mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-10 07:39:16 +00:00
ICU-12526 uprops.icu formatVersion 7.3: support new fraction numeric values like 3/80; ppucd.txt mostly no block compression for String/Misc properties; minor bug fixes
X-SVN-Rev: 38706
This commit is contained in:
parent
6e907382d0
commit
3e5578f3bf
4 changed files with 79 additions and 15 deletions
|
@ -1053,7 +1053,7 @@ static int32_t indexes[UCASE_IX_TOP]={
|
|||
0, 0, 0, 0
|
||||
};
|
||||
|
||||
static uint8_t trieBlock[40000];
|
||||
static uint8_t trieBlock[100000];
|
||||
static int32_t trieSize;
|
||||
|
||||
void
|
||||
|
@ -1083,15 +1083,20 @@ CasePropsBuilder::build(UErrorCode &errorCode) {
|
|||
}
|
||||
}
|
||||
if(U_FAILURE(errorCode)) {
|
||||
fprintf(stderr, "genprops error: unable to set UCASE_SENSITIVE: %s\n",
|
||||
fprintf(stderr, "genprops/case error: unable to set UCASE_SENSITIVE: %s\n",
|
||||
u_errorName(errorCode));
|
||||
return;
|
||||
}
|
||||
|
||||
utrie2_freeze(pTrie, UTRIE2_16_VALUE_BITS, &errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
fprintf(stderr, "genprops/case error: utrie2_freeze() failed: %s\n",
|
||||
u_errorName(errorCode));
|
||||
return;
|
||||
}
|
||||
trieSize=utrie2_serialize(pTrie, trieBlock, sizeof(trieBlock), &errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
fprintf(stderr, "genprops error: utrie2_freeze()+utrie2_serialize() failed: %s (length %ld)\n",
|
||||
fprintf(stderr, "genprops/case error: utrie2_serialize() failed: %s (length %ld)\n",
|
||||
u_errorName(errorCode), (long)trieSize);
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -45,7 +45,7 @@ the udata API for loading ICU data. Especially, a UDataInfo structure
|
|||
precedes the actual data. It contains platform properties values and the
|
||||
file format version.
|
||||
|
||||
The following is a description of format version 7.2 .
|
||||
The following is a description of format version 7.3 .
|
||||
|
||||
Data contents:
|
||||
|
||||
|
@ -146,8 +146,7 @@ Encoding of numeric type and value in the 10-bit ntv field:
|
|||
0 U_NT_NONE 0
|
||||
1..10 U_NT_DECIMAL 0..9
|
||||
11..20 U_NT_DIGIT 0..9
|
||||
21..0x2ff U_NT_NUMERIC see below
|
||||
0x300..0x3ff reserved
|
||||
21..0x3ff U_NT_NUMERIC see below
|
||||
|
||||
For U_NT_NUMERIC:
|
||||
ntv value
|
||||
|
@ -157,6 +156,11 @@ Encoding of numeric type and value in the 10-bit ntv field:
|
|||
(only one significant decimal digit)
|
||||
0x300..0x323 base-60 (sexagesimal) integer (new in format version 7.1)
|
||||
((ntv>>2)-0xbf) * 60^((ntv&3)+1) = (1..9)*(60^1..60^4)
|
||||
0x324..0x34b fraction-20 (new in format version 7.3)
|
||||
frac20 = ntv-0x324 = 0..0x17 -> 1|3|5|7 / 20|40|80|160|320|640
|
||||
numerator: num = 2*(frac20&3)+1
|
||||
denominator: den = 20<<(frac20>>2)
|
||||
0x34c..0x3ff reserved
|
||||
|
||||
--- Additional properties (new in format version 2.1) ---
|
||||
|
||||
|
@ -250,6 +254,10 @@ ICU 57 adds 4 Emoji properties to vector word 2.
|
|||
http://bugs.icu-project.org/trac/ticket/11802
|
||||
http://www.unicode.org/reports/tr51/#Emoji_Properties
|
||||
|
||||
--- Changes in format version 7.3 ---
|
||||
|
||||
ICU 58 adds fraction-20 numeric values for new Unicode 9 Malayalam fraction characters.
|
||||
|
||||
----------------------------------------------------------------------------- */
|
||||
|
||||
U_NAMESPACE_USE
|
||||
|
@ -265,7 +273,7 @@ static UDataInfo dataInfo={
|
|||
0,
|
||||
|
||||
{ 0x55, 0x50, 0x72, 0x6f }, /* dataFormat="UPro" */
|
||||
{ 7, 2, 0, 0 }, /* formatVersion */
|
||||
{ 7, 3, 0, 0 }, /* formatVersion */
|
||||
{ 8, 0, 0, 0 } /* dataVersion */
|
||||
};
|
||||
|
||||
|
@ -314,10 +322,29 @@ CorePropsBuilder::setUnicodeVersion(const UVersionInfo version) {
|
|||
uprv_memcpy(dataInfo.dataVersion, version, 4);
|
||||
}
|
||||
|
||||
static int32_t encodeFractional20(int32_t value, int32_t den) {
|
||||
if(den<20 || 640<den) { return -1; }
|
||||
int32_t frac20;
|
||||
if(value==1 || value==3 || value==5 || value==7) {
|
||||
frac20=value/2;
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
// Denominator: 20 times which power of 2: 0..5 into bits 4..2
|
||||
do {
|
||||
if(den==20) {
|
||||
return UPROPS_NTV_FRACTION20_START+frac20;
|
||||
}
|
||||
den/=2;
|
||||
frac20+=4;
|
||||
} while(den>=20);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// For nt=U_NT_NUMERIC.
|
||||
static int32_t
|
||||
encodeNumericValue(UChar32 start, const char *s, UErrorCode &errorCode) {
|
||||
const char *original;
|
||||
const char *original=s;
|
||||
/* get a possible minus sign */
|
||||
UBool isNegative;
|
||||
if(*s=='-') {
|
||||
|
@ -407,6 +434,8 @@ encodeNumericValue(UChar32 start, const char *s, UErrorCode &errorCode) {
|
|||
} else if(exp==0 && value==-1 && den==0) {
|
||||
/* -1 parsed with den=0, encoded as pseudo-fraction -1/1 */
|
||||
ntv=((value+12)<<4);
|
||||
} else if(exp==0 && (ntv=encodeFractional20(value, den))>=0) {
|
||||
// fits into fractional-20 format
|
||||
} else {
|
||||
ntv=-1;
|
||||
}
|
||||
|
@ -669,7 +698,7 @@ static int32_t indexes[UPROPS_INDEX_COUNT]={
|
|||
0, 0, 0, 0
|
||||
};
|
||||
|
||||
static uint8_t trieBlock[40000];
|
||||
static uint8_t trieBlock[100000];
|
||||
static int32_t trieSize;
|
||||
static uint8_t props2TrieBlock[100000];
|
||||
static int32_t props2TrieSize;
|
||||
|
@ -681,18 +710,23 @@ CorePropsBuilder::build(UErrorCode &errorCode) {
|
|||
if(U_FAILURE(errorCode)) { return; }
|
||||
|
||||
utrie2_freeze(pTrie, UTRIE2_16_VALUE_BITS, &errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
fprintf(stderr,
|
||||
"genprops/core error: utrie2_freeze(main trie) failed: %s\n",
|
||||
u_errorName(errorCode));
|
||||
return;
|
||||
}
|
||||
trieSize=utrie2_serialize(pTrie, trieBlock, sizeof(trieBlock), &errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
fprintf(stderr,
|
||||
"genprops error: utrie2_freeze(main trie)+utrie2_serialize() "
|
||||
"failed: %s (length %ld)\n",
|
||||
"genprops/core error: utrie2_serialize(main trie) failed: %s (length %ld)\n",
|
||||
u_errorName(errorCode), (long)trieSize);
|
||||
return;
|
||||
}
|
||||
|
||||
props2Trie=upvec_compactToUTrie2WithRowIndexes(pv, &errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
fprintf(stderr, "genprops error: unable to build trie for additional properties: %s\n",
|
||||
fprintf(stderr, "genprops/core error: unable to build trie for additional properties: %s\n",
|
||||
u_errorName(errorCode));
|
||||
return;
|
||||
}
|
||||
|
@ -702,7 +736,8 @@ CorePropsBuilder::build(UErrorCode &errorCode) {
|
|||
&errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
fprintf(stderr,
|
||||
"genprops error: utrie2_freeze(additional properties)+utrie2_serialize() failed: %s\n",
|
||||
"genprops/core error: utrie2_freeze(additional properties)+utrie2_serialize() "
|
||||
"failed: %s\n",
|
||||
u_errorName(errorCode));
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2000-2015, International Business Machines
|
||||
* Copyright (C) 2000-2016, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
|
@ -656,6 +656,13 @@ readAnElement(FILE *data,
|
|||
buffer[--buflen] = 0;
|
||||
}
|
||||
|
||||
if(buflen >= 3 && buffer[0] == (char)0xef &&
|
||||
buffer[1] == (char)0xbb && buffer[2] == (char)0xbf) {
|
||||
// U+FEFF UTF-8 signature byte sequence.
|
||||
// Ignore, assuming it is at the start of the file.
|
||||
buflen -= 3;
|
||||
uprv_memmove(buffer, buffer + 3, buflen + 1); // +1: including NUL terminator
|
||||
}
|
||||
if(buffer[0] == 0 || buffer[0] == '#') {
|
||||
return FALSE; // just a comment, skip whole line
|
||||
}
|
||||
|
|
|
@ -58,6 +58,8 @@ _scripts_only_in_iso15924 = (
|
|||
|
||||
# Properties --------------------------------------------------------------- ***
|
||||
|
||||
# Properties that we do not want to store in ppucd.txt.
|
||||
# Not a frozenset so that we can add aliases for simpler subsequent testing.
|
||||
_ignored_properties = set((
|
||||
# Other_Xyz only contribute to Xyz, store only the latter.
|
||||
"OAlpha",
|
||||
|
@ -95,6 +97,16 @@ _ignored_properties = set((
|
|||
"cjkRSUnicode"
|
||||
))
|
||||
|
||||
# These properties (short names) map code points to
|
||||
# strings or other unusual values (property types String or Miscellaneous)
|
||||
# that cannot be block-compressed (or would be confusing).
|
||||
_uncompressible_props = frozenset((
|
||||
"bmg", "bpb", "cf", "Conditional_Case_Mappings", "dm", "FC_NFKC",
|
||||
"isc", "lc", "na", "na1", "Name_Alias", "NFKC_CF",
|
||||
# scx is block-compressible.
|
||||
"scf", "slc", "stc", "suc", "tc", "Turkic_Case_Folding", "uc"
|
||||
))
|
||||
|
||||
# Dictionary of properties.
|
||||
# Keyed by normalized property names and aliases.
|
||||
# Each value is a tuple with
|
||||
|
@ -985,7 +997,12 @@ def CompactBlock(b, i):
|
|||
if count == 1: num_unique += 1
|
||||
if max_value != _null_or_defaults[pname]:
|
||||
# Avoid picking randomly among several unique values.
|
||||
if (max_count > 1 or num_unique == 1):
|
||||
# Do not compress uncompressible properties,
|
||||
# with an exception for many empty-string values in a block
|
||||
# (NFCK_CF='' for tags and variation selectors).
|
||||
if ((max_count > 1 or num_unique == 1) and
|
||||
((pname not in _uncompressible_props) or
|
||||
(max_value == '' and max_count >= 12))):
|
||||
b_props[pname] = max_value
|
||||
# For each range and property, remove the default+block value
|
||||
# but set the default value if that property was not set
|
||||
|
|
Loading…
Add table
Reference in a new issue