mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-10 07:39:16 +00:00
ICU-7264 add ScriptExtensions.txt, new scripts, new blocks, fix genpname/preparse.pl
X-SVN-Rev: 28359
This commit is contained in:
parent
6f17ff12b4
commit
e72d90de1a
8 changed files with 2340 additions and 2235 deletions
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1999-2009, International Business Machines
|
||||
* Copyright (C) 1999-2010, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
|
@ -165,6 +165,7 @@ enum {
|
|||
UNI_5_0,
|
||||
UNI_5_1,
|
||||
UNI_5_2,
|
||||
UNI_6_0,
|
||||
UNI_VER_COUNT
|
||||
};
|
||||
|
||||
|
@ -181,7 +182,8 @@ unicodeVersions[]={
|
|||
{ 4, 1, 0, 0 },
|
||||
{ 5, 0, 0, 0 },
|
||||
{ 5, 1, 0, 0 },
|
||||
{ 5, 2, 0, 0 }
|
||||
{ 5, 2, 0, 0 },
|
||||
{ 6, 0, 0, 0 }
|
||||
};
|
||||
|
||||
static int32_t ucdVersion=UNI_5_2;
|
||||
|
@ -1220,6 +1222,11 @@ generateAlgorithmicData(UNewDataMemory *pData, Options *storeOptions) {
|
|||
0, 5,
|
||||
sizeof(AlgorithmicRange)+PREFIX_LENGTH_4
|
||||
};
|
||||
static AlgorithmicRange cjkExtD={
|
||||
0x2b740, 0x2b81d,
|
||||
0, 5,
|
||||
sizeof(AlgorithmicRange)+PREFIX_LENGTH_4
|
||||
};
|
||||
|
||||
static char jamo[]=
|
||||
"HANGUL SYLLABLE \0"
|
||||
|
@ -1266,6 +1273,9 @@ generateAlgorithmicData(UNewDataMemory *pData, Options *storeOptions) {
|
|||
/* number of ranges of algorithmic names */
|
||||
if(!storeOptions->storeNames) {
|
||||
countAlgRanges=0;
|
||||
} else if(ucdVersion>=UNI_6_0) {
|
||||
/* Unicode 6.0 and up has 6 ranges including CJK Extension D */
|
||||
countAlgRanges=6;
|
||||
} else if(ucdVersion>=UNI_5_2) {
|
||||
/* Unicode 5.2 and up has 5 ranges including CJK Extension C */
|
||||
countAlgRanges=5;
|
||||
|
@ -1358,6 +1368,19 @@ generateAlgorithmicData(UNewDataMemory *pData, Options *storeOptions) {
|
|||
}
|
||||
}
|
||||
|
||||
/* range 5: cjk extension d */
|
||||
if(countAlgRanges>=6) {
|
||||
if(pData!=NULL) {
|
||||
udata_writeBlock(pData, &cjkExtD, sizeof(AlgorithmicRange));
|
||||
udata_writeString(pData, prefix, PREFIX_LENGTH);
|
||||
if(PREFIX_LENGTH<PREFIX_LENGTH_4) {
|
||||
udata_writePadding(pData, PREFIX_LENGTH_4-PREFIX_LENGTH);
|
||||
}
|
||||
} else {
|
||||
size+=sizeof(AlgorithmicRange)+PREFIX_LENGTH_4;
|
||||
}
|
||||
}
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
######################################################################
|
||||
# Copyright (c) 2003-2005, International Business Machines
|
||||
# Copyright (c) 2003-2010, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
######################################################################
|
||||
# Author: Alan Liu
|
||||
|
@ -14,11 +14,12 @@
|
|||
# ================================================
|
||||
|
||||
# ================================================
|
||||
# Non-enumerated Properties
|
||||
# Miscellaneous Properties
|
||||
# ================================================
|
||||
scx; Script_Extensions
|
||||
|
||||
# ================================================
|
||||
# Enumerated Non-Binary Properties
|
||||
# Enumerated Properties
|
||||
# ================================================
|
||||
|
||||
# lccc(c)=ccc(NFD(c)[0])
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
########################################################################
|
||||
# Copyright (c) 2006-2009, International Business Machines
|
||||
# Copyright (c) 2006-2010, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
########################################################################
|
||||
# file name: SyntheticPropertyValueAliases.txt
|
||||
|
@ -24,9 +24,7 @@
|
|||
|
||||
# Script (sc)
|
||||
|
||||
sc ; Batk ; Batk
|
||||
sc ; Blis ; Blis
|
||||
sc ; Brah ; Brah
|
||||
sc ; Cirt ; Cirt
|
||||
sc ; Cyrs ; Cyrs
|
||||
sc ; Egyd ; Egyd
|
||||
|
@ -41,7 +39,6 @@ sc ; Jpan ; Jpan
|
|||
sc ; Latf ; Latf
|
||||
sc ; Latg ; Latg
|
||||
sc ; Lina ; Lina
|
||||
sc ; Mand ; Mand
|
||||
sc ; Maya ; Maya
|
||||
sc ; Mero ; Mero
|
||||
sc ; Moon ; Moon
|
||||
|
@ -66,3 +63,17 @@ sc ; Zmth ; Zmth
|
|||
sc ; Zsym ; Zsym
|
||||
|
||||
sc ; Nkgb ; Nkgb
|
||||
|
||||
sc ; Bass ; Bass
|
||||
sc ; Dupl ; Dupl
|
||||
sc ; Elba ; Elba
|
||||
sc ; Gran ; Gran
|
||||
sc ; Kpel ; Kpel
|
||||
sc ; Loma ; Loma
|
||||
sc ; Mend ; Mend
|
||||
sc ; Merc ; Merc
|
||||
sc ; Narb ; Narb
|
||||
sc ; Nbat ; Nbat
|
||||
sc ; Palm ; Palm
|
||||
sc ; Sind ; Sind
|
||||
sc ; Wara ; Wara
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -1,7 +1,7 @@
|
|||
#!/bin/perl -w
|
||||
#*******************************************************************
|
||||
# COPYRIGHT:
|
||||
# Copyright (c) 2002-2009, International Business Machines Corporation and
|
||||
# Copyright (c) 2002-2010, International Business Machines Corporation and
|
||||
# others. All Rights Reserved.
|
||||
#*******************************************************************
|
||||
|
||||
|
@ -14,17 +14,6 @@
|
|||
#
|
||||
# See usage note below.
|
||||
#
|
||||
# TODO: The Property[Value]Alias.txt files state that they can support
|
||||
# more than 2 names per property|value. Currently (Unicode 3.2) there
|
||||
# are always 1 or 2 names. If more names were supported, presumably
|
||||
# the format would be something like:
|
||||
# nv ; Numeric_Value
|
||||
# nv ; Value_Numerique
|
||||
# CURRENTLY, this script assumes that there are 1 or two names. Any
|
||||
# duplicates it sees are flagged as an error. If multiple aliases
|
||||
# appear in a future version of Unicode, modify this script to support
|
||||
# that.
|
||||
#
|
||||
# NOTE: As of ICU 2.6, this script has been modified to know about the
|
||||
# pseudo-property gcm/General_Category_Mask, which corresponds to the
|
||||
# uchar.h property UCHAR_GENERAL_CATEGORY_MASK. This property
|
||||
|
@ -70,23 +59,17 @@ my $propNA = 0;
|
|||
my $valueNA = 0;
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# Top level property keys for binary, enumerated, string, and double props
|
||||
my @TOP = qw( _bp _ep _sp _dp _mp );
|
||||
# Top level property keys for binary, enumerated, string, double, and other props
|
||||
my @TOP = qw( _bp _ep _sp _dp _op );
|
||||
|
||||
# This hash governs how top level properties are grouped into output arrays.
|
||||
#my %TOP_PROPS = ( "VALUED" => [ '_bp', '_ep' ],
|
||||
# "NO_VALUE" => [ '_sp', '_dp' ] );m
|
||||
#my %TOP_PROPS = ( "BINARY" => [ '_bp' ],
|
||||
# "ENUMERATED" => [ '_ep' ],
|
||||
# "STRING" => [ '_sp' ],
|
||||
# "DOUBLE" => [ '_dp' ] );
|
||||
my %TOP_PROPS = ( "" => [ '_bp', '_ep', '_sp', '_dp', '_mp' ] );
|
||||
# Top level properties are grouped into output arrays.
|
||||
my %TOP_PROPS = ( "" => [ '_bp', '_ep', '_sp', '_dp', '_op' ] );
|
||||
|
||||
my %PROP_TYPE = (Binary => "_bp",
|
||||
String => "_sp",
|
||||
Double => "_dp",
|
||||
Enumerated => "_ep",
|
||||
Bitmask => "_mp");
|
||||
Other => "_op");
|
||||
#----------------------------------------------------------------------
|
||||
|
||||
# Properties that are unsupported in ICU
|
||||
|
@ -1079,7 +1062,7 @@ sub read_uscript {
|
|||
# @param a filename for uchar.h
|
||||
#
|
||||
# @return a ref to a hash. The keys of the hash are '_bp' for binary
|
||||
# properties, '_ep' for enumerated properties, '_dp'/'_sp'/'_mp' for
|
||||
# properties, '_ep' for enumerated properties, '_dp'/'_sp'/'_op' for
|
||||
# double/string/mask properties, and 'gc', 'gcm', 'bc', 'blk',
|
||||
# 'ea', 'dt', 'jt', 'jg', 'lb', or 'nt' for corresponding property
|
||||
# value aliases. The values of the hash are subhashes. The subhashes
|
||||
|
@ -1137,9 +1120,13 @@ sub read_uchar {
|
|||
|
||||
elsif (m|^\s*/\*\*\s*(\w+)\s+property\s+(\w+)|i) {
|
||||
die "Error: Unmatched tag $submode" if ($submode);
|
||||
die "Error: Unrecognized UProperty comment: $_"
|
||||
unless (exists $PROP_TYPE{$1});
|
||||
$key = $PROP_TYPE{$1};
|
||||
#die "Error: Unrecognized UProperty comment: $_"
|
||||
# unless (exists $PROP_TYPE{$1});
|
||||
if (exists $PROP_TYPE{$1}) {
|
||||
$key = $PROP_TYPE{$1};
|
||||
} else {
|
||||
$key = $PROP_TYPE{"Other"};
|
||||
}
|
||||
$submode = $2;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
#!/usr/bin/python2.4
|
||||
# Copyright (c) 2009 International Business Machines
|
||||
# Copyright (c) 2009-2010 International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#
|
||||
# file name: ucdcopy.py
|
||||
|
@ -120,6 +120,7 @@ _unidata_files = {
|
|||
"NormalizationCorrections.txt": shutil.copy,
|
||||
"PropertyAliases.txt": shutil.copy,
|
||||
"PropertyValueAliases.txt": shutil.copy,
|
||||
"ScriptExtensions.txt": shutil.copy,
|
||||
"SpecialCasing.txt": shutil.copy,
|
||||
"UnicodeData.txt": shutil.copy,
|
||||
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string>
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/errorcode.h"
|
||||
|
|
|
@ -19,9 +19,8 @@ There are autoconf makefiles (Makefile.in) and Visual C++ project files (.vcproj
|
|||
in the subfolders. They are copied over from the ICU source tree and will not
|
||||
work without modifications. However, I started to use CMake (CMakeLists.txt)
|
||||
which is much simpler, and if it works well enough then I plan to just
|
||||
delete the old makefiles and project files. The CMake files will currently
|
||||
work only on Linux, just because I hardcoded the ICU library filenames
|
||||
(e.g., libicuuc.so).
|
||||
delete the old makefiles and project files. The CMake files should
|
||||
work on Linux and MacOS X.
|
||||
I should use more variables to make the CMake files more portable, and should
|
||||
use ICU's installed icu-config or Makefile.inc to get the values for these
|
||||
variables.
|
||||
|
|
Loading…
Add table
Reference in a new issue