mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-06 22:15:31 +00:00
ICU-7264 scripts for data file generation
X-SVN-Rev: 28364
This commit is contained in:
parent
a2605b9c83
commit
58d21ee915
5 changed files with 81 additions and 9 deletions
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1999-2009, International Business Machines
|
||||
* Copyright (C) 1999-2010, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
|
@ -466,7 +466,9 @@ generateData(const char *dataDir, UBool csource) {
|
|||
pData=udata_create(dataDir, DATA_TYPE, DATA_NAME, &dataInfo,
|
||||
haveCopyright ? U_COPYRIGHT_STRING : NULL, &errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
fprintf(stderr, "genprops: unable to create data memory, %s\n", u_errorName(errorCode));
|
||||
fprintf(stderr, "genprops: udata_create(%s, %s.%s) failed - %s\n",
|
||||
dataDir, DATA_NAME, DATA_TYPE,
|
||||
u_errorName(errorCode));
|
||||
exit(errorCode);
|
||||
}
|
||||
|
||||
|
|
16
tools/unicode/makedefs.sh
Executable file
16
tools/unicode/makedefs.sh
Executable file
|
@ -0,0 +1,16 @@
|
|||
# Copyright (C) 2010, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#
|
||||
# Basic definitions for building ICU Unicode data.
|
||||
# Sourced from makeprops.sh for example.
|
||||
UNICODE_VERSION=6.0
|
||||
# Assume that there are parallel src & bld trees with the Unicode tools
|
||||
# source files and the out-of-source build files.
|
||||
# Assume that the current folder is some/path/src/unicode
|
||||
UNITOOLS_BLD=../../bld/unicode
|
||||
# The sourcing script must define ICU_SRC and ICU_BLD for the ICU library
|
||||
# source files and the out-of-source build files.
|
||||
UNIDATA=$ICU_SRC/source/data/unidata
|
||||
COMMON=$ICU_SRC/source/common
|
||||
SRC_DATA_IN=$ICU_SRC/source/data/in
|
||||
BLD_DATA_FILES=$ICU_BLD/data/out/build/icudt45l
|
38
tools/unicode/makeprops.sh
Executable file
38
tools/unicode/makeprops.sh
Executable file
|
@ -0,0 +1,38 @@
|
|||
# Copyright (C) 2010, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#
|
||||
# Parses Unicode Character Database files and build ICU core properties files.
|
||||
#
|
||||
# Invoke as
|
||||
# ./makeprops.sh path/to/ICU/src/tree path/to/ICU/build/tree
|
||||
ICU_SRC=$1
|
||||
ICU_BLD=$2
|
||||
source ./makedefs.sh
|
||||
|
||||
# uprops.icu
|
||||
$UNITOOLS_BLD/c/genprops/genprops -d $SRC_DATA_IN -s $UNIDATA -i $BLD_DATA_FILES -u $UNICODE_VERSION
|
||||
$UNITOOLS_BLD/c/genprops/genprops -d $COMMON --csource -s $UNIDATA -i $BLD_DATA_FILES -u $UNICODE_VERSION
|
||||
|
||||
# ubidi.icu
|
||||
$UNITOOLS_BLD/c/genbidi/genbidi -d $SRC_DATA_IN -s $UNIDATA -i $BLD_DATA_FILES -u $UNICODE_VERSION
|
||||
$UNITOOLS_BLD/c/genbidi/genbidi -d $COMMON --csource -s $UNIDATA -i $BLD_DATA_FILES -u $UNICODE_VERSION
|
||||
|
||||
# ucase.icu
|
||||
$UNITOOLS_BLD/c/gencase/gencase -d $SRC_DATA_IN -s $UNIDATA -i $BLD_DATA_FILES -u $UNICODE_VERSION
|
||||
$UNITOOLS_BLD/c/gencase/gencase -d $COMMON --csource -s $UNIDATA -i $BLD_DATA_FILES -u $UNICODE_VERSION
|
||||
|
||||
# unames.icu
|
||||
$UNITOOLS_BLD/c/gennames/gennames -d $SRC_DATA_IN -1 -q $UNIDATA/UnicodeData.txt $UNIDATA/NameAliases.txt -u $UNICODE_VERSION
|
||||
|
||||
# unidata/norm2/*.txt
|
||||
$UNITOOLS_BLD/c/gennorm/gennorm -d $UNIDATA/norm2 -s $UNIDATA -i $BLD_DATA_FILES
|
||||
|
||||
# *.nrm
|
||||
export LD_LIBRARY_PATH=$ICU_BLD/lib
|
||||
$ICU_BLD/bin/gennorm2 -o $SRC_DATA_IN/nfc.nrm -s $UNIDATA/norm2 nfc.txt -u $UNICODE_VERSION
|
||||
$ICU_BLD/bin/gennorm2 -o $SRC_DATA_IN/nfkc.nrm -s $UNIDATA/norm2 nfkc.txt -u $UNICODE_VERSION
|
||||
$ICU_BLD/bin/gennorm2 -o $SRC_DATA_IN/nfkc_cf.nrm -s $UNIDATA/norm2 nfkc.txt nfkc_cf.txt -u $UNICODE_VERSION
|
||||
$ICU_BLD/bin/gennorm2 -o $SRC_DATA_IN/uts46.nrm -s $UNIDATA/norm2 nfc.txt uts46.txt -u $UNICODE_VERSION
|
||||
|
||||
# UCA
|
||||
$UNITOOLS_BLD/c/genuca/genuca -d $SRC_DATA_IN/coll -s $UNIDATA -i $BLD_DATA_FILES
|
15
tools/unicode/makeuca.sh
Executable file
15
tools/unicode/makeuca.sh
Executable file
|
@ -0,0 +1,15 @@
|
|||
# Copyright (C) 2010, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#
|
||||
# Parses Unicode Character Database files and build ICU UCA data files.
|
||||
#
|
||||
# Requires: 1. run makeprops.sh 2. rebuild ICU & Unicode tools
|
||||
# See (ICU)/source/data/unidata/changes.txt
|
||||
#
|
||||
# Invoke as
|
||||
# ./makeuca.sh path/to/ICU/src/tree path/to/ICU/build/tree
|
||||
ICU_SRC=$1
|
||||
ICU_BLD=$2
|
||||
source ./makedefs.sh
|
||||
|
||||
$UNITOOLS_BLD/c/genuca/genuca -d $SRC_DATA_IN/coll -s $UNIDATA -i $BLD_DATA_FILES
|
|
@ -24,17 +24,18 @@ replacements = [
|
|||
(re.compile(r"005B..0060 ; disallowed"), "# 005B..0060 (allow ASCII)"),
|
||||
(re.compile(r"007B..00A0 ; disallowed #"),
|
||||
"0080..00A0 >FFFD # (allow ASCII)"),
|
||||
# Several versions of avoiding circular FFFD>FFFD mappings,
|
||||
# depending on the version of the input file.
|
||||
(re.compile(r"FFFD ; disallowed"), "# FFFD (avoid circular mapping)"),
|
||||
(re.compile(r"\.\.FFFD"), "..FFFC"),
|
||||
(re.compile(r"(FFF[^E])\.\.FFFF"), "\1..FFFC"),
|
||||
# Normal transformations.
|
||||
(re.compile(r"; disallowed "), ">FFFD"),
|
||||
(re.compile(r"; ignored "), ">"),
|
||||
(re.compile(r"^([^;]+) ; valid"), r"# \1valid"),
|
||||
(re.compile(r"; mapped ; "), ">"),
|
||||
(re.compile(r"^([^;]+) ; deviation"), r"# \1deviation"),
|
||||
(re.compile(r" +(\# [^\#]+)$"), r" \1"),
|
||||
# Two versions of avoiding circular FFFD>FFFD mappings,
|
||||
# depending on the version of the input file.
|
||||
(re.compile(r"\.\.FFFD"), "..FFFC"),
|
||||
(re.compile(r"(FFF[^E])\.\.FFFF"), "\1..FFFC")
|
||||
(re.compile(r" +(\# [^\#]+)$"), r" \1")
|
||||
]
|
||||
|
||||
in_file = open("IdnaMappingTable.txt", "r")
|
||||
|
@ -61,8 +62,8 @@ for line in in_file:
|
|||
# they are handled in code.
|
||||
# Deviation characters are also handled in code.
|
||||
#
|
||||
# A circular mapping FFFD>FFFD is avoided by rewriting the line that contains
|
||||
# ..FFFD to contain ..FFFC instead.
|
||||
# A circular mapping FFFD>FFFD is avoided by
|
||||
# rewriting the line that contains FFFD.
|
||||
#
|
||||
# Use this file as the second gennorm2 input file after nfc.txt.
|
||||
# ================================================
|
||||
|
|
Loading…
Add table
Reference in a new issue