ICU-8972 generate norm2/nfkc_cf.txt from preparseucd.py

X-SVN-Rev: 31197
This commit is contained in:
Markus Scherer 2012-01-10 22:59:14 +00:00
parent 37c81c2566
commit b2a9c8508e

View file

@ -1206,9 +1206,59 @@ def WriteNorm2TextFile(path, filename, with_compat):
WriteAllMappings(out_file, with_compat)
def WriteNorm2NFKC_CFTextFile(path):
year = datetime.date.today().strftime("%Y")
with open(os.path.join(path, "nfkc_cf.txt"), "w") as out_file:
out_file.write(
"""# Unicode Character Database
# Copyright (c) 1991-""" + year + """ Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see http://www.unicode.org/reports/tr44/
#
# file name: nfkc_cf.txt
#
# machine-generated by ICU preparseucd.py
#
# This file contains the Unicode NFKC_CF mappings,
# extracted from the UCD file DerivedNormalizationProps.txt,
# and reformatted into syntax for the gennorm2 Normalizer2 data generator tool.
# Use this file as the second gennorm2 input file after nfkc.txt.
""")
out_file.write("* Unicode " + _ucd_version + "\n\n")
prev_start = 0
prev_end = 0
prev_nfkc_cf = None
for i in xrange(len(_starts) - 1):
start = _starts[i]
end = _starts[i + 1] - 1
props = _props[i]
nfkc_cf = props.get("NFKC_CF")
if nfkc_cf != None and (not nfkc_cf or nfkc_cf[0] != '<'):
# Merge with the previous range if possible,
# or remember this range for merging.
if nfkc_cf == prev_nfkc_cf and (prev_end + 1) == start:
prev_end = end
else:
if prev_nfkc_cf != None:
if prev_start == prev_end:
out_file.write("%04X>%s\n" % (prev_start, prev_nfkc_cf))
else:
out_file.write("%04X..%04X>%s\n" % (prev_start, prev_end, prev_nfkc_cf))
prev_start = start
prev_end = end
prev_nfkc_cf = nfkc_cf
if prev_nfkc_cf != None:
if prev_start == prev_end:
out_file.write("%04X>%s\n" % (prev_start, prev_nfkc_cf))
else:
out_file.write("%04X..%04X>%s\n" % (prev_start, prev_end, prev_nfkc_cf))
def WriteNorm2(path):
WriteNorm2TextFile(path, "nfc.txt", False)
WriteNorm2TextFile(path, "nfkc.txt", True)
WriteNorm2NFKC_CFTextFile(path)
# Preprocessing ------------------------------------------------------------ ***