mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-07 22:44:49 +00:00
ICU-7144 script to turn Unicode's IdnaMappingTable.txt into ICU gennorm2 source file format
X-SVN-Rev: 27791
This commit is contained in:
parent
2f3cf12d53
commit
46ec4b3cc7
1 changed files with 65 additions and 0 deletions
65
tools/unicode/py/idna2nrm.py
Executable file
65
tools/unicode/py/idna2nrm.py
Executable file
|
@ -0,0 +1,65 @@
|
|||
# Copyright (C) 2010, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#
|
||||
# file name: idna2nrm.py
|
||||
# encoding: US-ASCII
|
||||
# tab size: 8 (not used)
|
||||
# indentation:4
|
||||
#
|
||||
# created on: 2010jan28
|
||||
# created by: Markus W. Scherer
|
||||
|
||||
"""Turn Unicode IdnaMappingTable.txt into ICU gennorm2 source file format."""
|
||||
|
||||
__author__ = "Markus Scherer"
|
||||
|
||||
import re
|
||||
|
||||
replacements = [
|
||||
(re.compile(r"; disallowed "), ">FFFD"),
|
||||
(re.compile(r"; ignored "), ">"),
|
||||
(re.compile(r"^([^;]+) ; valid"), r"# \1valid"),
|
||||
(re.compile(r"; mapped ; "), ">"),
|
||||
(re.compile(r"; deviation ; "), ">"),
|
||||
(re.compile(r" +(\# [^\#]+)$"), r" \1"),
|
||||
(re.compile(r"\.\.FFFF"), "..FFFC")
|
||||
]
|
||||
|
||||
in_file = open("IdnaMappingTable.txt", "r")
|
||||
out_file = open("uts46.txt", "w")
|
||||
|
||||
out_file.write("# Original file:\n")
|
||||
for line in in_file:
|
||||
if line.startswith("# For documentation, see"):
|
||||
out_file.write(line)
|
||||
out_file.write(r"""
|
||||
# ================================================
|
||||
# This file has been reformatted into syntax for the
|
||||
# gennorm2 Normalizer2 data generator tool.
|
||||
# Reformatting via regular expressions:
|
||||
# s/; disallowed />FFFD/
|
||||
# s/; ignored />/
|
||||
# s/^([^;]+) ; valid/# \1valid/
|
||||
# s/; mapped ; />/
|
||||
# s/; deviation ; />/
|
||||
# s/ +(\# [^\#]+)$/ \1/
|
||||
#
|
||||
# Plus, the following NFC mappings are removed to avoid a conflict
|
||||
# with mappings in this file.
|
||||
2260-
|
||||
226E-
|
||||
226F-
|
||||
#
|
||||
# A circular mapping FFFD>FFFD is avoided by rewriting the line that starts with
|
||||
# FFEF..FFFF to two lines, splitting this range and omitting FFFD.
|
||||
#
|
||||
# Use this file as the second gennorm2 input file after nfc.txt.
|
||||
# ================================================
|
||||
""")
|
||||
continue
|
||||
for rep in replacements: line = rep[0].sub(rep[1], line)
|
||||
out_file.write(line)
|
||||
if "..FFFC" in line:
|
||||
out_file.write("FFFE..FFFF >FFFD\n");
|
||||
in_file.close()
|
||||
out_file.close()
|
Loading…
Add table
Reference in a new issue