ICU-7144 handle deviation characters and disallowed ASCII characters in code

X-SVN-Rev: 27999
This commit is contained in:
Markus Scherer 2010-04-29 00:33:58 +00:00
parent df7d633f59
commit ecd660579b

View file

@ -17,12 +17,22 @@ __author__ = "Markus Scherer"
import re
replacements = [
# Pass through disallowed ASCII characters: Handled in code.
(re.compile(r"0000..002C ; disallowed"), "# 0000..002C (allow ASCII)"),
(re.compile(r"002F ; disallowed"), "# 002F (allow ASCII)"),
(re.compile(r"003A..0040 ; disallowed"), "# 003A..0040 (allow ASCII)"),
(re.compile(r"005B..0060 ; disallowed"), "# 005B..0060 (allow ASCII)"),
(re.compile(r"007B..00A0 ; disallowed #"),
"0080..00A0 >FFFD # (allow ASCII)"),
# Normal transformations.
(re.compile(r"; disallowed "), ">FFFD"),
(re.compile(r"; ignored "), ">"),
(re.compile(r"^([^;]+) ; valid"), r"# \1valid"),
(re.compile(r"; mapped ; "), ">"),
(re.compile(r"; deviation ; "), ">"),
(re.compile(r"^([^;]+) ; deviation"), r"# \1deviation"),
(re.compile(r" +(\# [^\#]+)$"), r" \1"),
# Two versions of avoiding circular FFFD>FFFD mappings,
# depending on the version of the input file.
(re.compile(r"\.\.FFFD"), "..FFFC"),
(re.compile(r"(FFF[^E])\.\.FFFF"), "\1..FFFC")
]
@ -44,9 +54,13 @@ for line in in_file:
# s/; ignored />/
# s/^([^;]+) ; valid/# \1valid/
# s/; mapped ; />/
# s/; deviation ; />/
# s/^([^;]+) ; deviation/# \1deviation/
# s/ +(\# [^\#]+)$/ \1/
#
# Except: Disallowed ASCII characters are passed through;
# they are handled in code.
# Deviation characters are also handled in code.
#
# A circular mapping FFFD>FFFD is avoided by rewriting the line that contains
# ..FFFD to contain ..FFFC instead.
#