mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-06 14:05:32 +00:00
ICU-7144 handle deviation characters and disallowed ASCII characters in code
X-SVN-Rev: 27999
This commit is contained in:
parent
df7d633f59
commit
ecd660579b
1 changed files with 16 additions and 2 deletions
|
@ -17,12 +17,22 @@ __author__ = "Markus Scherer"
|
|||
import re
|
||||
|
||||
replacements = [
|
||||
# Pass through disallowed ASCII characters: Handled in code.
|
||||
(re.compile(r"0000..002C ; disallowed"), "# 0000..002C (allow ASCII)"),
|
||||
(re.compile(r"002F ; disallowed"), "# 002F (allow ASCII)"),
|
||||
(re.compile(r"003A..0040 ; disallowed"), "# 003A..0040 (allow ASCII)"),
|
||||
(re.compile(r"005B..0060 ; disallowed"), "# 005B..0060 (allow ASCII)"),
|
||||
(re.compile(r"007B..00A0 ; disallowed #"),
|
||||
"0080..00A0 >FFFD # (allow ASCII)"),
|
||||
# Normal transformations.
|
||||
(re.compile(r"; disallowed "), ">FFFD"),
|
||||
(re.compile(r"; ignored "), ">"),
|
||||
(re.compile(r"^([^;]+) ; valid"), r"# \1valid"),
|
||||
(re.compile(r"; mapped ; "), ">"),
|
||||
(re.compile(r"; deviation ; "), ">"),
|
||||
(re.compile(r"^([^;]+) ; deviation"), r"# \1deviation"),
|
||||
(re.compile(r" +(\# [^\#]+)$"), r" \1"),
|
||||
# Two versions of avoiding circular FFFD>FFFD mappings,
|
||||
# depending on the version of the input file.
|
||||
(re.compile(r"\.\.FFFD"), "..FFFC"),
|
||||
(re.compile(r"(FFF[^E])\.\.FFFF"), "\1..FFFC")
|
||||
]
|
||||
|
@ -44,9 +54,13 @@ for line in in_file:
|
|||
# s/; ignored />/
|
||||
# s/^([^;]+) ; valid/# \1valid/
|
||||
# s/; mapped ; />/
|
||||
# s/; deviation ; />/
|
||||
# s/^([^;]+) ; deviation/# \1deviation/
|
||||
# s/ +(\# [^\#]+)$/ \1/
|
||||
#
|
||||
# Except: Disallowed ASCII characters are passed through;
|
||||
# they are handled in code.
|
||||
# Deviation characters are also handled in code.
|
||||
#
|
||||
# A circular mapping FFFD>FFFD is avoided by rewriting the line that contains
|
||||
# ..FFFD to contain ..FFFC instead.
|
||||
#
|
||||
|
|
Loading…
Add table
Reference in a new issue