From 9bbee2c4ba13d685eace62f1b5b6c47222fab328 Mon Sep 17 00:00:00 2001 From: Markus Scherer Date: Mon, 19 Apr 2010 21:05:15 +0000 Subject: [PATCH] ICU-7144 adjust to IdnaMappingTable.txt separating reserved and assigned ranges X-SVN-Rev: 27949 --- tools/unicode/py/idna2nrm.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tools/unicode/py/idna2nrm.py b/tools/unicode/py/idna2nrm.py index b244935a141..58bb98c8d80 100755 --- a/tools/unicode/py/idna2nrm.py +++ b/tools/unicode/py/idna2nrm.py @@ -1,3 +1,4 @@ +#!/usr/bin/python2.4 # Copyright (C) 2010, International Business Machines # Corporation and others. All Rights Reserved. # @@ -22,7 +23,8 @@ replacements = [ (re.compile(r"; mapped ; "), ">"), (re.compile(r"; deviation ; "), ">"), (re.compile(r" +(\# [^\#]+)$"), r" \1"), - (re.compile(r"\.\.FFFF"), "..FFFC") + (re.compile(r"\.\.FFFD"), "..FFFC"), + (re.compile(r"(FFF[^E])\.\.FFFF"), "\1..FFFC") ] in_file = open("IdnaMappingTable.txt", "r") @@ -30,6 +32,7 @@ out_file = open("uts46.txt", "w") out_file.write("# Original file:\n") for line in in_file: + orig_line = line if line.startswith("# For documentation, see"): out_file.write(line) out_file.write(r""" @@ -44,8 +47,8 @@ for line in in_file: # s/; deviation ; />/ # s/ +(\# [^\#]+)$/ \1/ # -# A circular mapping FFFD>FFFD is avoided by rewriting the line that starts with -# FFEF..FFFF to two lines, splitting this range and omitting FFFD. +# A circular mapping FFFD>FFFD is avoided by rewriting the line that contains +# ..FFFD to contain ..FFFC instead. # # Use this file as the second gennorm2 input file after nfc.txt. # ================================================ @@ -53,7 +56,7 @@ for line in in_file: continue for rep in replacements: line = rep[0].sub(rep[1], line) out_file.write(line) - if "..FFFC" in line: + if "..FFFF" in orig_line and "..FFFC" in line: out_file.write("FFFE..FFFF >FFFD\n"); in_file.close() out_file.close()