ICU-2194 data for IDNA

X-SVN-Rev: 11195
This commit is contained in:
Ram Viswanadha 2003-02-28 21:36:17 +00:00
parent 7da935c904
commit 71eb8f87f1
6 changed files with 2050 additions and 1 deletions

View file

@ -241,7 +241,7 @@ BRK_FILES = "$(ICUBLD)\$(ICUDT)sent.brk" "$(ICUBLD)\$(ICUDT)char.brk" "$(ICUBLD)
# move the .dll and .lib files to their final destination afterwards.
# The $(U_ICUDATA_NAME).lib and $(U_ICUDATA_NAME).exp should already be in the right place due to stubdata.
#
"$(DLL_OUTPUT)\$(U_ICUDATA_NAME).dll" : "$(ICUTOOLS)\pkgdata\$(CFG)\pkgdata.exe" $(CNV_FILES) $(BRK_FILES) "$(ICUBLD)\$(ICUDT)uprops.icu" "$(ICUBLD)\$(ICUDT)unames.icu" "$(ICUBLD)\$(ICUDT)pnames.icu" "$(ICUBLD)\$(ICUDT)unorm.icu" "$(ICUBLD)\$(ICUDT)cnvalias.icu" "$(ICUBLD)\$(ICUDT)tz.icu" "$(ICUBLD)\$(ICUDT)ucadata.icu" "$(ICUBLD)\$(ICUDT)invuca.icu" $(ALL_RES) "$(ICUBLD)\$(ICUDT)icudata.res" "$(ICUP)\source\stubdata\stubdatabuilt.txt"
"$(DLL_OUTPUT)\$(U_ICUDATA_NAME).dll" : "$(ICUTOOLS)\pkgdata\$(CFG)\pkgdata.exe" $(CNV_FILES) $(BRK_FILES) "$(ICUBLD)\$(ICUDT)uprops.icu" "$(ICUBLD)\$(ICUDT)unames.icu" "$(ICUBLD)\$(ICUDT)pnames.icu" "$(ICUBLD)\$(ICUDT)unorm.icu" "$(ICUBLD)\$(ICUDT)cnvalias.icu" "$(ICUBLD)\$(ICUDT)tz.icu" "$(ICUBLD)\$(ICUDT)ucadata.icu" "$(ICUBLD)\$(ICUDT)invuca.icu" "$(ICUBLD)\$(ICUDT)uidna.icu" $(ALL_RES) "$(ICUBLD)\$(ICUDT)icudata.res" "$(ICUP)\source\stubdata\stubdatabuilt.txt"
@echo Building icu data
@cd "$(ICUBLD)"
@"$(ICUTOOLS)\pkgdata\$(CFG)\pkgdata" -f -e $(U_ICUDATA_NAME) -v -m dll -c -p $(ICUPKG) -O "$(PKGOPT)" -d "$(ICUBLD)" -s . <<pkgdatain.txt
@ -253,6 +253,7 @@ $(ICUDT)cnvalias.icu
$(ICUDT)tz.icu
$(ICUDT)ucadata.icu
$(ICUDT)invuca.icu
$(ICUDT)uidna.icu
$(CNV_FILES:.cnv =.cnv
)
$(ALL_RES:.res =.res
@ -401,6 +402,10 @@ res_index {
@set ICU_DATA=$(ICUBLD)
@"$(ICUTOOLS)\genuca\$(CFG)\genuca" -s "$(ICUUNIDATA)"
# Targets for uidna.icu
"$(ICUBLD)\$(ICUDT)uidna.icu" : "$(ICUUNIDATA)\*.txt" "$(ICUMISC)\*.txt"
genidna -s "$(ICUDATA)" -d "$(ICUBLD)\\"
# Dependencies on the tools for the batch inference rules
$(UCM_SOURCE) : {"$(ICUTOOLS)\makeconv\$(CFG)"}makeconv.exe

View file

@ -0,0 +1,405 @@
###################
# WARNING: This table is generated by filterRFC3454.pl tool. DO NOT EDIT
#################
# This file contains code points from Table A.1 from RFC 3454
0221;
0234..024F;
02AE..02AF;
02EF..02FF;
0350..035F;
0370..0373;
0376..0379;
037B..037D;
037F..0383;
038B;
038D;
03A2;
03CF;
03F7..03FF;
0487;
04CF;
04F6..04F7;
04FA..04FF;
0510..0530;
0557..0558;
0560;
0588;
058B..0590;
05A2;
05BA;
05C5..05CF;
05EB..05EF;
05F5..060B;
060D..061A;
061C..061E;
0620;
063B..063F;
0656..065F;
06EE..06EF;
06FF;
070E;
072D..072F;
074B..077F;
07B2..0900;
0904;
093A..093B;
094E..094F;
0955..0957;
0971..0980;
0984;
098D..098E;
0991..0992;
09A9;
09B1;
09B3..09B5;
09BA..09BB;
09BD;
09C5..09C6;
09C9..09CA;
09CE..09D6;
09D8..09DB;
09DE;
09E4..09E5;
09FB..0A01;
0A03..0A04;
0A0B..0A0E;
0A11..0A12;
0A29;
0A31;
0A34;
0A37;
0A3A..0A3B;
0A3D;
0A43..0A46;
0A49..0A4A;
0A4E..0A58;
0A5D;
0A5F..0A65;
0A75..0A80;
0A84;
0A8C;
0A8E;
0A92;
0AA9;
0AB1;
0AB4;
0ABA..0ABB;
0AC6;
0ACA;
0ACE..0ACF;
0AD1..0ADF;
0AE1..0AE5;
0AF0..0B00;
0B04;
0B0D..0B0E;
0B11..0B12;
0B29;
0B31;
0B34..0B35;
0B3A..0B3B;
0B44..0B46;
0B49..0B4A;
0B4E..0B55;
0B58..0B5B;
0B5E;
0B62..0B65;
0B71..0B81;
0B84;
0B8B..0B8D;
0B91;
0B96..0B98;
0B9B;
0B9D;
0BA0..0BA2;
0BA5..0BA7;
0BAB..0BAD;
0BB6;
0BBA..0BBD;
0BC3..0BC5;
0BC9;
0BCE..0BD6;
0BD8..0BE6;
0BF3..0C00;
0C04;
0C0D;
0C11;
0C29;
0C34;
0C3A..0C3D;
0C45;
0C49;
0C4E..0C54;
0C57..0C5F;
0C62..0C65;
0C70..0C81;
0C84;
0C8D;
0C91;
0CA9;
0CB4;
0CBA..0CBD;
0CC5;
0CC9;
0CCE..0CD4;
0CD7..0CDD;
0CDF;
0CE2..0CE5;
0CF0..0D01;
0D04;
0D0D;
0D11;
0D29;
0D3A..0D3D;
0D44..0D45;
0D49;
0D4E..0D56;
0D58..0D5F;
0D62..0D65;
0D70..0D81;
0D84;
0D97..0D99;
0DB2;
0DBC;
0DBE..0DBF;
0DC7..0DC9;
0DCB..0DCE;
0DD5;
0DD7;
0DE0..0DF1;
0DF5..0E00;
0E3B..0E3E;
0E5C..0E80;
0E83;
0E85..0E86;
0E89;
0E8B..0E8C;
0E8E..0E93;
0E98;
0EA0;
0EA4;
0EA6;
0EA8..0EA9;
0EAC;
0EBA;
0EBE..0EBF;
0EC5;
0EC7;
0ECE..0ECF;
0EDA..0EDB;
0EDE..0EFF;
0F48;
0F6B..0F70;
0F8C..0F8F;
0F98;
0FBD;
0FCD..0FCE;
0FD0..0FFF;
1022;
1028;
102B;
1033..1035;
103A..103F;
105A..109F;
10C6..10CF;
10F9..10FA;
10FC..10FF;
115A..115E;
11A3..11A7;
11FA..11FF;
1207;
1247;
1249;
124E..124F;
1257;
1259;
125E..125F;
1287;
1289;
128E..128F;
12AF;
12B1;
12B6..12B7;
12BF;
12C1;
12C6..12C7;
12CF;
12D7;
12EF;
130F;
1311;
1316..1317;
131F;
1347;
135B..1360;
137D..139F;
13F5..1400;
1677..167F;
169D..169F;
16F1..16FF;
170D;
1715..171F;
1737..173F;
1754..175F;
176D;
1771;
1774..177F;
17DD..17DF;
17EA..17FF;
180F;
181A..181F;
1878..187F;
18AA..1DFF;
1E9C..1E9F;
1EFA..1EFF;
1F16..1F17;
1F1E..1F1F;
1F46..1F47;
1F4E..1F4F;
1F58;
1F5A;
1F5C;
1F5E;
1F7E..1F7F;
1FB5;
1FC5;
1FD4..1FD5;
1FDC;
1FF0..1FF1;
1FF5;
1FFF;
2053..2056;
2058..205E;
2064..2069;
2072..2073;
208F..209F;
20B2..20CF;
20EB..20FF;
213B..213C;
214C..2152;
2184..218F;
23CF..23FF;
2427..243F;
244B..245F;
24FF;
2614..2615;
2618;
267E..267F;
268A..2700;
2705;
270A..270B;
2728;
274C;
274E;
2753..2755;
2757;
275F..2760;
2795..2797;
27B0;
27BF..27CF;
27EC..27EF;
2B00..2E7F;
2E9A;
2EF4..2EFF;
2FD6..2FEF;
2FFC..2FFF;
3040;
3097..3098;
3100..3104;
312D..3130;
318F;
31B8..31EF;
321D..321F;
3244..3250;
327C..327E;
32CC..32CF;
32FF;
3377..337A;
33DE..33DF;
33FF;
4DB6..4DFF;
9FA6..9FFF;
A48D..A48F;
A4C7..ABFF;
D7A4..D7FF;
FA2E..FA2F;
FA6B..FAFF;
FB07..FB12;
FB18..FB1C;
FB37;
FB3D;
FB3F;
FB42;
FB45;
FBB2..FBD2;
FD40..FD4F;
FD90..FD91;
FDC8..FDCF;
FDFD..FDFF;
FE10..FE1F;
FE24..FE2F;
FE47..FE48;
FE53;
FE67;
FE6C..FE6F;
FE75;
FEFD..FEFE;
FF00;
FFBF..FFC1;
FFC8..FFC9;
FFD0..FFD1;
FFD8..FFD9;
FFDD..FFDF;
FFE7;
FFEF..FFF8;
10000..102FF;
1031F;
10324..1032F;
1034B..103FF;
10426..10427;
1044E..1CFFF;
1D0F6..1D0FF;
1D127..1D129;
1D1DE..1D3FF;
1D455;
1D49D;
1D4A0..1D4A1;
1D4A3..1D4A4;
1D4A7..1D4A8;
1D4AD;
1D4BA;
1D4BC;
1D4C1;
1D4C4;
1D506;
1D50B..1D50C;
1D515;
1D51D;
1D53A;
1D53F;
1D545;
1D547..1D549;
1D551;
1D6A4..1D6A7;
1D7CA..1D7CD;
1D800..1FFFD;
2A6D7..2F7FF;
2FA1E..2FFFD;
30000..3FFFD;
40000..4FFFD;
50000..5FFFD;
60000..6FFFD;
70000..7FFFD;
80000..8FFFD;
90000..9FFFD;
A0000..AFFFD;
B0000..BFFFD;
C0000..CFFFD;
D0000..DFFFD;
E0000;
E0002..E001F;
E0080..EFFFD;
# Total code points 3653

View file

@ -0,0 +1,36 @@
###################
# WARNING: This table is generated by filterRFC3454.pl tool. DO NOT EDIT
#################
# This file contains code points from Table B.1 from RFC 3454
00AD; ; Map to nothing
034F; ; Map to nothing
1806; ; Map to nothing
180B; ; Map to nothing
180C; ; Map to nothing
180D; ; Map to nothing
200B; ; Map to nothing
200C; ; Map to nothing
200D; ; Map to nothing
2060; ; Map to nothing
FE00; ; Map to nothing
FE01; ; Map to nothing
FE02; ; Map to nothing
FE03; ; Map to nothing
FE04; ; Map to nothing
FE05; ; Map to nothing
FE06; ; Map to nothing
FE07; ; Map to nothing
FE08; ; Map to nothing
FE09; ; Map to nothing
FE0A; ; Map to nothing
FE0B; ; Map to nothing
FE0C; ; Map to nothing
FE0D; ; Map to nothing
FE0E; ; Map to nothing
FE0F; ; Map to nothing
FEFF; ; Map to nothing
# Total code points 27

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,180 @@
###################
# WARNING: This table is generated by filterRFC3454.pl tool. DO NOT EDIT
#################
# code points from Table C.1.1
0020; SPACE
# Total code points 1
###################
# WARNING: This table is generated by filterRFC3454.pl tool. DO NOT EDIT
#################
# code points from Table C.1.2
00A0; NO..BREAK SPACE
1680; OGHAM SPACE MARK
2000; EN QUAD
2001; EM QUAD
2002; EN SPACE
2003; EM SPACE
2004; THREE..PER-EM SPACE
2005; FOUR..PER-EM SPACE
2006; SIX..PER-EM SPACE
2007; FIGURE SPACE
2008; PUNCTUATION SPACE
2009; THIN SPACE
200A; HAIR SPACE
200B; ZERO WIDTH SPACE
202F; NARROW NO..BREAK SPACE
205F; MEDIUM MATHEMATICAL SPACE
3000; IDEOGRAPHIC SPACE
# Total code points 13
###################
# WARNING: This table is generated by filterRFC3454.pl tool. DO NOT EDIT
#################
# code points from Table C.2.1
0000..001F; [CONTROL CHARACTERS]
007F; DELETE
# Total code points 18
###################
# WARNING: This table is generated by filterRFC3454.pl tool. DO NOT EDIT
#################
# code points from Table C.2.2
0080..009F; [CONTROL CHARACTERS]
06DD; ARABIC END OF AYAH
070F; SYRIAC ABBREVIATION MARK
180E; MONGOLIAN VOWEL SEPARATOR
200C; ZERO WIDTH NON..JOINER
200D; ZERO WIDTH JOINER
2028; LINE SEPARATOR
2029; PARAGRAPH SEPARATOR
2060; WORD JOINER
2061; FUNCTION APPLICATION
2062; INVISIBLE TIMES
2063; INVISIBLE SEPARATOR
206A..206F; [CONTROL CHARACTERS]
FEFF; ZERO WIDTH NO..BREAK SPACE
FFF9..FFFC; [CONTROL CHARACTERS]
1D173..1D17A; [MUSICAL CONTROL CHARACTERS]
# Total code points 29
###################
# WARNING: This table is generated by filterRFC3454.pl tool. DO NOT EDIT
#################
# code points from Table C.3
E000..F8FF; [PRIVATE USE, PLANE 0]
F0000..FFFFD; [PRIVATE USE, PLANE 15]
100000..10FFFD; [PRIVATE USE, PLANE 16]
# Total code points 2051
###################
# WARNING: This table is generated by filterRFC3454.pl tool. DO NOT EDIT
#################
# code points from Table C.4
FDD0..FDEF; [NONCHARACTER CODE POINTS]
FFFE..FFFF; [NONCHARACTER CODE POINTS]
1FFFE..1FFFF; [NONCHARACTER CODE POINTS]
2FFFE..2FFFF; [NONCHARACTER CODE POINTS]
3FFFE..3FFFF; [NONCHARACTER CODE POINTS]
4FFFE..4FFFF; [NONCHARACTER CODE POINTS]
5FFFE..5FFFF; [NONCHARACTER CODE POINTS]
6FFFE..6FFFF; [NONCHARACTER CODE POINTS]
7FFFE..7FFFF; [NONCHARACTER CODE POINTS]
8FFFE..8FFFF; [NONCHARACTER CODE POINTS]
9FFFE..9FFFF; [NONCHARACTER CODE POINTS]
AFFFE..AFFFF; [NONCHARACTER CODE POINTS]
BFFFE..BFFFF; [NONCHARACTER CODE POINTS]
CFFFE..CFFFF; [NONCHARACTER CODE POINTS]
DFFFE..DFFFF; [NONCHARACTER CODE POINTS]
EFFFE..EFFFF; [NONCHARACTER CODE POINTS]
FFFFE..FFFFF; [NONCHARACTER CODE POINTS]
10FFFE..10FFFF; [NONCHARACTER CODE POINTS]
# Total code points 18
###################
# WARNING: This table is generated by filterRFC3454.pl tool. DO NOT EDIT
#################
# code points from Table C.5
D800..DFFF; [SURROGATE CODES]
# Total code points 0
###################
# WARNING: This table is generated by filterRFC3454.pl tool. DO NOT EDIT
#################
# code points from Table C.6
FFF9; INTERLINEAR ANNOTATION ANCHOR
FFFA; INTERLINEAR ANNOTATION SEPARATOR
FFFB; INTERLINEAR ANNOTATION TERMINATOR
FFFC; OBJECT REPLACEMENT CHARACTER
FFFD; REPLACEMENT CHARACTER
# Total code points 5
###################
# WARNING: This table is generated by filterRFC3454.pl tool. DO NOT EDIT
#################
# code points from Table C.7
2FF0..2FFB; [IDEOGRAPHIC DESCRIPTION CHARACTERS]
# Total code points 1
###################
# WARNING: This table is generated by filterRFC3454.pl tool. DO NOT EDIT
#################
# code points from Table C.8
0340; COMBINING GRAVE TONE MARK
0341; COMBINING ACUTE TONE MARK
200E; LEFT..TO-RIGHT MARK
200F; RIGHT..TO-LEFT MARK
202A; LEFT..TO-RIGHT EMBEDDING
202B; RIGHT..TO-LEFT EMBEDDING
202C; POP DIRECTIONAL FORMATTING
202D; LEFT..TO-RIGHT OVERRIDE
202E; RIGHT..TO-LEFT OVERRIDE
206A; INHIBIT SYMMETRIC SWAPPING
206B; ACTIVATE SYMMETRIC SWAPPING
206C; INHIBIT ARABIC FORM SHAPING
206D; ACTIVATE ARABIC FORM SHAPING
206E; NATIONAL DIGIT SHAPES
206F; NOMINAL DIGIT SHAPES
# Total code points 9
###################
# WARNING: This table is generated by filterRFC3454.pl tool. DO NOT EDIT
#################
# code points from Table C.9
E0001; LANGUAGE TAG
E0020..E007F; [TAGGING CHARACTERS]
# Total code points 82

View file

@ -0,0 +1,43 @@
# NormalizationCorrections-4.0.0.txt
#
# This file is a normative contributory data file in the
# Unicode Character Database.
#
# The normalization stabilization policy of the Unicode
# Consortium ordinarily precludes any change to the decomposition
# for any character, once established in a relevant version
# of the UnicodeData.txt data file. However, under certain
# exceptional (and rare) conditions, an error in a decomposition
# mapping may be discovered that is truly just an unintended
# typo in the data, and not a matter of dubious interpretation.
#
# Whenever such an error may be found, and if it meets the
# requirements for possible exceptions to normalization
# stability, the correction is entered in this data file,
# so that any implementation depending on absolute stability
# of normalization, *including* any errors in the data, can
# safely reconstruct the exact state of the data tables at
# any given version of Unicode.
#
# Currently this list has exactly six entries in it, one for the
# typo found and corrected in Corrigendum #3, and five for
# the typos and misidentifications found and corrected in
# Corrigendum #4. All efforts
# will be made to keep the entries limited to just those fixes.
#
# Interpretation of the fields:
# Field 1: Unicode code point
# Field 2: Original (erroneous) decomposition
# Field 3: Corrected decomposition
# Field 4: Version of Unicode for which the correction was
# entered into UnicodeData.txt, in n.n.n format.
# Comment: Indicates the Unicode Corrigendum which documents
# the correction
#
#
F951;96FB;964B;3.2.0 # Corrigendum 3
2F868;2136A;36FC;4.0.0 # Corrigendum 4
2F874;5F33;5F53;4.0.0 # Corrigendum 4
2F91F;43AB;243AB;4.0.0 # Corrigendum 4
2F95F;7AAE;7AEE;4.0.0 # Corrigendum 4
2F9BF;4D57;45D7;4.0.0 # Corrigendum 4