ICU-10168 Unicode 6.3 data files as of 2013-aug-27

X-SVN-Rev: 34104
This commit is contained in:
Markus Scherer 2013-08-28 21:08:11 +00:00
parent 4dc67e0203
commit fe1a149ca5
6 changed files with 13 additions and 18 deletions

View file

@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:20edb1901887f32f7e831035d158947e48b538440fedbadf3de398bc54648e16
oid sha256:a832781610985c50e36fcd45f8b5c7a7fcb8d358edeeb6d89aaed10f01b810b1
size 10966489

View file

@ -1,5 +1,5 @@
# CompositionExclusions-6.1.0.txt
# Date: 2011-07-12, 00:13:00 GMT [KW, LI]
# CompositionExclusions-6.3.0.txt
# Date: 2012-12-11, 11:23:00 GMT [KW, LI]
#
# This file lists the characters for the Composition Exclusion Table
# defined in UAX #15, Unicode Normalization Forms.
@ -7,7 +7,7 @@
# This file is a normative contributory data file in the
# Unicode Character Database.
#
# Copyright (c) 1991-2011 Unicode, Inc.
# Copyright (c) 1991-2012 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
# For more information, see
@ -203,3 +203,4 @@ FB4E # HEBREW LETTER PE WITH RAFE
# Total code points: 4
# EOF

View file

@ -1,5 +1,5 @@
# SpecialCasing-6.3.0.txt
# Date: 2013-03-12, 22:36:00 GMT [LI temp]
# Date: 2013-05-08, 13:54:51 GMT [MD]
#
# Unicode Character Database
# Copyright (c) 1991-2013 Unicode, Inc.
@ -273,3 +273,4 @@ FB17; FB17; 0544 056D; 0544 053D; # ARMENIAN SMALL LIGATURE MEN XEH
# 0131; 0131; 0049; 0049; tr; # LATIN SMALL LETTER DOTLESS I
# EOF

View file

@ -7743,10 +7743,10 @@
2305;PROJECTIVE;So;0;ON;;;;;N;;;;;
2306;PERSPECTIVE;So;0;ON;;;;;N;;;;;
2307;WAVY LINE;So;0;ON;;;;;N;;;;;
2308;LEFT CEILING;Sm;0;ON;;;;;Y;;;;;
2309;RIGHT CEILING;Sm;0;ON;;;;;Y;;;;;
230A;LEFT FLOOR;Sm;0;ON;;;;;Y;;;;;
230B;RIGHT FLOOR;Sm;0;ON;;;;;Y;;;;;
2308;LEFT CEILING;Ps;0;ON;;;;;Y;;;;;
2309;RIGHT CEILING;Pe;0;ON;;;;;Y;;;;;
230A;LEFT FLOOR;Ps;0;ON;;;;;Y;;;;;
230B;RIGHT FLOOR;Pe;0;ON;;;;;Y;;;;;
230C;BOTTOM RIGHT CROP;So;0;ON;;;;;N;;;;;
230D;BOTTOM LEFT CROP;So;0;ON;;;;;N;;;;;
230E;TOP RIGHT CROP;So;0;ON;;;;;N;;;;;

View file

@ -660,10 +660,6 @@ public class RBBITest extends TestFmwk {
}
}
}
// KIND_WORD "en_US_POSIX"
final String posxWordText = "Can't have breaks in xx:yy or struct.field for CS-types.";
final int[] posxWordTOffsets = { 5, 6, 10, 11, 17, 18, 20, 21, 23, 24, 26, 27, 29, 30, 36, 37, 42, 43, 46, 47, 49, 50, 55, 56 };
final int[] posxWordROffsets = { 5, 6, 10, 11, 17, 18, 20, 21, 23, 24, 26, 27, 29, 30, 42, 43, 46, 47, 49, 50, 55, 56 };
// KIND_SENTENCE "el"
final String elSentText = "\u0391\u03B2, \u03B3\u03B4; \u0395 \u03B6\u03B7\u037E \u0398 \u03B9\u03BA. " +
"\u039B\u03BC \u03BD\u03BE! \u039F\u03C0, \u03A1\u03C2? \u03A3";
@ -682,8 +678,6 @@ public class RBBITest extends TestFmwk {
// 29, 32, 33, 35, 37, 38, 40, 41 };
final TBItem[] tests = {
new TBItem( BreakIterator.KIND_WORD, new ULocale("en_US_POSIX"), posxWordText, posxWordTOffsets ),
new TBItem( BreakIterator.KIND_WORD, ULocale.ROOT, posxWordText, posxWordROffsets ),
new TBItem( BreakIterator.KIND_SENTENCE, new ULocale("el"), elSentText, elSentTOffsets ),
new TBItem( BreakIterator.KIND_SENTENCE, ULocale.ROOT, elSentText, elSentROffsets ),
new TBItem( BreakIterator.KIND_CHARACTER, new ULocale("th"), thCharText, thCharTOffsets ),

View file

@ -711,17 +711,16 @@ Bangkok)•</data>
# UBreakIteratorType UBRK_WORD, Locale "en_US_POSIX"
# Words don't include colon or period (cldrbug #1969).
# Unicode 6.3 change: colon now breaks words.
<locale en_US>
<word>
<data>•Can't<200> •have<200> •breaks<200> •in<200> •xx<200>:•yy<200> •or<200> •struct.field<200> \
<data>•Can't<200> •have<200> •breaks<200> •in<200> •xx:yy<200> •or<200> •struct.field<200> \
•for<200> •CS<200>-•types<200>.•</data>
<data>•\uFF92\uFF76\uFF9E<400> •</data>
<locale en_US_POSIX>
<word>
<data>•Can't<200> •have<200> •breaks<200> •in<200> •xx<200>:•yy<200> •or<200> •struct<200>.•field<200> \
<data>•Can't<200> •have<200> •breaks<200> •in<200> •xx:yy<200> •or<200> •struct<200>.•field<200> \
•for<200> •CS<200>-•types<200>.•</data>
<data>•\u06c9<200>\uc799\ufffa•</data>
<data>•\uFF92\uFF76\uFF9E<400> •</data>