ICU-21635 Unicode 14 data files 20210820, line break LB30b.2

See #1807
This commit is contained in:
Markus Scherer 2021-08-23 03:24:02 +00:00
parent 4a9d2bd0b8
commit 41aa7159ea
49 changed files with 838 additions and 700 deletions

View file

@ -649,11 +649,11 @@ static const uint16_t ucase_props_trieIndex[12908]={
0x1392,0x1392,0x1392,0,0x1392,0x1392,0x1392,0x1392,0x1392,0x1392,0x1392,0,0x1392,0x1392,0,0xec91,
0xec91,0xec91,0xec91,0xec91,0xec91,0xec91,0xec91,0xec91,0xec91,0xec91,0,0xec91,0xec91,0xec91,0xec91,0xec91,
0xec91,0xec91,0xec91,0xec91,0xec91,0xec91,0xec91,0xec91,0xec91,0xec91,0,0xec91,0xec91,0xec91,0xec91,0xec91,
0xec91,0xec91,0,0xec91,0xec91,0,0,0,4,4,4,4,4,4,4,4,
4,4,4,4,4,4,4,4,4,0,4,4,4,4,4,4,
4,4,4,0,0,0,0,0,4,4,4,4,4,4,0,4,
4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
4,4,4,4,4,4,4,4,0,4,4,4,0,4,4,0,
0xec91,0xec91,0,0xec91,0xec91,0,0,0,5,5,5,5,5,5,5,5,
5,5,5,5,5,5,5,5,5,0,5,5,5,5,5,5,
5,5,5,0,0,0,0,0,5,4,4,5,5,5,0,5,
5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
5,5,5,5,5,5,5,5,0,4,4,4,0,4,4,0,
0,0,0,0,4,0x64,4,0x44,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0x44,0x64,0x64,0,0,0,0,0x64,0,0,0,0,0,0x44,0x64,0,

File diff suppressed because it is too large Load diff

View file

@ -76,6 +76,8 @@ $ZWJ = [:LineBreak = ZWJ:];
$OP30 = [$OP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
$CP30 = [$CP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
$ExtPictUnassigned = [\p{Extended_Pictographic} & \p{Cn}];
# By LB9, a ZWJ also behaves as a CM. Including it in the definition of CM avoids having to explicitly
# list it in the numerous rules that use CM.
# By LB1, SA characters with general categor of Mn or Mc also resolve to CM.
@ -353,8 +355,9 @@ $RI $CM* $RI $CM* [$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $
# because of the chain-out behavior difference. The rule must chain out only from the [set characters],
# not from the preceding $RI or $CM, which it would be able to do if the set were optional.
# LB 30b Do not break between an Emoji Base and an Emoji Modifier
# LB30b Do not break between an emoji base (or potential emoji) and an emoji modifier.
$EB $CM* $EM;
$ExtPictUnassigned $CM* $EM;
# LB 31 Break everywhere else.
# Match a single code point if no other rule applies.

View file

@ -77,6 +77,8 @@ $ZWJ = [:LineBreak = ZWJ:];
$OP30 = [$OP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
$CP30 = [$CP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
$ExtPictUnassigned = [\p{Extended_Pictographic} & \p{Cn}];
# By LB9, a ZWJ also behaves as a CM. Including it in the definition of CM avoids having to explicitly
# list it in the numerous rules that use CM.
# By LB1, SA characters with general categor of Mn or Mc also resolve to CM.
@ -354,8 +356,9 @@ $RI $CM* $RI $CM* [$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $
# because of the chain-out behavior difference. The rule must chain out only from the [set characters],
# not from the preceding $RI or $CM, which it would be able to do if the set were optional.
# LB 30b Do not break between an Emoji Base and an Emoji Modifier
# LB30b Do not break between an emoji base (or potential emoji) and an emoji modifier.
$EB $CM* $EM;
$ExtPictUnassigned $CM* $EM;
# LB 31 Break everywhere else.
# Match a single code point if no other rule applies.

View file

@ -83,6 +83,8 @@ $ZWJ = [:LineBreak = ZWJ:];
$OP30 = [$OP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
$CP30 = [$CP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
$ExtPictUnassigned = [\p{Extended_Pictographic} & \p{Cn}];
# By LB9, a ZWJ also behaves as a CM. Including it in the definition of CM avoids having to explicitly
# list it in the numerous rules that use CM.
# By LB1, SA characters with general categor of Mn or Mc also resolve to CM.
@ -364,8 +366,9 @@ $RI $CM* $RI $CM* [$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $
# because of the chain-out behavior difference. The rule must chain out only from the [set characters],
# not from the preceding $RI or $CM, which it would be able to do if the set were optional.
# LB 30b Do not break between an Emoji Base and an Emoji Modifier
# LB30b Do not break between an emoji base (or potential emoji) and an emoji modifier.
$EB $CM* $EM;
$ExtPictUnassigned $CM* $EM;
# LB 31 Break everywhere else.
# Match a single code point if no other rule applies.

View file

@ -94,6 +94,8 @@ $ZWJ = [:LineBreak = ZWJ:];
$OP30 = [$OP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
$CP30 = [$CP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
$ExtPictUnassigned = [\p{Extended_Pictographic} & \p{Cn}];
# By LB9, a ZWJ also behaves as a CM. Including it in the definition of CM avoids having to explicitly
# list it in the numerous rules that use CM.
# By LB1, SA characters with general categor of Mn or Mc also resolve to CM.
@ -379,8 +381,9 @@ $RI $CM* $RI $CM* [$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $
# because of the chain-out behavior difference. The rule must chain out only from the [set characters],
# not from the preceding $RI or $CM, which it would be able to do if the set were optional.
# LB 30b Do not break between an Emoji Base and an Emoji Modifier
# LB30b Do not break between an emoji base (or potential emoji) and an emoji modifier.
$EB $CM* $EM;
$ExtPictUnassigned $CM* $EM;
# LB 31 Break everywhere else.
# Match a single code point if no other rule applies.

View file

@ -78,6 +78,8 @@ $ZWJ = [:LineBreak = ZWJ:];
$OP30 = [$OP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
$CP30 = [$CP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
$ExtPictUnassigned = [\p{Extended_Pictographic} & \p{Cn}];
# By LB9, a ZWJ also behaves as a CM. Including it in the definition of CM avoids having to explicitly
# list it in the numerous rules that use CM.
# By LB1, SA characters with general categor of Mn or Mc also resolve to CM.
@ -355,8 +357,9 @@ $RI $CM* $RI $CM* [$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $
# because of the chain-out behavior difference. The rule must chain out only from the [set characters],
# not from the preceding $RI or $CM, which it would be able to do if the set were optional.
# LB 30b Do not break between an Emoji Base and an Emoji Modifier
# LB30b Do not break between an emoji base (or potential emoji) and an emoji modifier.
$EB $CM* $EM;
$ExtPictUnassigned $CM* $EM;
# LB 31 Break everywhere else.
# Match a single code point if no other rule applies.

View file

@ -82,6 +82,8 @@ $ZWJ = [:LineBreak = ZWJ:];
$OP30 = [$OP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
$CP30 = [$CP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
$ExtPictUnassigned = [\p{Extended_Pictographic} & \p{Cn}];
# By LB9, a ZWJ also behaves as a CM. Including it in the definition of CM avoids having to explicitly
# list it in the numerous rules that use CM.
# By LB1, SA characters with general categor of Mn or Mc also resolve to CM.
@ -362,8 +364,9 @@ $RI $CM* $RI $CM* [$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $
# because of the chain-out behavior difference. The rule must chain out only from the [set characters],
# not from the preceding $RI or $CM, which it would be able to do if the set were optional.
# LB 30b Do not break between an Emoji Base and an Emoji Modifier
# LB30b Do not break between an emoji base (or potential emoji) and an emoji modifier.
$EB $CM* $EM;
$ExtPictUnassigned $CM* $EM;
# LB 31 Break everywhere else.
# Match a single code point if no other rule applies.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View file

@ -1,5 +1,5 @@
# DerivedCoreProperties-14.0.0.txt
# Date: 2021-06-08, 00:30:52 GMT
# Date: 2021-08-12, 23:12:53 GMT
# © 2021 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
@ -2028,6 +2028,10 @@ FF41..FF5A ; Lowercase
105A3..105B1 ; Lowercase
105B3..105B9 ; Lowercase
105BB..105BC ; Lowercase
10780 ; Lowercase
10783..10785 ; Lowercase
10787..107B0 ; Lowercase
107B2..107BA ; Lowercase
10CC0..10CF2 ; Lowercase
118C0..118DF ; Lowercase
16E60..16E7F ; Lowercase
@ -2063,7 +2067,7 @@ FF41..FF5A ; Lowercase
1DF0B..1DF1E ; Lowercase
1E922..1E943 ; Lowercase
# Total code points: 2416
# Total code points: 2471
# ================================================
@ -2855,6 +2859,10 @@ FF41..FF5A ; Cased
105A3..105B1 ; Cased
105B3..105B9 ; Cased
105BB..105BC ; Cased
10780 ; Cased
10783..10785 ; Cased
10787..107B0 ; Cased
107B2..107BA ; Cased
10C80..10CB2 ; Cased
10CC0..10CF2 ; Cased
118A0..118DF ; Cased
@ -2896,7 +2904,7 @@ FF41..FF5A ; Cased
1F150..1F169 ; Cased
1F170..1F189 ; Cased
# Total code points: 4398
# Total code points: 4453
# ================================================

View file

@ -1,5 +1,5 @@
# Fractional UCA Table, generated from the UCA DUCET
# 2021-06-08 [MS]
# 2021-08-17 [MS]
# VERSION: UCA=14.0.0, UCD=14.0.0
# © 2016 and later: Unicode, Inc. and others.
# License & terms of use: http://www.unicode.org/copyright.html
@ -18,7 +18,7 @@
[radical 6=⼅亅:亅𠄌了𠄍-𠄐亇𠄑𬼶-𬼸予㐧𠄒-𠄔𰁒争𠀩𠄕𬼹亊𠄖-𠄘𪜜事㐨𠄙𬼺𠄚𰁓𰁔𠄛𪜝𬼻𠄜𠄝𬼼]
[radical 7=⼆二:二𠄞-𠄠亍-亏𬼽云-亖𠄡𰁕亗𰁖亘-亚𠄢-𠄧𫡱𰁗些亜𠀥𠄨-𠄪𫡲亝-亟㐩𠄫-𠄯𫡳𬼾𠄰-𠄳𬼿𬽀𰁘𠄵-𠄸𬽁𠄹𠄺𫡴𠄻𬽂𠄼-𠄾𫡵𫡶𰁙𠄿𪜞𫡷𰁚𠅀𪜟𰁛𫡸𫡹]
[radical 8=⼇亠:亠-亣𠅁𠅂𪜠𪜡𬽃交-产㐪㐫𠅃-𠅇𫝅𬽄𰁜亨-亪㐬𠅈-𠅋𪜢𫡺𬽅-𬽇享京㐭𠅌-𠅒𬽈𬽉亭-亲𠅓-𠅕𬽊-𬽌亳𠅖𠅘-𠅝𫡻𬽍𠅞-𠅧𪜣𬽎-𬽒𰁝亴亵𠅨-𠅯𠅲𪜤𫡼𬽓𬽔𰁞𰁟亶亷㐮𠅳-𠅸𠅺-𠅼𠆀𫡽𫡾𬽕-𬽘𠅾𠅿𫡿𫢀𠆁-𠆉𪜥𬽙𬽚𰁠亸𠆋-𠆒𠆔𠆖-𠆘𬽛𬽜㐯𫢁𫢂𬽝𬽞𠆚𠆝𪜦𫢃亹𠆞𬽟𫢄𬽠𠆟-𠆡]
[radical 9=⼈人:人亻𠆢亼-亿𠆣-𠆥𫢅什-仓𠆦-𠆨𪜧𫢆-𫢈𬽡𬽢𰁡仔-仭㐰-㐷𠆩-𠆭𠆯𠆰𠆳𫝆𫢉𫢊𬽣-𬽨𰁢𰁣仮-伬佤㐸-㑀𠆴-𠇑𠇓-𠇕𪜨-𪜫𫢋-𫢐𬽩-𬽭𰁤-𰁪伭-佣佥-佨㑁-㑈𠇖-𠈁𪜬-𪜴𫢑-𫢘𬽮-𬽺𰁫-𰁰佩-侭鿇㑉-㑖𠈃-𠈣𠈥-𠈬𪜵-𪜺𫝇𫝈𫢙-𫢝𬽻-𬾄𰁱-𰂄侮-俭㑗-㑢𠈭-𠉢𪜻-𪝂𫢞-𫢩𬾅-𬾙𰂅-𰂎修-倿㑣㑥-㑭𠉣-𠊦𠋟𪝃-𪝊𫝉𫢪-𫢼𬾚-𬾩𰂏-𰂖偀-偿㑤㑮-㑲𠊧-𠋐𠋒-𠋞𠋠-𠋬𪝋-𪝕𫝊𫢽-𫣃𬾪-𬾼𰂗-𰂡傀-傩鿘㑳-㑺𠋭-𠌅𠌇-𠌝𠌠-𠌩𪝖-𪝟𫣄-𫣐𬾽-𬿏𰂢-𰂩傪-働鿙㑻-㒁𠌫-𠍡𠍣𠍤𠍦-𠍬𠎵𪝠-𪝤𫝋𫣑-𫣖𬿐-𬿠𰂪-𰂬僎-僱僳僴僷㒂-㒐𠍥𠍭-𠎗𠎙𠎚𠎝-𠎴𠏐𪝥-𪝪𫣗-𫣢𬿡-𬿮𰂭-𰂲僲僵僶僸-儏㒑-㒘𠎷-𠏏𠏑𠏔-𠏦𪝫-𪝯𫣣-𫣬𬿯-𬿺𰂳-𰂹儐-儞儫鿚㒙-㒜𠏒𠏨-𠐇𪝰-𪝳𫣭-𫣱𬿻-𭀀𰂺-𰂽償-優儬㒝㒞𠐈-𠐛𠐝-𠐢𪝴-𪝶𫣲-𫣸𭀁-𭀅𰂾-𰃁儭-儲㒟㒠𠐣-𠐺𪝷-𪝹𫣹-𫤃𭀆-𭀌𰃂-𰃆儳-儵㒡-㒣𠐻-𠑁𠑃-𠑌𪝺-𪝼𫤄-𫤈𭀍-𭀏𰃇𰃈儶㒤-㒦𠑍-𠑖𠑞𪝽𫤉-𫤋𭀐𭀑儷-儺㒧㒨𠑗-𠑝𠑟-𠑣𪝾𫤌-𫤑儻儼𠑤-𠑩𭀒儽㒩𠑪-𠑯𫤒-𫤔𭀓𭀔𰃉儾𠑰𫤕𭀕𫤖㒪𠑲𠑳𠑵]
[radical 9=⼈人:人亻𠆢亼-亿𠆣-𠆥𫢅什-仓𠆦-𠆨𪜧𫢆-𫢈𬽡𬽢𰁡仔-仭㐰-㐷𠆩-𠆭𠆯𠆰𠆳𫝆𫢉𫢊𬽣-𬽨𰁢𰁣仮-伬佤㐸-㑀𠆴-𠇑𠇓-𠇕𪜨-𪜫𫢋-𫢐𬽩-𬽭𰁤-𰁪伭-佣佥-佨㑁-㑈𠇖-𠈁𪜬-𪜴𫢑-𫢘𬽮-𬽺𰁫-𰁰佩-侭鿇㑉-㑖𠈃-𠈣𠈥-𠈬𪜵-𪜺𫝇𫝈𫢙-𫢝𬽻-𬾄𰁱-𰂄侮-俭㑗-㑢𠈭-𠉢𪜻-𪝂𫢞-𫢩𬾅-𬾙𰂅-𰂎修-倿㑣㑥-㑭𠉣-𠊦𠋟𪝃-𪝊𫝉𫢪-𫢼𬾚-𬾩𰂏-𰂖偀-偿㑤㑮-㑲𠊧-𠋐𠋒-𠋞𠋠-𠋬𪝋-𪝕𫝊𫢽-𫣃𬾪-𬾼𰂗-𰂡傀-傩鿘㑳-㑺𠋭-𠌅𠌇-𠌝𠌠-𠌩𪝖-𪝟𫣄-𫣐𬾽-𬿏𰂢-𰂩傪-働鿙㑻-㒁𠌫-𠍡𠍣𠍤𠍦-𠍬𠎵𪝠-𪝤𫝋𫣑-𫣖𫣞𬿐-𬿠𰂪-𰂬僎-僱僳僴僷㒂-㒐𠍥𠍭-𠎗𠎙𠎚𠎝-𠎴𠏐𪝥-𪝪𫣗𫣘𫣛-𫣝𫣟-𫣢𬿡-𬿮𰂭-𰂲僲僵僶僸-儏㒑-㒘𠎷-𠏏𠏑𠏔-𠏦𪝫-𪝯𫣣-𫣬𬿯-𬿺𰂳-𰂹儐-儞儫鿚㒙-㒜𠏒𠏨-𠐇𪝰-𪝳𫣭-𫣱𬿻-𭀀𰂺-𰂽償-優儬㒝㒞𠐈-𠐛𠐝-𠐢𪝴-𪝶𫣲-𫣸𭀁-𭀅𰂾-𰃁儭-儲㒟㒠𠐣-𠐺𪝷-𪝹𫣹-𫤃𭀆-𭀌𰃂-𰃆儳-儵㒡-㒣𠐻-𠑁𠑃-𠑌𪝺-𪝼𫤄-𫤈𭀍-𭀏𰃇𰃈儶㒤-㒦𠑍-𠑖𠑞𪝽𫤉-𫤋𭀐𭀑儷-儺㒧㒨𠑗-𠑝𠑟-𠑣𪝾𫤌-𫤑儻儼𠑤-𠑩𭀒儽㒩𠑪-𠑯𫤒-𫤔𭀓𭀔𰃉儾𠑰𫤕𭀕𫤖㒪𠑲𠑳𠑵]
[radical 10=⼉儿:儿-元𠑶𭀖-𭀘兄𠑷𠑸𭀙-𭀝𰃊充-兊㒫𠑹-𠑼𫤗克-兑𠑽-𠒂𭀞-𭀠𰃋兒-兖㒬𠒃-𠒋𪝿𫤘𭀡-𭀤𰃌兗-兙𠒌-𠒏𪞀𫝌𫤙𭀥党兛㒭𠒐-𠒖𪞁𭀦𭀧兜-兞㒮𠒗-𠒝𫤚𫤛𭀨𰃍兟兠𠒞-𠒣𫤜𫤝𭀩-𭀭𰃎兡𠒤-𠒦𪞂𫤞𭀮𭀯𰃏𰃐兢𠒧-𠒯𫤟𫤠𭀰-𭀳𰃑𰃒𠒰-𠒲𠒴-𠒷𪞃-𪞅𭀴兣𠒳𠒸-𠒾𭀵𰃓𰃔𠒿-𠓅𠓇𫤡𫤢𭀶-𭀸㒯𠓆𠓈-𠓊𫤣-𫤦𭀹𭀺𠓋𠓎𪞆𫤧𰃕𠓍𠓏𠓐兤𠓑-𠓔𫤨𠓕-𠓚]
[radical 11=⼊入:入兦𠓛內𠓜-𠓞㒰㒱𠓟𭀻全氽㒲𠇒𠓠-𠓣𰃖㒳㒴𠓤-𠓨𭀼-𭀾兩𠓩𭀿𰃗-𰃙兪𠓪-𠓬𭁀𰃚𰃛𠓭-𠓰𪞇𫤩𠓱𰃜𠌆𠓲𠓳𫤪𠓴-𠓼𭁁𠓽𭁂𠓾𠓿𭁃]
[radical 12=⼋八:八𠔀公-兯𠔁-𠔄兰𠔅𠔆𭁄共-兴龹𠔈𠔉𪞈𫤫𫤬𭁅𭁆𰃝-𰃟兵㒵-㒷𠔊-𠔏𭁇其-典𠔐𠔑𠩖𫤭𭁈-𭁋𰃠兹养㒸𠔒-𠔗𭁌-𭁎𰃡兺兼𠔙-𠔛𫤮𫤯𭁏𭁐𰃢𰃣兽𠔜𠔝𫤰𭁑-𭁓𰃤𠔞-𠔧𪞉𫤱𭁔-𭁖兾兿𠔨-𠔮𪞊𫤲𭁗𭁘𰃥𠔯-𠔱𠭻𪞋𭁙𪞌𭁚-𭁜冀𠔳-𠔵𪞍𠁜冁𠔶𠔷𫤳𠔸-𠔺𫤴𫤵𭁝㒹𭁞𫤶]
@ -35,7 +35,7 @@
[radical 23=⼖匸:匸-区𠥭𠤲𠥮𠥯𰅪医㔷𠥰-𠥲𰅫匼𠥳匽𠥃𠥴𠥵𭅟𠥶𰅬匾-區㔸𠥷-𠥺]
[radical 24=⼗十:十-千卅-午㔹𠥻𠥼𠥿𭅠𰅭𰅮卉半卌𠆱𠥽𠥾𠦀𠦁𰅯卋卍-卐㔺㔻𠦂-𠦐𰅰𰅱𠦑-𠦗𪟳𫝒𫧞𰅲-𰅴卑-卖龺𠦘-𠦢𣥥𫝓𫧟𫧠𰅵南単𫝔𫧡𫧢𭅡𭅢𰅶𰅷𠦣-𠦩𪟴𭅣𰅸𰅹卙𠦪-𠦯𪟵𭅤𰅺𰅻博𠦰-𠦳𫧣𭅥𭅦㔼𠦴-𠦼𪟶𫧤𫧥𭅧𰅼𠦽-𠦿𪟷𫧦𰅽𰅾𠧀-𠧄𪟸𪟹𫧧𫧨𭅨-𭅪𠧅-𠧈𪟺𫧩-𫧫𰅿𠧉-𠧍𪟻𭅫𭅬𰆀𰆁𠧎𫧬𪟼𭅭卛𠧏𫧭𠧐𭅮𠧑𰆂]
[radical 25=⼘卜:卜𠧒卝卞𪟽卟-卢𠧓𠀝𠧔-𠧘𭅯卣卤𠧙-𠧜𡥋𪟾𭅰卥-卧㔽𠧞-𠧨𫧮𰆃𠧩-𠧲𠭉𫧯𰆄-𰆆𠧳-𠧷𠧹𪟿𫧰卨𠧺-𠧿𭅱𠨀-𠨂𠨄𫧱𫧲𰆇𰆈𠨅𫧳𫧴𠨆𠨇𪠀𫧵𠨈-𠨊𰆉𫧶-𫧸𠨋𠨌]
[radical 26=⼙卩:卩𰆊卪卫㔾㔿𭅲卬𠨍-𠨐𫧹𭅳𰆋卭-卯𠨑𭅴印危𠨒-𠨗𫧺𭅵𰆌𰆍卲-卵㕀𠨘𰆎卶-卺㕁𪠁𭅶-𭅺𰆏卻-卽𠨙-𠨝𪠂𰆐𠨞𭅻𭅼卾𫧻𰆑𰆒𠨟-𠨡𫧼厀厁𠨢-𠨤𫧽𭅽𭅾𠨥𫧾𫧿𭅿𰆓𫨀𰆔𠨦-𠨪𫨁𠨫]
[radical 26=⼙卩:卩𰆊卪卫㔾㔿𭅲卬𠨍-𠨐𫧹𭅳𰆋卭-卯𠨑𭅴印危𠨒-𠨗𫧺𭅵𰆌𰆍卲-卵㕀𠨘𰆎卶-卺㕁𪠁𭅶-𭅺𰆏卻-卽𠨙-𠨝𪠂𰆐𠨞𭅻𭅼卾𫧻𰆑𰆒𠨟-𠨡𫧼厀厁𠨢-𠨤𫧽𭅽𭅾𠨥𫧾𫧿𭅿𰆓𫨀𰆔𠨦-𠨪𫨁𠨫]
[radical 27=⼚厂:厂-历𠨬厇-厉𠨭-𠨳𪠃𫨂𭆀𰆕厊-厍㕂-㕄𠂬𠂮𠨴-𠨿𭆁𰆖𰆗厎-厑㕅-㕇𠂰𠩀-𠩉𪠄𫝕𫨃𫨄𭆂𭆃𰆘厒-厕㕈㕉𠩊-𠩕𪠅𪠆𫨅-𫨈𭆄厖-厛㕊𠩗-𠩦𪠇-𪠊𫨉𫨊𭆅𰆙𰆚厜-原虒㕋-㕍𠩧-𠩷𪠋-𪠎𫝖𫝗𫨋𭆆-𭆈𰆛-𰆟厠-厣厩𠩸-𠩾𠪀-𠪆𪠏𪠐𫨌-𫨐𭆉-𭆋厤-厨㕎𠪇-𠪐𪠑𪠒𫨑-𫨗𭆌-𭆐𰆠-𰆦厪厫㕏𠩿𠪑-𠪘𪠓-𪠕𫨘-𫨛𭆑𭆒𰆧厬-厰㕐㕑𠪙-𠪬𪠖𪠗𫨜𫨝𭆓-𭆕𰆨𰆩厱厲㕒𠪭-𠪸𪠘𪠙𫨞𫨟𭆖𠪺-𠪾𫨠-𫨢𭆗厳㕓𠪿𪠚𫨣𰆪𰆫𠫀-𠫆厴𠫇𪠛𫨤𰆬𠫉-𠫎㕔𠫏𫨥𭆘𠫐厵𠫑𠫒]
[radical 28=⼛厶:厶𠫓-𠫖厷-厹㕕𠫗𠫘𫨦厺-厼𠫙-𠫝𰆭厽厾𠫞-𠫥𭆙𭆚县𠫦-𠫨𫨧叀-参𠫩-𠫭𠫯-𠫲𠫴-𠫸𪠜𫨨𫨩𭆛-𭆝𰆮㕖𠫹-𠫼𪠝𫨪𭆞-𭆢參叄㕗㕘𠫽𠫾𪠞𫨫𭆣叅𠫿-𠬅𦎅𪠟𰆯𠬆-𠬌𪠠𫨬-𫨮𭆤叆𠔲𠬍-𠬐叇㕙𠬑𠬒𪠡𠬓𠬕-𠬗𫨯𫨰𠬔𠬘𠬙𪠢𫨱]
[radical 29=⼜又:又叉𭆥及-収㕚㕛𠬚-𠬠𪠣𪠤𫨲叏-发𠬡-𠬨𭆦𰆰叒㕜𠬩-𠬰𪠥𫨳𭆧𭆨𰆱-𰆶叓㕝𠬱-𠬸𪠦𫨴-𫨸𰆷-𰆺叔-变㕞𠬹-𠬾𪠧𪠨𫨹𭆩-𭆫𰆻-𰆽叙-叝㕟𠫳𠬿-𠭈𪠩𪠪𫨺𫨻𭆬-𭆮𰆾𰆿叞叟𠭊-𠭔𪠫𪠬𫨼-𫨿𭆯𭆰𠭕-𠭤𫩀-𫩃𭆱𭆲𰇀𰇁㕠𠭥-𠭮𪠭𪠮𫩄𫩅𭆳𭆴𰇂𰇃叠﨎𠭯-𠭵𠭷-𠭺𪠯𫩆𫩇𰇄㕡𠭼-𠮂𡪞𪠰𫩈𫩉𭆵𭆶𰇅𠮃-𠮆𫩊𭆷叡𠮇-𠮌𪠱𫩋𫩌𠮍-𠮏𫩍𫩎叢𠮐壡𠮑𠮒𰇆𠮓-𠮘]
@ -53,7 +53,7 @@
[radical 41=⼨寸:寸𡬝对㝳寺-导𡬞𪧷𭔩寽-寿㝴𭔪尀㝵𡬟-𡬦𭔫𭔬封専将𡬧-𡬬𫴬𭔭-𭔯尃-尅㝶𡬭𫴭𭔰-𭔲將-尉𡬮-𡬲𪧸𪧹𭔳-𭔵尊-尌㝷𡬳-𡬸𪧺𫴮𫴯𭔶-𭔸𡬹-𡬼𫴰-𫴳𭔹-𭔼對𡬽-𡭂𭔽-𭔿𰍥導𡭃-𡭉𣊒𪧻𪧼𫴴𫴵𭕀𭕁𰍦𡭊-𡭌𪧽𭕂𡭍-𡭏𪧾𡭐𡭑𭕃𡭒𡭓𫴶𫴷]
[radical 42=⼩小:小𡭔𭕄𰍧𰍨尐少𡭕𡭖尒-尕龸𡭗𡭘𰍩尖-尘𡭙-𡭜𭕅𡭝-𡭤𪧿𫴸𫴹𭕆𭕇𰍪尙尚𠈤𡭥-𡭩𪨀𫴺𫴻𭕈𭕉尛-尝𡭪-𡭳𫴼𫴽𭕊𰍫㝸𡭴-𡭼𪨁𪨂𡭽-𡮍𫴾-𫵃尞㝹𡮎-𡮗𫵄𫵅尟尠𠅽𡮘-𡮝𭕋尡㝺𡮞-𡮥𪨃𫵆𫵇㝻𡮦-𡮯𪨄𫵈𫵉𡮰-𡮳𪨅𫵊-𫵌𭕌𡮴-𡮶𢇔𪨆𰍬𡮷-𡮻𤯏𡮼-𡮾𫵍𡮿𡯀]
[radical 43=⼪尢:尢尣𡯁𡯂尤𡯃-𡯊𫵎尥-尧㝼𡯋-𡯎𭕍尨-尬㝽𡯏-𡯙𫵏尭㝾㝿𡯚-𡯡𫵐𭕎𰍭尮尯㞀㞁𡯢-𡯤𡯦𡯧𰍮㞂𡯨-𡯲𰍯㞃-㞆𡯳-𡯸𫵑𭕏𭕐尰就㞇㞈𡯹-𡰂𪨇尲-尴㞉㞊𡰃-𡰊𪨈𭕑𭕒𡰋-𡰏𭕓尵𡰐-𡰕𫵒𡰖-𡰚尶尷𡰛-𡰢]
[radical 44=⼫尸:尸𡰣尺𪛛𰍰尻尼㞋𡰤-𡰨𡰴𭕔尽㞌㞍𡰩-𡰳𫝲尾-屃㞎𡰵-𡰽𪨉-𪨋𫵓-𫵕𰍱𰍲屄-届㞏-㞑𡰾-𡱋𪨌𫵖-𫵘𭕕-𭕘𰍳𰍴屋-屏㞒-㞖𡱌-𡱡𢇀𪨍𪨎𫵙-𫵝𭕙-𭕝𰍵-𰍷屐-屘㞗𡱢-𡱹𪨏𪨐𫵞𫵟𭕞𭕟𰍸-𰍻屙-屝㞘㞙𡱺-𡲓𪨑𫵠𭕠-𭕣𰍼-𰍾属-屡㞚㞛𡲔-𡲫𪨕-𪨘𭕤𰍿𰎀𡲬-𡳃𪨒-𪨔𫵡𭕥-𭕭𰎁屢屣㞜-㞞𡳄-𡳏𪨙𪨚𫵢-𫵥𭕮𭕯𰎂層-屧㞟㞠𡳐-𡳞𪨛𪨜𭕰-𭕵𰎃-𰎅𡳟-𡳧𪨝𫵦𫵧𰎆屨𡳨𡳩𪨞𫵨𭕶屩屪𡳪𡳫𫵩𭕷𭕸屫㞡𡳬-𡳰𪨟𪨠𫵪𫵫𭕹𭕺𰎇𡳱-𡳴屬𡳵-𡳷𫵬𫵭𭕻𡳸-𡳺𪨡𭕼屭𡳻𭕽𡳼𡳽]
[radical 44=⼫尸:尸𡰣尺𪛛𰍰尻尼㞋𡰤-𡰨𡰴𭕔尽㞌㞍𡰩-𡰳𫝲尾-屃㞎𡰵-𡰽𪨉-𪨋𫵓-𫵕𰍱𰍲屄-届㞏-㞑𡰾-𡱋𪨌𫵖-𫵘𭕕-𭕘𰍳𰍴屋-屏㞒-㞖𡱌-𡱡𢇀𪨍𪨎𫵙-𫵝𭕙-𭕝𰍵-𰍷屐-屘㞗𡱢-𡱹𪨏𪨐𫵞𫵟𭕞𭕟𰍸-𰍻屙-屝㞘㞙𡱺-𡲓𪨑𫵠𭕠-𭕣𰍼-𰍾属-屡㞚㞛𡲔-𡲫𪨕-𪨘𫵡𭕤𰍿𰎀𡲬-𡳃𪨒-𪨔𭕥-𭕭𰎁屢屣㞜-㞞𡳄-𡳏𪨙𪨚𫵢-𫵥𭕮𭕯𰎂層-屧㞟㞠𡳐-𡳞𪨛𪨜𭕰-𭕵𰎃-𰎅𡳟-𡳧𪨝𫵦𫵧𰎆屨𡳨𡳩𪨞𫵨𭕶屩屪𡳪𡳫𫵩𭕷𭕸屫㞡𡳬-𡳰𪨟𪨠𫵪𫵫𭕹𭕺𰎇𡳱-𡳴屬𡳵-𡳷𫵬𫵭𭕻𡳸-𡳺𪨡𭕼屭𡳻𭕽𡳼𡳽]
[radical 45=⼬屮:屮䶹𡳾屯㞢𡳿𭕾屰𡴀-𡴅𭕿㞣㞷𡴆-𡴚𫵮𡴛-𡴣𰎈𡴤𡴥𫵯𫵰𡴦-𡴬]
[radical 46=⼭山:山乢屲𡴭-𡴯屳-屷㞤-㞧𡴰-𡴽𫵱-𫵴𭖀屸-岃㞨-㞯𡴾-𡵒𪨢-𪨥𫝳𫵵𭖁-𭖄𰎉岄-岌岎-岜㞰-㞶㞸𡵓-𡶂𪨦-𪨨𫵶𫵷𭖅-𭖊𰎊-𰎍岝-峅㞹-㟃𡶃-𡶣𡶥-𡶪𪨩-𪨮𫵸𫵹𭖋-𭖔𰎎-𰎓岍峆-峧㟄-㟆𡶫-𡷔𪨯-𪨴𫝴𫵺𫵻𭖕-𭖞𰎔-𰎞峨-崅㟇-㟖𡷕-𡸐𦊤𪨵-𪨺𫝵𫵼-𫶀𭖟-𭖨𰎟-𰎤崆-崰㟗-㟥𡸑-𡹩𪨻-𪩁𫶁-𫶅𭖩-𭖱𰎥-𰎨崱-嵉嵋-嵝﨑㟦-㟯𡹪-𡺩𪩂-𪩇𫶆-𫶉𭖲-𭖿𰎩-𰎭嵊嵞-嵶㟰-㟸𡺪-𡻗𪩈-𪩋𫶊-𫶐𭗀-𭗆𰎮-𰎰嵷-嶎㟹-㠄𡻘-𡼉𪩌-𪩒𫶑-𫶔𭗇-𭗓𰎱-𰎴嶏-嶥㠅-㠐𡼊-𡼼𪩓-𪩖𫶕𫶖𭗔-𭗞𰎵-𰎸嶦-嶶㠑-㠗𡼽-𡽈𡽊-𡽛𪩗-𪩚𫶗-𫶜𭗟𭗠𰎹𰎺嶷-嶺嶼-嶿㠘-㠜𡽉𡽜-𡾁𡾻𪩛-𪩝𫶝𭗡-𭗤𰎻巀-巂㠝-㠟𡾂-𡾖𡾠𫶞𭗥-𭗫𰎼-𰎾嶻巃-巅㠠-㠣𡾘-𡾟𡾡-𡾭𣦭𪩞𫶟-𫶣𭗬-𭗱𰎿-𰏁巆-巌㠤𡾮-𡾺𭗲-𭗴巍巏巐㠥㠦𡾼-𡿆𪩟𫶤𭗵𭗶巎巑-巕巗㠧𡿇-𡿑𪩠𭗷巖巘-巚𡿒𡿓𭗸𡿔-𡿜𫶥𭗹𡿝-𡿟𫶦𰏂𡿠𡿡𡿣𡿤㠨𡿢𡿥]
[radical 47=⼮巛:巛-川𡿦𡿨𫶧𡿧𭗺州巟㠩𡿩-𡿭𭗻𭗼巠𡿮-𡿰𪩡𭗽𭗾𡿱-𡿶𭗿𡿷-𡿻𪩢𭘀𰏃𰏄𠙗𡿼𡿽𫶨𭘁巢巣𡿾-𢀀𫶩𢀁𢀂𰏅巤𢀃𰏆𢀄-𢀋𫶪𢀌-𢀐]
@ -68,12 +68,12 @@
[radical 56=⼷弋:弋𢍺𫠠弌𢍻𢍼弍𭚟-𭚡弎-弐𢍽𢍾㢤𢍿-𢎅𰐍㢥𢎆-𢎌𰐎-𰐐𢎍𭚢弑𢎎弒㢦𢎏𢎐𣦏𭚣𭚤𰐑𢎑𢎒𰐒𢎓-𢎖]
[radical 57=⼸弓:弓𢎗𢎘弔-弖㢧𢎙-𢎡𭚥弗弘𢎢-𢎪弙-弜㢨-㢫𢎫-𢎶𪪺𫸥-𫸧𭚦弝-张㢬㢭𢎷-𢏅𫸨-𫸪𭚧𭚨弡-弪㢮-㢱𢏆-𢏓𪪻𫸫-𫸭𭚩-𭚫𰐓𰐔弫-弯㢲-㢷𠄴𠔘𢏔-𢏣𪪼𪪽𫸮-𫸲𭚬-𭚰𰐕𰐖弰-弳㢸㢹𢏤-𢏭𪪾𫸳-𫸶𭚱-𭚵𰐗𰐘弴-弹㢺-㢼𢏮-𢏿𪪿𫸷𫸸𭚶-𭚹𰐙-𰐛强-弾㢽-㢿𢐀-𢐉𪫀𪫁𫸹𫸺𭚺-𭛁𰐜-𰐞弿-彂㣀-㣂𢐊-𢐑𭛂𭛃彃-彅㣃𢐒-𢐝𪫂𫸻𭛄𰐟彆-彉㣄㣅𢐞-𢐤𫸼𫸽𭛅-𭛇𰐠彊彋𢐥-𢐪𫸾𫸿𭛈𭛉彌𢐬-𢐱𪫃𫹀𭛊𭛋彍𢐲-𢐷𫹁𭛌𢐸-𢐾𤯽𢐿-𢑅𫹂𰐡㣆𢑆-𢑉𪫄𫹃𰐢彎𢑊彏𢑋𢑌𪫅𢑍𢑎]
[radical 58=⼹彐:彐-当𢑏-𢑒𪫆𢑓彔录㣇𢑔𢑕𫹄𭛍𭛎彖𢑗-𢑚𭛏𢑛-𢑝彗𢑞-𢑡𭛐𰐣彘𢑢-𢑤𫹅𫹆𭛑𭛒彙彚𢑥-𢑧𰐤𰐥𢑨-𢑮𣼙𫹇𫹈彛彜㣈𢑯𢑱𢑲彝彞𢑳-𢑶𪫇彟𢑷-𢑾彠𢑿]
[radical 59=⼺彡:彡𢒀-𢒂㣉𢒃-𢒆形-彤㣊𢒇𢒈𰐦㣋-㣍𢒉-𢒍𭛓𰐧彥彦𢒎𫝸𰐨彧彨𢒏-𢒓𣥲𪫈𫹉𰐩𰐪彩彫彬㣎𢒔-𢒝彭㣏㣐𢒞-𢒣𰐫彮𢒤-𢒨𭛔𭛕彯彰㣑𢒩𢒪𭛖-𭛘影㣒𢒫-𢒯𪫉𭛙-𭛛㣓𢒰-𢒴𪫊𫹊𢒵𢒷𢒶彲𢒸-𢒻]
[radical 59=⼺彡:彡𢒀-𢒂㣉𢒃-𢒆形-彤㣊𢒇𢒈𰐦㣋-㣍𢒉-𢒍𭛓𰐧彥彦𢒎𫝸𰐨彧彨𢒏-𢒓𣥲𪫈𫹉𰐩𰐪彩彫彬㣎𢒔-𢒝彭㣏㣐𢒞-𢒣𰐫彮𢒤-𢒨𭛔𭛕彯彰㣑𢒩𢒪𭛖-𭛘影㣒𢒫-𢒯𪫉𭛙-𭛛㣓𢒰-𢒴𪫊𢒵𫹊𢒷𢒶彲𢒸-𢒻]
[radical 60=⼻彳:彳𢒼𢒽㣔𢒾𢒿𫹋彴彵㣕𢓀-𢓃𫹌𭛜𭛝𰐬彶-彻鿈鿉㣖㣗𢓄-𢓑𪫋𫹍-𫹐𭛞彼-径㣘㣙𢓒-𢓛𪫌𫹑𭛟-𭛦𰐭待-徍徔㣚-㣡𢓜-𢓩𫹒𫹓𭛧-𭛩徎-従徕㣢㣣𢓪-𢔀𪫍𪫎𫹔𫹕𭛪-𭛮𰐮𰐯徖-徙徛-徤㣤-㣩𢔁-𢔞𪫏𪫐𫹖-𫹘𭛯-𭛶𰐰-𰐳徚徥-徫㣪-㣮𢔟-𢔫𢔭-𢔱𪫑-𪫓𫹙-𫹝𭛷-𭛼𰐴𰐵徬-徰㣯𢔲-𢕎𢕧𪫔𪫕𫹞-𫹠𭛽𰐶徱徳徴㣰-㣲𢕏-𢕦𢕨𢕩𪫖-𪫘𫹡𫹢𭛾-𭜀𰐷徲徵-徺㣳㣴𢕪-𢕸𫹣-𫹥𭜁-𭜅徻徼㣵㣶𢕹-𢖄𢖆𢖇𪫙𭜆-𭜈𰐸徽徾㣷𢖈-𢖊𢖌-𢖎𪫚𫹦𢖏-𢖗𪫛𪫜𫹧𫹨𭜉𰐹徿㣸𢖘-𢖚𢖜𭜊忀忁㣹𢖝-𢖠𢖢忂𢖣𢖤𫹩𢖥𢖦𫹪𢖧𢖨]
[radical 61=⼼心:心忄𢖩必忆㣺𢖪𰐺忇-忊㣻𢖫-𢖱𪫝𫹫-𫹭𰐻𰐼忋-応㣼-㤀𢖲-𢗈𪫞𫹮-𫹱𭜋-𭜎𰐽-𰑀忝-怆㤁-㤋𢗉-𢗾𪫟-𪫥𫝹𫹲-𫹹𭜏-𭜘𰑁-𰑉怇-怿㤌-㤙𢗿-𢘳𪫦-𪫬𫹺-𫹿𭜙-𭜧𰑊-𰑓恀-恽㤚-㤭㤺𢘴-𢙰𪫭-𪫴𫺀-𫺋𭜨-𭜵𰑔-𰑛恾-悯㤮-㤹㤻-㤽𢙱-𢚿𪫵-𪫾𫺌-𫺕𭜶-𭝋𰑜-𰑦悰-惯㤾-㥙𢛀-𢜧𪫿-𪬆𫺖-𫺟𭝌-𭝠𰑧-𰑯惰-愦慨㥚-㥫𢜨-𢞌𪬇-𪬒𫺠-𫺭𭝡-𭝶𰑰-𰑻愧-慑㥬-㥻𢞍-𢟟𪬓-𪬞𫺮-𫺷𭝷-𭞌𰑼-𰒄慒-慧慩-慬慮-憈㥼-㦈𠕫𢟠-𢠰𦑑𪬟-𪬧𫺸-𫻂𭞍-𭞥𰒅-𰒊慭憉-憳㦉-㦖𢠱-𢢑𪬨-𪬭𫻃-𫻉𭞦-𭞹𰒋-𰒓憴-憺憼-懔㦗-㦙𢢒-𢣍𦡗𪬮-𪬶𫻊-𫻏𭞺-𭟄𰒔-𰒖憻懕-懨㦚-㦠𢣎-𢣺𪬷-𪬹𫻐-𫻒𭟅-𭟌𰒗-𰒚懩-懴㦡㦢𢣻-𢤥𦻇𪬺-𪬾𫻓-𫻙𭟍-𭟗𰒛-𰒞懵-懸㦣-㦧𢤦-𢥊𪬿𪭀𫻚-𫻜𭟘-𭟞𰒟𰒠懹-懻㦨-㦪𢥋-𢥖𪭁𪭂𫻝-𫻡𭟟-𭟢𰒡-𰒣懼-懿𢥗-𢥤𪭃𪭄𫻢-𫻤𭟣-𭟦戀-戂㦫㦬𢥥-𢥮𪭅𪭆𭟧戃戄𢥯-𢥺𪭇戅戆𢥻-𢦂𭟨-𭟪𪭈𭟫𭟬𢦃𢦄𭟭戇㦭𢦅-𢦇𫻥𢦈-𢦋]
[radical 62=⼽戈:戈-戋𢦌𢦍𪭉𭟮戌-成𢦎-𢦑𪭊𫻦𫻧我-戓㦮㦯𢦒-𢦙𫻨𭟯戔-戗㦰-㦲𢦚-𢦥𫻩𫻪𭟰-𭟲𰒤𰒥战㦳𢦦-𢦶𪭋𪭌𫻫𫻬𭟳𰒦-𰒨戙㦴-㦶𢦷-𢦾𪭍𪭎𫻭-𫻱𰒩戚-戝㦷𢦿-𢧇𪭏𪭐𫻲-𫻶𭟴𭟵𰒪戞戟戦㦸𢧈-𢧓𪭑𫻷-𫻾𭟶𰒫戠-戥㦹𢧔-𢧡𪭒𫻿𫼀𭟷𰒬-𰒮戧-戬㦺𠎶𢧢-𢧧𢧩-𢧱𪭓𫼁𭟸𭟹𰒯戭-戯戱㦻㦼𢧨𢧲-𢨆𫼂𫼃𭟺-𭟼𰒰𰒱戰𢨇-𢨏𢨑𢨒𪭔-𪭖𫼄-𫼈𭟽𰒲𰒳戲戴㦽𢨓-𢨗𭟾戳𢨘-𢨣戵𪭗]
[radical 63=⼾戶:戶-戹𢨤𢨥𭟿𢨦-𢨩戺-戼㦾𢨪-𢨮𭠀戽-所㦿㧀𢨯-𢨵𫼉𭠁𭠂扁-扃㧁㧂𢨶-𢩄𪭘𫼊𭠃-𭠆扄-扇𢩅-𢩌𫼋𫼌扈𢩍-𢩐𪭙𫼍𫼎扉扊𢩑-𢩕𫼏𫼐𭠇-𭠉𰒴𢩖𢩗𤟵𫼑𢩘-𢩞𪭚𭠊𰒵𢩟𭠋𰒶𢩠𢩡𨐳𢩢𪭛𢩣𢩤𭠌𫼒]
[radical 64=⼿手:手-才龵𭠍扎𢩥-𢩧𫼓扐-扖㧃-㧅𢩨-𢩬𫼔𭠎𰒷-𰒻扏扗-扞扠-扬㧆-㧈𢩭-𢪂𪭜𫼕-𫼗𭠏𭠐𰒼𰒽扟扭-扷批-报㧉-㧑𢪃-𢪳𪭝-𪭣𫼘-𫼛𭠑-𭠖𰒾-𰓋扸抦-择㧒-㧠𢪴-𢫣𪭤-𪭪𫝺𫼜-𫼡𭠗-𭠡𰓌-𰓖拪-挧㧡-㧱𢫤-𢬮𪭫-𪭳𫝻𫼢-𫼰𭠢-𭠲𰓗-𰓪挨-捤㧲-㧸𢬯-𢮀𪭴-𪮀𫼱-𫽀𭠳-𭡈𰓫-𰓹捥-掱掳-掽㧹-㨈𢮁-𢯨𪮁-𪮋𫽁-𫽖𭡉-𭡝𰓺-𰔄掲掾-揄揆-搅摒㨉-㨕㨗㨘𢯩-𢰾𢱀-𢱞𪮌-𪮖𫝼𫽗-𫽨𭡞-𭡱𰔅-𰔑搆-搰搲-摊㨖㨙-㨬𢱟-𢲲𪮗-𪮢𫝽𫽩-𫽶𭡲-𭢆𰔒-𰔞揅摋-摑摓-摕摗-撄撇㨭-㩂𢲳-𢴔𢴖-𢴟𪮣-𪮪𫝾𫽷-𫾁𭢇-𭢖𰔟-𰔥搱摖撅撆撈-撺擆㩃-㩑𢴕𢴠-𢵾𢷅𪮫-𪮴𫾂-𫾉𭢗-𭢢𰔦-𰔩撻-擅擇-擞㩒-㩙𢵿-𢷄𢷆-𢷊𪮵-𪮷𫾊-𫾏𭢣-𭢱𰔪-𰔲擟-擤擦-擱㩚-㩟𢷋-𢷵𪮸-𪮺𫾐-𫾒𭢲-𭢽𰔳-𰔷擥擲-攆㩠-㩫𢷶-𢸢𪮻-𪯀𫝿𫾓-𫾘𭢾-𭣃𰔸𰔹攇-攐攒㩬㩭𢸣-𢹌𪯁𫾙𫾚𭣄-𭣋𰔺攑攓-攚㩮-㩰𢹍-𢹡𢹣-𢹫𪯂𫾛𭣌-𭣐𰔻𰔼攛-攝㩱-㩴𢹢𢹬-𢺁𫾜-𫾞𭣑𭣒𰔽𰔾攞-攤攦攧鿜𢺂-𢺕𪯃𫾟𫾠𰔿攥攨-攫㩵-㩷𢺖-𢺝𪯄𫾡攬攭𢺞-𢺫𫾢攮㩸㩹𢺬-𢺳𭣓𢺴]
[radical 64=⼿手:手-才龵𭠍扎𢩥-𢩧𫼓扐-扖㧃-㧅𢩨-𢩬𫼔𭠎𰒷-𰒻扏扗-扞扠-扬㧆-㧈𢩭-𢪂𪭜𫼕-𫼗𭠏𭠐𰒼𰒽扟扭-扷批-报㧉-㧑𢪃-𢪳𪭝-𪭣𫼘-𫼛𭠑-𭠖𰒾-𰓋扸抦-择㧒-㧠𢪴-𢫣𪭤-𪭪𫝺𫼜-𫼡𭠗-𭠡𰓌-𰓖拪-挧㧡-㧱𢫤-𢬮𪭫-𪭳𫝻𫼢-𫼰𭠢-𭠲𰓗-𰓪挨-捤㧲-㧸𢬯-𢮀𪭴-𪮀𫼱-𫽀𭠳-𭡈𰓫-𰓹捥-掱掳-掽㧹-㨈𢮁-𢯨𪮁-𪮋𫽁-𫽖𭡉-𭡝𰓺-𰔄掲掾-揄揆-搅摒㨉-㨕㨗㨘𢯩-𢰾𢱀-𢱞𪮌-𪮖𫝼𫽗-𫽨𭡞-𭡱𰔅-𰔑搆-搰搲-摊㨖㨙-㨬𢱟-𢲲𪮗-𪮢𫝽𫽩-𫽶𭡲-𭢆𰔒-𰔞揅摋-摑摓-摕摗-撄撇㨭-㩂𢲳-𢴔𢴖-𢴟𪮣-𪮪𫝾𫽷-𫾁𭢇-𭢖𰔟-𰔥搱摖撅撆撈-撺擆㩃-㩑𢴕𢴠-𢵾𢷅𪮫-𪮴𫾂-𫾉𭢗-𭢢𰔦-𰔩撻-擅擇-擞㩒-㩙𢵿-𢷄𢷆-𢷊𪮵-𪮷𫾊-𫾏𭢣-𭢱𰔪-𰔲擟-擤擦-擱㩚-㩟𢷋-𢷵𪮸-𪮺𫾐-𫾒𭢲-𭢽𰔳-𰔷擥擲-攆㩠-㩫𢷶-𢸢𪮻-𪯀𫝿𫾓-𫾘𭢾-𭣃𰔸𰔹攇-攐攒㩬㩭𢸣-𢹌𪯁𫾙𫾚𭣄-𭣋𰔺攑攓-攚㩮-㩰𢹍-𢹡𢹣-𢹫𪯂𫾛𭣌-𭣐𰔻𰔼攛-攝㩱-㩴𢹢𢹬-𢺁𫾜𫾞𭣑𭣒𰔽𰔾攞-攤攦攧鿜𢺂-𢺕𪯃𫾝𫾟𫾠𰔿攥攨-攫㩵-㩷𢺖-𢺝𪯄𫾡攬攭𢺞-𢺫𫾢攮㩸㩹𢺬-𢺳𭣓𢺴]
[radical 65=⽀支:支𭣔攰𢺵𭣕𢺶𭣖𰕀㩺𢺷-𢺽𪯆攱𢺾-𢻀𭣗𭣘㩻㩼𢻁-𢻇𢻉𰕁㩽𢻊-𢻍𪯅攲㩾𢻎-𢻙𭣙𭣚𰕂𢻚-𢻜𫾣𢻝-𢻟𪯇𭣛攳𢻠-𢻤𫾤𭣜𭣝𢻥-𢻩𭣞𫾥𭣟𢻪𭣠]
[radical 66=⽁攴:攴-攷𫾦𭣡𰕃攸-攼㩿㪀𢻫-𢻲𫾧𭣢𭣣攽-政㪁㪂𢻳-𢼈𪯈𫾨𭣤𰕄-𰕆敀-故㪃-㪆𢼉-𢼚𪯉𪯊𫾩𭣥-𭣩𰕇敆-敌㪇-㪉𢼛-𢼸𣁋𪯋𪯌𫾪-𫾰𭣪-𭣱𰕈啟敍-敛㪊-㪎𢼹-𢽙𢽜𢽷𪯍𪵊𫾱-𫾸𭣲-𭣵𰕉𰕊敜-敪敭㪏-㪚𢽚𢽛𢽝-𢽶𢽸-𢽾𢾣𪯎𪯏𫾹-𫿀𭣶-𭣻𰕋-𰕎敫敬敮-数㪛-㪟𢽿-𢾢𢾤𪯐𫿁-𫿇𭣼-𭤂𰕏𰕐敱-敳㪠-㪣𢾦-𢿅𪯑𪯒𫿈-𫿌𭤃-𭤇𰕑整-敻㪤-㪦𢿇-𢿝𣁛𪯓-𪯖𫿍-𫿐𭤈-𭤋𰕒𰕓敼-敿㪧-㪪𢿞-𢿽𪯗-𪯙𫿑-𫿗𰕔𰕕斀-斂𢿾-𣀃𣀅-𣀏𪯚𫿘-𫿚𭤌𭤍𰕖𰕗斃贁㪫㪬𣀄𣀐-𣀚𪯛𪯜𫿛-𫿝𭤎𭤏斄𣀛-𣀢𩠮𫿞-𫿢𰕘斅斆㪭𣀣-𣀭𩠰𫿣-𫿦𰕙㪮𣀮-𣀲𪯝𪯞𫿧𫿨𭤐𭤑𣀳𣀴𫿩𭤒𣀵-𣀺𪯟𫿪-𫿬𣀻𣀼𭤓𣀽-𣁀]
[radical 67=⽂文:文𣁁𫿭斈㪯𣁂𪯠𭤔斉𣁃-𣁅𪯡𪯢𣁆-𣁉𪯣𫿮𫿯𭤕-𭤙𰕚斊斋㪰𣁊𣁌𣁍𪯤𫿰斌-斏𣁎-𣁓𣁚斐斑𣁔-𣁖𪯥-𪯧𫿱斒㪱𣁗-𣁙𩖰𪯨-𪯪𰕛𣁜-𣁡斓𣁢-𣁤𭤚𭤛𣁥𣁦𰕜斔𣁧𣁨𫿲斕𣁩𣁪斖𣁫]
@ -84,7 +84,7 @@
[radical 72=⽇日:日-旧𣄻-𣄽𭥋旨-旯㫐㫑𣄾-𣅃𣅅𬀦𭥌-𭥎𰕱𰕲旰-旸㫒-㫗𣅄𣅆-𣅙𪰆𪰇𬀧𬀨𭥏-𭥑𰕳旹-昙㫘-㫚𣅚-𣅶𪰈-𪰎𬀩-𬀫𭥒-𭥠𰕴-𰕷昚-昿㫛-㫤𣅷-𣆈𣆊-𣆑𪰏-𪰘𫞂𫞃𬀬-𬀮𭥡-𭥭𰕸-𰕾晀-晖㫥-㫮𣆒-𣆱𣈰𪰙-𪰤𫞄𬀯-𬀴𭥮-𭦃𰕿-𰖆勗晗-晩曽㫯-㫴𣆲-𣇡𪰥-𪰨𬀵-𬀺𭦄-𭦘𰖇-𰖌晪-暃暑㫵-㬀𣇢-𣈣𪰩-𪰮𬀻-𬁁𭦙-𭦧𰖍-𰖕暄-暐暒-暙㬁-㬌𣈤-𣈯𣈱-𣉑𪰯-𪰷𬁂-𬁉𭦨-𭧁𰖖-𰖚暚-暨㬍-㬐𣉒-𣉠𣉢-𣉺𪰸-𪰽𬁊-𬁏𭧂-𭧑𰖛-𰖠暩-暷㬑-㬖𣉻-𣊈𣊊-𣊑𣊓-𣊝𣋇𪰾-𪱅𬁐-𬁒𭧒-𭧡𰖡-𰖪暸-曍龧㬗-㬟𣊉𣊞-𣋆𣋈𪱇-𪱌𬁓-𬁘𭧢-𭧴𰖫-𰖮曎-曗曚㬠-㬢𠁞𣋉-𣋝𪱆𪱍𬁙-𬁛𭧵-𭧾𰖯-𰖲曘曙曛曜㬣-㬨𠆛𣋞-𣋱𥫂𪱎-𪱐𬁜𭧿-𭨉𰖳-𰖵曝-曢㬩㬪𣋲-𣋾𭨊-𭨌𰖶-𰖸曣-曨㬫𣋿-𣌇𬁝𬁞𭨍-𭨐曩𣌈-𣌌𪱑-𪱓𭨑𰖹-𰖻㬬㬭𣌍-𣌕𰖼曪-曬㬮𣌖𭨒𰖽曭曮𣌗-𣌙𣌛𪱔𭨓𭨔曯㬯𣌜𣌝𭨕𭨖𰖾𭨗𣌟𣌠]
[radical 73=⽈曰:曰-曳㬰𭨘更曵𣌡𣌢𬁟𬁠曶𣌣-𣌦𬁡曷𣆉𣌧-𣌬𭨙𭨚書曺𣌭-𣌶𬁢𭨛𭨜曹曻曼𣌷-𣌺𬁣𬁤曾替朁朂𣌻-𣌽𬁥-𬁧𭨝𰖿𰗀會𣌾-𣍂朄朅𣍃-𣍅𬁨-𬁪𭨞𰗁𣍆-𣍊𪱕𬁫𭨟-𭨢朆㬱𣍋-𣍎𪱖𪱗㬲𣍏𣍐𬁬𣍑-𣍕𰗂𣍖𣍗𪱘𣍘𣍙𬁭-𬁯朇𭨣𣍚𠑱𰗃𣍛𣍜]
[radical 74=⽉月:月𣍝有𣍞𪱙𬁰𭨤𭨥𣍟-𣍣𪱚𬁱-𬁳𭨦𭨧朊-服㬳䶺䶻𣍤𦙗𪱛𬁴-𬁸𭨨-𭨭𰗄朎-朐𣍥-𣍧𣍩-𣍬𪱜𪱝𬁹𭨮-𭨱𰗅朒-朕㬴㬵𣍭𣍮𪱞𫞅𬁺-𬁼𭨲𭨳𰗆𰗇朖-望㬶㬷𣍱-𣍶𪱠-𪱢𬁽-𬂁𭨴-𭨸朜-期㬸䶼𣍷-𣎃𦜳𪱣𫞆𫞇𬂂-𬂅𭨹-𭨽𰗈𰗉朠䶽𣎄-𣎌𪱤-𪱧𬂆𭨾-𭩀𰗊朢㬺㬻𣎍-𣎑𪱟𪱨-𪱪𬂇-𬂌𭩁-𭩃𰗋膤㬼-㬾𣎒-𣎙𪱫𪱬𬂍𬂎𭩄𭩅𰗌𰗍朣朤㬿𣎚𣎛𣎝-𣎢𪱭𬂏𬂐𭩆-𭩉𰗎㭀𣎜𣎣-𣎨𦡖𬂑-𬂔𭩊-𭩎朦𣎩-𣎭𬂕𬂖𭩏-𭩑𰗏𬂗𬂘𭩒-𭩕朧𣎮𣎯𪱮𪱯𬂙𣎰𣎱𫇃𭩖𣎲𪱰𭩗𬂚]
[radical 75=⽊木:木朩𣎳𣎴𬂛未-朰𣎵-𣎷𬂜𭩘朱-权㭁𣎸-𣏀𰗐-𰗒杄-杩极㭂-㭆𣏁-𣏓𪱱-𪱵𬂝-𬂟𭩙-𭩛𰗓杪-枀枂-枭㭇-㭐𣏔-𣐇𪱶-𪱹𫞈𬂠-𬂤𭩜-𭩡𰗔-𰗗枮-树㭑-㭗𣐈-𣐴𪱺-𪲂𬂥𬂦𭩢-𭩲𰗘-𰗛栒-桪鿄㭘-㭧𣐵-𣑦𣑨-𣑾𪲃-𪲏𫞉𫞊𬂧-𬂱𭩳-𭪂𰗜-𰗦桫-棂㭨-㭵㭷𣑿-𣓀𪲐-𪲞𬂲-𬂼𭪃-𭪗𰗧-𰗰棃-椮㭶㭸-㮊𣓁-𣔰𪲟-𪲯𬂽-𬃓𭪘-𭪵𰗱-𰗹椯-榉榋榌榔榘﨓㮋-㮠㮢㮣𣔱-𣕣𣕥-𣖩𪲰-𪳇𬃔-𬃮𭪶-𭫎𰗺-𰘆榊榍-榓榕-榗榙-槡槩樮﨔㮤-㮽𣕤𣖪-𣘔𪳈-𪳜𫞋𫞌𬃯-𬄆𭫏-𭫟𰘇-𰘕槢-槨槪-樭樯-樱橥橴㮾-㯏㯑㯒㯠𣘕-𣙑𣙓-𣙺𪳝-𪳬𫞍-𫞏𬄇-𬄘𭫠-𭫺𰘖-𰘤樲-橤橦-橳橵-橼㯐㯓-㯟㯡-㯩𣙒𣙻-𣛯𪳭-𪳼𫞐𬄙-𬄫𭫻-𭬉𰘥-𰘲橽-檪㯪-㯶𣛰-𣜡𣜣-𣝀𪳽-𪴈𫞑𬄬-𬄶𭬊-𭬑𰘳-𰘺檫-櫊㯷-㯽𣝁-𣝘𣝚-𣞎𪴉-𪴏𫞒𬄷-𬅀𭬒-𭬝𰘻-𰘽櫋-櫦櫫櫭㯾-㰈𣝙𣞐-𣟃𣟑𪴐-𪴕𬅁-𬅈𭬞-𭬨𰘾-𰙄櫧-櫪櫬櫮-櫶㰉-㰓㰖𣟄-𣟐𣟒-𣟮𪴖-𪴚𬅉𬅊𭬩-𭬳櫸-欅欌㰔㰕𣟯-𣠁𣠃-𣠘𪴛-𪴞𬅋-𬅏𭬴-𭬷𰙅-𰙈櫷欆-欋欍欎㰗㰘𣠂𣠙-𣠶𪴟-𪴡𬅐-𬅓𭬸-𭬺欏-欒㰙-㰜𣠷-𣡊𪴢𪴣𬅔𭬻-𭭀欓-欕𣡋-𣡝𥤔𪴤𪴥𬅕-𬅘欖-欛𣡞-𣡨𣡮𪴦欜欝𣡩-𣡭𣡯-𣡱𪴨𬅙𬅚𭭁鿝𣡲𣡴𣡵𪴧𬅛欞欟𣡶-𣡸𰙉𣡹𭭂𣡺-𣡼𬅜𭭃𣡽-𣡿]
[radical 75=⽊木:木朩𣎳𣎴𬂛未-朰𣎵-𣎷𬂜𭩘朱-权㭁𣎸-𣏀𰗐-𰗒杄-杩极㭂-㭆𣏁-𣏓𪱱-𪱵𬂝-𬂟𭩙-𭩛𰗓杪-枀枂-枭㭇-㭐𣏔-𣐇𪱶-𪱹𫞈𬂠-𬂤𭩜-𭩡𰗔-𰗗枮-树㭑-㭗𣐈-𣐴𪱺-𪲂𬂥𬂦𭩢-𭩲𰗘-𰗛栒-桪鿄㭘-㭧𣐵-𣑦𣑨-𣑾𪲃-𪲏𫞉𫞊𬂧-𬂱𭩳-𭪂𰗜-𰗦桫-棂㭨-㭵㭷𣑿-𣓀𪲐-𪲞𬂲-𬂷𬂹-𬂼𭪃-𭪗𰗧-𰗰棃-椮㭶㭸-㮊𣓁-𣔰𪲟-𪲯𬂸𬂽-𬃓𭪘-𭪵𰗱-𰗹椯-榉榋榌榔榘﨓㮋-㮠㮢㮣𣔱-𣕣𣕥-𣖩𪲰-𪳇𫣙𫣚𬃔-𬃮𭪶-𭫎𰗺-𰘆榊榍-榓榕-榗榙-槡槩樮﨔㮤-㮽𣕤𣖪-𣘔𪳈-𪳜𫞋𫞌𬃯-𬄆𭫏-𭫟𰘇-𰘕槢-槨槪-樭樯-樱橥橴㮾-㯏㯑㯒㯠𣘕-𣙑𣙓-𣙺𪳝-𪳬𫞍-𫞏𬄇-𬄘𭫠-𭫺𰘖-𰘤樲-橤橦-橳橵-橼㯐㯓-㯟㯡-㯩𣙒𣙻-𣛯𪳭-𪳼𫞐𬄙-𬄫𭫻-𭬉𰘥-𰘲橽-檪㯪-㯶𣛰-𣜡𣜣-𣝀𪳽-𪴈𫞑𬄬-𬄶𭬊-𭬑𰘳-𰘺檫-櫊㯷-㯽𣝁-𣝘𣝚-𣞎𪴉-𪴏𫞒𬄷-𬅀𭬒-𭬝𰘻-𰘽櫋-櫦櫫櫭㯾-㰈𣝙𣞐-𣟃𣟑𪴐-𪴕𬅁𬅃-𬅈𭬞-𭬨𰘾-𰙄櫧-櫪櫬櫮-櫶㰉-㰓㰖𣟄-𣟐𣟒-𣟮𪴖-𪴚𬅂𬅉𬅊𭬩-𭬳櫸-欅欌㰔㰕𣟯-𣠁𣠃-𣠘𪴛-𪴞𬅋-𬅏𭬴-𭬷𰙅-𰙈櫷欆-欋欍欎㰗㰘𣠂𣠙-𣠶𪴟-𪴡𬅐-𬅓𭬸-𭬺欏-欒㰙-㰜𣠷-𣡊𪴢𪴣𬅔𭬻-𭭀欓-欕𣡋-𣡝𥤔𪴤𪴥𬅕-𬅘欖-欛𣡞-𣡨𣡮𪴦欜欝𣡩-𣡭𣡯-𣡱𪴨𬅙𬅚𭭁鿝𣡲𣡴𣡵𪴧𬅛欞欟𣡶-𣡸𰙉𣡹𭭂𣡺-𣡼𬅜𭭃𣡽-𣡿]
[radical 76=⽋欠:欠-欢𣢀𭭄欤㰝㰞𣢁-𣢈𬅝𬅞欣欥-欧㰟-㰢𣢉-𣢖𪴩𬅟𭭅𭭆𰙊𰙋欨-欪㰣-㰨𣢗-𣢦𪴪𪴫𬅠𬅡𰙌欫-欱㰩-㰭䶾𣢧-𣢹𬅢𬅣𭭇-𭭋欲-欸㰮-㰱𣢺-𣣇𪴬𪴭𬅤-𬅨𭭌𰙍𰙎欹-欿㰲-㰶㰸𣣈-𣣜𪴮𬅩𬅪𭭍-𭭏𰙏-𰙑歀-歈㰹-㰾𣣝-𣣫𣣭-𣣱𪴯𪴰𬅫𬅬𭭐𭭑𰙒-𰙕歉-歍㰿-㱁𣣲-𣤇𪴱𬅭-𬅯𭭒𰙖𰙗歎-歓㱂㱃𣤈-𣤗𪴲𭭓𭭔𰙘歔-歚㱄㱅𣤘-𣤟𬅰𬅱𭭕𭭖歛-歝㱆-㱉𣤠-𣤧𬅲𰙙歞歟𣤨-𣤫𬅳歠㱊𣤬-𣤰㱋𣤱-𣤳𪴳𭭗𣤴𣤵𣤷𣤸𬅴歡㱌𣤶𣤹-𣤼㱍𣤽𬅵𣤾𪴴㱎𣤿-𣥁]
[radical 77=⽌止:止龰𣥂正𣥃𣥄此㱏𣥅𣥆𬅶𬅷𭭘-𭭚𰙚步㱐㱑𣥇𣥈𣥊-𣥌𬅸𰙛𰙜武-歩𣥉𣥍-𣥡𪴵𬅹𭭛-𭭝𰙝-𰙟歪歫㱒-㱔𣥢-𣥤𬅺𬅻𭭞-𭭠𰙠-𰙣歬歭𣥦-𣥱𪴶𬅼𬅽𭭡𰙤-𰙩𣥳-𣥸𪴷𬅾-𬆀𭭢-𭭥𰙪歮歯㱕㱖𣥹-𣦆𪴸𬆁-𬆃𭭦-𭭩𰙫-𰙮歰-歳𣦇-𣦊𣦌-𣦎𤚜𪴹𪴺𫞓𬆄𬆅𭭪𭭫𰙯-𰙳歴㱗𣦐-𣦓𪴻𬆆𬆇𭭬𭭭𰙴𰙵歵歶𣦔-𣦛𪴼𬆈𬆉𭭮-𭭱𰙶𰙷歷𣦜-𣦠𪴽𪴾𬆊𬆋𭭲-𭭴𰙸𰙹𣦡-𣦥𪴿𬆌𬆍𭭵𭭶歸㱘𣦦-𣦫𬆎𭭷𣦬𬆏𬆐𰙺𣦮𭭸-𭭻𰙻𣦯-𣦴𭭼]
[radical 78=⽍歹:歹𣦵𣦶歺𣦷死㱙𣦸-𣦾𭭽-𭭿歼𣦿-𣧈𭮀𭮁𰙼歽-殁㱚-㱝𣧉-𣧜𭮂-𭮅殂-殇㱞-㱠𣧝-𣧱𪵀𪵁𬆑𬆒𭮆-𭮈殈-残㰷㱡𣧲-𣨄𪵂𪵃𬆓-𬆕𭮉-𭮑𰙽𰙾殌-殓㱢㱣𣨅-𣨕𪵄𬆖𬆗𭮒-𭮖𰙿𰚀殔-殚㱤-㱩𣨖-𣨰𪵅𬆘-𬆚𭮗𭮘殛殜㱪-㱮𣨱-𣩃𬆛𭮙-𭮛殝-殡㱯-㱲𣩄-𣩌𪵆𭮜殢-殦㱳㱴𣩍-𣩜𫞔𭮝𭮞𰚁殧-殫㱵-㱷𣩝-𣩧𭮟-𭮣殬-殮㱸𣩨-𣩮𪵇殯𣩯-𣩶𭮤-𭮧殰殱𣩷𣩸𬆜鿞㱹㱺𣩹-𣩻𬆝𰚂殲𣩼-𣩾㱻𣩿-𣪁]
@ -143,7 +143,7 @@
[radical 129=⾀聿:聿肀𦘒𮌁𰭼𦘓𬚪𮌂𦘔𮌃肁-肃𦘕-𦘗𰭽𦘘-𦘚肄-肆䏋𦘛𦘜𫆔𬚫𮌄肇肈𦘝-𦘟𫆕𦘠-𦘢𬚬𮌅𮌆𦘣𬚭𬚮𦘤-𦘨]
[radical 130=⾁肉:肉𮌇肊𰭾肋-肏䏌䏍𦘩-𦘳𰭿-𰮂肐-肠䏎䏏𦘴-𦘾𬚯𮌈-𮌊𰮃-𰮆股-胁䏐-䏝𦘿-𦙖𦙘-𦙩𫆖-𫆘𬚰-𬚳𮌋𰮇-𰮑朑胂-胬脉䏞-䏥𡋜𣍨𦙪-𦚜𫆙-𫆜𬚴-𬚶𮌌-𮌏𰮒-𰮜胭-脈脊-脔䏦-䏮𣍯𣍰𦚝-𦛓𫆝𫆞𬚷-𬚺𮌐-𮌓𰮝-𰮤脕-脸䏯-䏻𦛔-𦜅𫆟-𫆡𬚻-𬛁𮌔-𮌙𰮥-𰮪脹-腚㬹䏼-䐌𦜆-𦜲𦜴-𦝙𫆢-𫆥𬛂-𬛇𮌚-𮌠𰮫-𰮴幐朡腛-腾䐍-䐛𦝚-𦞘𦟓𦟔𫆦-𫆨𬛈-𬛋𮌡-𮌥𰮵-𰯀腿-膑䐜-䐪𦞙-𦟒𦟕-𦟗𫆩-𫆮𫟊𬛌-𬛎𮌦-𮌰𰯁-𰯋膒-膣䐫-䐴𦟘-𦠂𫆯-𫆴𬛏-𬛓𮌱-𮌹𰯌-𰯓朥膥-膶䐵-䐽𦠃-𦠽𦡄𦡮𫆵-𫆸𫜶𬛔-𬛘𮌺-𮌿𰯔-𰯝膷-臌䐾-䑁𦠾-𦡃𦡅-𦡍𦡏-𦡕𦡘-𦡭𦡯𫆹-𫆻𬛙-𬛛𮍀𮍁𰯞-𰯠臍-臓䑂-䑅𤑃𦡎𦡰-𦢉𫆼𫆽𬛜𬛝𮍂-𮍆𰯡-𰯣臔臕臗臘䑆-䑈𦢊-𦢤𫆾𫆿𮍇-𮍉𰯤-𰯧臖臙-臜䑉䑊𦢥-𦢷𫇀𬛞𮍊𰯨-𰯪臝㔮䑋-䑎𦢸-𦢾𫇁𬛟臞臟䑏𦢿-𦣅𫇂𰯫臠-臢𤓏𦣆-𦣐𬛠𦣑-𦣕𬛡𮍋𦣖𦣗𦣙𰯬𦣘𦣚-𦣜𰯭𫇄]
[radical 131=⾂臣:臣𦣞𮍌𦣝臤臥𮍍𦣟𰯮𦣠-𦣣𫇅𬛢𬛣𰯯䑐𦣤𦣥𬛤𮍎𮍏𰯰臦𦣦-𦣩𫇆𬛥𰯱𫇇𬛦臧𦣫𦣬𦣮𫇈𮍐-𮍒𰯲𰯳𦣭𦣯𬛧𬛨𦣪𮍓臨臩𦣰𦣱𬛩䑑𦣲𦣳𫇉𦣴𦣵𬛪𦣶𬛫𦣷𦣸]
[radical 132=⾃自:自𦣹臫𦣺-𦤀𰯴臬臭𦤁-𦤋𮍔𮍕𦤌-𦤏𬛬𮍖-𮍘𰯵臮-臰𦤐-𦤓𫇊𮍙𰯶𦤔𮍚𮍛𦤕-𦤗𬛭𮍜臱𦤘-𦤝𫇋臲𦤞-𦤤𬛮𦤥𫇌𦤦-𦤪𬛯𰯷𦤫-𦤮𬛰𮍝𮍞𦤯-𦤱𮍟𦤲𫇍]
[radical 132=⾃自:自𦣹臫𦣺-𦤀𰯴臬臭𦤁-𦤋𮍔𮍕𦤌-𦤏𬛬𮍖-𮍘𰯵臮-臰𦤐-𦤓𫇊𮍙𰯶𦤔𮍚𮍛𦤕-𦤗𬛭𮍜臱𦤘-𦤝𫇋臲𦤞-𦤤𬛮𦤥𫇌𦤦-𦤪𬛯𰯷𦤫-𦤮𬛰𮍝𮍞𦤯-𦤱𮍟𫇍𦤲]
[radical 133=⾄至:至𦤳-𦤵𬛱致䑒𦤶-𦤺𬛲𮍠𮍡𦤻-𦤾臵-臸𦤿𦥀𬛳𮍢臹䑓𦥁𫇎臺𦥂-𦥄𫇏𫇐𮍣𦥅-𦥇臻𦥉-𦥍𬛴𬛵𦥎-𦥐𫇑𬛶𮍤𬛷]
[radical 134=⾅臼:臼𦥑-𦥔臽臾𦥕𦥖𦥙𮍥臿𦥗𦥘𦥚-𦥜𦥧𬛸𬛹舀舁䑔𦥝-𦥤𮍦𮍧舂䑕𦥥𦥦𦥨-𦥬舃舄𦥭-𦥲𫟋𮍨-𮍬舅𦥳-𦥺𬛺𮍭𮍮與䑖𦥻-𦦄𮍯𰯸𰯹興䑗𦦅-𦦎𬛻𮍰𮍱𰯺舆舉䑘𦦏-𦦓𦦕-𦦗𬛼𦦘-𦦠𬛽舊𦦡-𦦧𫇒𬛾𰯻舋𠤫𦦩-𦦬𦦯𬛿𬜀𦦭𦦮𦦰-𦦹𰯼𦦺𦦻𬜁𤓟𦦼𬜂𦦽-𦦿𮍲𦧀𫇓𬜃𦧁𦧂𮍳𬜄𦧃-𦧅𠔻]
[radical 135=⾆舌:舌𦧆舍-舏𫇔𦧇𮍴𮍵舐䑙䑚𦧈-𦧐舑䑛𦧑-𦧔𬜅𬜆𮍶𮍷𰯽-𰯿舒𦧕-𦧜𰰀𦧝𦧞𫇕𫇖𬜇𮍸𰰁-𰰅舓-舕𦧟-𦧦𬜈𮍹𰰆舖舗䑜𦧧-𦧬𬜉-𬜌𮍺-𮍾𰰇舘𦧭-𦧰𫇗𮍿𦧱-𦧳𬜍𰰈舙𦧴-𦧶𬜎𰰉舚𦧷𫇘𬜏𬜐𦧸-𦧺𫇙𦧻𦧼𮎀𦧽]
@ -151,9 +151,9 @@
[radical 137=⾈舟:舟𠂨𦨇舠䑠𦨈-𦨍𬜑舡-舤䑡-䑣𦨎-𦨔𫇚𬜒舥-舱䑤䑥𦨕-𦨠𮎃𮎄𰰋舲-舻䑦-䑩𦨡-𦨮𮎅-𮎊𰰌舼-舿䑪-䑮𦨯-𦨲𦨴-𦨼𬜓𮎋𮎌艀-艉䑯䑰𦨳𦨽-𦩉𬜔𮎍-𮎑𰰍艊-艍䑱-䑸𦩊-𦩜𨦞𫇛𬜕𬜖𮎒𮎓𰰎艎-艔䑹-䑻𦩝-𦩳𦩿𫇜𫇝𬜗𰰏-𰰑艕-艙䑼-䑾𦩴-𦩾𦪀-𦪅𬜘-𬜚𰰒艚-艝䑿-䒅𦪆-𦪐𫇞𬜛𮎔艞-艠䒆-䒈𦪑-𦪧𫇟𰰓艡-艥𦪨-𦪰艦-艩䒉𦪱-𦪵𫇠艪𦪶-𦪸𦪺-𦪼𬜜𮎕艫𦪽-𦫂𫇡-𫇣𮎖𮎗艬𦫃𦫄艭𦫅-𦫇𦫉𦫈𦫊]
[radical 138=⾉艮:艮-艰𦫋𰰔𦫌-𦫏𮎘𮎙𦫐艱𦫑𮎚𦫒鿥]
[radical 139=⾊色:色𮎛𮎜𰰕䒊𦫓艳艴䒋𦫔-𦫙𰰖𰰗𦫚𬜝𦫛-𦫝𰰘艵𦫞-𦫣𬜞𰰙𦫤-𦫩𮎝䒌䒍𦫪-𦫮𫇤𰰚䒎䒏𰰛艶𦫯-𦫱𮎞䒐艷𦫲𬜟]
[radical 140=⾋艸:艸艹䒑艺龷𦫳-𦫵艻-节䒒-䒔𦫶-𦬀𫇥𫇦𬜠𰰜-𰰟芃-芗䒕-䒙𦬁-𦬒𫇧𫇨𬜡𬜢𰰠-𰰢芘-芾苀-苏茾䒚-䒥𦬓-𦬶𫇩-𫇭𬜣-𬜥𮎟-𮎣𰰣-𰰪芿苐-茑茓-茕䒦-䒯𦬷-𦭬𫇮-𫇵𫟌𫟍𬜦𬜧𮎤-𮎦𰰫-𰰱茒茖-茜茞-茢茤-茽茿-荒荔-药鿒䒰-䓁𦭭-𦮵𫇶-𫇾𫟎𬜨-𬜮𮎧-𮎸𰰲-𰰽茝茣荰-莬莮-莽鿊䓂-䓔䓖𦮶-𦯰𦯲-𦰕𦰗-𦰩𫇿-𫈏𫟏-𫟑𬜯-𬜺𮎹-𮏏𰰾-𰱎荓莾-萨著龿䓕䓗-䓭𦯱𦰖𦰪-𦳀𫈐-𫈠𬜻-𬝋𮏐-𮏢𰱏-𰱜莭萩-萫萭-葖葘-蒏鿓䓮-䓻𦳁-𦴵𦴷-𦵆𦵈-𦵠𫈡-𫈵𫟒𫟓𬝌-𬝙𮏣-𮐂𰱝-𰱪蒐-蒷蒹-蓤蓦䓼-䔍𤎀𦵇𦵡-𦷾𫈶-𫉇𫟔𬝚-𬝳𮐃-𮐠𰱫-𰱱蓥蓧-蔼䔎-䔬𦷿-𦸼𦸾-𦺄𫉈-𫉜𫟕𬝴-𬞍𮐡-𮐸𰱲-𰱵蔽-蕵䔭-䕄𦸽𦺆-𦻆𦻈-𦻽𦻿-𦼧𦽭𫉝-𫉤𫟖𬞎-𬞙𮐹-𮑙𰱶-𰱼蕶-薨薪-薬薮龩﨟䕅-䕑𦻾𦼨-𦽬𦽮-𦾩𦿡𦿸𫉥-𫉴𬞚-𬞬𮑚-𮑻𰱽-𰲇薩薭薯-藓蘤䕒-䕝𦾪-𦿓𦿖-𦿜𦿞-𦿠𦿢-𦿷𦿹-𧀟𫉵-𫊄𬞭-𬞾𮑼-𮒖𰲈𰲉藔-藭藯-藵䕞-䕥𦿔𦿕𦿝𧀠-𧁺𧃌𫊅-𫊋𬞿-𬟍𮒗-𮒨𰲊-𰲐藮藶-蘑蘓蘔蘢䕦-䕲𧁻-𧃋𧃎𫊌-𫊏𬟎-𬟕𮒩-𮒷𰲑-𰲓蘒蘕-蘡蘣蘥-蘰䕳-䕷𧃍𧃏-𧄌𫊐-𫊓𬟖-𬟛𮒸-𮓃𰲔𰲕蘲-蘷鿀鿦䕸-䕺𧄍-𧄵𫊔-𫊖𬟜-𬟟𮓄-𮓍𰲖蘱蘸-虁䕻-䕽𧄶-𧅕𫊗𬟠-𬟤𮓎-𮓒虂-虅䕾𧅖-𧅭𫊘-𫊚𬟥𮓓虆-虉䕿䖀𧅮-𧅴𧅶-𧅹𬟦𮓔𮓕䖁𧅺-𧆅𫊛𫊜𮓖𰲗虊䖂-䖄𧆇-𧆎䖅䖆𧅵𧆏-𧆒虋虌𧆔𧆕𦺅𧆖𧆚𧆗䖇𧆙𧆓𧆘]
[radical 140=⾋艸:艸艹䒑艺龷𦫳-𦫵艻-节䒒-䒔𦫶-𦬀𫇥𫇦𬜠𰰜-𰰟芃-芗䒕-䒙𦬁-𦬒𫇧𫇨𬜡𬜢𰰠-𰰢芘-芾苀-苏茾䒚-䒥𦬓-𦬶𫇩-𫇭𬜣-𬜥𮎟-𮎣𰰣-𰰪芿苐-茑茓-茕䒦-䒯𦬷-𦭬𫇮-𫇵𫟌𫟍𬜦𬜧𮎤-𮎦𰰫-𰰱茒茖-茜茞-茢茤-茽茿-荒荔-药鿒䒰-䓁𦭭-𦮵𫇶-𫇾𫟎𬜨-𬜮𮎧-𮎸𰰲-𰰽茝茣荰-莬莮-莽鿊䓂-䓔䓖𦮶-𦯰𦯲-𦰕𦰗-𦰩𫇿-𫈏𫟏-𫟑𬜯-𬜺𮎹-𮏏𰰾-𰱎荓莾-萨著龿䓕䓗-䓭𦯱𦰖𦰪-𦳀𫈐-𫈠𬜻-𬝋𮏐-𮏢𰱏-𰱜莭萩-萫萭-葖葘-蒏鿓䓮-䓻𦳁-𦴵𦴷-𦵆𦵈-𦵠𫈡-𫈵𫟒𫟓𬝌-𬝙𮏣-𮐂𰱝-𰱪蒐-蒷蒹-蓤蓦䓼-䔍𤎀𦵇𦵡-𦷾𫈶-𫉇𫟔𬝚-𬝳𮐃-𮐠𰱫-𰱱蓥蓧-蔼䔎-䔬𦷿-𦸼𦸾-𦺄𫉈-𫉜𫟕𬝴-𬞍𮐡-𮐸𰱲-𰱵蔽-蕵䔭-䕄𦸽𦺆-𦻆𦻈-𦻽𦻿-𦼧𦽭𫉝-𫉤𫟖𬞎-𬞙𮐹-𮑙𰱶-𰱼蕶-薨薪-薬薮龩﨟䕅-䕑𦻾𦼨-𦽬𦽮-𦾩𦿡𦿸𫉥-𫉴𬞚-𬞬𮑚-𮑻𰱽-𰲇薩薭薯-藓蘤䕒-䕝𦾪-𦿓𦿖-𦿜𦿞-𦿠𦿢-𦿷𦿹-𧀟𫉵-𫊄𬞭-𬞾𬟃𮑼-𮒖𰲈𰲉藔-藭藯-藵䕞-䕥𦿔𦿕𦿝𧀠-𧁺𧃌𫊅-𫊋𬞿-𬟂𬟄-𬟍𮒗-𮒨𰲊-𰲐藮藶-蘑蘓蘔蘢䕦-䕲𧁻-𧃋𧃎𫊌-𫊏𬟎-𬟕𮒩-𮒷𰲑-𰲓蘒蘕-蘡蘣蘥-蘰䕳-䕷𧃍𧃏-𧄌𫊐-𫊓𬟖-𬟛𮒸-𮓃𰲔𰲕蘲-蘷鿀鿦䕸-䕺𧄍-𧄵𫊔-𫊖𬟜-𬟟𮓄-𮓍𰲖蘱蘸-虁䕻-䕽𧄶-𧅕𫊗𬟠-𬟤𮓎-𮓒虂-虅䕾𧅖-𧅭𫊘-𫊚𬟥𮓓虆-虉䕿䖀𧅮-𧅴𧅶-𧅹𬟦𮓔𮓕䖁𧅺-𧆅𫊛𫊜𮓖𰲗虊䖂-䖄𧆇-𧆎䖅䖆𧅵𧆏-𧆒虋虌𧆔𧆕𦺅𧆖𧆚𧆗䖇𧆙𧆓𧆘]
[radical 141=⾌虍:虍𮓗虎虏䖈𧆛𮓘-𮓚彪虐䖉𧆜-𧆞𫊝𫊞𬟧𮓛𮓜虑虓虔䖊-䖍𧆟-𧆢𫊟𬟨𬟩𮓝-𮓠𰲘-𰲚處-虚䖎䖏𧆣-𧆰𮓡𰲛-𰲝虛-虝䖐䖑𧆱-𧆹𫊠𫊡𬟪𮓢-𮓤𰲞𰲟虞號䖒-䖖𧆺-𧇀𬟫-𬟭𮓥𮓦𰲠-𰲣虠虡𧇁-𧇎𫊢𰲤-𰲦虢虣䖗𧇏-𧇛𮓧𮓨𰲧虤-虦䖘𧇜-𧇰𬟮-𬟰𮓩𮓪虧虨䖙𧇱-𧇺𫊣𮓫𮓬虩䖚䖛𧇻-𧈁𧈃𧈄𬟱𮓭䖜𧈂𧈅-𧈊𬟲𮓮𧈋-𧈏𮓯𧈑-𧈗𰲨𧈘-𧈛𬟳虪𧈜]
[radical 142=⾍虫:虫𮓰虬䖝𧈝𧈞𮓱虭-虲𧈟-𧈧𫊤𮓲𰲩虳-蚃䖞-䖠𡯥𧈨-𧈺𫊥𫊦𬟴𬟵𮓳-𮓷𰲪蚄-蚇蚉-蚪蚬䖡-䖣𧈻-𧉝𫊧-𫊫𬟶-𬟻𮓸-𮓼𰲫-𰲲蚫蚭-蛏﨡䖤-䖬𧉞-𧊎𫊬-𫊲𬟼-𬠁𮓽-𮔀𰲳-𰲷蚈蛐-蛕蛗-蛴䖭-䖵𧊏-𧊪𧊬𧊭𧊯-𧋇𫊳-𫊼𬠂-𬠇𮔁-𮔈𰲸𰲹蛖蛵-蜗蜫蝆䖶-䖾𧊫𧊮𧋈-𧋝𧋟-𧌂𧌞𫊽-𫋁𬠈-𬠍𮔉-𮔖𰲺-𰲾蜘-蜪蜬-蝅蝇-蝋䖿-䗊䗕𧋞𧌃-𧌝𧌟-𧌯𧌱-𧍑𫋂-𫋉𬠎-𬠚𮔗-𮔝𰲿-𰳉蝌-蝸蝺-螀蟡䗋-䗔䗖𧌰𧍒-𧎠𧏇𫋊-𫋎𬠛-𬠞𮔞-𮔧𰳊-𰳍蝹螁-螩蟗䗗-䗞𧎡-𧏆𧏈-𧏷𫋏-𫋕𬠟-𬠤𮔨-𮔲𰳎-𰳕螪-蟒蠁䗟-䗮𢋜𧏸-𧐯𧐱-𧑃𫋖-𫋙𬠥-𬠨𮔳-𮔺𰳖-𰳚蟓-蟖蟘-蟠蟢-蟵蠎䗯-䗳𧑄-𧒍𫋚-𫋞𬠩-𬠮𮔻-𮕁𰳛𰳜蟶-蠀蠂-蠍蠏䗴-䗻𧒎-𧓈𫋟-𫋢𬠯-𬠱𮕂-𮕇𰳝-𰳠蠐-蠙䗼-䘁𧓉-𧓰𫋣-𫋥𫟗𬠲-𬠷𮕈-𮕋𰳡-𰳣蠚-蠤蠴䘂-䘄𧓱-𧔜𫋦𫋧𬠸𮕌-𮕑𰳤蠥-蠬䘅-䘉𧔝-𧕂𫋨𬠹𮕒𰳥蠭-蠳䘊-䘌𧕃-𧕒𧕔-𧕚𬠺𮕓-𮕖蠵-蠺𧕓𧕛-𧕮𮕗𰳦蠻䘍𧕯-𧕼𬠻𮕘𮕙蠼𧕽-𧖃𧖊𮕚蠽蠾𧖄-𧖉𧖋-𧖐蠿䘎𧖑-𧖗𮕛𠑴𧖘-𧖛𫋩𧖜-𧖟𧖡-𧖦]
[radical 142=⾍虫:虫𮓰虬䖝𧈝𧈞𮓱虭-虲𧈟-𧈧𫊤𮓲𰲩虳-蚃䖞-䖠𡯥𧈨-𧈺𫊥𫊦𬟴𬟵𮓳-𮓷𰲪蚄-蚇蚉-蚪蚬䖡-䖣𧈻-𧉝𫊧-𫊫𬟶-𬟻𮓸-𮓼𰲫-𰲲蚫蚭-蛏﨡䖤-䖬𧉞-𧊎𫊬-𫊲𬟼-𬠁𮓽-𮔀𰲳-𰲷蚈蛐-蛕蛗-蛴䖭-䖵𧊏-𧊪𧊬𧊭𧊯-𧋇𫊳-𫊼𬠂-𬠇𮔁-𮔈𰲸𰲹蛖蛵-蜗蜫蝆䖶-䖾𧊫𧊮𧋈-𧋝𧋟-𧌂𧌞𫊽-𫋁𬠈-𬠍𮔉-𮔖𰲺-𰲾蜘-蜪蜬-蝅蝇-蝋䖿-䗊䗕𧋞𧌃-𧌝𧌟-𧌯𧌱-𧍑𫋂-𫋉𬠎-𬠚𮔗-𮔝𰲿-𰳉蝌-蝸蝺-螀蟡䗋-䗔䗖𧌰𧍒-𧎠𧏇𫋊-𫋎𬠛-𬠞𮔞-𮔧𰳊-𰳍蝹螁-螩蟗䗗-䗞𧎡-𧏆𧏈-𧏷𫋏-𫋕𬠟-𬠤𮔨-𮔲𰳎-𰳕螪-蟒蠁䗟-䗮𢋜𧏸-𧐯𧐱-𧑃𫋖-𫋙𬠥-𬠨𮔳-𮔺𰳖-𰳚蟓-蟖蟘-蟠蟢-蟵蠎䗯-䗳𧑄-𧒍𫋚-𫋞𬠩-𬠮𮔻-𮕁𰳛𰳜蟶-蠀蠂-蠍蠏䗴-䗻𧒎-𧓈𫋟-𫋢𬠯-𬠱𮕂-𮕇𰳝-𰳠蠐-蠙䗼-䘁𧓉-𧓰𫋣-𫋥𫟗𬠲-𬠷𮕈-𮕋𰳡-𰳣蠚-蠤蠴䘂-䘄𧓱-𧔜𫋦𫋧𬠸𮕌-𮕑𰳤蠥-蠬䘅-䘉𧔝-𧕂𫋨𬠹𮕒𰳥蠭-蠳䘊-䘌𧕃-𧕒𧕔-𧕚𮕓-𮕖蠵-蠺𧕓𧕛-𧕮𬠺𮕗𰳦蠻䘍𧕯-𧕼𬠻𮕘𮕙蠼𧕽-𧖃𧖊𮕚蠽蠾𧖄-𧖉𧖋-𧖐蠿䘎𧖑-𧖗𮕛𠑴𧖘-𧖛𫋩𧖜-𧖟𧖡-𧖦]
[radical 143=⾎血:血𮕜𧖧-𧖩𮕝衁衂䘏𧖪衃衄䘐𧖫-𧖯𫋪衅䘑𧖰-𧖲𫋫𮕞𮕟𰳧衆-衉𧖳𧖴𬠼𮕠𰳨䘒𧖵-𧖹䘓𧖺-𧖿𧗄䘔𧗀-𧗃𧗅-𧗍𫋬𰳩𧗎-𧗗衊𧗘𮕡𧗙-𧗛衋𧗜]
[radical 144=⾏行:行𬠽𧗝-𧗠衍衎𧗡-𧗥𮕢衏䘕𧗦-𧗩𬠾𬠿衐-衔𧗪𫋭衕-衘䘖𢔬𧗫-𧗱𫋮𰳪衙𧗲-𧗷𮕣𰳫-𰳭𧗸𧗺𧗻𰳮衚-衝𧗹𧗼𧗽𫋯𫟘𬡀衞-衡䘗䘘𢖅𧗾𢖋𧗿-𧘁𫟙𧘂𧘃𫋰𰳯𧘄䘙𢖡𧘅𬡁𧘆𫋱衢]
[radical 145=⾐衣:衣衤𧘇𮕤补䘚䘛𧘈-𧘌𬡂𮕥-𮕧𰳰衦-衬䘜-䘞𧘍-𧘜𬡃-𬡇𮕨衭-袇䘟-䘡𧘝-𧘼𫋲-𫋴𬡈-𬡊𮕩-𮕯𰳱袈-袮袰䘢-䘧𧘽𧘿-𧙝𫋵-𫋷𬡋-𬡐𮕰-𮕸𰳲-𰳸袯袱-裉䘨-䘮𧘾𧙞-𧙿𫋸-𫋼𬡑-𬡘𮕹-𮖀𰳹𰳺裊-裥䘯-䘲𧚀-𧚣𫋽𫟚𬡙-𬡝𮖁-𮖎𰳻-𰴀裧-褄䘳-䙂𧚤-𧚺𧚼-𧛎𫋾-𫌄𬡞-𬡤𮖏-𮖓𰴁𰴂裦褅-褝䙃-䙍𧚻𧛏-𧛷𫌅𫌆𬡥𬡦𮖔-𮖠褞-褲褴䙎-䙓𧛸-𧜜𧜨𫌇-𫌉𬡧-𬡩𮖡-𮖩𰴃-𰴅褳褵-襅襔䙔-䙝𧜝-𧜧𧜩-𧜬𧜮-𧝁𫌊-𫌏𬡪-𬡭𮖪-𮖮𰴆-𰴈襆-襓襕鿋䙞-䙣𧜭𧝂-𧝰𫌐-𫌒𬡮𬡯𮖯-𮖶襖-襢䙤䙥𧝱-𧞌𫌓𫌔𬡰𬡱𮖷-𮖺襣-襨䙦-䙩𧞍-𧞖𧞘-𧞦𫌕𬡲-𬡴𮖻𮖼襩-襮䙪𧞗𧞧-𧞱𧞳-𧞵𫌖𬡵𬡶𮖽𮖾襯-襲䙫𧞲𧞶-𧟂𬡷𰴉襳襴襽䙬䙭𧟃-𧟇𫌗襵-襷䙮䙯𧟈-𧟋𫌘𫌙襸-襼䙰𧟌-𧟏𰴊𧟐-𧟔䙱𧟕-𧟟]
@ -194,7 +194,7 @@
[radical 173=⾬雨:雨𮦄𩁶-𩁸𫕝𱁑雩-雫䨋𩁹-𩂁𫕞𬯸𮦅雬-雳䨌䨍𩂂-𩂑𫕟𫕠𬯹𬯺𮦆-𮦉雴-雾䨎𩂒-𩂡𫕡𫕢𬯻𮦊-𮦍𱁒𱁓雿-霁䨏-䨖𩂢-𩂶𫕣𫕤𮦎𮦏霂-霊䨗-䨙𩂷-𩃑𫕥𫕦𬯼𬯽𮦐-𮦓𱁔-𱁚霋-霗䨚-䨝𩃒-𩃴𫕧𫕨𬯾-𬰃𮦔-𮦛𱁛-𱁟霘-霠䨞-䨤𩃵-𩃽𩃿-𩄖𫕩𫕪𬰄-𬰇𮦜-𮦣𱁠-𱁢霡-霥䨥-䨪𩃾𩄗-𩄭𩄯-𩄺𫕫𬰈𮦤-𮦫霦-霭䨫-䨯𩄻-𩅜𫕬𫕭𬰉𬰊𮦬𱁣霮-霴䨰䨱𩅝-𩅻𫕮-𫕱𬰋-𬰍𮦭-𮦲霵-霻𩅼-𩆐𫕲𬰎-𬰐𮦳-𮦶霼-靀䨲-䨴𩆑-𩆣𫕳𫕴𬰑-𬰔𮦷-𮦻𱁤靁䨵䨶𩆤-𩆬𮦼𱁥-𱁨靂-靈䨷𩆭-𩆴𫕵𫕶𮦽𱁩靉䨸𩆵-𩆾𮦾靊𩆿-𩇇𮦿靋-靎鿨䨹𬰕𮧀𮧁𩇈𩇉靏𩇊-𩇎𫕷𬰖𮧂𩇏𱁪𩇐𩇑𱁫䨺𩇒靐𩇓𩇔䨻𱁬]
[radical 174=⾭靑:靑青𩇕𫕸靓靔𩇖𩇗靕靖𩇘-𩇛𫕹𫕺𬰗𬰘𮧃靗-静𩇜靚𱁭𱁮靛靜𩇝𩇞𫕻𩇟靝𩇠𩇡𫕼𩇢𩇣䨼𩇤𩇥]
[radical 175=⾮非:非𩇦-𩇨䨽䨾𩇩-𩇬𬰙靟䨿𩇭-𩇴𮧄𩇵𩇶𬰚𬰛𮧅𮧆𩇷𫕽𮧇靠䩀𩇸-𩇻𬰜𩇼𩇽𬰝靡𬰞䩁𩇾-𩈁𱁯𫕾𬰟𮧈𫕿𩈂]
[radical 176=⾯面:面靣𠚑𩈃-𩈅𮧉䩂-䩄𩈆-𩈌靤䩅-䩇𩈍-𩈘𫖀𬰠𮧊𮧋靥𤎂𩈙-𩈠𫖁𮧌靦䩈䩉𩈡-𩈪𫖂𮧍䩊𩈫-𩈱𬰡𮧎𩈲-𩈷𫖃𮧏𩈸-𩈺𱁰䩋𩈻-𩉀𬰢𬰣𮧐靧䩌䩍𩉁-𩉉𫖄𮧑𮧒䩎𩉊𩉋靨𩉌-𩉐䩏𩉑-𩉚]
[radical 176=⾯面:面靣𠚑𩈃-𩈅𮧉䩂-䩄𩈆-𩈌靤䩅-䩇𩈍-𩈘𫖀𬰠𮧊𮧋靥𤎂𩈙-𩈠𫖁𮧌靦䩈䩉𩈡-𩈪𫖂𮧍䩊𩈫-𩈱𬰡𮧎𩈲-𩈷𫖃𮧏𩈸-𩈺𱁰䩋𩈻-𩉀𫖄𬰢𬰣𮧐靧䩌䩍𩉁-𩉉𮧑𮧒䩎𩉊𩉋靨𩉌-𩉐䩏𩉑-𩉚]
[radical 177=⾰革:革𮧓靪𩉛𩉜靫-靱䩐-䩒𩉝-𩉠𱁱靲-靹䩓-䩘𩉡-𩉷𮧔𱁲靺-鞆䩙-䩞𩉸-𩊏𫖅𫖆𮧕-𮧗𱁳鞇-鞒䩟𩊐-𩊨𫖇𫖈𬰤𮧘𮧙𱁴𱁵鞓-鞙䩠-䩧𩊩-𩊾𬰥𮧚-𮧝𱁶鞚-鞡䩨-䩮𩊿-𩋞𬰦𮧞𱁷鞢-鞰䩯-䩵𩋟-𩌂𬰧𮧟-𮧢𱁸-𱁺鞱-鞷䩶-䩻𥌕𩌃-𩌥𮧣-𮧥𱁻𱁼鞸-鞻䩼-䩾𩌦-𩍂𫖉𮧦-𮧩鞼-鞿䩿-䪄𩍃-𩍒𫖊𬰨𬰩𮧪𮧫𱁽韀-韃䪅䪆𩍓-𩍤𫖋𮧬𮧭韄韅䪇䪈𩍥-𩍱韆-韈䪉𩍲-𩍷䪊䪋𩍸-𩍺𩍼-𩍾𱁾韉䪌䪍𩍿-𩎆䪎𩎇-𩎊韊𩎋-𩎑]
[radical 178=⾱韋:韋𮧮-𮧰𫖌韌𩎒𱁿䪏𩎓-𩎗𫖍𬰪𱂀韍韎䪐-䪓𩎘-𩎣𱂁韏-韑𩎤-𩎮𫠄𬰫-𬰭𮧱韒䪔𩎯-𩎷𱂂韓-韕䪕𩎸-𩏄𫖎韖-韚䪖-䪘𩏅-𩏎𬰮𮧲韜-韟䪙䪚𩏏-𩏗韛韠𩏘-𩏟𫖏韡韢䪛𩏠-𩏧𮧳韣䪜𩏨-𩏭䪝𩏮-𩏱𬰯韤韥𩏲𩏳𫖐𩏴𩏵𬰰𩏶-𩏻]
[radical 178'=⻙韦:韦韧𬰱𩏼𫖑𬰲𮧴𱂃韨𩏽𩏾𬰳𬰴𱂄𱂅𫖒𮧵𱂆-𱂉韩𫖓韪韫𩏿𩐀𫠅𬰵𱂊韬𫖔-𫖖𬰶𬰷𱂋𱂌𬰸]

View file

@ -1,5 +1,5 @@
# UCA_Rules_SHORT.txt
# Date: 2021-06-08, 19:47:07 GMT
# Date: 2021-08-17, 17:34:16 GMT
# © 2021 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html

View file

@ -30611,9 +30611,9 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;;
1E7E5;ETHIOPIC SYLLABLE HHYE;Lo;0;L;;;;;N;;;;;
1E7E6;ETHIOPIC SYLLABLE HHYO;Lo;0;L;;;;;N;;;;;
1E7E8;ETHIOPIC SYLLABLE GURAGE HHWA;Lo;0;L;;;;;N;;;;;
1E7E9;ETHIOPIC SYLLABLE HWI;Lo;0;L;;;;;N;;;;;
1E7EA;ETHIOPIC SYLLABLE HWEE;Lo;0;L;;;;;N;;;;;
1E7EB;ETHIOPIC SYLLABLE HWE;Lo;0;L;;;;;N;;;;;
1E7E9;ETHIOPIC SYLLABLE HHWI;Lo;0;L;;;;;N;;;;;
1E7EA;ETHIOPIC SYLLABLE HHWEE;Lo;0;L;;;;;N;;;;;
1E7EB;ETHIOPIC SYLLABLE HHWE;Lo;0;L;;;;;N;;;;;
1E7ED;ETHIOPIC SYLLABLE GURAGE MWI;Lo;0;L;;;;;N;;;;;
1E7EE;ETHIOPIC SYLLABLE GURAGE MWEE;Lo;0;L;;;;;N;;;;;
1E7F0;ETHIOPIC SYLLABLE GURAGE QWI;Lo;0;L;;;;;N;;;;;

View file

@ -38,7 +38,7 @@ https://unicode-org.atlassian.net/browse/ICU-21635
* Command-line environment setup
export UNICODE_DATA=~/unidata/uni14/20210609
export UNICODE_DATA=~/unidata/uni14/20210819
export CLDR_SRC=~/cldr/uni/src
export ICU_ROOT=~/icu/uni
export ICU_SRC=$ICU_ROOT/src
@ -198,6 +198,11 @@ copying that version number into the $ICU_SRC/.bazeliskrc config file.
* run & fix ICU4C tests
- fix Unicode Tools class Segmenter to generate correct *BreakTest.txt files
- update CLDR GraphemeBreakTest.txt
cd ~/unitools/mine/Generated
cp UCD/d22d/cldr/GraphemeBreakTest-cldr.txt $CLDR_SRC/common/properties/segments/GraphemeBreakTest.txt
cp UCD/d22d/cldr/GraphemeBreakTest-cldr.html $CLDR_SRC/common/properties/segments/GraphemeBreakTest.html
cp $CLDR_SRC/common/properties/segments/GraphemeBreakTest.txt $ICU_SRC/icu4c/source/test/testdata
- Andy helps with RBBI & spoof check test failures
* collation: CLDR collation root, UCA DUCET

View file

@ -1,11 +1,11 @@
# confusables.txt
# Date: 2020-02-13, 01:38:49 GMT
# © 2020 Unicode®, Inc.
# Date: 2021-05-29, 22:09:29 GMT
# © 2021 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
# Unicode Security Mechanisms for UTS #39
# Version: 13.0.0
# Version: 14.0.0
#
# For documentation and usage, see http://www.unicode.org/reports/tr39
#
@ -593,7 +593,7 @@ FF40 ; 0027 ; MA #* ( → ' ) FULLWIDTH GRAVE ACCENT → APOSTROPHE # →‘
02B9 ; 0027 ; MA # ( ʹ → ' ) MODIFIER LETTER PRIME → APOSTROPHE #
0374 ; 0027 ; MA # ( ʹ → ' ) GREEK NUMERAL SIGN → APOSTROPHE # →′→
02C8 ; 0027 ; MA # ( ˈ → ' ) MODIFIER LETTER VERTICAL LINE → APOSTROPHE #
02CA ; 0027 ; MA # ( ˊ → ' ) MODIFIER LETTER ACUTE ACCENT → APOSTROPHE # →΄→→ʹ
02CA ; 0027 ; MA # ( ˊ → ' ) MODIFIER LETTER ACUTE ACCENT → APOSTROPHE # →ʹ→→′
02CB ; 0027 ; MA # ( ˋ → ' ) MODIFIER LETTER GRAVE ACCENT → APOSTROPHE # →`→→‘→
02F4 ; 0027 ; MA #* ( ˴ → ' ) MODIFIER LETTER MIDDLE GRAVE ACCENT → APOSTROPHE # →ˋ→→`→→‘→
02BB ; 0027 ; MA # ( ʻ → ' ) MODIFIER LETTER TURNED COMMA → APOSTROPHE # →‘→
@ -1071,7 +1071,7 @@ A714 ; 02EB ; MA #* ( ꜔ → ˫ ) MODIFIER LETTER MID LEFT-STEM TONE BAR → MO
25CB ; 00B0 ; MA #* ( ○ → ° ) WHITE CIRCLE → DEGREE SIGN # →◦→→∘→
25E6 ; 00B0 ; MA #* ( ◦ → ° ) WHITE BULLET → DEGREE SIGN # →∘→
235C ; 00B0 0332 ; MA #* ( ⍜ → °̲ ) APL FUNCTIONAL SYMBOL CIRCLE UNDERBAR → DEGREE SIGN, COMBINING LOW LINE # →○̲→
235C ; 00B0 0332 ; MA #* ( ⍜ → °̲ ) APL FUNCTIONAL SYMBOL CIRCLE UNDERBAR → DEGREE SIGN, COMBINING LOW LINE # →○̲→→∘̲→
2364 ; 00B0 0308 ; MA #* ( ⍤ → °̈ ) APL FUNCTIONAL SYMBOL JOT DIAERESIS → DEGREE SIGN, COMBINING DIAERESIS # →◦̈→→∘̈→
@ -2371,7 +2371,7 @@ A6B1 ; 2C75 ; MA # ( ꚱ → Ⱶ ) BAMUM LETTER NDAA → LATIN CAPITAL LETTER HA
A795 ; A727 ; MA # ( ꞕ → ꜧ ) LATIN SMALL LETTER H WITH PALATAL HOOK → LATIN SMALL LETTER HENG #
02DB ; 0069 ; MA #* ( ˛ → i ) OGONEK → LATIN SMALL LETTER I # →ͺ→→ι→→ι→
2373 ; 0069 ; MA #* ( → i ) APL FUNCTIONAL SYMBOL IOTA → LATIN SMALL LETTER I # →ɩ
2373 ; 0069 ; MA #* ( → i ) APL FUNCTIONAL SYMBOL IOTA → LATIN SMALL LETTER I # →ι
FF49 ; 0069 ; MA # ( → i ) FULLWIDTH LATIN SMALL LETTER I → LATIN SMALL LETTER I # →і→
2170 ; 0069 ; MA # ( → i ) SMALL ROMAN NUMERAL ONE → LATIN SMALL LETTER I #
2139 ; 0069 ; MA # ( → i ) INFORMATION SOURCE → LATIN SMALL LETTER I #

View file

@ -1,6 +1,6 @@
# Original file:
# IdnaMappingTable.txt
# Date: 2021-05-29, 15:44:58 GMT
# Date: 2021-07-10, 00:49:51 GMT
# © 2021 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
@ -6524,7 +6524,8 @@ FFFE..FFFF >FFFD # 1.1 <noncharacter-FFFE>..<noncharacte
12475..1247F >FFFD # NA <reserved-12475>..<reserved-1247F>
# 12480..12543valid # 8.0 CUNEIFORM SIGN AB TIMES NUN TENU..CUNEIFORM SIGN ZU5 TIMES THREE DISH TENU
12544..12F8F >FFFD # NA <reserved-12544>..<reserved-12F8F>
# 12F90..12FF2valid # 14.0 CYPRO-MINOAN SIGN CM001..CYPRO-MINOAN SIGN CM302
# 12F90..12FF0valid # 14.0 CYPRO-MINOAN SIGN CM001..CYPRO-MINOAN SIGN CM114
# 12FF1..12FF2valid # 14.0 CYPRO-MINOAN SIGN CM301..CYPRO-MINOAN SIGN CM302
12FF3..12FFF >FFFD # NA <reserved-12FF3>..<reserved-12FFF>
# 13000..1342Evalid # 5.2 EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032
1342F >FFFD # NA <reserved-1342F>
@ -7745,7 +7746,7 @@ FFFE..FFFF >FFFD # 1.1 <noncharacter-FFFE>..<noncharacte
1E300..1E7DF >FFFD # NA <reserved-1E300>..<reserved-1E7DF>
# 1E7E0..1E7E6valid # 14.0 ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO
1E7E7 >FFFD # NA <reserved-1E7E7>
# 1E7E8..1E7EBvalid # 14.0 ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HWE
# 1E7E8..1E7EBvalid # 14.0 ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE
1E7EC >FFFD # NA <reserved-1E7EC>
# 1E7ED..1E7EEvalid # 14.0 ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE
1E7EF >FFFD # NA <reserved-1E7EF>

View file

@ -1299,7 +1299,9 @@ cp;0027;bc=ON;CI;gc=Po;lb=QU;na=APOSTROPHE;Pat_Syn;QMark;SB=CL;WB=SQ
cp;0028;bc=ON;Bidi_M;bmg=0029;bpb=0029;bpt=o;gc=Ps;lb=OP;na=LEFT PARENTHESIS;Pat_Syn;SB=CL
cp;0029;bc=ON;Bidi_M;bmg=0028;bpb=0028;bpt=c;gc=Pe;lb=CP;na=RIGHT PARENTHESIS;Pat_Syn;SB=CL
cp;002A;bc=ON;EComp;Emoji;gc=Po;na=ASTERISK;Pat_Syn
# ASCII math operator
cp;002B;bc=ES;gc=Sm;lb=PR;Math;na=PLUS SIGN;Pat_Syn
# ASCII punctuation
cp;002C;bc=CS;gc=Po;lb=IS;na=COMMA;Pat_Syn;SB=SC;Term;WB=MN
cp;002D;bc=ES;Dash;gc=Pd;Hyphen;InSC=Consonant_Placeholder;lb=HY;na=HYPHEN-MINUS;Pat_Syn;SB=SC
cp;002E;bc=CS;CI;gc=Po;lb=IS;na=FULL STOP;Pat_Syn;SB=AT;STerm;Term;WB=MB
@ -1315,12 +1317,14 @@ cp;0036;AHex;bc=EN;EComp;Emoji;gc=Nd;Hex;IDC;InSC=Number;lb=NU;na=DIGIT SIX;nt=D
cp;0037;AHex;bc=EN;EComp;Emoji;gc=Nd;Hex;IDC;InSC=Number;lb=NU;na=DIGIT SEVEN;nt=De;nv=7;SB=NU;WB=NU;XIDC
cp;0038;AHex;bc=EN;EComp;Emoji;gc=Nd;Hex;IDC;InSC=Number;lb=NU;na=DIGIT EIGHT;nt=De;nv=8;SB=NU;WB=NU;XIDC
cp;0039;AHex;bc=EN;EComp;Emoji;gc=Nd;Hex;IDC;InSC=Number;lb=NU;na=DIGIT NINE;nt=De;nv=9;SB=NU;WB=NU;XIDC
# ASCII punctuation and symbols
# ASCII punctuation
cp;003A;bc=CS;CI;gc=Po;lb=IS;na=COLON;Pat_Syn;SB=SC;Term;WB=ML
cp;003B;bc=ON;gc=Po;lb=IS;na=SEMICOLON;Pat_Syn;Term;WB=MN
# ASCII mathematical operators
cp;003C;bc=ON;Bidi_M;bmg=003E;gc=Sm;Math;na=LESS-THAN SIGN;Pat_Syn
cp;003D;bc=ON;gc=Sm;Math;na=EQUALS SIGN;Pat_Syn
cp;003E;bc=ON;Bidi_M;bmg=003C;gc=Sm;Math;na=GREATER-THAN SIGN;Pat_Syn
# ASCII punctuation
cp;003F;bc=ON;gc=Po;lb=EX;na=QUESTION MARK;Pat_Syn;SB=ST;STerm;Term
cp;0040;bc=ON;gc=Po;na=COMMERCIAL AT;Pat_Syn
# Uppercase Latin alphabet
@ -22849,13 +22853,13 @@ cp;10766;na=LINEAR A SIGN A806
cp;10767;na=LINEAR A SIGN A807
unassigned;10768..1077F
block;10780..107BF;age=14.0;Alpha;blk=Latin_Ext_F;CI;CWKCF;Dia;dt=Sup;gc=Lm;Gr_Base;IDC;IDS;lb=AL;NFKC_QC=N;NFKD_QC=N;SB=LE;sc=Latn;WB=LE;XIDC;XIDS
block;10780..107BF;age=14.0;Alpha;blk=Latin_Ext_F;Cased;CI;CWKCF;Dia;dt=Sup;gc=Lm;Gr_Base;IDC;IDS;lb=AL;Lower;NFKC_QC=N;NFKD_QC=N;SB=LO;sc=Latn;WB=LE;XIDC;XIDS
# 10780..107BF Latin Extended-F
# Modifier letter for VoQS
cp;10780;-CWKCF;dt=None;na=MODIFIER LETTER SMALL CAPITAL AA;NFKC_QC=Y;NFKD_QC=Y
# Modifier letters for IPA
cp;10781;dm=02D0;Ext;na=MODIFIER LETTER SUPERSCRIPT TRIANGULAR COLON;NFKC_CF=02D0
cp;10782;dm=02D1;Ext;na=MODIFIER LETTER SUPERSCRIPT HALF TRIANGULAR COLON;NFKC_CF=02D1
cp;10781;-Cased;dm=02D0;Ext;-Lower;na=MODIFIER LETTER SUPERSCRIPT TRIANGULAR COLON;NFKC_CF=02D0;SB=LE
cp;10782;-Cased;dm=02D1;Ext;-Lower;na=MODIFIER LETTER SUPERSCRIPT HALF TRIANGULAR COLON;NFKC_CF=02D1;SB=LE
cp;10783;dm=00E6;na=MODIFIER LETTER SMALL AE;NFKC_CF=00E6
cp;10784;dm=0299;na=MODIFIER LETTER SMALL CAPITAL B;NFKC_CF=0299
cp;10785;dm=0253;na=MODIFIER LETTER SMALL B WITH HOOK;NFKC_CF=0253
@ -25705,8 +25709,8 @@ cp;1171D;bc=NSM;CI;gc=Mn;GCB=EX;-Gr_Base;Gr_Ext;InPC=Bottom;InSC=Consonant_Media
cp;1171E;bc=NSM;CI;gc=Mn;GCB=EX;-Gr_Base;Gr_Ext;InPC=Top_And_Bottom_And_Left;InSC=Consonant_Medial;jt=T;na=AHOM CONSONANT SIGN MEDIAL RA;SB=EX;WB=Extend
cp;1171F;bc=NSM;CI;gc=Mn;GCB=EX;-Gr_Base;Gr_Ext;InPC=Top;InSC=Consonant_Medial;jt=T;na=AHOM CONSONANT SIGN MEDIAL LIGATING RA;SB=EX;WB=Extend
# Vowel signs
cp;11720;gc=Mc;GCB=SM;InPC=Right;InSC=Vowel_Dependent;na=AHOM VOWEL SIGN A;SB=EX;WB=Extend
cp;11721;gc=Mc;GCB=SM;InPC=Right;InSC=Vowel_Dependent;na=AHOM VOWEL SIGN AA;SB=EX;WB=Extend
cp;11720;gc=Mc;InPC=Right;InSC=Vowel_Dependent;na=AHOM VOWEL SIGN A;SB=EX;WB=Extend
cp;11721;gc=Mc;InPC=Right;InSC=Vowel_Dependent;na=AHOM VOWEL SIGN AA;SB=EX;WB=Extend
cp;11722;bc=NSM;CI;gc=Mn;GCB=EX;-Gr_Base;Gr_Ext;InPC=Top;InSC=Vowel_Dependent;jt=T;na=AHOM VOWEL SIGN I;SB=EX;WB=Extend
cp;11723;bc=NSM;CI;gc=Mn;GCB=EX;-Gr_Base;Gr_Ext;InPC=Top;InSC=Vowel_Dependent;jt=T;na=AHOM VOWEL SIGN II;SB=EX;WB=Extend
cp;11724;bc=NSM;CI;gc=Mn;GCB=EX;-Gr_Base;Gr_Ext;InPC=Bottom;InSC=Vowel_Dependent;jt=T;na=AHOM VOWEL SIGN U;SB=EX;WB=Extend
@ -36040,7 +36044,7 @@ cp;1DAAF;bc=NSM;CI;gc=Mn;GCB=EX;-Gr_Base;Gr_Ext;IDC;jt=T;lb=CM;na=SIGNWRITING RO
block;1DF00..1DFFF;age=14.0;Alpha;blk=Latin_Ext_G;Cased;gc=Ll;Gr_Base;IDC;IDS;lb=AL;Lower;SB=LO;sc=Latn;WB=LE;XIDC;XIDS
# 1DF00..1DFFF Latin Extended-G
# IPA letters for disordered speech
# Extended IPA for disordered speech
cp;1DF00;na=LATIN SMALL LETTER FENG DIGRAPH WITH TRILL
cp;1DF01;na=LATIN SMALL LETTER REVERSED SCRIPT G
cp;1DF02;na=LATIN LETTER SMALL CAPITAL TURNED G
@ -36329,9 +36333,9 @@ cp;1E7E5;na=ETHIOPIC SYLLABLE HHYE
cp;1E7E6;na=ETHIOPIC SYLLABLE HHYO
unassigned;1E7E7
cp;1E7E8;na=ETHIOPIC SYLLABLE GURAGE HHWA
cp;1E7E9;na=ETHIOPIC SYLLABLE HWI
cp;1E7EA;na=ETHIOPIC SYLLABLE HWEE
cp;1E7EB;na=ETHIOPIC SYLLABLE HWE
cp;1E7E9;na=ETHIOPIC SYLLABLE HHWI
cp;1E7EA;na=ETHIOPIC SYLLABLE HHWEE
cp;1E7EB;na=ETHIOPIC SYLLABLE HHWE
unassigned;1E7EC
cp;1E7ED;na=ETHIOPIC SYLLABLE GURAGE MWI
cp;1E7EE;na=ETHIOPIC SYLLABLE GURAGE MWEE

View file

@ -120,22 +120,22 @@ void U_CALLCONV initializeStatics(UErrorCode &status) {
u"\\u1256\\u1258\\u125A-\\u125D\\u1260-\\u1288\\u128A-\\u128D\\u1290-\\u12B0"
u"\\u12B2-\\u12B5\\u12B8-\\u12BE\\u12C0\\u12C2-\\u12C5\\u12C8-\\u12D6\\u12D8-"
u"\\u1310\\u1312-\\u1315\\u1318-\\u135A\\u135D-\\u135F\\u1380-\\u138F\\u1780-"
u"\\u17A2\\u17A5-\\u17A7\\u17A9-\\u17B3\\u17B6-\\u17CA\\u17D2\\u17D7\\u17DC"
u"\\u17E0-\\u17E9\\u1C90-\\u1CBA\\u1CBD-\\u1CBF\\u1E00-\\u1E99\\u1E9E\\u1EA0-"
u"\\u1EF9\\u1F00-\\u1F15\\u1F18-\\u1F1D\\u1F20-\\u1F45\\u1F48-\\u1F4D\\u1F50-"
u"\\u1F57\\u1F59\\u1F5B\\u1F5D\\u1F5F-\\u1F70\\u1F72\\u1F74\\u1F76\\u1F78"
u"\\u1F7A\\u1F7C\\u1F80-\\u1FB4\\u1FB6-\\u1FBA\\u1FBC\\u1FC2-\\u1FC4\\u1FC6-"
u"\\u1FC8\\u1FCA\\u1FCC\\u1FD0-\\u1FD2\\u1FD6-\\u1FDA\\u1FE0-\\u1FE2\\u1FE4-"
u"\\u1FEA\\u1FEC\\u1FF2-\\u1FF4\\u1FF6-\\u1FF8\\u1FFA\\u1FFC\\u2D27\\u2D2D"
u"\\u2D80-\\u2D96\\u2DA0-\\u2DA6\\u2DA8-\\u2DAE\\u2DB0-\\u2DB6\\u2DB8-\\u2DBE"
u"\\u2DC0-\\u2DC6\\u2DC8-\\u2DCE\\u2DD0-\\u2DD6\\u2DD8-\\u2DDE\\u3005-\\u3007"
u"\\u3041-\\u3096\\u3099\\u309A\\u309D\\u309E\\u30A1-\\u30FA\\u30FC-\\u30FE"
u"\\u3105-\\u312D\\u312F\\u31A0-\\u31BF\\u3400-\\u4DBF\\u4E00-\\u9FFF\\uA67F"
u"\\uA717-\\uA71F\\uA788\\uA78D\\uA792\\uA793\\uA7AA\\uA7AE\\uA7B8\\uA7B9"
u"\\uA7C0-\\uA7CA\\uA7D0\\uA7D1\\uA7D3\\uA7D5-\\uA7D9\\uA9E7-\\uA9FE\\uAA60-"
u"\\uAA76\\uAA7A-\\uAA7F\\uAB01-\\uAB06\\uAB09-\\uAB0E\\uAB11-\\uAB16\\uAB20-"
u"\\uAB26\\uAB28-\\uAB2E\\uAB66\\uAB67\\uAC00-\\uD7A3\\uFA0E\\uFA0F\\uFA11"
u"\\uFA13\\uFA14\\uFA1F\\uFA21\\uFA23\\uFA24\\uFA27-\\uFA29\\U00011301"
u"\\u17A2\\u17A5-\\u17A7\\u17A9-\\u17B3\\u17B6-\\u17CD\\u17D0\\u17D2\\u17D7"
u"\\u17DC\\u17E0-\\u17E9\\u1C90-\\u1CBA\\u1CBD-\\u1CBF\\u1E00-\\u1E99\\u1E9E"
u"\\u1EA0-\\u1EF9\\u1F00-\\u1F15\\u1F18-\\u1F1D\\u1F20-\\u1F45\\u1F48-\\u1F4D"
u"\\u1F50-\\u1F57\\u1F59\\u1F5B\\u1F5D\\u1F5F-\\u1F70\\u1F72\\u1F74\\u1F76"
u"\\u1F78\\u1F7A\\u1F7C\\u1F80-\\u1FB4\\u1FB6-\\u1FBA\\u1FBC\\u1FC2-\\u1FC4"
u"\\u1FC6-\\u1FC8\\u1FCA\\u1FCC\\u1FD0-\\u1FD2\\u1FD6-\\u1FDA\\u1FE0-\\u1FE2"
u"\\u1FE4-\\u1FEA\\u1FEC\\u1FF2-\\u1FF4\\u1FF6-\\u1FF8\\u1FFA\\u1FFC\\u2D27"
u"\\u2D2D\\u2D80-\\u2D96\\u2DA0-\\u2DA6\\u2DA8-\\u2DAE\\u2DB0-\\u2DB6\\u2DB8-"
u"\\u2DBE\\u2DC0-\\u2DC6\\u2DC8-\\u2DCE\\u2DD0-\\u2DD6\\u2DD8-\\u2DDE\\u3005-"
u"\\u3007\\u3041-\\u3096\\u3099\\u309A\\u309D\\u309E\\u30A1-\\u30FA\\u30FC-"
u"\\u30FE\\u3105-\\u312D\\u312F\\u31A0-\\u31BF\\u3400-\\u4DBF\\u4E00-\\u9FFF"
u"\\uA67F\\uA717-\\uA71F\\uA788\\uA78D\\uA792\\uA793\\uA7AA\\uA7AE\\uA7B8"
u"\\uA7B9\\uA7C0-\\uA7CA\\uA7D0\\uA7D1\\uA7D3\\uA7D5-\\uA7D9\\uA9E7-\\uA9FE"
u"\\uAA60-\\uAA76\\uAA7A-\\uAA7F\\uAB01-\\uAB06\\uAB09-\\uAB0E\\uAB11-\\uAB16"
u"\\uAB20-\\uAB26\\uAB28-\\uAB2E\\uAB66\\uAB67\\uAC00-\\uD7A3\\uFA0E\\uFA0F"
u"\\uFA11\\uFA13\\uFA14\\uFA1F\\uFA21\\uFA23\\uFA24\\uFA27-\\uFA29\\U00011301"
u"\\U00011303\\U0001133B\\U0001133C\\U00016FF0\\U00016FF1\\U0001B11F-"
u"\\U0001B122\\U0001B150-\\U0001B152\\U0001B164-\\U0001B167\\U0001DF00-"
u"\\U0001DF1E\\U0001E7E0-\\U0001E7E6\\U0001E7E8-\\U0001E7EB\\U0001E7ED"

View file

@ -2621,6 +2621,7 @@ private:
UnicodeSet *fZWJ;
UnicodeSet *fOP30;
UnicodeSet *fCP30;
UnicodeSet *fExtPictUnassigned;
BreakIterator *fCharBI;
const UnicodeString *fText;
@ -2689,6 +2690,7 @@ RBBILineMonkey::RBBILineMonkey() :
fZWJ = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=ZWJ}]"), status);
fOP30 = new UnicodeSet(u"[\\p{Line_break=OP}-[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]", status);
fCP30 = new UnicodeSet(u"[\\p{Line_break=CP}-[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]", status);
fExtPictUnassigned = new UnicodeSet(u"[\\p{Extended_Pictographic}&\\p{Cn}]", status);
if (U_FAILURE(status)) {
deferredStatus = status;
@ -2740,7 +2742,6 @@ RBBILineMonkey::RBBILineMonkey() :
fSets->addElementX(fAL, status); classNames.push_back("fAL");
fSets->addElementX(fHL, status); classNames.push_back("fHL");
fSets->addElementX(fID, status); classNames.push_back("fID");
fSets->addElementX(fWJ, status); classNames.push_back("fWJ");
fSets->addElementX(fRI, status); classNames.push_back("fRI");
fSets->addElementX(fSG, status); classNames.push_back("fSG");
fSets->addElementX(fEB, status); classNames.push_back("fEB");
@ -2749,6 +2750,7 @@ RBBILineMonkey::RBBILineMonkey() :
// TODO: fOP30 & fCP30 overlap with plain fOP. Probably OK, but fOP/CP chars will be over-represented.
fSets->addElementX(fOP30, status); classNames.push_back("fOP30");
fSets->addElementX(fCP30, status); classNames.push_back("fCP30");
fSets->addElementX(fExtPictUnassigned, status); classNames.push_back("fExtPictUnassigned");
const char *rules =
"((\\p{Line_Break=PR}|\\p{Line_Break=PO})(\\p{Line_Break=CM}|\\u200d)*)?"
@ -3282,11 +3284,17 @@ int32_t RBBILineMonkey::next(int32_t startPos) {
continue;
}
// LB30b Do not break between an emoji base (or potential emoji) and an emoji modifier.
if (fEB->contains(prevChar) && fEM->contains(thisChar)) {
setAppliedRule(pos, "LB30b Emoji Base x Emoji Modifier");
continue;
}
if (fExtPictUnassigned->contains(prevChar) && fEM->contains(thisChar)) {
setAppliedRule(pos, "LB30b [\\p{Extended_Pictographic}&\\p{Cn}] × EM");
continue;
}
setAppliedRule(pos, "LB 31 Break everywhere else");
break;
}
@ -3348,6 +3356,7 @@ RBBILineMonkey::~RBBILineMonkey() {
delete fZWJ;
delete fOP30;
delete fCP30;
delete fExtPictUnassigned;
delete fCharBI;
delete fNumberMatcher;

View file

@ -1,5 +1,5 @@
# CollationTest_CLDR_NON_IGNORABLE_SHORT.txt
# Date: 2021-06-08, 19:47:15 GMT
# Date: 2021-08-17, 17:34:25 GMT
# © 2021 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html

View file

@ -1,5 +1,5 @@
# CollationTest_CLDR_SHIFTED_SHORT.txt
# Date: 2021-06-08, 19:47:16 GMT
# Date: 2021-08-17, 17:34:28 GMT
# © 2021 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html

View file

@ -1,5 +1,5 @@
# GraphemeBreakTest-cldr-14.0.0.txt
# Date: 2021-06-08, 18:13:31 GMT
# Date: 2021-08-17, 04:43:19 GMT
# © 2021 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html

View file

@ -1,5 +1,5 @@
# IdnaTestV2.txt
# Date: 2021-05-29, 21:48:17 GMT
# Date: 2021-08-17, 19:34:01 GMT
# © 2021 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
@ -16,7 +16,7 @@
# This file is in UTF-8, where characters may be escaped using the \uXXXX or \x{XXXX}
# convention where they could otherwise have a confusing display.
# These characters include control codes and combining marks.
#
#
# Columns (c1, c2,...) are separated by semicolons.
# Leading and trailing spaces and tabs in each column are ignored.
# Comments are indicated with hash marks.
@ -40,21 +40,21 @@
# A blank value means the same as the toAsciiNStatus value.
# An explicit [] means no errors.
#
# The line comments currently show visible characters that have been escaped.
#
# The line comments currently show visible characters that have been escaped.
#
# CONFORMANCE:
#
# To test for conformance to UTS #46, an implementation will perform the toUnicode, toAsciiN, and
# toAsciiT operations on the source string, then verify the resulting strings and relevant status
# toAsciiT operations on the source string, then verify the resulting strings and relevant status
# values.
#
# If the implementation converts illegal code points into U+FFFD (as per
# http://www.unicode.org/reports/tr46/proposed.html#Processing) then the string comparisons need to
# account for that by treating U+FFFD in the actual value as a wildcard when comparing to the
# https://www.unicode.org/reports/tr46/#Processing) then the string comparisons need to
# account for that by treating U+FFFD in the actual value as a wildcard when comparing to the
# expected value in the test file.
#
# A status in toUnicode, toAsciiN or toAsciiT is indicated by a value in square brackets,
# such as "[B5 B6]". In such a case, the contents is a list of status codes based on the step
# such as "[B5 B6]". In such a case, the contents is a list of status codes based on the step
# numbers in UTS #46 and IDNA2008, with the following formats.
#
# Pn for Section 4 Processing step n
@ -65,7 +65,7 @@
# Cn for ContextJ (in IDNA2008)
# Xn for toUnicode issues (see below)
#
# Thus C1 = Appendix A.1. ZERO WIDTH NON-JOINER, and C2 = Appendix A.2. ZERO WIDTH JOINER.
# Thus C1 = Appendix A.1. ZERO WIDTH NON-JOINER, and C2 = Appendix A.2. ZERO WIDTH JOINER.
# (The CONTEXTO tests are optional for client software, and not tested here.)
#
# Implementations that allow values of particular input flags to be false would ignore

View file

@ -1,5 +1,5 @@
# LineBreakTest-14.0.0.txt
# Date: 2021-03-08, 06:22:32 GMT
# Date: 2021-08-20, 21:08:45 GMT
# © 2021 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
@ -1846,9 +1846,9 @@
× AC00 × 0020 ÷ 2329 ÷ # × [0.3] HANGUL SYLLABLE GA (H2) × [7.01] SPACE (SP) ÷ [18.0] LEFT-POINTING ANGLE BRACKET (OP) ÷ [0.3]
× AC00 × 0308 ÷ 2329 ÷ # × [0.3] HANGUL SYLLABLE GA (H2) × [9.0] COMBINING DIAERESIS (CM1_CM) ÷ [999.0] LEFT-POINTING ANGLE BRACKET (OP) ÷ [0.3]
× AC00 × 0308 × 0020 ÷ 2329 ÷ # × [0.3] HANGUL SYLLABLE GA (H2) × [9.0] COMBINING DIAERESIS (CM1_CM) × [7.01] SPACE (SP) ÷ [18.0] LEFT-POINTING ANGLE BRACKET (OP) ÷ [0.3]
× AC00 × 0025 ÷ # × [0.3] HANGUL SYLLABLE GA (H2) × [27.02] PERCENT SIGN (PO) ÷ [0.3]
× AC00 × 0025 ÷ # × [0.3] HANGUL SYLLABLE GA (H2) × [27.01] PERCENT SIGN (PO) ÷ [0.3]
× AC00 × 0020 ÷ 0025 ÷ # × [0.3] HANGUL SYLLABLE GA (H2) × [7.01] SPACE (SP) ÷ [18.0] PERCENT SIGN (PO) ÷ [0.3]
× AC00 × 0308 × 0025 ÷ # × [0.3] HANGUL SYLLABLE GA (H2) × [9.0] COMBINING DIAERESIS (CM1_CM) × [27.02] PERCENT SIGN (PO) ÷ [0.3]
× AC00 × 0308 × 0025 ÷ # × [0.3] HANGUL SYLLABLE GA (H2) × [9.0] COMBINING DIAERESIS (CM1_CM) × [27.01] PERCENT SIGN (PO) ÷ [0.3]
× AC00 × 0308 × 0020 ÷ 0025 ÷ # × [0.3] HANGUL SYLLABLE GA (H2) × [9.0] COMBINING DIAERESIS (CM1_CM) × [7.01] SPACE (SP) ÷ [18.0] PERCENT SIGN (PO) ÷ [0.3]
× AC00 ÷ 0024 ÷ # × [0.3] HANGUL SYLLABLE GA (H2) ÷ [999.0] DOLLAR SIGN (PR) ÷ [0.3]
× AC00 × 0020 ÷ 0024 ÷ # × [0.3] HANGUL SYLLABLE GA (H2) × [7.01] SPACE (SP) ÷ [18.0] DOLLAR SIGN (PR) ÷ [0.3]
@ -2018,9 +2018,9 @@
× AC01 × 0020 ÷ 2329 ÷ # × [0.3] HANGUL SYLLABLE GAG (H3) × [7.01] SPACE (SP) ÷ [18.0] LEFT-POINTING ANGLE BRACKET (OP) ÷ [0.3]
× AC01 × 0308 ÷ 2329 ÷ # × [0.3] HANGUL SYLLABLE GAG (H3) × [9.0] COMBINING DIAERESIS (CM1_CM) ÷ [999.0] LEFT-POINTING ANGLE BRACKET (OP) ÷ [0.3]
× AC01 × 0308 × 0020 ÷ 2329 ÷ # × [0.3] HANGUL SYLLABLE GAG (H3) × [9.0] COMBINING DIAERESIS (CM1_CM) × [7.01] SPACE (SP) ÷ [18.0] LEFT-POINTING ANGLE BRACKET (OP) ÷ [0.3]
× AC01 × 0025 ÷ # × [0.3] HANGUL SYLLABLE GAG (H3) × [27.02] PERCENT SIGN (PO) ÷ [0.3]
× AC01 × 0025 ÷ # × [0.3] HANGUL SYLLABLE GAG (H3) × [27.01] PERCENT SIGN (PO) ÷ [0.3]
× AC01 × 0020 ÷ 0025 ÷ # × [0.3] HANGUL SYLLABLE GAG (H3) × [7.01] SPACE (SP) ÷ [18.0] PERCENT SIGN (PO) ÷ [0.3]
× AC01 × 0308 × 0025 ÷ # × [0.3] HANGUL SYLLABLE GAG (H3) × [9.0] COMBINING DIAERESIS (CM1_CM) × [27.02] PERCENT SIGN (PO) ÷ [0.3]
× AC01 × 0308 × 0025 ÷ # × [0.3] HANGUL SYLLABLE GAG (H3) × [9.0] COMBINING DIAERESIS (CM1_CM) × [27.01] PERCENT SIGN (PO) ÷ [0.3]
× AC01 × 0308 × 0020 ÷ 0025 ÷ # × [0.3] HANGUL SYLLABLE GAG (H3) × [9.0] COMBINING DIAERESIS (CM1_CM) × [7.01] SPACE (SP) ÷ [18.0] PERCENT SIGN (PO) ÷ [0.3]
× AC01 ÷ 0024 ÷ # × [0.3] HANGUL SYLLABLE GAG (H3) ÷ [999.0] DOLLAR SIGN (PR) ÷ [0.3]
× AC01 × 0020 ÷ 0024 ÷ # × [0.3] HANGUL SYLLABLE GAG (H3) × [7.01] SPACE (SP) ÷ [18.0] DOLLAR SIGN (PR) ÷ [0.3]
@ -3050,9 +3050,9 @@
× 1100 × 0020 ÷ 2329 ÷ # × [0.3] HANGUL CHOSEONG KIYEOK (JL) × [7.01] SPACE (SP) ÷ [18.0] LEFT-POINTING ANGLE BRACKET (OP) ÷ [0.3]
× 1100 × 0308 ÷ 2329 ÷ # × [0.3] HANGUL CHOSEONG KIYEOK (JL) × [9.0] COMBINING DIAERESIS (CM1_CM) ÷ [999.0] LEFT-POINTING ANGLE BRACKET (OP) ÷ [0.3]
× 1100 × 0308 × 0020 ÷ 2329 ÷ # × [0.3] HANGUL CHOSEONG KIYEOK (JL) × [9.0] COMBINING DIAERESIS (CM1_CM) × [7.01] SPACE (SP) ÷ [18.0] LEFT-POINTING ANGLE BRACKET (OP) ÷ [0.3]
× 1100 × 0025 ÷ # × [0.3] HANGUL CHOSEONG KIYEOK (JL) × [27.02] PERCENT SIGN (PO) ÷ [0.3]
× 1100 × 0025 ÷ # × [0.3] HANGUL CHOSEONG KIYEOK (JL) × [27.01] PERCENT SIGN (PO) ÷ [0.3]
× 1100 × 0020 ÷ 0025 ÷ # × [0.3] HANGUL CHOSEONG KIYEOK (JL) × [7.01] SPACE (SP) ÷ [18.0] PERCENT SIGN (PO) ÷ [0.3]
× 1100 × 0308 × 0025 ÷ # × [0.3] HANGUL CHOSEONG KIYEOK (JL) × [9.0] COMBINING DIAERESIS (CM1_CM) × [27.02] PERCENT SIGN (PO) ÷ [0.3]
× 1100 × 0308 × 0025 ÷ # × [0.3] HANGUL CHOSEONG KIYEOK (JL) × [9.0] COMBINING DIAERESIS (CM1_CM) × [27.01] PERCENT SIGN (PO) ÷ [0.3]
× 1100 × 0308 × 0020 ÷ 0025 ÷ # × [0.3] HANGUL CHOSEONG KIYEOK (JL) × [9.0] COMBINING DIAERESIS (CM1_CM) × [7.01] SPACE (SP) ÷ [18.0] PERCENT SIGN (PO) ÷ [0.3]
× 1100 ÷ 0024 ÷ # × [0.3] HANGUL CHOSEONG KIYEOK (JL) ÷ [999.0] DOLLAR SIGN (PR) ÷ [0.3]
× 1100 × 0020 ÷ 0024 ÷ # × [0.3] HANGUL CHOSEONG KIYEOK (JL) × [7.01] SPACE (SP) ÷ [18.0] DOLLAR SIGN (PR) ÷ [0.3]
@ -3222,9 +3222,9 @@
× 11A8 × 0020 ÷ 2329 ÷ # × [0.3] HANGUL JONGSEONG KIYEOK (JT) × [7.01] SPACE (SP) ÷ [18.0] LEFT-POINTING ANGLE BRACKET (OP) ÷ [0.3]
× 11A8 × 0308 ÷ 2329 ÷ # × [0.3] HANGUL JONGSEONG KIYEOK (JT) × [9.0] COMBINING DIAERESIS (CM1_CM) ÷ [999.0] LEFT-POINTING ANGLE BRACKET (OP) ÷ [0.3]
× 11A8 × 0308 × 0020 ÷ 2329 ÷ # × [0.3] HANGUL JONGSEONG KIYEOK (JT) × [9.0] COMBINING DIAERESIS (CM1_CM) × [7.01] SPACE (SP) ÷ [18.0] LEFT-POINTING ANGLE BRACKET (OP) ÷ [0.3]
× 11A8 × 0025 ÷ # × [0.3] HANGUL JONGSEONG KIYEOK (JT) × [27.02] PERCENT SIGN (PO) ÷ [0.3]
× 11A8 × 0025 ÷ # × [0.3] HANGUL JONGSEONG KIYEOK (JT) × [27.01] PERCENT SIGN (PO) ÷ [0.3]
× 11A8 × 0020 ÷ 0025 ÷ # × [0.3] HANGUL JONGSEONG KIYEOK (JT) × [7.01] SPACE (SP) ÷ [18.0] PERCENT SIGN (PO) ÷ [0.3]
× 11A8 × 0308 × 0025 ÷ # × [0.3] HANGUL JONGSEONG KIYEOK (JT) × [9.0] COMBINING DIAERESIS (CM1_CM) × [27.02] PERCENT SIGN (PO) ÷ [0.3]
× 11A8 × 0308 × 0025 ÷ # × [0.3] HANGUL JONGSEONG KIYEOK (JT) × [9.0] COMBINING DIAERESIS (CM1_CM) × [27.01] PERCENT SIGN (PO) ÷ [0.3]
× 11A8 × 0308 × 0020 ÷ 0025 ÷ # × [0.3] HANGUL JONGSEONG KIYEOK (JT) × [9.0] COMBINING DIAERESIS (CM1_CM) × [7.01] SPACE (SP) ÷ [18.0] PERCENT SIGN (PO) ÷ [0.3]
× 11A8 ÷ 0024 ÷ # × [0.3] HANGUL JONGSEONG KIYEOK (JT) ÷ [999.0] DOLLAR SIGN (PR) ÷ [0.3]
× 11A8 × 0020 ÷ 0024 ÷ # × [0.3] HANGUL JONGSEONG KIYEOK (JT) × [7.01] SPACE (SP) ÷ [18.0] DOLLAR SIGN (PR) ÷ [0.3]
@ -3394,9 +3394,9 @@
× 1160 × 0020 ÷ 2329 ÷ # × [0.3] HANGUL JUNGSEONG FILLER (JV) × [7.01] SPACE (SP) ÷ [18.0] LEFT-POINTING ANGLE BRACKET (OP) ÷ [0.3]
× 1160 × 0308 ÷ 2329 ÷ # × [0.3] HANGUL JUNGSEONG FILLER (JV) × [9.0] COMBINING DIAERESIS (CM1_CM) ÷ [999.0] LEFT-POINTING ANGLE BRACKET (OP) ÷ [0.3]
× 1160 × 0308 × 0020 ÷ 2329 ÷ # × [0.3] HANGUL JUNGSEONG FILLER (JV) × [9.0] COMBINING DIAERESIS (CM1_CM) × [7.01] SPACE (SP) ÷ [18.0] LEFT-POINTING ANGLE BRACKET (OP) ÷ [0.3]
× 1160 × 0025 ÷ # × [0.3] HANGUL JUNGSEONG FILLER (JV) × [27.02] PERCENT SIGN (PO) ÷ [0.3]
× 1160 × 0025 ÷ # × [0.3] HANGUL JUNGSEONG FILLER (JV) × [27.01] PERCENT SIGN (PO) ÷ [0.3]
× 1160 × 0020 ÷ 0025 ÷ # × [0.3] HANGUL JUNGSEONG FILLER (JV) × [7.01] SPACE (SP) ÷ [18.0] PERCENT SIGN (PO) ÷ [0.3]
× 1160 × 0308 × 0025 ÷ # × [0.3] HANGUL JUNGSEONG FILLER (JV) × [9.0] COMBINING DIAERESIS (CM1_CM) × [27.02] PERCENT SIGN (PO) ÷ [0.3]
× 1160 × 0308 × 0025 ÷ # × [0.3] HANGUL JUNGSEONG FILLER (JV) × [9.0] COMBINING DIAERESIS (CM1_CM) × [27.01] PERCENT SIGN (PO) ÷ [0.3]
× 1160 × 0308 × 0020 ÷ 0025 ÷ # × [0.3] HANGUL JUNGSEONG FILLER (JV) × [9.0] COMBINING DIAERESIS (CM1_CM) × [7.01] SPACE (SP) ÷ [18.0] PERCENT SIGN (PO) ÷ [0.3]
× 1160 ÷ 0024 ÷ # × [0.3] HANGUL JUNGSEONG FILLER (JV) ÷ [999.0] DOLLAR SIGN (PR) ÷ [0.3]
× 1160 × 0020 ÷ 0024 ÷ # × [0.3] HANGUL JUNGSEONG FILLER (JV) × [7.01] SPACE (SP) ÷ [18.0] DOLLAR SIGN (PR) ÷ [0.3]
@ -4538,13 +4538,13 @@
× 0024 × 0020 ÷ 00A0 ÷ # × [0.3] DOLLAR SIGN (PR) × [7.01] SPACE (SP) ÷ [18.0] NO-BREAK SPACE (GL) ÷ [0.3]
× 0024 × 0308 × 00A0 ÷ # × [0.3] DOLLAR SIGN (PR) × [9.0] COMBINING DIAERESIS (CM1_CM) × [12.2] NO-BREAK SPACE (GL) ÷ [0.3]
× 0024 × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] DOLLAR SIGN (PR) × [9.0] COMBINING DIAERESIS (CM1_CM) × [7.01] SPACE (SP) ÷ [18.0] NO-BREAK SPACE (GL) ÷ [0.3]
× 0024 × AC00 ÷ # × [0.3] DOLLAR SIGN (PR) × [27.03] HANGUL SYLLABLE GA (H2) ÷ [0.3]
× 0024 × AC00 ÷ # × [0.3] DOLLAR SIGN (PR) × [27.02] HANGUL SYLLABLE GA (H2) ÷ [0.3]
× 0024 × 0020 ÷ AC00 ÷ # × [0.3] DOLLAR SIGN (PR) × [7.01] SPACE (SP) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3]
× 0024 × 0308 × AC00 ÷ # × [0.3] DOLLAR SIGN (PR) × [9.0] COMBINING DIAERESIS (CM1_CM) × [27.03] HANGUL SYLLABLE GA (H2) ÷ [0.3]
× 0024 × 0308 × AC00 ÷ # × [0.3] DOLLAR SIGN (PR) × [9.0] COMBINING DIAERESIS (CM1_CM) × [27.02] HANGUL SYLLABLE GA (H2) ÷ [0.3]
× 0024 × 0308 × 0020 ÷ AC00 ÷ # × [0.3] DOLLAR SIGN (PR) × [9.0] COMBINING DIAERESIS (CM1_CM) × [7.01] SPACE (SP) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3]
× 0024 × AC01 ÷ # × [0.3] DOLLAR SIGN (PR) × [27.03] HANGUL SYLLABLE GAG (H3) ÷ [0.3]
× 0024 × AC01 ÷ # × [0.3] DOLLAR SIGN (PR) × [27.02] HANGUL SYLLABLE GAG (H3) ÷ [0.3]
× 0024 × 0020 ÷ AC01 ÷ # × [0.3] DOLLAR SIGN (PR) × [7.01] SPACE (SP) ÷ [18.0] HANGUL SYLLABLE GAG (H3) ÷ [0.3]
× 0024 × 0308 × AC01 ÷ # × [0.3] DOLLAR SIGN (PR) × [9.0] COMBINING DIAERESIS (CM1_CM) × [27.03] HANGUL SYLLABLE GAG (H3) ÷ [0.3]
× 0024 × 0308 × AC01 ÷ # × [0.3] DOLLAR SIGN (PR) × [9.0] COMBINING DIAERESIS (CM1_CM) × [27.02] HANGUL SYLLABLE GAG (H3) ÷ [0.3]
× 0024 × 0308 × 0020 ÷ AC01 ÷ # × [0.3] DOLLAR SIGN (PR) × [9.0] COMBINING DIAERESIS (CM1_CM) × [7.01] SPACE (SP) ÷ [18.0] HANGUL SYLLABLE GAG (H3) ÷ [0.3]
× 0024 × 05D0 ÷ # × [0.3] DOLLAR SIGN (PR) × [24.02] HEBREW LETTER ALEF (HL) ÷ [0.3]
× 0024 × 0020 ÷ 05D0 ÷ # × [0.3] DOLLAR SIGN (PR) × [7.01] SPACE (SP) ÷ [18.0] HEBREW LETTER ALEF (HL) ÷ [0.3]
@ -4566,17 +4566,17 @@
× 0024 × 0020 × 002C ÷ # × [0.3] DOLLAR SIGN (PR) × [7.01] SPACE (SP) × [13.02] COMMA (IS) ÷ [0.3]
× 0024 × 0308 × 002C ÷ # × [0.3] DOLLAR SIGN (PR) × [9.0] COMBINING DIAERESIS (CM1_CM) × [13.03] COMMA (IS) ÷ [0.3]
× 0024 × 0308 × 0020 × 002C ÷ # × [0.3] DOLLAR SIGN (PR) × [9.0] COMBINING DIAERESIS (CM1_CM) × [7.01] SPACE (SP) × [13.02] COMMA (IS) ÷ [0.3]
× 0024 × 1100 ÷ # × [0.3] DOLLAR SIGN (PR) × [27.03] HANGUL CHOSEONG KIYEOK (JL) ÷ [0.3]
× 0024 × 1100 ÷ # × [0.3] DOLLAR SIGN (PR) × [27.02] HANGUL CHOSEONG KIYEOK (JL) ÷ [0.3]
× 0024 × 0020 ÷ 1100 ÷ # × [0.3] DOLLAR SIGN (PR) × [7.01] SPACE (SP) ÷ [18.0] HANGUL CHOSEONG KIYEOK (JL) ÷ [0.3]
× 0024 × 0308 × 1100 ÷ # × [0.3] DOLLAR SIGN (PR) × [9.0] COMBINING DIAERESIS (CM1_CM) × [27.03] HANGUL CHOSEONG KIYEOK (JL) ÷ [0.3]
× 0024 × 0308 × 1100 ÷ # × [0.3] DOLLAR SIGN (PR) × [9.0] COMBINING DIAERESIS (CM1_CM) × [27.02] HANGUL CHOSEONG KIYEOK (JL) ÷ [0.3]
× 0024 × 0308 × 0020 ÷ 1100 ÷ # × [0.3] DOLLAR SIGN (PR) × [9.0] COMBINING DIAERESIS (CM1_CM) × [7.01] SPACE (SP) ÷ [18.0] HANGUL CHOSEONG KIYEOK (JL) ÷ [0.3]
× 0024 × 11A8 ÷ # × [0.3] DOLLAR SIGN (PR) × [27.03] HANGUL JONGSEONG KIYEOK (JT) ÷ [0.3]
× 0024 × 11A8 ÷ # × [0.3] DOLLAR SIGN (PR) × [27.02] HANGUL JONGSEONG KIYEOK (JT) ÷ [0.3]
× 0024 × 0020 ÷ 11A8 ÷ # × [0.3] DOLLAR SIGN (PR) × [7.01] SPACE (SP) ÷ [18.0] HANGUL JONGSEONG KIYEOK (JT) ÷ [0.3]
× 0024 × 0308 × 11A8 ÷ # × [0.3] DOLLAR SIGN (PR) × [9.0] COMBINING DIAERESIS (CM1_CM) × [27.03] HANGUL JONGSEONG KIYEOK (JT) ÷ [0.3]
× 0024 × 0308 × 11A8 ÷ # × [0.3] DOLLAR SIGN (PR) × [9.0] COMBINING DIAERESIS (CM1_CM) × [27.02] HANGUL JONGSEONG KIYEOK (JT) ÷ [0.3]
× 0024 × 0308 × 0020 ÷ 11A8 ÷ # × [0.3] DOLLAR SIGN (PR) × [9.0] COMBINING DIAERESIS (CM1_CM) × [7.01] SPACE (SP) ÷ [18.0] HANGUL JONGSEONG KIYEOK (JT) ÷ [0.3]
× 0024 × 1160 ÷ # × [0.3] DOLLAR SIGN (PR) × [27.03] HANGUL JUNGSEONG FILLER (JV) ÷ [0.3]
× 0024 × 1160 ÷ # × [0.3] DOLLAR SIGN (PR) × [27.02] HANGUL JUNGSEONG FILLER (JV) ÷ [0.3]
× 0024 × 0020 ÷ 1160 ÷ # × [0.3] DOLLAR SIGN (PR) × [7.01] SPACE (SP) ÷ [18.0] HANGUL JUNGSEONG FILLER (JV) ÷ [0.3]
× 0024 × 0308 × 1160 ÷ # × [0.3] DOLLAR SIGN (PR) × [9.0] COMBINING DIAERESIS (CM1_CM) × [27.03] HANGUL JUNGSEONG FILLER (JV) ÷ [0.3]
× 0024 × 0308 × 1160 ÷ # × [0.3] DOLLAR SIGN (PR) × [9.0] COMBINING DIAERESIS (CM1_CM) × [27.02] HANGUL JUNGSEONG FILLER (JV) ÷ [0.3]
× 0024 × 0308 × 0020 ÷ 1160 ÷ # × [0.3] DOLLAR SIGN (PR) × [9.0] COMBINING DIAERESIS (CM1_CM) × [7.01] SPACE (SP) ÷ [18.0] HANGUL JUNGSEONG FILLER (JV) ÷ [0.3]
× 0024 × 000A ÷ # × [0.3] DOLLAR SIGN (PR) × [6.0] <LINE FEED (LF)> (LF) ÷ [0.3]
× 0024 × 0020 × 000A ÷ # × [0.3] DOLLAR SIGN (PR) × [7.01] SPACE (SP) × [6.0] <LINE FEED (LF)> (LF) ÷ [0.3]
@ -5838,9 +5838,9 @@
× 261D × 0020 ÷ 261D ÷ # × [0.3] WHITE UP POINTING INDEX (EB) × [7.01] SPACE (SP) ÷ [18.0] WHITE UP POINTING INDEX (EB) ÷ [0.3]
× 261D × 0308 ÷ 261D ÷ # × [0.3] WHITE UP POINTING INDEX (EB) × [9.0] COMBINING DIAERESIS (CM1_CM) ÷ [999.0] WHITE UP POINTING INDEX (EB) ÷ [0.3]
× 261D × 0308 × 0020 ÷ 261D ÷ # × [0.3] WHITE UP POINTING INDEX (EB) × [9.0] COMBINING DIAERESIS (CM1_CM) × [7.01] SPACE (SP) ÷ [18.0] WHITE UP POINTING INDEX (EB) ÷ [0.3]
× 261D × 1F3FB ÷ # × [0.3] WHITE UP POINTING INDEX (EB) × [30.2] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (EM) ÷ [0.3]
× 261D × 1F3FB ÷ # × [0.3] WHITE UP POINTING INDEX (EB) × [30.21] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (EM) ÷ [0.3]
× 261D × 0020 ÷ 1F3FB ÷ # × [0.3] WHITE UP POINTING INDEX (EB) × [7.01] SPACE (SP) ÷ [18.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (EM) ÷ [0.3]
× 261D × 0308 × 1F3FB ÷ # × [0.3] WHITE UP POINTING INDEX (EB) × [9.0] COMBINING DIAERESIS (CM1_CM) × [30.2] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (EM) ÷ [0.3]
× 261D × 0308 × 1F3FB ÷ # × [0.3] WHITE UP POINTING INDEX (EB) × [9.0] COMBINING DIAERESIS (CM1_CM) × [30.21] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (EM) ÷ [0.3]
× 261D × 0308 × 0020 ÷ 1F3FB ÷ # × [0.3] WHITE UP POINTING INDEX (EB) × [9.0] COMBINING DIAERESIS (CM1_CM) × [7.01] SPACE (SP) ÷ [18.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (EM) ÷ [0.3]
× 261D × 0029 ÷ # × [0.3] WHITE UP POINTING INDEX (EB) × [13.02] RIGHT PARENTHESIS (CP_CP30) ÷ [0.3]
× 261D × 0020 × 0029 ÷ # × [0.3] WHITE UP POINTING INDEX (EB) × [7.01] SPACE (SP) × [13.02] RIGHT PARENTHESIS (CP_CP30) ÷ [0.3]
@ -7452,9 +7452,9 @@
× 1160 × 1160 ÷ # × [0.3] HANGUL JUNGSEONG FILLER (JV) × [26.02] HANGUL JUNGSEONG FILLER (JV) ÷ [0.3]
× 11A8 × 11A8 ÷ # × [0.3] HANGUL JONGSEONG KIYEOK (JT) × [26.03] HANGUL JONGSEONG KIYEOK (JT) ÷ [0.3]
× 1160 × 2024 ÷ # × [0.3] HANGUL JUNGSEONG FILLER (JV) × [22.0] ONE DOT LEADER (IN) ÷ [0.3]
× 1160 × 0025 ÷ # × [0.3] HANGUL JUNGSEONG FILLER (JV) × [27.02] PERCENT SIGN (PO) ÷ [0.3]
× 0024 × 1160 ÷ # × [0.3] DOLLAR SIGN (PR) × [27.03] HANGUL JUNGSEONG FILLER (JV) ÷ [0.3]
× 261D × 1F3FB ÷ # × [0.3] WHITE UP POINTING INDEX (EB) × [30.2] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (EM) ÷ [0.3]
× 1160 × 0025 ÷ # × [0.3] HANGUL JUNGSEONG FILLER (JV) × [27.01] PERCENT SIGN (PO) ÷ [0.3]
× 0024 × 1160 ÷ # × [0.3] DOLLAR SIGN (PR) × [27.02] HANGUL JUNGSEONG FILLER (JV) ÷ [0.3]
× 261D × 1F3FB ÷ # × [0.3] WHITE UP POINTING INDEX (EB) × [30.21] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (EM) ÷ [0.3]
× 0066 × 0069 × 006E × 0061 × 006C ÷ # × [0.3] LATIN SMALL LETTER F (AL) × [28.0] LATIN SMALL LETTER I (AL) × [28.0] LATIN SMALL LETTER N (AL) × [28.0] LATIN SMALL LETTER A (AL) × [28.0] LATIN SMALL LETTER L (AL) ÷ [0.3]
× 0063 × 0061 × 006E × 0027 × 0074 ÷ # × [0.3] LATIN SMALL LETTER C (AL) × [28.0] LATIN SMALL LETTER A (AL) × [28.0] LATIN SMALL LETTER N (AL) × [19.01] APOSTROPHE (QU) × [19.02] LATIN SMALL LETTER T (AL) ÷ [0.3]
× 0063 × 0061 × 006E × 2019 × 0074 ÷ # × [0.3] LATIN SMALL LETTER C (AL) × [28.0] LATIN SMALL LETTER A (AL) × [28.0] LATIN SMALL LETTER N (AL) × [19.01] RIGHT SINGLE QUOTATION MARK (QU) × [19.02] LATIN SMALL LETTER T (AL) ÷ [0.3]
@ -7678,7 +7678,9 @@
× 1F1F7 × 1F1FA ÷ 1F1F8 × 1F1EA ÷ # × [0.3] REGIONAL INDICATOR SYMBOL LETTER R (RI) × [30.11] REGIONAL INDICATOR SYMBOL LETTER U (RI) ÷ [30.13] REGIONAL INDICATOR SYMBOL LETTER S (RI) × [30.11] REGIONAL INDICATOR SYMBOL LETTER E (RI) ÷ [0.3]
× 1F1F7 × 1F1FA × 200B ÷ 1F1F8 × 1F1EA ÷ # × [0.3] REGIONAL INDICATOR SYMBOL LETTER R (RI) × [30.11] REGIONAL INDICATOR SYMBOL LETTER U (RI) × [7.02] ZERO WIDTH SPACE (ZW) ÷ [8.0] REGIONAL INDICATOR SYMBOL LETTER S (RI) × [30.12] REGIONAL INDICATOR SYMBOL LETTER E (RI) ÷ [0.3]
× 05D0 × 002D × 05D0 ÷ # × [0.3] HEBREW LETTER ALEF (HL) × [21.02] HYPHEN-MINUS (HY) × [21.1] HEBREW LETTER ALEF (HL) ÷ [0.3]
× 1F02C × 1F3FF ÷ # × [0.3] <reserved-1F02C> (Other) × [30.22] EMOJI MODIFIER FITZPATRICK TYPE-6 (EM) ÷ [0.3]
× 00A9 ÷ 1F3FF ÷ # × [0.3] COPYRIGHT SIGN (AL) ÷ [999.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (EM) ÷ [0.3]
#
# Lines: 7652
# Lines: 7654
#
# EOF

View file

@ -75,6 +75,9 @@ ZWJ = [:LineBreak = ZWJ:];
OP30 = [OP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
CP30 = [CP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
# The redundant-looking inner brackets are required for the current parser in the test code.
ExtPictUnassigned = [[\p{Extended_Pictographic}]&[\p{Cn}]];
# LB1 - Resolve AI, CB, CJ, SA, SG, and XX into other line breaking classes
AL = [AL AI SG XX ];
dictionary = SA;
@ -213,8 +216,9 @@ LB30a.1: RI CM* RI ÷ [^BK CR LF NL SP ZW WJ CL CP EX IS SY GL QU BA
LB30a.2: RI CM* RI CM* CMS ÷ [^BK CR LF NL SP ZW WJ CL CP EX IS SY GL QU BA HY NS IN CM];
LB30a.3: RI CM* RI CM* [BK CR LF NL SP ZW WJ GL CL CP EX IS SY QU BA HY NS IN ZWJ]?;
# LB30b Do not break between Emoji Base and Emoji Modifier
LB30b: EB CM* EM;
# LB30b Do not break between Emoji Base (or potential emoji) and Emoji Modifier
LB30b.1: EB CM* EM;
LB30b.2: ExtPictUnassigned CM* EM;
# LB31 Break Everywhere Else.
# Include combining marks

View file

@ -75,6 +75,9 @@ CP30 = [CP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
OP30 = [OP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
CP30 = [CP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
# The redundant-looking inner brackets are required for the current parser in the test code.
ExtPictUnassigned = [[\p{Extended_Pictographic}]&[\p{Cn}]];
# LB1 - Resolve AI, CB, CJ, SA, SG, and XX into other line breaking classes
AL = [AL AI SG XX ];
dictionary = SA;
@ -213,8 +216,9 @@ LB30a.1: RI CM* RI ÷ [^BK CR LF NL SP ZW WJ CL CP EX IS SY GL QU BA
LB30a.2: RI CM* RI CM* CMS ÷ [^BK CR LF NL SP ZW WJ CL CP EX IS SY GL QU BA HY NS IN CM];
LB30a.3: RI CM* RI CM* [BK CR LF NL SP ZW WJ GL CL CP EX IS SY QU BA HY NS IN ZWJ]?;
# LB30b Do not break between Emoji Base and Emoji Modifier
LB30b: EB CM* EM;
# LB30b Do not break between Emoji Base (or potential emoji) and Emoji Modifier
LB30b.1: EB CM* EM;
LB30b.2: ExtPictUnassigned CM* EM;
# LB31 Break Everywhere Else.
# Include combining marks

View file

@ -76,6 +76,9 @@ ZWJ = [:LineBreak = ZWJ:];
OP30 = [OP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
CP30 = [CP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
# The redundant-looking inner brackets are required for the current parser in the test code.
ExtPictUnassigned = [[\p{Extended_Pictographic}]&[\p{Cn}]];
# LB1 - Resolve AI, CB, CJ, SA, SG, and XX into other line breaking classes
AL = [AL AI SG XX ];
dictionary = SA;
@ -215,8 +218,9 @@ LB30a.1: RI CM* RI ÷ [^BK CR LF NL SP ZW WJ CL CP EX IS SY GL QU BA
LB30a.2: RI CM* RI CM* CMS ÷ [^BK CR LF NL SP ZW WJ CL CP EX IS SY GL QU BA HY NS IN CM];
LB30a.3: RI CM* RI CM* [BK CR LF NL SP ZW WJ GL CL CP EX IS SY QU BA HY NS IN ZWJ]?;
# LB30b Do not break between Emoji Base and Emoji Modifier
LB30b: EB CM* EM;
# LB30b Do not break between Emoji Base (or potential emoji) and Emoji Modifier
LB30b.1: EB CM* EM;
LB30b.2: ExtPictUnassigned CM* EM;
# LB31 Break Everywhere Else.
# Include combining marks

View file

@ -93,6 +93,9 @@ ZWJ = [:LineBreak = ZWJ:];
OP30 = [OP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
CP30 = [CP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
# The redundant-looking inner brackets are required for the current parser in the test code.
ExtPictUnassigned = [[\p{Extended_Pictographic}]&[\p{Cn}]];
# LB1 - Resolve AI, CB, CJ, SA, SG, and XX into other line breaking classes
AL = [AL AI SG XX ];
dictionary = SA;
@ -237,8 +240,9 @@ LB30a.1: RI CM* RI ÷ [^BK CR LF NL SP ZW WJ CL CP EX IS SY GL QU BA
LB30a.2: RI CM* RI CM* CMS ÷ [^BK CR LF NL SP ZW WJ CL CP EX IS SY GL QU BA HY NS IN CM];
LB30a.3: RI CM* RI CM* [BK CR LF NL SP ZW WJ GL CL CP EX IS SY QU BA HY NS IN ZWJ]?;
# LB30b Do not break between Emoji Base and Emoji Modifier
LB30b: EB CM* EM;
# LB30b Do not break between Emoji Base (or potential emoji) and Emoji Modifier
LB30b.1: EB CM* EM;
LB30b.2: ExtPictUnassigned CM* EM;
# LB31 Break Everywhere Else.
# Include combining marks

View file

@ -77,6 +77,9 @@ ZWJ = [:LineBreak = ZWJ:];
OP30 = [OP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
CP30 = [CP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
# The redundant-looking inner brackets are required for the current parser in the test code.
ExtPictUnassigned = [[\p{Extended_Pictographic}]&[\p{Cn}]];
# LB1 - Resolve AI, CB, CJ, SA, SG, and XX into other line breaking classes
AL = [AL AI SG XX ];
dictionary = SA;
@ -215,8 +218,9 @@ LB30a.1: RI CM* RI ÷ [^BK CR LF NL SP ZW WJ CL CP EX IS SY GL QU BA
LB30a.2: RI CM* RI CM* CMS ÷ [^BK CR LF NL SP ZW WJ CL CP EX IS SY GL QU BA HY NS IN CM];
LB30a.3: RI CM* RI CM* [BK CR LF NL SP ZW WJ GL CL CP EX IS SY QU BA HY NS IN ZWJ]?;
# LB30b Do not break between Emoji Base and Emoji Modifier
LB30b: EB CM* EM;
# LB30b Do not break between Emoji Base (or potential emoji) and Emoji Modifier
LB30b.1: EB CM* EM;
LB30b.2: ExtPictUnassigned CM* EM;
# LB31 Break Everywhere Else.
# Include combining marks

View file

@ -80,6 +80,9 @@ ZWJ = [:LineBreak = ZWJ:];
OP30 = [OP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
CP30 = [CP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
# The redundant-looking inner brackets are required for the current parser in the test code.
ExtPictUnassigned = [[\p{Extended_Pictographic}]&[\p{Cn}]];
# LB1 - Resolve AI, CB, CJ, SA, SG, and XX into other line breaking classes
AL = [AL AI SG XX ];
dictionary = SA;
@ -223,8 +226,9 @@ LB30a.1: RI CM* RI ÷ [^BK CR LF NL SP ZW WJ CL CP EX IS SY GL QU BA
LB30a.2: RI CM* RI CM* CMS ÷ [^BK CR LF NL SP ZW WJ CL CP EX IS SY GL QU BA HY NS IN CM];
LB30a.3: RI CM* RI CM* [BK CR LF NL SP ZW WJ GL CL CP EX IS SY QU BA HY NS IN ZWJ]?;
# LB30b Do not break between Emoji Base and Emoji Modifier
LB30b: EB CM* EM;
# LB30b Do not break between Emoji Base (or potential emoji) and Emoji Modifier
LB30b.1: EB CM* EM;
LB30b.2: ExtPictUnassigned CM* EM;
# LB31 Break Everywhere Else.
# Include combining marks

View file

@ -335,22 +335,22 @@ public class SpoofChecker {
+ "\\u1256\\u1258\\u125A-\\u125D\\u1260-\\u1288\\u128A-\\u128D\\u1290-\\u12B0"
+ "\\u12B2-\\u12B5\\u12B8-\\u12BE\\u12C0\\u12C2-\\u12C5\\u12C8-\\u12D6\\u12D8-"
+ "\\u1310\\u1312-\\u1315\\u1318-\\u135A\\u135D-\\u135F\\u1380-\\u138F\\u1780-"
+ "\\u17A2\\u17A5-\\u17A7\\u17A9-\\u17B3\\u17B6-\\u17CA\\u17D2\\u17D7\\u17DC"
+ "\\u17E0-\\u17E9\\u1C90-\\u1CBA\\u1CBD-\\u1CBF\\u1E00-\\u1E99\\u1E9E\\u1EA0-"
+ "\\u1EF9\\u1F00-\\u1F15\\u1F18-\\u1F1D\\u1F20-\\u1F45\\u1F48-\\u1F4D\\u1F50-"
+ "\\u1F57\\u1F59\\u1F5B\\u1F5D\\u1F5F-\\u1F70\\u1F72\\u1F74\\u1F76\\u1F78"
+ "\\u1F7A\\u1F7C\\u1F80-\\u1FB4\\u1FB6-\\u1FBA\\u1FBC\\u1FC2-\\u1FC4\\u1FC6-"
+ "\\u1FC8\\u1FCA\\u1FCC\\u1FD0-\\u1FD2\\u1FD6-\\u1FDA\\u1FE0-\\u1FE2\\u1FE4-"
+ "\\u1FEA\\u1FEC\\u1FF2-\\u1FF4\\u1FF6-\\u1FF8\\u1FFA\\u1FFC\\u2D27\\u2D2D"
+ "\\u2D80-\\u2D96\\u2DA0-\\u2DA6\\u2DA8-\\u2DAE\\u2DB0-\\u2DB6\\u2DB8-\\u2DBE"
+ "\\u2DC0-\\u2DC6\\u2DC8-\\u2DCE\\u2DD0-\\u2DD6\\u2DD8-\\u2DDE\\u3005-\\u3007"
+ "\\u3041-\\u3096\\u3099\\u309A\\u309D\\u309E\\u30A1-\\u30FA\\u30FC-\\u30FE"
+ "\\u3105-\\u312D\\u312F\\u31A0-\\u31BF\\u3400-\\u4DBF\\u4E00-\\u9FFF\\uA67F"
+ "\\uA717-\\uA71F\\uA788\\uA78D\\uA792\\uA793\\uA7AA\\uA7AE\\uA7B8\\uA7B9"
+ "\\uA7C0-\\uA7CA\\uA7D0\\uA7D1\\uA7D3\\uA7D5-\\uA7D9\\uA9E7-\\uA9FE\\uAA60-"
+ "\\uAA76\\uAA7A-\\uAA7F\\uAB01-\\uAB06\\uAB09-\\uAB0E\\uAB11-\\uAB16\\uAB20-"
+ "\\uAB26\\uAB28-\\uAB2E\\uAB66\\uAB67\\uAC00-\\uD7A3\\uFA0E\\uFA0F\\uFA11"
+ "\\uFA13\\uFA14\\uFA1F\\uFA21\\uFA23\\uFA24\\uFA27-\\uFA29\\U00011301"
+ "\\u17A2\\u17A5-\\u17A7\\u17A9-\\u17B3\\u17B6-\\u17CD\\u17D0\\u17D2\\u17D7"
+ "\\u17DC\\u17E0-\\u17E9\\u1C90-\\u1CBA\\u1CBD-\\u1CBF\\u1E00-\\u1E99\\u1E9E"
+ "\\u1EA0-\\u1EF9\\u1F00-\\u1F15\\u1F18-\\u1F1D\\u1F20-\\u1F45\\u1F48-\\u1F4D"
+ "\\u1F50-\\u1F57\\u1F59\\u1F5B\\u1F5D\\u1F5F-\\u1F70\\u1F72\\u1F74\\u1F76"
+ "\\u1F78\\u1F7A\\u1F7C\\u1F80-\\u1FB4\\u1FB6-\\u1FBA\\u1FBC\\u1FC2-\\u1FC4"
+ "\\u1FC6-\\u1FC8\\u1FCA\\u1FCC\\u1FD0-\\u1FD2\\u1FD6-\\u1FDA\\u1FE0-\\u1FE2"
+ "\\u1FE4-\\u1FEA\\u1FEC\\u1FF2-\\u1FF4\\u1FF6-\\u1FF8\\u1FFA\\u1FFC\\u2D27"
+ "\\u2D2D\\u2D80-\\u2D96\\u2DA0-\\u2DA6\\u2DA8-\\u2DAE\\u2DB0-\\u2DB6\\u2DB8-"
+ "\\u2DBE\\u2DC0-\\u2DC6\\u2DC8-\\u2DCE\\u2DD0-\\u2DD6\\u2DD8-\\u2DDE\\u3005-"
+ "\\u3007\\u3041-\\u3096\\u3099\\u309A\\u309D\\u309E\\u30A1-\\u30FA\\u30FC-"
+ "\\u30FE\\u3105-\\u312D\\u312F\\u31A0-\\u31BF\\u3400-\\u4DBF\\u4E00-\\u9FFF"
+ "\\uA67F\\uA717-\\uA71F\\uA788\\uA78D\\uA792\\uA793\\uA7AA\\uA7AE\\uA7B8"
+ "\\uA7B9\\uA7C0-\\uA7CA\\uA7D0\\uA7D1\\uA7D3\\uA7D5-\\uA7D9\\uA9E7-\\uA9FE"
+ "\\uAA60-\\uAA76\\uAA7A-\\uAA7F\\uAB01-\\uAB06\\uAB09-\\uAB0E\\uAB11-\\uAB16"
+ "\\uAB20-\\uAB26\\uAB28-\\uAB2E\\uAB66\\uAB67\\uAC00-\\uD7A3\\uFA0E\\uFA0F"
+ "\\uFA11\\uFA13\\uFA14\\uFA1F\\uFA21\\uFA23\\uFA24\\uFA27-\\uFA29\\U00011301"
+ "\\U00011303\\U0001133B\\U0001133C\\U00016FF0\\U00016FF1\\U0001B11F-"
+ "\\U0001B122\\U0001B150-\\U0001B152\\U0001B164-\\U0001B167\\U0001DF00-"
+ "\\U0001DF1E\\U0001E7E0-\\U0001E7E6\\U0001E7E8-\\U0001E7EB\\U0001E7ED"

View file

@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:c204ac8ff2fa17bd37859dc9d19fcc0c1f0f350540fb0d9489c632fad4e9c7a5
size 13620342
oid sha256:16109c3147304d51cb8c81783606c644b83a8673ecfdefb9fb6ab05f20b23d39
size 13621236

View file

@ -1,5 +1,5 @@
# CollationTest_CLDR_NON_IGNORABLE_SHORT.txt
# Date: 2021-06-08, 19:47:15 GMT
# Date: 2021-08-17, 17:34:25 GMT
# © 2021 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html

View file

@ -1,5 +1,5 @@
# CollationTest_CLDR_SHIFTED_SHORT.txt
# Date: 2021-06-08, 19:47:16 GMT
# Date: 2021-08-17, 17:34:28 GMT
# © 2021 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html

View file

@ -1,10 +1,10 @@
# CompositionExclusions-14.0.0.txt
# Date: 2021-03-16, 21:31:00 GMT [KW, LI]
# Date: 2021-03-30, 23:59:00 GMT [KW, LI]
# © 2021 Unicode®, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For terms of use, see https://www.unicode.org/terms_of_use.html
#
# Unicode Character Database
# For documentation, see http://www.unicode.org/reports/tr44/
# For documentation, see https://www.unicode.org/reports/tr44/
#
# This file lists the characters for the Composition Exclusion Table
# defined in UAX #15, Unicode Normalization Forms.
@ -13,7 +13,7 @@
# Unicode Character Database.
#
# For more information, see
# http://www.unicode.org/unicode/reports/tr15/#Primary_Exclusion_List_Table
# https://www.unicode.org/reports/tr15/#Primary_Exclusion_List_Table
#
# For a full derivation of composition exclusions, see the derived property
# Full_Composition_Exclusion in DerivedNormalizationProps.txt

View file

@ -1,5 +1,5 @@
# IdnaTestV2.txt
# Date: 2021-05-29, 21:48:17 GMT
# Date: 2021-08-17, 19:34:01 GMT
# © 2021 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
@ -16,7 +16,7 @@
# This file is in UTF-8, where characters may be escaped using the \uXXXX or \x{XXXX}
# convention where they could otherwise have a confusing display.
# These characters include control codes and combining marks.
#
#
# Columns (c1, c2,...) are separated by semicolons.
# Leading and trailing spaces and tabs in each column are ignored.
# Comments are indicated with hash marks.
@ -40,21 +40,21 @@
# A blank value means the same as the toAsciiNStatus value.
# An explicit [] means no errors.
#
# The line comments currently show visible characters that have been escaped.
#
# The line comments currently show visible characters that have been escaped.
#
# CONFORMANCE:
#
# To test for conformance to UTS #46, an implementation will perform the toUnicode, toAsciiN, and
# toAsciiT operations on the source string, then verify the resulting strings and relevant status
# toAsciiT operations on the source string, then verify the resulting strings and relevant status
# values.
#
# If the implementation converts illegal code points into U+FFFD (as per
# http://www.unicode.org/reports/tr46/proposed.html#Processing) then the string comparisons need to
# account for that by treating U+FFFD in the actual value as a wildcard when comparing to the
# https://www.unicode.org/reports/tr46/#Processing) then the string comparisons need to
# account for that by treating U+FFFD in the actual value as a wildcard when comparing to the
# expected value in the test file.
#
# A status in toUnicode, toAsciiN or toAsciiT is indicated by a value in square brackets,
# such as "[B5 B6]". In such a case, the contents is a list of status codes based on the step
# such as "[B5 B6]". In such a case, the contents is a list of status codes based on the step
# numbers in UTS #46 and IDNA2008, with the following formats.
#
# Pn for Section 4 Processing step n
@ -65,7 +65,7 @@
# Cn for ContextJ (in IDNA2008)
# Xn for toUnicode issues (see below)
#
# Thus C1 = Appendix A.1. ZERO WIDTH NON-JOINER, and C2 = Appendix A.2. ZERO WIDTH JOINER.
# Thus C1 = Appendix A.1. ZERO WIDTH NON-JOINER, and C2 = Appendix A.2. ZERO WIDTH JOINER.
# (The CONTEXTO tests are optional for client software, and not tested here.)
#
# Implementations that allow values of particular input flags to be false would ignore

View file

@ -30611,9 +30611,9 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;;
1E7E5;ETHIOPIC SYLLABLE HHYE;Lo;0;L;;;;;N;;;;;
1E7E6;ETHIOPIC SYLLABLE HHYO;Lo;0;L;;;;;N;;;;;
1E7E8;ETHIOPIC SYLLABLE GURAGE HHWA;Lo;0;L;;;;;N;;;;;
1E7E9;ETHIOPIC SYLLABLE HWI;Lo;0;L;;;;;N;;;;;
1E7EA;ETHIOPIC SYLLABLE HWEE;Lo;0;L;;;;;N;;;;;
1E7EB;ETHIOPIC SYLLABLE HWE;Lo;0;L;;;;;N;;;;;
1E7E9;ETHIOPIC SYLLABLE HHWI;Lo;0;L;;;;;N;;;;;
1E7EA;ETHIOPIC SYLLABLE HHWEE;Lo;0;L;;;;;N;;;;;
1E7EB;ETHIOPIC SYLLABLE HHWE;Lo;0;L;;;;;N;;;;;
1E7ED;ETHIOPIC SYLLABLE GURAGE MWI;Lo;0;L;;;;;N;;;;;
1E7EE;ETHIOPIC SYLLABLE GURAGE MWEE;Lo;0;L;;;;;N;;;;;
1E7F0;ETHIOPIC SYLLABLE GURAGE QWI;Lo;0;L;;;;;N;;;;;

View file

@ -1,11 +1,11 @@
# confusables.txt
# Date: 2020-02-13, 01:38:49 GMT
# © 2020 Unicode®, Inc.
# Date: 2021-05-29, 22:09:29 GMT
# © 2021 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
# Unicode Security Mechanisms for UTS #39
# Version: 13.0.0
# Version: 14.0.0
#
# For documentation and usage, see http://www.unicode.org/reports/tr39
#
@ -593,7 +593,7 @@ FF40 ; 0027 ; MA #* ( → ' ) FULLWIDTH GRAVE ACCENT → APOSTROPHE # →‘
02B9 ; 0027 ; MA # ( ʹ → ' ) MODIFIER LETTER PRIME → APOSTROPHE #
0374 ; 0027 ; MA # ( ʹ → ' ) GREEK NUMERAL SIGN → APOSTROPHE # →′→
02C8 ; 0027 ; MA # ( ˈ → ' ) MODIFIER LETTER VERTICAL LINE → APOSTROPHE #
02CA ; 0027 ; MA # ( ˊ → ' ) MODIFIER LETTER ACUTE ACCENT → APOSTROPHE # →΄→→ʹ
02CA ; 0027 ; MA # ( ˊ → ' ) MODIFIER LETTER ACUTE ACCENT → APOSTROPHE # →ʹ→→′
02CB ; 0027 ; MA # ( ˋ → ' ) MODIFIER LETTER GRAVE ACCENT → APOSTROPHE # →`→→‘→
02F4 ; 0027 ; MA #* ( ˴ → ' ) MODIFIER LETTER MIDDLE GRAVE ACCENT → APOSTROPHE # →ˋ→→`→→‘→
02BB ; 0027 ; MA # ( ʻ → ' ) MODIFIER LETTER TURNED COMMA → APOSTROPHE # →‘→
@ -1071,7 +1071,7 @@ A714 ; 02EB ; MA #* ( ꜔ → ˫ ) MODIFIER LETTER MID LEFT-STEM TONE BAR → MO
25CB ; 00B0 ; MA #* ( ○ → ° ) WHITE CIRCLE → DEGREE SIGN # →◦→→∘→
25E6 ; 00B0 ; MA #* ( ◦ → ° ) WHITE BULLET → DEGREE SIGN # →∘→
235C ; 00B0 0332 ; MA #* ( ⍜ → °̲ ) APL FUNCTIONAL SYMBOL CIRCLE UNDERBAR → DEGREE SIGN, COMBINING LOW LINE # →○̲→
235C ; 00B0 0332 ; MA #* ( ⍜ → °̲ ) APL FUNCTIONAL SYMBOL CIRCLE UNDERBAR → DEGREE SIGN, COMBINING LOW LINE # →○̲→→∘̲→
2364 ; 00B0 0308 ; MA #* ( ⍤ → °̈ ) APL FUNCTIONAL SYMBOL JOT DIAERESIS → DEGREE SIGN, COMBINING DIAERESIS # →◦̈→→∘̈→
@ -2371,7 +2371,7 @@ A6B1 ; 2C75 ; MA # ( ꚱ → Ⱶ ) BAMUM LETTER NDAA → LATIN CAPITAL LETTER HA
A795 ; A727 ; MA # ( ꞕ → ꜧ ) LATIN SMALL LETTER H WITH PALATAL HOOK → LATIN SMALL LETTER HENG #
02DB ; 0069 ; MA #* ( ˛ → i ) OGONEK → LATIN SMALL LETTER I # →ͺ→→ι→→ι→
2373 ; 0069 ; MA #* ( → i ) APL FUNCTIONAL SYMBOL IOTA → LATIN SMALL LETTER I # →ɩ
2373 ; 0069 ; MA #* ( → i ) APL FUNCTIONAL SYMBOL IOTA → LATIN SMALL LETTER I # →ι
FF49 ; 0069 ; MA # ( → i ) FULLWIDTH LATIN SMALL LETTER I → LATIN SMALL LETTER I # →і→
2170 ; 0069 ; MA # ( → i ) SMALL ROMAN NUMERAL ONE → LATIN SMALL LETTER I #
2139 ; 0069 ; MA # ( → i ) INFORMATION SOURCE → LATIN SMALL LETTER I #

View file

@ -681,61 +681,11 @@ public class RBBITestMonkey extends TestFmwk {
// Order matches that of Unicode UAX 14, Table 1, which makes it a little easier
// to verify that they are all accounted for.
UnicodeSet fBK;
UnicodeSet fCR;
UnicodeSet fLF;
UnicodeSet fCM;
UnicodeSet fNL;
UnicodeSet fSG;
UnicodeSet fWJ;
UnicodeSet fZW;
UnicodeSet fGL;
UnicodeSet fSP;
UnicodeSet fB2;
UnicodeSet fBA;
UnicodeSet fBB;
UnicodeSet fHH;
UnicodeSet fHY;
UnicodeSet fCB;
UnicodeSet fCL;
UnicodeSet fCP;
UnicodeSet fEX;
UnicodeSet fIN;
UnicodeSet fNS;
UnicodeSet fOP;
UnicodeSet fQU;
UnicodeSet fIS;
UnicodeSet fNU;
UnicodeSet fPO;
UnicodeSet fPR;
UnicodeSet fSY;
UnicodeSet fAI;
UnicodeSet fAL;
UnicodeSet fCJ;
UnicodeSet fH2;
UnicodeSet fH3;
UnicodeSet fHL;
UnicodeSet fID;
UnicodeSet fJL;
UnicodeSet fJV;
UnicodeSet fJT;
UnicodeSet fRI;
UnicodeSet fXX;
UnicodeSet fEB;
UnicodeSet fEM;
UnicodeSet fZWJ;
UnicodeSet fOP30;
UnicodeSet fCP30;
StringBuffer fText;
int fOrigPositions;
// XUnicodeSet is like UnicodeSet, except that the method contains(int codePoint) does not
// throw exceptions on out-of-range codePoints. This matches ICU4C behavior.
// The LineMonkey test (ported from ICU4C) relies on this behavior, it uses a value of -1
// to represent a non-codepoint that is not included in any of the property sets.
// This happens for rule 30a.
class XUnicodeSet extends UnicodeSet {
XUnicodeSet(String pattern) { super(pattern); }
XUnicodeSet() { super(); }
@ -746,6 +696,62 @@ public class RBBITestMonkey extends TestFmwk {
}
}
// Declare these variables as XUnicodeSet, not merely as UnicodeSet,
// so that when we copy a new declaration from C++ (where only UnicodeSet exists),
// the missing 'X' prefix is visible;
// and when the prefix is there and we copy a new initializer we get a compiler error.
// (Otherwise we rely on the caller catching the IAE from using codePoint=-1
// and failing with a message that tells us what to do.)
XUnicodeSet fBK;
XUnicodeSet fCR;
XUnicodeSet fLF;
XUnicodeSet fCM;
XUnicodeSet fNL;
XUnicodeSet fSG;
XUnicodeSet fWJ;
XUnicodeSet fZW;
XUnicodeSet fGL;
XUnicodeSet fSP;
XUnicodeSet fB2;
XUnicodeSet fBA;
XUnicodeSet fBB;
XUnicodeSet fHH;
XUnicodeSet fHY;
XUnicodeSet fCB;
XUnicodeSet fCL;
XUnicodeSet fCP;
XUnicodeSet fEX;
XUnicodeSet fIN;
XUnicodeSet fNS;
XUnicodeSet fOP;
XUnicodeSet fQU;
XUnicodeSet fIS;
XUnicodeSet fNU;
XUnicodeSet fPO;
XUnicodeSet fPR;
XUnicodeSet fSY;
XUnicodeSet fAI;
XUnicodeSet fAL;
XUnicodeSet fCJ;
XUnicodeSet fH2;
XUnicodeSet fH3;
XUnicodeSet fHL;
XUnicodeSet fID;
XUnicodeSet fJL;
XUnicodeSet fJV;
XUnicodeSet fJT;
XUnicodeSet fRI;
XUnicodeSet fXX;
XUnicodeSet fEB;
XUnicodeSet fEM;
XUnicodeSet fZWJ;
XUnicodeSet fOP30;
XUnicodeSet fCP30;
XUnicodeSet fExtPictUnassigned;
StringBuffer fText;
int fOrigPositions;
RBBILineMonkey()
{
fCharProperty = UProperty.LINE_BREAK;
@ -795,6 +801,7 @@ public class RBBITestMonkey extends TestFmwk {
fZWJ = new XUnicodeSet("[\\p{Line_break=ZWJ}]");
fOP30 = new XUnicodeSet("[\\p{Line_break=OP}-[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]");
fCP30 = new XUnicodeSet("[\\p{Line_break=CP}-[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]");
fExtPictUnassigned = new XUnicodeSet("[\\p{Extended_Pictographic}&\\p{Cn}]");
// Remove dictionary characters.
// The monkey test reference implementation of line break does not replicate the dictionary behavior,
@ -847,7 +854,6 @@ public class RBBITestMonkey extends TestFmwk {
fSets.add(fH3); fClassNames.add("H3");
fSets.add(fHL); fClassNames.add("HL");
fSets.add(fID); fClassNames.add("ID");
fSets.add(fWJ); fClassNames.add("WJ");
fSets.add(fRI); fClassNames.add("RI");
fSets.add(fSG); fClassNames.add("SG");
fSets.add(fEB); fClassNames.add("EB");
@ -856,6 +862,7 @@ public class RBBITestMonkey extends TestFmwk {
// TODO: fOP30 & fCP30 overlap with plain fOP. Probably OK, but fOP/CP chars will be over-represented.
fSets.add(fOP30); fClassNames.add("OP30");
fSets.add(fCP30); fClassNames.add("CP30");
fSets.add(fExtPictUnassigned); fClassNames.add("fExtPictUnassigned");
}
@Override
@ -1308,10 +1315,17 @@ public class RBBITestMonkey extends TestFmwk {
continue;
}
// LB30b Do not break between an emoji base (or potential emoji) and an emoji modifier.
if (fEB.contains(prevChar) && fEM.contains(thisChar)) {
setAppliedRule(pos, "LB 30b Emoji Base x Emoji Modifier");
continue;
}
if (fExtPictUnassigned.contains(prevChar) && fEM.contains(thisChar)) {
setAppliedRule(pos, "LB30b [\\p{Extended_Pictographic}&\\p{Cn}] × EM");
continue;
}
// LB 31 Break everywhere else
setAppliedRule(pos, "LB 31 Break everywhere else");
break;
@ -2254,7 +2268,18 @@ public class RBBITestMonkey extends TestFmwk {
logln("Line Break Monkey Test");
RBBILineMonkey m = new RBBILineMonkey();
BreakIterator bi = BreakIterator.getLineInstance(Locale.US);
RunMonkey(bi, m, "line", seed, loopCount);
try {
RunMonkey(bi, m, "line", seed, loopCount);
} catch(IllegalArgumentException e) {
if (e.getMessage().equals("Invalid code point U+-000001")) {
// Looks like you used class UnicodeSet instead of class XUnicodeSet
// (note the leading 'X').
// See the comment before the definition of class XUnicodeSet.
errln("Probable program error: use XUnicodeSet in RBBILineMonkey code");
} else {
throw e;
}
}
}
@Test
@ -2312,7 +2337,18 @@ public class RBBITestMonkey extends TestFmwk {
BreakIterator bi = BreakIterator.getLineInstance(Locale.US);
String rules = bi.toString();
BreakIterator rtbi = new RuleBasedBreakIterator(rules);
RunMonkey(rtbi, m, "line", seed, loopCount);
try {
RunMonkey(rtbi, m, "line", seed, loopCount);
} catch(IllegalArgumentException e) {
if (e.getMessage().equals("Invalid code point U+-000001")) {
// Looks like you used class UnicodeSet instead of class XUnicodeSet
// (note the leading 'X').
// See the comment before the definition of class XUnicodeSet.
errln("Probable program error: use XUnicodeSet in RBBILineMonkey code");
} else {
throw e;
}
}
}
@Test

View file

@ -6,7 +6,14 @@
# file: line.txt
#
# Reference Line Break rules for intltest rbbi/RBBIMonkeyTest.
# Rules derived from Unicode Standard Annex #14 for Unicode 14.0.
# Rules derived from Unicode Standard Annex #14 for Unicode 14.0,
# with the following modification:
#
# Boundaries between hyphens and following letters are suppressed when
# there is a boundary preceding the hyphen. See rule 20.9
#
# This corresponds to CSS line-break=strict (BCP47 -u-lb-strict).
# It sets characters of class CJ to behave like NS.
#
# Note: Rule syntax and the monkey test itself are still a work in progress.
# They are expected to change with review and the addition of support for rule tailoring.
@ -68,6 +75,9 @@ ZWJ = [:LineBreak = ZWJ:];
OP30 = [OP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
CP30 = [CP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
# The redundant-looking inner brackets are required for the current parser in the test code.
ExtPictUnassigned = [[\p{Extended_Pictographic}]&[\p{Cn}]];
# LB1 - Resolve AI, CB, CJ, SA, SG, and XX into other line breaking classes
AL = [AL AI SG XX ];
dictionary = SA;
@ -172,7 +182,7 @@ LB21.2: BB CM* [^CM CB];
LB21b: SY CM* HL;
LB22.2: . CM* IN;
LB22: . CM* IN;
LB23.1: (AL | HL | CM) CM* NU;
LB23.2: NU CM* (AL | HL);
@ -206,8 +216,9 @@ LB30a.1: RI CM* RI ÷ [^BK CR LF NL SP ZW WJ CL CP EX IS SY GL QU BA
LB30a.2: RI CM* RI CM* CMS ÷ [^BK CR LF NL SP ZW WJ CL CP EX IS SY GL QU BA HY NS IN CM];
LB30a.3: RI CM* RI CM* [BK CR LF NL SP ZW WJ GL CL CP EX IS SY QU BA HY NS IN ZWJ]?;
# LB30b Do not break between Emoji Base and Emoji Modifier
LB30b: EB CM* EM;
# LB30b Do not break between Emoji Base (or potential emoji) and Emoji Modifier
LB30b.1: EB CM* EM;
LB30b.2: ExtPictUnassigned CM* EM;
# LB31 Break Everywhere Else.
# Include combining marks

View file

@ -75,6 +75,9 @@ CP30 = [CP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
OP30 = [OP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
CP30 = [CP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
# The redundant-looking inner brackets are required for the current parser in the test code.
ExtPictUnassigned = [[\p{Extended_Pictographic}]&[\p{Cn}]];
# LB1 - Resolve AI, CB, CJ, SA, SG, and XX into other line breaking classes
AL = [AL AI SG XX ];
dictionary = SA;
@ -213,8 +216,9 @@ LB30a.1: RI CM* RI ÷ [^BK CR LF NL SP ZW WJ CL CP EX IS SY GL QU BA
LB30a.2: RI CM* RI CM* CMS ÷ [^BK CR LF NL SP ZW WJ CL CP EX IS SY GL QU BA HY NS IN CM];
LB30a.3: RI CM* RI CM* [BK CR LF NL SP ZW WJ GL CL CP EX IS SY QU BA HY NS IN ZWJ]?;
# LB30b Do not break between Emoji Base and Emoji Modifier
LB30b: EB CM* EM;
# LB30b Do not break between Emoji Base (or potential emoji) and Emoji Modifier
LB30b.1: EB CM* EM;
LB30b.2: ExtPictUnassigned CM* EM;
# LB31 Break Everywhere Else.
# Include combining marks

View file

@ -76,6 +76,9 @@ ZWJ = [:LineBreak = ZWJ:];
OP30 = [OP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
CP30 = [CP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
# The redundant-looking inner brackets are required for the current parser in the test code.
ExtPictUnassigned = [[\p{Extended_Pictographic}]&[\p{Cn}]];
# LB1 - Resolve AI, CB, CJ, SA, SG, and XX into other line breaking classes
AL = [AL AI SG XX ];
dictionary = SA;
@ -215,8 +218,9 @@ LB30a.1: RI CM* RI ÷ [^BK CR LF NL SP ZW WJ CL CP EX IS SY GL QU BA
LB30a.2: RI CM* RI CM* CMS ÷ [^BK CR LF NL SP ZW WJ CL CP EX IS SY GL QU BA HY NS IN CM];
LB30a.3: RI CM* RI CM* [BK CR LF NL SP ZW WJ GL CL CP EX IS SY QU BA HY NS IN ZWJ]?;
# LB30b Do not break between Emoji Base and Emoji Modifier
LB30b: EB CM* EM;
# LB30b Do not break between Emoji Base (or potential emoji) and Emoji Modifier
LB30b.1: EB CM* EM;
LB30b.2: ExtPictUnassigned CM* EM;
# LB31 Break Everywhere Else.
# Include combining marks

View file

@ -93,6 +93,9 @@ ZWJ = [:LineBreak = ZWJ:];
OP30 = [OP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
CP30 = [CP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
# The redundant-looking inner brackets are required for the current parser in the test code.
ExtPictUnassigned = [[\p{Extended_Pictographic}]&[\p{Cn}]];
# LB1 - Resolve AI, CB, CJ, SA, SG, and XX into other line breaking classes
AL = [AL AI SG XX ];
dictionary = SA;
@ -237,8 +240,9 @@ LB30a.1: RI CM* RI ÷ [^BK CR LF NL SP ZW WJ CL CP EX IS SY GL QU BA
LB30a.2: RI CM* RI CM* CMS ÷ [^BK CR LF NL SP ZW WJ CL CP EX IS SY GL QU BA HY NS IN CM];
LB30a.3: RI CM* RI CM* [BK CR LF NL SP ZW WJ GL CL CP EX IS SY QU BA HY NS IN ZWJ]?;
# LB30b Do not break between Emoji Base and Emoji Modifier
LB30b: EB CM* EM;
# LB30b Do not break between Emoji Base (or potential emoji) and Emoji Modifier
LB30b.1: EB CM* EM;
LB30b.2: ExtPictUnassigned CM* EM;
# LB31 Break Everywhere Else.
# Include combining marks

View file

@ -77,6 +77,9 @@ ZWJ = [:LineBreak = ZWJ:];
OP30 = [OP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
CP30 = [CP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
# The redundant-looking inner brackets are required for the current parser in the test code.
ExtPictUnassigned = [[\p{Extended_Pictographic}]&[\p{Cn}]];
# LB1 - Resolve AI, CB, CJ, SA, SG, and XX into other line breaking classes
AL = [AL AI SG XX ];
dictionary = SA;
@ -215,8 +218,9 @@ LB30a.1: RI CM* RI ÷ [^BK CR LF NL SP ZW WJ CL CP EX IS SY GL QU BA
LB30a.2: RI CM* RI CM* CMS ÷ [^BK CR LF NL SP ZW WJ CL CP EX IS SY GL QU BA HY NS IN CM];
LB30a.3: RI CM* RI CM* [BK CR LF NL SP ZW WJ GL CL CP EX IS SY QU BA HY NS IN ZWJ]?;
# LB30b Do not break between Emoji Base and Emoji Modifier
LB30b: EB CM* EM;
# LB30b Do not break between Emoji Base (or potential emoji) and Emoji Modifier
LB30b.1: EB CM* EM;
LB30b.2: ExtPictUnassigned CM* EM;
# LB31 Break Everywhere Else.
# Include combining marks

View file

@ -80,6 +80,9 @@ ZWJ = [:LineBreak = ZWJ:];
OP30 = [OP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
CP30 = [CP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
# The redundant-looking inner brackets are required for the current parser in the test code.
ExtPictUnassigned = [[\p{Extended_Pictographic}]&[\p{Cn}]];
# LB1 - Resolve AI, CB, CJ, SA, SG, and XX into other line breaking classes
AL = [AL AI SG XX ];
dictionary = SA;
@ -223,8 +226,9 @@ LB30a.1: RI CM* RI ÷ [^BK CR LF NL SP ZW WJ CL CP EX IS SY GL QU BA
LB30a.2: RI CM* RI CM* CMS ÷ [^BK CR LF NL SP ZW WJ CL CP EX IS SY GL QU BA HY NS IN CM];
LB30a.3: RI CM* RI CM* [BK CR LF NL SP ZW WJ GL CL CP EX IS SY QU BA HY NS IN ZWJ]?;
# LB30b Do not break between Emoji Base and Emoji Modifier
LB30b: EB CM* EM;
# LB30b Do not break between Emoji Base (or potential emoji) and Emoji Modifier
LB30b.1: EB CM* EM;
LB30b.2: ExtPictUnassigned CM* EM;
# LB31 Break Everywhere Else.
# Include combining marks

View file

@ -1,4 +1,4 @@
#!/usr/bin/python -B
#!/usr/bin/python3 -B
# -*- coding: utf-8 -*-
# © 2016 and later: Unicode, Inc. and others.
# License & terms of use: http://www.unicode.org/copyright.html
@ -1033,7 +1033,7 @@ def CompactBlock(b, i):
# For two values with the same savings, pick the one that compares lower,
# to make this deterministic (avoid flip-flopping).
if (savings > max_savings or
(savings == max_savings and value < max_value)):
(savings > 0 and savings == max_savings and value < max_value)):
max_value = value
max_count = count
max_savings = savings