mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-13 17:01:16 +00:00
42 lines
1.4 KiB
Text
42 lines
1.4 KiB
Text
# © 2016 and later: Unicode, Inc. and others.
|
||
# License & terms of use: http://www.unicode.org/copyright.html
|
||
# Generated using tools/cldr/cldr-to-icu/
|
||
#
|
||
# File: Han_Spacedhan.txt
|
||
# Generated from CLDR
|
||
#
|
||
|
||
# Only intended for internal use
|
||
# Make sure Han are normalized, including characters that contain them.
|
||
# The first set in the filter is computed with http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[:tonfkd:/XXX/:]-[:Ideographic:]-[:sc=Han:]
|
||
# Where XXX is the resolved [:Ideographic:][:sc=Han:]. It needs updating with each Unicode release!
|
||
:: [[、。々《-』〜・㆒-㆟㈠-㉇㊀-㊰㋀-㋋ ㍘-㍰㍻-㍿㏠-㏾🈐-🈒🈔-🈺🉀-🉈🉐🉑][:Ideographic:][:sc=Han:]] nfkc;
|
||
:: fullwidth-halfwidth;
|
||
。 → '.';
|
||
。→ '.';
|
||
、→ ',';
|
||
、→ ',';
|
||
《→ '«';
|
||
》→ '»';
|
||
〈 → '‹';
|
||
〉→ '›';
|
||
「→ '‘';
|
||
」→ '’';
|
||
「→ '‘';
|
||
」→ '’';
|
||
『→ '“';
|
||
』→ '”';
|
||
・→ '‧';
|
||
・ → '‧';
|
||
々→ '⓶';
|
||
〜→ '~';
|
||
$terminalPunct = [\.\,\:\;\?\!.,:?!。、;[:Pe:][:Pf:]];
|
||
$initialPunct = [[:Ps:][:Pi:]];
|
||
# add space between any Han or terminal punctuation and letters, and
|
||
# between letters and Han or initial punct
|
||
[[:Ideographic:] $terminalPunct] {} [:Letter:] → ' ' ;
|
||
[:Letter:] [:Mark:]* {} [[:Ideographic:] $initialPunct] → ' ' ;
|
||
# remove spacing between ideographs and other letters
|
||
← [:Ideographic:] { ' ' } [:Letter:] ;
|
||
← [:Letter:] [:Mark:]* { ' ' } [:Ideographic:] ;
|
||
|