ICU-45 add word tag value for Ideographics

X-SVN-Rev: 9315
This commit is contained in:
Andy Heninger 2002-07-24 19:10:18 +00:00
parent 8714fcad68
commit 0bc2ccb78a

View file

@ -118,13 +118,12 @@ $Hangul_Sequence = ((($L+ $LV?) | ($L* $LV)) $V* $T* ) | ($L* $LVT $T*);
####################################################################################
#
# Word Break Rules. Definitions and Rules specific to word break begin
# Here. Preceding stuff is copied from line or char break rules.
# Here. Preceding definitions are copied from line or char break rules.
#
####################################################################################
$LineBreak = [$Ideographic $Hiragana $Katakana];
$Letter = [[[:L:] [:Sk:]] & [^$LineBreak]];
$MidLetter = [\u0027 \u2019 \u003a \u0029 \u00ad \u05f3 \u05f4];
$LineBreak = [$Ideographic $Hiragana $Katakana];
$Letter = [[[:L:] [:Sk:]] & [^$LineBreak]];
$MidLetter = [\u0027 \u2019 \u003a \u0029 \u00ad \u05f3 \u05f4];
#
# LetterEx - extended letter, includes combining chars, CGJ sequences, Hangul sequences.
@ -163,11 +162,16 @@ $MidLetNum* ($LetterEx | $MidLetterSequence) $MidLetNum* {200};
#
# Hiragana and KataKana
# Hiragana and Katakana
#
($Hiragana $Extend*)+ {300};
($Katakana $Extend*)+ {300};
#
# Ideographic Characters. Stand by themselves as words.
#
$Ideographic $Extend* {400};
#
# Everything Else.
#