From 0bc2ccb78a0b3255634be137862a831c506cada8 Mon Sep 17 00:00:00 2001 From: Andy Heninger Date: Wed, 24 Jul 2002 19:10:18 +0000 Subject: [PATCH] ICU-45 add word tag value for Ideographics X-SVN-Rev: 9315 --- icu4c/source/data/brkitr/word.txt | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/icu4c/source/data/brkitr/word.txt b/icu4c/source/data/brkitr/word.txt index 1a29a8bb051..dbb900b017f 100644 --- a/icu4c/source/data/brkitr/word.txt +++ b/icu4c/source/data/brkitr/word.txt @@ -118,13 +118,12 @@ $Hangul_Sequence = ((($L+ $LV?) | ($L* $LV)) $V* $T* ) | ($L* $LVT $T*); #################################################################################### # # Word Break Rules. Definitions and Rules specific to word break begin -# Here. Preceding stuff is copied from line or char break rules. +# Here. Preceding definitions are copied from line or char break rules. # #################################################################################### -$LineBreak = [$Ideographic $Hiragana $Katakana]; -$Letter = [[[:L:] [:Sk:]] & [^$LineBreak]]; -$MidLetter = [\u0027 \u2019 \u003a \u0029 \u00ad \u05f3 \u05f4]; - +$LineBreak = [$Ideographic $Hiragana $Katakana]; +$Letter = [[[:L:] [:Sk:]] & [^$LineBreak]]; +$MidLetter = [\u0027 \u2019 \u003a \u0029 \u00ad \u05f3 \u05f4]; # # LetterEx - extended letter, includes combining chars, CGJ sequences, Hangul sequences. @@ -163,11 +162,16 @@ $MidLetNum* ($LetterEx | $MidLetterSequence) $MidLetNum* {200}; # -# Hiragana and KataKana +# Hiragana and Katakana # ($Hiragana $Extend*)+ {300}; ($Katakana $Extend*)+ {300}; +# +# Ideographic Characters. Stand by themselves as words. +# +$Ideographic $Extend* {400}; + # # Everything Else. #