mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-14 17:24:01 +00:00
ICU-9954 rbbi forward iteration performance improvement. Ensure rules always make some progress; keep engine out of the fail-safe fall back path for rules that do not advance.
X-SVN-Rev: 40461
This commit is contained in:
parent
75495acb8f
commit
e220fe9dd6
11 changed files with 40 additions and 0 deletions
|
@ -78,6 +78,9 @@ $Prepend [^$Control $CR $LF];
|
|||
^$Prepend* $Regional_Indicator $Regional_Indicator / $Regional_Indicator;
|
||||
^$Prepend* $Regional_Indicator $Regional_Indicator;
|
||||
|
||||
# GB 999 Match a single code point if no other rule applies.
|
||||
.;
|
||||
|
||||
## -------------------------------------------------
|
||||
|
||||
!!safe_reverse;
|
||||
|
|
|
@ -335,6 +335,9 @@ $RI $CM* $RI $CM* $ZWJ ($ID | $Extended_Pict | $EmojiNRK);
|
|||
# LB 30b Do not break between an Emoji Base and an Emoji Modifier
|
||||
$EB $CM* $EM;
|
||||
|
||||
# LB 31 Break everywhere else.
|
||||
# Match a single code point if no other rule applies.
|
||||
.;
|
||||
|
||||
## -------------------------------------------------
|
||||
|
||||
|
|
|
@ -344,6 +344,10 @@ $RI $CM* $RI $CM* $ZWJ ($ID | $Extended_Pict | $EmojiNRK);
|
|||
# LB 30b Do not break between an Emoji Base and an Emoji Modifier
|
||||
$EB $CM* $EM;
|
||||
|
||||
# LB 31 Break everywhere else.
|
||||
# Match a single code point if no other rule applies.
|
||||
.;
|
||||
|
||||
## -------------------------------------------------
|
||||
|
||||
!!safe_reverse;
|
||||
|
|
|
@ -347,6 +347,9 @@ $RI $CM* $RI $CM* $ZWJ ($ID | $Extended_Pict | $EmojiNRK);
|
|||
# LB 30b Do not break between an Emoji Base and an Emoji Modifier
|
||||
$EB $CM* $EM;
|
||||
|
||||
# LB 31 Break everywhere else.
|
||||
# Match a single code point if no other rule applies.
|
||||
.;
|
||||
|
||||
## -------------------------------------------------
|
||||
|
||||
|
|
|
@ -361,6 +361,10 @@ $RI $CM* $RI $CM* $ZWJ ($ID | $Extended_Pict | $EmojiNRK);
|
|||
# LB 30b Do not break between an Emoji Base and an Emoji Modifier
|
||||
$EB $CM* $EM;
|
||||
|
||||
# LB 31 Break everywhere else.
|
||||
# Match a single code point if no other rule applies.
|
||||
.;
|
||||
|
||||
## -------------------------------------------------
|
||||
|
||||
!!safe_reverse;
|
||||
|
|
|
@ -346,6 +346,9 @@ $RI $CM* $RI $CM* $ZWJ ($ID | $Extended_Pict | $EmojiNRK);
|
|||
# LB 30b Do not break between an Emoji Base and an Emoji Modifier
|
||||
$EB $CM* $EM;
|
||||
|
||||
# LB 31 Break everywhere else.
|
||||
# Match a single code point if no other rule applies.
|
||||
.;
|
||||
|
||||
## -------------------------------------------------
|
||||
|
||||
|
|
|
@ -339,6 +339,10 @@ $RI $CM* $RI $CM* $ZWJ ($ID | $Extended_Pict | $EmojiNRK);
|
|||
# LB 30b Do not break between an Emoji Base and an Emoji Modifier
|
||||
$EB $CM* $EM;
|
||||
|
||||
# LB 31 Break everywhere else.
|
||||
# Match a single code point if no other rule applies.
|
||||
.;
|
||||
|
||||
## -------------------------------------------------
|
||||
|
||||
!!safe_reverse;
|
||||
|
|
|
@ -345,6 +345,10 @@ $RI $CM* $RI $CM* $ZWJ ($ID | $Extended_Pict | $EmojiNRK);
|
|||
# LB 30b Do not break between an Emoji Base and an Emoji Modifier
|
||||
$EB $CM* $EM;
|
||||
|
||||
# LB 31 Break everywhere else.
|
||||
# Match a single code point if no other rule applies.
|
||||
.;
|
||||
|
||||
## -------------------------------------------------
|
||||
|
||||
!!safe_reverse;
|
||||
|
|
|
@ -342,6 +342,10 @@ $RI $CM* $RI $CM* $ZWJ ($ID | $Extended_Pict | $EmojiNRK);
|
|||
# LB 30b Do not break between an Emoji Base and an Emoji Modifier
|
||||
$EB $CM* $EM;
|
||||
|
||||
# LB 31 Break everywhere else.
|
||||
# Match a single code point if no other rule applies.
|
||||
.;
|
||||
|
||||
## -------------------------------------------------
|
||||
|
||||
!!safe_reverse;
|
||||
|
|
|
@ -194,6 +194,10 @@ $ExtendNumLetEx $KatakanaEx {400}; # (13b)
|
|||
$HangulSyllable $HangulSyllable {200};
|
||||
$KanaKanji $KanaKanji {400}; # different rule status if both kana and kanji found
|
||||
|
||||
# Rule 999
|
||||
# Match a single code point if no other rule applies.
|
||||
.;
|
||||
|
||||
|
||||
## -------------------------------------------------
|
||||
|
||||
|
|
|
@ -194,6 +194,10 @@ $ExtendNumLetEx $KatakanaEx {400}; # (13b)
|
|||
$HangulSyllable $HangulSyllable {200};
|
||||
$KanaKanji $KanaKanji {400}; # different rule status if both kana and kanji found
|
||||
|
||||
# Rule 999
|
||||
# Match a single code point if no other rule applies.
|
||||
.;
|
||||
|
||||
|
||||
## -------------------------------------------------
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue