ICU-9954 rbbi forward iteration performance improvement. Ensure rules always make some progress; keep engine out of the fail-safe fall back path for rules that do not advance.

X-SVN-Rev: 40461
This commit is contained in:
Andy Heninger 2017-09-26 20:16:03 +00:00
parent 75495acb8f
commit e220fe9dd6
11 changed files with 40 additions and 0 deletions

View file

@ -78,6 +78,9 @@ $Prepend [^$Control $CR $LF];
^$Prepend* $Regional_Indicator $Regional_Indicator / $Regional_Indicator;
^$Prepend* $Regional_Indicator $Regional_Indicator;
# GB 999 Match a single code point if no other rule applies.
.;
## -------------------------------------------------
!!safe_reverse;

View file

@ -335,6 +335,9 @@ $RI $CM* $RI $CM* $ZWJ ($ID | $Extended_Pict | $EmojiNRK);
# LB 30b Do not break between an Emoji Base and an Emoji Modifier
$EB $CM* $EM;
# LB 31 Break everywhere else.
# Match a single code point if no other rule applies.
.;
## -------------------------------------------------

View file

@ -344,6 +344,10 @@ $RI $CM* $RI $CM* $ZWJ ($ID | $Extended_Pict | $EmojiNRK);
# LB 30b Do not break between an Emoji Base and an Emoji Modifier
$EB $CM* $EM;
# LB 31 Break everywhere else.
# Match a single code point if no other rule applies.
.;
## -------------------------------------------------
!!safe_reverse;

View file

@ -347,6 +347,9 @@ $RI $CM* $RI $CM* $ZWJ ($ID | $Extended_Pict | $EmojiNRK);
# LB 30b Do not break between an Emoji Base and an Emoji Modifier
$EB $CM* $EM;
# LB 31 Break everywhere else.
# Match a single code point if no other rule applies.
.;
## -------------------------------------------------

View file

@ -361,6 +361,10 @@ $RI $CM* $RI $CM* $ZWJ ($ID | $Extended_Pict | $EmojiNRK);
# LB 30b Do not break between an Emoji Base and an Emoji Modifier
$EB $CM* $EM;
# LB 31 Break everywhere else.
# Match a single code point if no other rule applies.
.;
## -------------------------------------------------
!!safe_reverse;

View file

@ -346,6 +346,9 @@ $RI $CM* $RI $CM* $ZWJ ($ID | $Extended_Pict | $EmojiNRK);
# LB 30b Do not break between an Emoji Base and an Emoji Modifier
$EB $CM* $EM;
# LB 31 Break everywhere else.
# Match a single code point if no other rule applies.
.;
## -------------------------------------------------

View file

@ -339,6 +339,10 @@ $RI $CM* $RI $CM* $ZWJ ($ID | $Extended_Pict | $EmojiNRK);
# LB 30b Do not break between an Emoji Base and an Emoji Modifier
$EB $CM* $EM;
# LB 31 Break everywhere else.
# Match a single code point if no other rule applies.
.;
## -------------------------------------------------
!!safe_reverse;

View file

@ -345,6 +345,10 @@ $RI $CM* $RI $CM* $ZWJ ($ID | $Extended_Pict | $EmojiNRK);
# LB 30b Do not break between an Emoji Base and an Emoji Modifier
$EB $CM* $EM;
# LB 31 Break everywhere else.
# Match a single code point if no other rule applies.
.;
## -------------------------------------------------
!!safe_reverse;

View file

@ -342,6 +342,10 @@ $RI $CM* $RI $CM* $ZWJ ($ID | $Extended_Pict | $EmojiNRK);
# LB 30b Do not break between an Emoji Base and an Emoji Modifier
$EB $CM* $EM;
# LB 31 Break everywhere else.
# Match a single code point if no other rule applies.
.;
## -------------------------------------------------
!!safe_reverse;

View file

@ -194,6 +194,10 @@ $ExtendNumLetEx $KatakanaEx {400}; # (13b)
$HangulSyllable $HangulSyllable {200};
$KanaKanji $KanaKanji {400}; # different rule status if both kana and kanji found
# Rule 999
# Match a single code point if no other rule applies.
.;
## -------------------------------------------------

View file

@ -194,6 +194,10 @@ $ExtendNumLetEx $KatakanaEx {400}; # (13b)
$HangulSyllable $HangulSyllable {200};
$KanaKanji $KanaKanji {400}; # different rule status if both kana and kanji found
# Rule 999
# Match a single code point if no other rule applies.
.;
## -------------------------------------------------