mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-07 06:25:30 +00:00
ICU-22707 UTC-179-A102 Consider using a macro throughout the rules for [\p{ea=F}\p{ea=W}\p{ea=H}].
This commit is contained in:
parent
20fdebcb35
commit
782d5cc339
1 changed files with 10 additions and 15 deletions
|
@ -74,12 +74,7 @@ $XX = [:LineBreak = Unknown:];
|
|||
$ZW = [:LineBreak = ZWSpace:];
|
||||
$ZWJ = [:LineBreak = ZWJ:];
|
||||
|
||||
# OP30 and CP30 are variants of OP and CP that appear in-line in rule LB30 from UAX 14,
|
||||
# without a formal name. Because ICU rules require multiple uses of the expressions,
|
||||
# give them a single definition with a name
|
||||
|
||||
$OP30 = [$OP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
|
||||
$CP30 = [$CP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
|
||||
$EastAsian = [\p{ea=F}\p{ea=W}\p{ea=H}];
|
||||
|
||||
$ExtPictUnassigned = [\p{Extended_Pictographic} & \p{Cn}];
|
||||
|
||||
|
@ -122,7 +117,7 @@ $CANT_CM = [ $SP $BK $CR $LF $NL $ZW $CM]; # Bases that can't take CMs
|
|||
# AL_FOLLOW set of chars that can unconditionally follow an AL
|
||||
# Needed in rules where stand-alone $CM s are treated as AL.
|
||||
#
|
||||
$AL_FOLLOW = [$BK $CR $LF $NL $ZW $SP $CL $CP $EX $HL $IS $SY $WJ $GL $OP30 $QU $BA $HY $NS $IN $NU $PR $PO $ALPlus];
|
||||
$AL_FOLLOW = [$BK $CR $LF $NL $ZW $SP $CL $CP $EX $HL $IS $SY $WJ $GL [$OP - $EastAsian] $QU $BA $HY $NS $IN $NU $PR $PO $ALPlus];
|
||||
|
||||
|
||||
#
|
||||
|
@ -284,12 +279,12 @@ $LB18Breaks = [$LB8Breaks $SP];
|
|||
$LB18NonBreaks $CM* $QU;
|
||||
^$CM+ $QU;
|
||||
|
||||
[$LB18NonBreaks & [\p{ea=F}\p{ea=W}\p{ea=H}] - [$OP $GL]] / [\p{Pi} & $QU] $CM* [ [\p{ea=F}\p{ea=W}\p{ea=H}] - $CM];
|
||||
[$LB18NonBreaks & [\p{ea=F}\p{ea=W}\p{ea=H}] - [$OP $GL]] $CM* $CMX / [\p{Pi} & $QU] $CM* [ [\p{ea=F}\p{ea=W}\p{ea=H}] - $CM];
|
||||
[$LB18NonBreaks & $EastAsian - [$OP $GL]] / [\p{Pi} & $QU] $CM* [ $EastAsian - $CM];
|
||||
[$LB18NonBreaks & $EastAsian - [$OP $GL]] $CM* $CMX / [\p{Pi} & $QU] $CM* [ $EastAsian - $CM];
|
||||
|
||||
$QU $CM* .;
|
||||
[$LB18NonBreaks & [\p{ea=F}\p{ea=W}\p{ea=H}]] $CM* [\p{Pf} & $QU] / [ [\p{ea=F}\p{ea=W}\p{ea=H}] - [$NS $BA $EX $CL $IN $IS $GL $CM]];
|
||||
[$LB18NonBreaks & [\p{ea=F}\p{ea=W}\p{ea=H}]] $CM* [\p{Pf} & $QU] $CM* $CMX / [ [\p{ea=F}\p{ea=W}\p{ea=H}] - [$NS $BA $EX $CL $IN $IS $GL $CM]];
|
||||
[$LB18NonBreaks & $EastAsian] $CM* [\p{Pf} & $QU] / [ $EastAsian - [$NS $BA $EX $CL $IN $IS $GL $CM]];
|
||||
[$LB18NonBreaks & $EastAsian] $CM* [\p{Pf} & $QU] $CM* $CMX / [ $EastAsian - [$NS $BA $EX $CL $IN $IS $GL $CM]];
|
||||
|
||||
# LB 20
|
||||
# <break> $CB
|
||||
|
@ -329,7 +324,7 @@ $BB $CM* $LB20NonBreaks;
|
|||
# LB 21a Do not break after the hyphen in Hebrew + Hyphen + non-Hebrew
|
||||
# HL (HY | BA) x [^HL]
|
||||
#
|
||||
$HL $CM* ($HY | [ $BA - [\p{ea=F}\p{ea=W}\p{ea=H}] ] ) $CM* [^$CB $HL]?;
|
||||
$HL $CM* ($HY | [ $BA - $EastAsian ] ) $CM* [^$CB $HL]?;
|
||||
|
||||
# LB 21b (forward) Don't break between SY and HL
|
||||
# (break between HL and SY already disallowed by LB 13 above)
|
||||
|
@ -389,9 +384,9 @@ $PR $CM* ($JL | $JV | $JT | $H2 | $H3);
|
|||
$IS $CM* ($ALPlus | $HL);
|
||||
|
||||
# LB 30
|
||||
($ALPlus | $HL | $NU) $CM* $OP30;
|
||||
^$CM+ $OP30; # The $CM+ is from rule 10, an unattached CM is treated as AL.
|
||||
$CP30 $CM* ($ALPlus | $HL | $NU);
|
||||
($ALPlus | $HL | $NU) $CM* [$OP - $EastAsian];
|
||||
^$CM+ [$OP - $EastAsian]; # The $CM+ is from rule 10, an unattached CM is treated as AL.
|
||||
[$CP - $EastAsian] $CM* ($ALPlus | $HL | $NU);
|
||||
|
||||
# LB 30a Do not break between regional indicators. Break after pairs of them.
|
||||
# Tricky interaction with LB8a: ZWJ x . together with ZWJ acting like a CM.
|
||||
|
|
Loading…
Add table
Reference in a new issue