ICU-22707 Patch tailored rules (manually for hunks 1 and 6 on loose(_phrase)?_cj)

2025-04-10 07:39:16 +00:00 · 2024-07-15 18:03:56 +02:00 · 2024-07-15 18:03:56 +02:00 · 3a004d400f
commit 3a004d400f
parent 782d5cc339
8 changed files with 288 additions and 152 deletions
--- a/icu4c/source/data/brkitr/rules/line_cj.txt
+++ b/icu4c/source/data/brkitr/rules/line_cj.txt
@ -75,12 +75,7 @@ $XX = [:LineBreak =  Unknown:];
 $ZW = [:LineBreak =  ZWSpace:];
 $ZWJ = [:LineBreak = ZWJ:];

-# OP30 and CP30 are variants of OP and CP that appear in-line in rule LB30 from UAX 14,
-# without a formal name. Because ICU rules require multiple uses of the expressions,
-# give them a single definition with a name
-
-$OP30 = [$OP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
-$CP30 = [$CP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
+$EastAsian = [\p{ea=F}\p{ea=W}\p{ea=H}];

 $ExtPictUnassigned = [\p{Extended_Pictographic} & \p{Cn}];

@ -123,7 +118,7 @@ $CANT_CM = [ $SP $BK $CR $LF $NL $ZW $CM];       # Bases that can't take CMs
 # AL_FOLLOW  set of chars that can unconditionally follow an AL
 #            Needed in rules where stand-alone $CM s are treated as AL.
 #
-$AL_FOLLOW      = [$BK $CR $LF $NL $ZW $SP $CL $CP $EX $HL $IS $SY $WJ $GL $OP30 $QU $BA $HY $NS $IN $NU $PR $PO $ALPlus];
+$AL_FOLLOW      = [$BK $CR $LF $NL $ZW $SP $CL $CP $EX $HL $IS $SY $WJ $GL [$OP - $EastAsian] $QU $BA $HY $NS $IN $NU $PR $PO $ALPlus];


 #
@ -252,6 +247,7 @@ $SP $IS $CM* $CMX / [^ $CanFollowIS $NU $CM];

 #
 # LB 15d Do not break before numeric separators (IS), even after spaces.
+# SP IS QU is handled below as part of LB 19.

 [$LB8NonBreaks - $SP] $IS;
 $SP $IS $CM* [$CanFollowIS {eof}];
@ -274,13 +270,22 @@ $LB18NonBreaks = [$LB8NonBreaks - [$SP]];
 $LB18Breaks    = [$LB8Breaks $SP];


-# LB 19
-#         x QU
+# LB 19 and LB 19a.
+# Instead of implementing both as keep-together rules as in UAX #14, we have an
+# East_Asian_Width and General_Category-insensitive keep-together rule
+# equivalent to the old LB19 × QU and QU ×, and then we poke holes into it based
+# on context.  This avoids having to do manual chaining over multiple characters
+# with many other rules over multiple characters, as a keep-together LB19a would
+# overlap in context with at least LB14, LB15a, LB15a, LB15d, LB30a, and itself.
 $LB18NonBreaks $CM* $QU;
 ^$CM+               $QU;

-#         QU  x
+[$LB18NonBreaks & $EastAsian - [$OP $GL]]           / [\p{Pi} & $QU] $CM* [ $EastAsian - $CM];
+[$LB18NonBreaks & $EastAsian - [$OP $GL]] $CM* $CMX / [\p{Pi} & $QU] $CM* [ $EastAsian - $CM];
+
 $QU $CM* .;
+[$LB18NonBreaks & $EastAsian] $CM* [\p{Pf} & $QU]           / [ $EastAsian - [$NS $BA $EX $CL $IN $IS $GL $CM]];
+[$LB18NonBreaks & $EastAsian] $CM* [\p{Pf} & $QU] $CM* $CMX / [ $EastAsian - [$NS $BA $EX $CL $IN $IS $GL $CM]];

 # LB 20
 #        <break>  $CB
@ -288,11 +293,23 @@ $QU $CM* .;
 #
 $LB20NonBreaks = [$LB18NonBreaks - $CB];

-# LB 20.09    Don't break between Hyphens and Letters when there is a break preceding the hyphen.
-#             Originally added as a Finnish tailoring, now promoted to default ICU behavior.
-#             Note: this is not default UAX-14 behaviour. See issue ICU-8151.
+# LB 20a      Don't break between Hyphens and Letters when there is a break preceding the hyphen.
+#             Originally added as a Finnish tailoring, promoted to default ICU behavior (ICU-8151),
+#             and then to default UAX #14 behaviour (UTC-179-C32).
 #
 ^($HY | $HH) $CM* $ALPlus;
+$GL ($HY | $HH) $CM* $ALPlus; 
+# Non-breaking CB from LB8a:
+$CB $CM* $ZWJ ($HY | $HH) $CM* $ALPlus;
+# Non-breaking SP from LB14:
+$OP $CM* $SP+ ($HY | $HH) $CM* $ALPlus; 
+# Non-breaking SP from LB15a:
+($OP $CM* $SP+ | [$OP $QU $GL] $CM*) ([\p{Pi} & $QU] $CM* $SP*)+ $SP ($HY | $HH) $CM* $ALPlus;
+^([\p{Pi} & $QU] $CM* $SP*)+ $SP ($HY | $HH) $CM* $ALPlus;
+# Non-breaking SP from LB15a following LB15b:
+$LB8NonBreaks [\p{Pf} & $QU] $CM* ([\p{Pi} & $QU] $CM* $SP*)+ $SP ($HY | $HH) $CM* $ALPlus;
+$CAN_CM $CM*  [\p{Pf} & $QU] $CM* ([\p{Pi} & $QU] $CM* $SP*)+ $SP ($HY | $HH) $CM* $ALPlus;
+^$CM+  [\p{Pf} & $QU] $CM* ([\p{Pi} & $QU] $CM* $SP*)+ $SP ($HY | $HH) $CM* $ALPlus;

 # LB 21        x   (BA | HY | NS)
 #           BB x
@ -305,10 +322,10 @@ $LB20NonBreaks $CM* ($BA | $HY | $NS);
 $BB $CM* [^$CB];                                  #  $BB  x
 $BB $CM* $LB20NonBreaks;

-# LB 21a Don't break after Hebrew + Hyphen
-#   HL (HY | BA) x
+# LB 21a Do not break after the hyphen in Hebrew + Hyphen + non-Hebrew
+#   HL (HY | BA) x [^HL]
 #
-$HL $CM* ($HY | $BA) $CM* [^$CB]?;
+$HL $CM* ($HY | [ $BA - $EastAsian ] ) $CM* [^$CB $HL]?;

 # LB 21b (forward) Don't break between SY and HL
 # (break between HL and SY already disallowed by LB 13 above)
@ -368,9 +385,9 @@ $PR $CM* ($JL | $JV | $JT | $H2 | $H3);
 $IS $CM* ($ALPlus | $HL);

 # LB 30
-($ALPlus | $HL | $NU) $CM* $OP30;
-^$CM+ $OP30;         # The $CM+ is from rule 10, an unattached CM is treated as AL.
-$CP30 $CM* ($ALPlus | $HL | $NU);
+($ALPlus | $HL | $NU) $CM* [$OP - $EastAsian];
+^$CM+ [$OP - $EastAsian];         # The $CM+ is from rule 10, an unattached CM is treated as AL.
+[$CP - $EastAsian] $CM* ($ALPlus | $HL | $NU);

 # LB 30a  Do not break between regional indicators. Break after pairs of them.
 #         Tricky interaction with LB8a: ZWJ x .   together with ZWJ acting like a CM.
--- a/icu4c/source/data/brkitr/rules/line_loose.txt
+++ b/icu4c/source/data/brkitr/rules/line_loose.txt
@ -81,12 +81,7 @@ $XX = [:LineBreak =  Unknown:];
 $ZW = [:LineBreak =  ZWSpace:];
 $ZWJ = [:LineBreak = ZWJ:];

-# OP30 and CP30 are variants of OP and CP that appear in-line in rule LB30 from UAX 14,
-# without a formal name. Because ICU rules require multiple uses of the expressions,
-# give them a single definition with a name
-
-$OP30 = [$OP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
-$CP30 = [$CP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
+$EastAsian = [\p{ea=F}\p{ea=W}\p{ea=H}];

 $ExtPictUnassigned = [\p{Extended_Pictographic} & \p{Cn}];

@ -129,7 +124,7 @@ $CANT_CM = [ $SP $BK $CR $LF $NL $ZW $CM];       # Bases that can't take CMs
 # AL_FOLLOW  set of chars that can unconditionally follow an AL
 #            Needed in rules where stand-alone $CM s are treated as AL.
 #
-$AL_FOLLOW      = [$BK $CR $LF $NL $ZW $SP $CL $CP $EX $HL $IS $SY $WJ $GL $OP30 $QU $BA $HY $NS $IN $NU $PR $PO $ALPlus];
+$AL_FOLLOW      = [$BK $CR $LF $NL $ZW $SP $CL $CP $EX $HL $IS $SY $WJ $GL [$OP - $EastAsian] $QU $BA $HY $NS $IN $NU $PR $PO $ALPlus];


 #
@ -258,6 +253,7 @@ $SP $IS $CM* $CMX / [^ $CanFollowIS $NU $CM];

 #
 # LB 15d Do not break before numeric separators (IS), even after spaces.
+# SP IS QU is handled below as part of LB 19.

 [$LB8NonBreaks - $SP] $IS;
 $SP $IS $CM* [$CanFollowIS {eof}];
@ -282,13 +278,22 @@ $LB18NonBreaks = [$LB8NonBreaks - [$SP]];
 $LB18Breaks    = [$LB8Breaks $SP];


-# LB 19
-#         x QU
+# LB 19 and LB 19a.
+# Instead of implementing both as keep-together rules as in UAX #14, we have an
+# East_Asian_Width and General_Category-insensitive keep-together rule
+# equivalent to the old LB19 × QU and QU ×, and then we poke holes into it based
+# on context.  This avoids having to do manual chaining over multiple characters
+# with many other rules over multiple characters, as a keep-together LB19a would
+# overlap in context with at least LB14, LB15a, LB15a, LB15d, LB30a, and itself.
 $LB18NonBreaks $CM* $QU;
 ^$CM+               $QU;

-#         QU  x
+[$LB18NonBreaks & $EastAsian - [$OP $GL]]           / [\p{Pi} & $QU] $CM* [ $EastAsian - $CM];
+[$LB18NonBreaks & $EastAsian - [$OP $GL]] $CM* $CMX / [\p{Pi} & $QU] $CM* [ $EastAsian - $CM];
+
 $QU $CM* .;
+[$LB18NonBreaks & $EastAsian] $CM* [\p{Pf} & $QU]           / [ $EastAsian - [$NS $BA $EX $CL $IN $IS $GL $CM]];
+[$LB18NonBreaks & $EastAsian] $CM* [\p{Pf} & $QU] $CM* $CMX / [ $EastAsian - [$NS $BA $EX $CL $IN $IS $GL $CM]];

 # LB 20
 #        <break>  $CB
@ -296,11 +301,23 @@ $QU $CM* .;
 #
 $LB20NonBreaks = [$LB18NonBreaks - $CB];

-# LB 20.09    Don't break between Hyphens and Letters when there is a break preceding the hyphen.
-#             Originally added as a Finnish tailoring, now promoted to default ICU behavior.
-#             Note: this is not default UAX-14 behaviour. See issue ICU-8151.
+# LB 20a      Don't break between Hyphens and Letters when there is a break preceding the hyphen.
+#             Originally added as a Finnish tailoring, promoted to default ICU behavior (ICU-8151),
+#             and then to default UAX #14 behaviour (UTC-179-C32).
 #
 ^($HY | $HH) $CM* $ALPlus;
+$GL ($HY | $HH) $CM* $ALPlus; 
+# Non-breaking CB from LB8a:
+$CB $CM* $ZWJ ($HY | $HH) $CM* $ALPlus;
+# Non-breaking SP from LB14:
+$OP $CM* $SP+ ($HY | $HH) $CM* $ALPlus; 
+# Non-breaking SP from LB15a:
+($OP $CM* $SP+ | [$OP $QU $GL] $CM*) ([\p{Pi} & $QU] $CM* $SP*)+ $SP ($HY | $HH) $CM* $ALPlus;
+^([\p{Pi} & $QU] $CM* $SP*)+ $SP ($HY | $HH) $CM* $ALPlus;
+# Non-breaking SP from LB15a following LB15b:
+$LB8NonBreaks [\p{Pf} & $QU] $CM* ([\p{Pi} & $QU] $CM* $SP*)+ $SP ($HY | $HH) $CM* $ALPlus;
+$CAN_CM $CM*  [\p{Pf} & $QU] $CM* ([\p{Pi} & $QU] $CM* $SP*)+ $SP ($HY | $HH) $CM* $ALPlus;
+^$CM+  [\p{Pf} & $QU] $CM* ([\p{Pi} & $QU] $CM* $SP*)+ $SP ($HY | $HH) $CM* $ALPlus;

 # LB 21        x   (BA | HY | NS)
 #           BB x
@ -314,10 +331,10 @@ $LB20NonBreaks $CM* ($BA | $HY | $NS);
 $BB $CM* [^$CB];                                  #  $BB  x
 $BB $CM* $LB20NonBreaks;

-# LB 21a Don't break after Hebrew + Hyphen
-#   HL (HY | BA) x
+# LB 21a Do not break after the hyphen in Hebrew + Hyphen + non-Hebrew
+#   HL (HY | BA) x [^HL]
 #
-$HL $CM* ($HY | $BA) $CM* [^$CB]?;
+$HL $CM* ($HY | [ $BA - $EastAsian ] ) $CM* [^$CB $HL]?;

 # LB 21b (forward) Don't break between SY and HL
 # (break between HL and SY already disallowed by LB 13 above)
@ -378,9 +395,9 @@ $PR $CM* ($JL | $JV | $JT | $H2 | $H3);
 $IS $CM* ($ALPlus | $HL);

 # LB 30
-($ALPlus | $HL | $NU) $CM* $OP30;
-^$CM+ $OP30;         # The $CM+ is from rule 10, an unattached CM is treated as AL.
-$CP30 $CM* ($ALPlus | $HL | $NU);
+($ALPlus | $HL | $NU) $CM* [$OP - $EastAsian];
+^$CM+ [$OP - $EastAsian];         # The $CM+ is from rule 10, an unattached CM is treated as AL.
+[$CP - $EastAsian] $CM* ($ALPlus | $HL | $NU);

 # LB 30a  Do not break between regional indicators. Break after pairs of them.
 #         Tricky interaction with LB8a: ZWJ x .   together with ZWJ acting like a CM.
--- a/icu4c/source/data/brkitr/rules/line_loose_cj.txt
+++ b/icu4c/source/data/brkitr/rules/line_loose_cj.txt
@ -93,12 +93,7 @@ $XX = [:LineBreak =  Unknown:];
 $ZW = [:LineBreak =  ZWSpace:];
 $ZWJ = [:LineBreak = ZWJ:];

-# OP30 and CP30 are variants of OP and CP that appear in-line in rule LB30 from UAX 14,
-# without a formal name. Because ICU rules require multiple uses of the expressions,
-# give them a single definition with a name
-
-$OP30 = [$OP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
-$CP30 = [$CP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
+$EastAsian = [\p{ea=F}\p{ea=W}\p{ea=H}];

 $ExtPictUnassigned = [\p{Extended_Pictographic} & \p{Cn}];

@ -141,7 +136,7 @@ $CANT_CM = [ $SP $BK $CR $LF $NL $ZW $CM];       # Bases that can't take CMs
 # AL_FOLLOW  set of chars that can unconditionally follow an AL
 #            Needed in rules where stand-alone $CM s are treated as AL.
 #
-$AL_FOLLOW      = [$BK $CR $LF $NL $ZW $SP $CL $CP $EX $HL $IS $SY $WJ $GL $OP30 $QU $BA $BAX $HY $NS $IN $NU $PR $PO $POX $ALPlus];
+$AL_FOLLOW      = [$BK $CR $LF $NL $ZW $SP $CL $CP $EX $HL $IS $SY $WJ $GL [$OP - $EastAsian] $QU $BA $BAX $HY $NS $IN $NU $PR $PO $POX $ALPlus];


 #
@ -270,6 +265,7 @@ $SP $IS $CM* $CMX / [^ $CanFollowIS $NU $CM];

 #
 # LB 15d Do not break before numeric separators (IS), even after spaces.
+# SP IS QU is handled below as part of LB 19.

 [$LB8NonBreaks - $SP] $IS;
 $SP $IS $CM* [$CanFollowIS {eof}];
@ -294,13 +290,22 @@ $LB18NonBreaks = [$LB8NonBreaks - [$SP]];
 $LB18Breaks    = [$LB8Breaks $SP];


-# LB 19
-#         x QU
+# LB 19 and LB 19a.
+# Instead of implementing both as keep-together rules as in UAX #14, we have an
+# East_Asian_Width and General_Category-insensitive keep-together rule
+# equivalent to the old LB19 × QU and QU ×, and then we poke holes into it based
+# on context.  This avoids having to do manual chaining over multiple characters
+# with many other rules over multiple characters, as a keep-together LB19a would
+# overlap in context with at least LB14, LB15a, LB15a, LB15d, LB30a, and itself.
 $LB18NonBreaks $CM* $QU;
 ^$CM+               $QU;

-#         QU  x
+[$LB18NonBreaks & $EastAsian - [$OP $GL]]           / [\p{Pi} & $QU] $CM* [ $EastAsian - $CM];
+[$LB18NonBreaks & $EastAsian - [$OP $GL]] $CM* $CMX / [\p{Pi} & $QU] $CM* [ $EastAsian - $CM];
+
 $QU $CM* .;
+[$LB18NonBreaks & $EastAsian] $CM* [\p{Pf} & $QU]           / [ $EastAsian - [$NS $BA $EX $CL $IN $IS $GL $CM]];
+[$LB18NonBreaks & $EastAsian] $CM* [\p{Pf} & $QU] $CM* $CMX / [ $EastAsian - [$NS $BA $EX $CL $IN $IS $GL $CM]];

 # LB 20
 #        <break>  $CB
@ -308,11 +313,23 @@ $QU $CM* .;
 #
 $LB20NonBreaks = [$LB18NonBreaks - $CB];

-# LB 20.09    Don't break between Hyphens and Letters when there is a break preceding the hyphen.
-#             Originally added as a Finnish tailoring, now promoted to default ICU behavior.
-#             Note: this is not default UAX-14 behaviour. See issue ICU-8151.
+# LB 20a      Don't break between Hyphens and Letters when there is a break preceding the hyphen.
+#             Originally added as a Finnish tailoring, promoted to default ICU behavior (ICU-8151),
+#             and then to default UAX #14 behaviour (UTC-179-C32).
 #
 ^($HY | $HH) $CM* $ALPlus;
+$GL ($HY | $HH) $CM* $ALPlus; 
+# Non-breaking CB from LB8a:
+$CB $CM* $ZWJ ($HY | $HH) $CM* $ALPlus;
+# Non-breaking SP from LB14:
+$OP $CM* $SP+ ($HY | $HH) $CM* $ALPlus; 
+# Non-breaking SP from LB15a:
+($OP $CM* $SP+ | [$OP $QU $GL] $CM*) ([\p{Pi} & $QU] $CM* $SP*)+ $SP ($HY | $HH) $CM* $ALPlus;
+^([\p{Pi} & $QU] $CM* $SP*)+ $SP ($HY | $HH) $CM* $ALPlus;
+# Non-breaking SP from LB15a following LB15b:
+$LB8NonBreaks [\p{Pf} & $QU] $CM* ([\p{Pi} & $QU] $CM* $SP*)+ $SP ($HY | $HH) $CM* $ALPlus;
+$CAN_CM $CM*  [\p{Pf} & $QU] $CM* ([\p{Pi} & $QU] $CM* $SP*)+ $SP ($HY | $HH) $CM* $ALPlus;
+^$CM+  [\p{Pf} & $QU] $CM* ([\p{Pi} & $QU] $CM* $SP*)+ $SP ($HY | $HH) $CM* $ALPlus;

 # LB 21        x   (BA | HY | NS)
 #           BB x
@ -328,10 +345,10 @@ $ID $CM* ($BA | $HY | $NS);
 $BB $CM* [^$CB];                                  #  $BB  x
 $BB $CM* $LB20NonBreaks;

-# LB 21a Don't break after Hebrew + Hyphen
-#   HL (HY | BA) x
+# LB 21a Do not break after the hyphen in Hebrew + Hyphen + non-Hebrew
+#   HL (HY | BA) x [^HL]
 #
-$HL $CM* ($HY | $BA | $BAX) $CM* [^$CB]?;
+$HL $CM* ($HY | [ $BA - $EastAsian ] | $BAX ) $CM* [^$CB $HL]?;

 # LB 21b (forward) Don't break between SY and HL
 # (break between HL and SY already disallowed by LB 13 above)
@ -396,9 +413,9 @@ $PR $CM* ($JL | $JV | $JT | $H2 | $H3);
 $IS $CM* ($ALPlus | $HL);

 # LB 30
-($ALPlus | $HL | $NU) $CM* $OP30;
-^$CM+ $OP30;         # The $CM+ is from rule 10, an unattached CM is treated as AL.
-$CP30 $CM* ($ALPlus | $HL | $NU);
+($ALPlus | $HL | $NU) $CM* [$OP - $EastAsian];
+^$CM+ [$OP - $EastAsian];         # The $CM+ is from rule 10, an unattached CM is treated as AL.
+[$CP - $EastAsian] $CM* ($ALPlus | $HL | $NU);

 # LB 30a  Do not break between regional indicators. Break after pairs of them.
 #         Tricky interaction with LB8a: ZWJ x .   together with ZWJ acting like a CM.
--- a/icu4c/source/data/brkitr/rules/line_loose_phrase_cj.txt
+++ b/icu4c/source/data/brkitr/rules/line_loose_phrase_cj.txt
@ -95,12 +95,7 @@ $XX = [:LineBreak =  Unknown:];
 $ZW = [:LineBreak =  ZWSpace:];
 $ZWJ = [:LineBreak = ZWJ:];

-# OP30 and CP30 are variants of OP and CP that appear in-line in rule LB30 from UAX 14,
-# without a formal name. Because ICU rules require multiple uses of the expressions,
-# give them a single definition with a name
-
-$OP30 = [$OP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
-$CP30 = [$CP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
+$EastAsian = [\p{ea=F}\p{ea=W}\p{ea=H}];

 $ExtPictUnassigned = [\p{Extended_Pictographic} & \p{Cn}];

@ -154,7 +149,7 @@ $CANT_CM = [ $SP $BK $CR $LF $NL $ZW $CM];       # Bases that can't take CMs
 # AL_FOLLOW  set of chars that can unconditionally follow an AL
 #            Needed in rules where stand-alone $CM s are treated as AL.
 #
-$AL_FOLLOW      = [$BK $CR $LF $NL $ZW $SP $CL $CP $EX $HL $IS $SY $WJ $GL $OP30 $QU $BA $BAX $HY $NS $IN $NU $PR $PO $POX $ALPlus];
+$AL_FOLLOW      = [$BK $CR $LF $NL $ZW $SP $CL $CP $EX $HL $IS $SY $WJ $GL [$OP - $EastAsian] $QU $BA $BAX $HY $NS $IN $NU $PR $PO $POX $ALPlus];


 #
@ -283,6 +278,7 @@ $SP $IS $CM* $CMX / [^ $CanFollowIS $NU $CM];

 #
 # LB 15d Do not break before numeric separators (IS), even after spaces.
+# SP IS QU is handled below as part of LB 19.

 [$LB8NonBreaks - $SP] $IS;
 $SP $IS $CM* [$CanFollowIS {eof}];
@ -307,13 +303,22 @@ $LB18NonBreaks = [$LB8NonBreaks - [$SP]];
 $LB18Breaks    = [$LB8Breaks $SP];


-# LB 19
-#         x QU
+# LB 19 and LB 19a.
+# Instead of implementing both as keep-together rules as in UAX #14, we have an
+# East_Asian_Width and General_Category-insensitive keep-together rule
+# equivalent to the old LB19 × QU and QU ×, and then we poke holes into it based
+# on context.  This avoids having to do manual chaining over multiple characters
+# with many other rules over multiple characters, as a keep-together LB19a would
+# overlap in context with at least LB14, LB15a, LB15a, LB15d, LB30a, and itself.
 $LB18NonBreaks $CM* $QU;
 ^$CM+               $QU;

-#         QU  x
+[$LB18NonBreaks & $EastAsian - [$OP $GL]]           / [\p{Pi} & $QU] $CM* [ $EastAsian - $CM];
+[$LB18NonBreaks & $EastAsian - [$OP $GL]] $CM* $CMX / [\p{Pi} & $QU] $CM* [ $EastAsian - $CM];
+
 $QU $CM* .;
+[$LB18NonBreaks & $EastAsian] $CM* [\p{Pf} & $QU]           / [ $EastAsian - [$NS $BA $EX $CL $IN $IS $GL $CM]];
+[$LB18NonBreaks & $EastAsian] $CM* [\p{Pf} & $QU] $CM* $CMX / [ $EastAsian - [$NS $BA $EX $CL $IN $IS $GL $CM]];

 # LB 20
 #        <break>  $CB
@ -321,11 +326,23 @@ $QU $CM* .;
 #
 $LB20NonBreaks = [$LB18NonBreaks - $CB];

-# LB 20.09    Don't break between Hyphens and Letters when there is a break preceding the hyphen.
-#             Originally added as a Finnish tailoring, now promoted to default ICU behavior.
-#             Note: this is not default UAX-14 behaviour. See issue ICU-8151.
+# LB 20a      Don't break between Hyphens and Letters when there is a break preceding the hyphen.
+#             Originally added as a Finnish tailoring, promoted to default ICU behavior (ICU-8151),
+#             and then to default UAX #14 behaviour (UTC-179-C32).
 #
 ^($HY | $HH) $CM* $ALPlus;
+$GL ($HY | $HH) $CM* $ALPlus; 
+# Non-breaking CB from LB8a:
+$CB $CM* $ZWJ ($HY | $HH) $CM* $ALPlus;
+# Non-breaking SP from LB14:
+$OP $CM* $SP+ ($HY | $HH) $CM* $ALPlus; 
+# Non-breaking SP from LB15a:
+($OP $CM* $SP+ | [$OP $QU $GL] $CM*) ([\p{Pi} & $QU] $CM* $SP*)+ $SP ($HY | $HH) $CM* $ALPlus;
+^([\p{Pi} & $QU] $CM* $SP*)+ $SP ($HY | $HH) $CM* $ALPlus;
+# Non-breaking SP from LB15a following LB15b:
+$LB8NonBreaks [\p{Pf} & $QU] $CM* ([\p{Pi} & $QU] $CM* $SP*)+ $SP ($HY | $HH) $CM* $ALPlus;
+$CAN_CM $CM*  [\p{Pf} & $QU] $CM* ([\p{Pi} & $QU] $CM* $SP*)+ $SP ($HY | $HH) $CM* $ALPlus;
+^$CM+  [\p{Pf} & $QU] $CM* ([\p{Pi} & $QU] $CM* $SP*)+ $SP ($HY | $HH) $CM* $ALPlus;

 # LB 21        x   (BA | HY | NS)
 #           BB x
@ -341,10 +358,10 @@ $ID $CM* ($BA | $HY | $NS);
 $BB $CM* [^$CB];                                  #  $BB  x
 $BB $CM* $LB20NonBreaks;

-# LB 21a Don't break after Hebrew + Hyphen
-#   HL (HY | BA) x
+# LB 21a Do not break after the hyphen in Hebrew + Hyphen + non-Hebrew
+#   HL (HY | BA) x [^HL]
 #
-$HL $CM* ($HY | $BA | $BAX) $CM* [^$CB]?;
+$HL $CM* ($HY | [ $BA - $EastAsian ] | $BAX ) $CM* [^$CB $HL]?;

 # LB 21b (forward) Don't break between SY and HL
 # (break between HL and SY already disallowed by LB 13 above)
@ -409,9 +426,9 @@ $PR $CM* ($JL | $JV | $JT | $H2 | $H3);
 $IS $CM* ($ALPlus | $HL);

 # LB 30
-($ALPlus | $HL | $NU) $CM* $OP30;
-^$CM+ $OP30;         # The $CM+ is from rule 10, an unattached CM is treated as AL.
-$CP30 $CM* ($ALPlus | $HL | $NU);
+($ALPlus | $HL | $NU) $CM* [$OP - $EastAsian];
+^$CM+ [$OP - $EastAsian];         # The $CM+ is from rule 10, an unattached CM is treated as AL.
+[$CP - $EastAsian] $CM* ($ALPlus | $HL | $NU);

 # LB 30a  Do not break between regional indicators. Break after pairs of them.
 #         Tricky interaction with LB8a: ZWJ x .   together with ZWJ acting like a CM.
--- a/icu4c/source/data/brkitr/rules/line_normal.txt
+++ b/icu4c/source/data/brkitr/rules/line_normal.txt
@ -76,12 +76,7 @@ $XX = [:LineBreak =  Unknown:];
 $ZW = [:LineBreak =  ZWSpace:];
 $ZWJ = [:LineBreak = ZWJ:];

-# OP30 and CP30 are variants of OP and CP that appear in-line in rule LB30 from UAX 14,
-# without a formal name. Because ICU rules require multiple uses of the expressions,
-# give them a single definition with a name
-
-$OP30 = [$OP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
-$CP30 = [$CP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
+$EastAsian = [\p{ea=F}\p{ea=W}\p{ea=H}];

 $ExtPictUnassigned = [\p{Extended_Pictographic} & \p{Cn}];

@ -124,7 +119,7 @@ $CANT_CM = [ $SP $BK $CR $LF $NL $ZW $CM];       # Bases that can't take CMs
 # AL_FOLLOW  set of chars that can unconditionally follow an AL
 #            Needed in rules where stand-alone $CM s are treated as AL.
 #
-$AL_FOLLOW      = [$BK $CR $LF $NL $ZW $SP $CL $CP $EX $HL $IS $SY $WJ $GL $OP30 $QU $BA $HY $NS $IN $NU $PR $PO $ALPlus];
+$AL_FOLLOW      = [$BK $CR $LF $NL $ZW $SP $CL $CP $EX $HL $IS $SY $WJ $GL [$OP - $EastAsian] $QU $BA $HY $NS $IN $NU $PR $PO $ALPlus];


 #
@ -253,6 +248,7 @@ $SP $IS $CM* $CMX / [^ $CanFollowIS $NU $CM];

 #
 # LB 15d Do not break before numeric separators (IS), even after spaces.
+# SP IS QU is handled below as part of LB 19.

 [$LB8NonBreaks - $SP] $IS;
 $SP $IS $CM* [$CanFollowIS {eof}];
@ -275,13 +271,22 @@ $LB18NonBreaks = [$LB8NonBreaks - [$SP]];
 $LB18Breaks    = [$LB8Breaks $SP];


-# LB 19
-#         x QU
+# LB 19 and LB 19a.
+# Instead of implementing both as keep-together rules as in UAX #14, we have an
+# East_Asian_Width and General_Category-insensitive keep-together rule
+# equivalent to the old LB19 × QU and QU ×, and then we poke holes into it based
+# on context.  This avoids having to do manual chaining over multiple characters
+# with many other rules over multiple characters, as a keep-together LB19a would
+# overlap in context with at least LB14, LB15a, LB15a, LB15d, LB30a, and itself.
 $LB18NonBreaks $CM* $QU;
 ^$CM+               $QU;

-#         QU  x
+[$LB18NonBreaks & $EastAsian - [$OP $GL]]           / [\p{Pi} & $QU] $CM* [ $EastAsian - $CM];
+[$LB18NonBreaks & $EastAsian - [$OP $GL]] $CM* $CMX / [\p{Pi} & $QU] $CM* [ $EastAsian - $CM];
+
 $QU $CM* .;
+[$LB18NonBreaks & $EastAsian] $CM* [\p{Pf} & $QU]           / [ $EastAsian - [$NS $BA $EX $CL $IN $IS $GL $CM]];
+[$LB18NonBreaks & $EastAsian] $CM* [\p{Pf} & $QU] $CM* $CMX / [ $EastAsian - [$NS $BA $EX $CL $IN $IS $GL $CM]];

 # LB 20
 #        <break>  $CB
@ -289,11 +294,23 @@ $QU $CM* .;
 #
 $LB20NonBreaks = [$LB18NonBreaks - $CB];

-# LB 20.09    Don't break between Hyphens and Letters when there is a break preceding the hyphen.
-#             Originally added as a Finnish tailoring, now promoted to default ICU behavior.
-#             Note: this is not default UAX-14 behaviour. See issue ICU-8151.
+# LB 20a      Don't break between Hyphens and Letters when there is a break preceding the hyphen.
+#             Originally added as a Finnish tailoring, promoted to default ICU behavior (ICU-8151),
+#             and then to default UAX #14 behaviour (UTC-179-C32).
 #
 ^($HY | $HH) $CM* $ALPlus;
+$GL ($HY | $HH) $CM* $ALPlus; 
+# Non-breaking CB from LB8a:
+$CB $CM* $ZWJ ($HY | $HH) $CM* $ALPlus;
+# Non-breaking SP from LB14:
+$OP $CM* $SP+ ($HY | $HH) $CM* $ALPlus; 
+# Non-breaking SP from LB15a:
+($OP $CM* $SP+ | [$OP $QU $GL] $CM*) ([\p{Pi} & $QU] $CM* $SP*)+ $SP ($HY | $HH) $CM* $ALPlus;
+^([\p{Pi} & $QU] $CM* $SP*)+ $SP ($HY | $HH) $CM* $ALPlus;
+# Non-breaking SP from LB15a following LB15b:
+$LB8NonBreaks [\p{Pf} & $QU] $CM* ([\p{Pi} & $QU] $CM* $SP*)+ $SP ($HY | $HH) $CM* $ALPlus;
+$CAN_CM $CM*  [\p{Pf} & $QU] $CM* ([\p{Pi} & $QU] $CM* $SP*)+ $SP ($HY | $HH) $CM* $ALPlus;
+^$CM+  [\p{Pf} & $QU] $CM* ([\p{Pi} & $QU] $CM* $SP*)+ $SP ($HY | $HH) $CM* $ALPlus;

 # LB 21        x   (BA | HY | NS)
 #           BB x
@ -306,10 +323,10 @@ $LB20NonBreaks $CM* ($BA | $HY | $NS);
 $BB $CM* [^$CB];                                  #  $BB  x
 $BB $CM* $LB20NonBreaks;

-# LB 21a Don't break after Hebrew + Hyphen
-#   HL (HY | BA) x
+# LB 21a Do not break after the hyphen in Hebrew + Hyphen + non-Hebrew
+#   HL (HY | BA) x [^HL]
 #
-$HL $CM* ($HY | $BA) $CM* [^$CB]?;
+$HL $CM* ($HY | [ $BA - $EastAsian ] ) $CM* [^$CB $HL]?;

 # LB 21b (forward) Don't break between SY and HL
 # (break between HL and SY already disallowed by LB 13 above)
@ -369,9 +386,9 @@ $PR $CM* ($JL | $JV | $JT | $H2 | $H3);
 $IS $CM* ($ALPlus | $HL);

 # LB 30
-($ALPlus | $HL | $NU) $CM* $OP30;
-^$CM+ $OP30;         # The $CM+ is from rule 10, an unattached CM is treated as AL.
-$CP30 $CM* ($ALPlus | $HL | $NU);
+($ALPlus | $HL | $NU) $CM* [$OP - $EastAsian];
+^$CM+ [$OP - $EastAsian];         # The $CM+ is from rule 10, an unattached CM is treated as AL.
+[$CP - $EastAsian] $CM* ($ALPlus | $HL | $NU);

 # LB 30a  Do not break between regional indicators. Break after pairs of them.
 #         Tricky interaction with LB8a: ZWJ x .   together with ZWJ acting like a CM.
--- a/icu4c/source/data/brkitr/rules/line_normal_cj.txt
+++ b/icu4c/source/data/brkitr/rules/line_normal_cj.txt
@ -79,12 +79,7 @@ $XX = [:LineBreak =  Unknown:];
 $ZW = [:LineBreak =  ZWSpace:];
 $ZWJ = [:LineBreak = ZWJ:];

-# OP30 and CP30 are variants of OP and CP that appear in-line in rule LB30 from UAX 14,
-# without a formal name. Because ICU rules require multiple uses of the expressions,
-# give them a single definition with a name
-
-$OP30 = [$OP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
-$CP30 = [$CP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
+$EastAsian = [\p{ea=F}\p{ea=W}\p{ea=H}];

 $ExtPictUnassigned = [\p{Extended_Pictographic} & \p{Cn}];

@ -127,7 +122,7 @@ $CANT_CM = [ $SP $BK $CR $LF $NL $ZW $CM];       # Bases that can't take CMs
 # AL_FOLLOW  set of chars that can unconditionally follow an AL
 #            Needed in rules where stand-alone $CM s are treated as AL.
 #
-$AL_FOLLOW      = [$BK $CR $LF $NL $ZW $SP $CL $CP $EX $HL $IS $SY $WJ $GL $OP30 $QU $BA $HY $NS $IN $NU $PR $PO $ALPlus];
+$AL_FOLLOW      = [$BK $CR $LF $NL $ZW $SP $CL $CP $EX $HL $IS $SY $WJ $GL [$OP - $EastAsian] $QU $BA $HY $NS $IN $NU $PR $PO $ALPlus];


 #
@ -256,6 +251,7 @@ $SP $IS $CM* $CMX / [^ $CanFollowIS $NU $CM];

 #
 # LB 15d Do not break before numeric separators (IS), even after spaces.
+# SP IS QU is handled below as part of LB 19.

 [$LB8NonBreaks - $SP] $IS;
 $SP $IS $CM* [$CanFollowIS {eof}];
@ -280,13 +276,22 @@ $LB18NonBreaks = [$LB8NonBreaks - [$SP]];
 $LB18Breaks    = [$LB8Breaks $SP];


-# LB 19
-#         x QU
+# LB 19 and LB 19a.
+# Instead of implementing both as keep-together rules as in UAX #14, we have an
+# East_Asian_Width and General_Category-insensitive keep-together rule
+# equivalent to the old LB19 × QU and QU ×, and then we poke holes into it based
+# on context.  This avoids having to do manual chaining over multiple characters
+# with many other rules over multiple characters, as a keep-together LB19a would
+# overlap in context with at least LB14, LB15a, LB15a, LB15d, LB30a, and itself.
 $LB18NonBreaks $CM* $QU;
 ^$CM+               $QU;

-#         QU  x
+[$LB18NonBreaks & $EastAsian - [$OP $GL]]           / [\p{Pi} & $QU] $CM* [ $EastAsian - $CM];
+[$LB18NonBreaks & $EastAsian - [$OP $GL]] $CM* $CMX / [\p{Pi} & $QU] $CM* [ $EastAsian - $CM];
+
 $QU $CM* .;
+[$LB18NonBreaks & $EastAsian] $CM* [\p{Pf} & $QU]           / [ $EastAsian - [$NS $BA $EX $CL $IN $IS $GL $CM]];
+[$LB18NonBreaks & $EastAsian] $CM* [\p{Pf} & $QU] $CM* $CMX / [ $EastAsian - [$NS $BA $EX $CL $IN $IS $GL $CM]];

 # LB 20
 #        <break>  $CB
@ -294,11 +299,23 @@ $QU $CM* .;
 #
 $LB20NonBreaks = [$LB18NonBreaks - $CB];

-# LB 20.09    Don't break between Hyphens and Letters when there is a break preceding the hyphen.
-#             Originally added as a Finnish tailoring, now promoted to default ICU behavior.
-#             Note: this is not default UAX-14 behaviour. See issue ICU-8151.
+# LB 20a      Don't break between Hyphens and Letters when there is a break preceding the hyphen.
+#             Originally added as a Finnish tailoring, promoted to default ICU behavior (ICU-8151),
+#             and then to default UAX #14 behaviour (UTC-179-C32).
 #
 ^($HY | $HH) $CM* $ALPlus;
+$GL ($HY | $HH) $CM* $ALPlus; 
+# Non-breaking CB from LB8a:
+$CB $CM* $ZWJ ($HY | $HH) $CM* $ALPlus;
+# Non-breaking SP from LB14:
+$OP $CM* $SP+ ($HY | $HH) $CM* $ALPlus; 
+# Non-breaking SP from LB15a:
+($OP $CM* $SP+ | [$OP $QU $GL] $CM*) ([\p{Pi} & $QU] $CM* $SP*)+ $SP ($HY | $HH) $CM* $ALPlus;
+^([\p{Pi} & $QU] $CM* $SP*)+ $SP ($HY | $HH) $CM* $ALPlus;
+# Non-breaking SP from LB15a following LB15b:
+$LB8NonBreaks [\p{Pf} & $QU] $CM* ([\p{Pi} & $QU] $CM* $SP*)+ $SP ($HY | $HH) $CM* $ALPlus;
+$CAN_CM $CM*  [\p{Pf} & $QU] $CM* ([\p{Pi} & $QU] $CM* $SP*)+ $SP ($HY | $HH) $CM* $ALPlus;
+^$CM+  [\p{Pf} & $QU] $CM* ([\p{Pi} & $QU] $CM* $SP*)+ $SP ($HY | $HH) $CM* $ALPlus;

 # LB 21        x   (BA | HY | NS)
 #           BB x
@ -312,10 +329,10 @@ $LB20NonBreaks $CM* ($BA | $HY | $NS);
 $BB $CM* [^$CB];                                  #  $BB  x
 $BB $CM* $LB20NonBreaks;

-# LB 21a Don't break after Hebrew + Hyphen
-#   HL (HY | BA) x
+# LB 21a Do not break after the hyphen in Hebrew + Hyphen + non-Hebrew
+#   HL (HY | BA) x [^HL]
 #
-$HL $CM* ($HY | $BA) $CM* [^$CB]?;
+$HL $CM* ($HY | [ $BA - $EastAsian ] ) $CM* [^$CB $HL]?;

 # LB 21b (forward) Don't break between SY and HL
 # (break between HL and SY already disallowed by LB 13 above)
@ -375,9 +392,9 @@ $PR $CM* ($JL | $JV | $JT | $H2 | $H3);
 $IS $CM* ($ALPlus | $HL);

 # LB 30
-($ALPlus | $HL | $NU) $CM* $OP30;
-^$CM+ $OP30;         # The $CM+ is from rule 10, an unattached CM is treated as AL.
-$CP30 $CM* ($ALPlus | $HL | $NU);
+($ALPlus | $HL | $NU) $CM* [$OP - $EastAsian];
+^$CM+ [$OP - $EastAsian];         # The $CM+ is from rule 10, an unattached CM is treated as AL.
+[$CP - $EastAsian] $CM* ($ALPlus | $HL | $NU);

 # LB 30a  Do not break between regional indicators. Break after pairs of them.
 #         Tricky interaction with LB8a: ZWJ x .   together with ZWJ acting like a CM.
--- a/icu4c/source/data/brkitr/rules/line_normal_phrase_cj.txt
+++ b/icu4c/source/data/brkitr/rules/line_normal_phrase_cj.txt
@ -81,12 +81,7 @@ $XX = [:LineBreak =  Unknown:];
 $ZW = [:LineBreak =  ZWSpace:];
 $ZWJ = [:LineBreak = ZWJ:];

-# OP30 and CP30 are variants of OP and CP that appear in-line in rule LB30 from UAX 14,
-# without a formal name. Because ICU rules require multiple uses of the expressions,
-# give them a single definition with a name
-
-$OP30 = [$OP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
-$CP30 = [$CP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
+$EastAsian = [\p{ea=F}\p{ea=W}\p{ea=H}];

 $ExtPictUnassigned = [\p{Extended_Pictographic} & \p{Cn}];

@ -140,7 +135,7 @@ $CANT_CM = [ $SP $BK $CR $LF $NL $ZW $CM];       # Bases that can't take CMs
 # AL_FOLLOW  set of chars that can unconditionally follow an AL
 #            Needed in rules where stand-alone $CM s are treated as AL.
 #
-$AL_FOLLOW      = [$BK $CR $LF $NL $ZW $SP $CL $CP $EX $HL $IS $SY $WJ $GL $OP30 $QU $BA $HY $NS $IN $NU $PR $PO $ALPlus];
+$AL_FOLLOW      = [$BK $CR $LF $NL $ZW $SP $CL $CP $EX $HL $IS $SY $WJ $GL [$OP - $EastAsian] $QU $BA $HY $NS $IN $NU $PR $PO $ALPlus];


 #
@ -269,6 +264,7 @@ $SP $IS $CM* $CMX / [^ $CanFollowIS $NU $CM];

 #
 # LB 15d Do not break before numeric separators (IS), even after spaces.
+# SP IS QU is handled below as part of LB 19.

 [$LB8NonBreaks - $SP] $IS;
 $SP $IS $CM* [$CanFollowIS {eof}];
@ -293,13 +289,22 @@ $LB18NonBreaks = [$LB8NonBreaks - [$SP]];
 $LB18Breaks    = [$LB8Breaks $SP];


-# LB 19
-#         x QU
+# LB 19 and LB 19a.
+# Instead of implementing both as keep-together rules as in UAX #14, we have an
+# East_Asian_Width and General_Category-insensitive keep-together rule
+# equivalent to the old LB19 × QU and QU ×, and then we poke holes into it based
+# on context.  This avoids having to do manual chaining over multiple characters
+# with many other rules over multiple characters, as a keep-together LB19a would
+# overlap in context with at least LB14, LB15a, LB15a, LB15d, LB30a, and itself.
 $LB18NonBreaks $CM* $QU;
 ^$CM+               $QU;

-#         QU  x
+[$LB18NonBreaks & $EastAsian - [$OP $GL]]           / [\p{Pi} & $QU] $CM* [ $EastAsian - $CM];
+[$LB18NonBreaks & $EastAsian - [$OP $GL]] $CM* $CMX / [\p{Pi} & $QU] $CM* [ $EastAsian - $CM];
+
 $QU $CM* .;
+[$LB18NonBreaks & $EastAsian] $CM* [\p{Pf} & $QU]           / [ $EastAsian - [$NS $BA $EX $CL $IN $IS $GL $CM]];
+[$LB18NonBreaks & $EastAsian] $CM* [\p{Pf} & $QU] $CM* $CMX / [ $EastAsian - [$NS $BA $EX $CL $IN $IS $GL $CM]];

 # LB 20
 #        <break>  $CB
@ -307,11 +312,23 @@ $QU $CM* .;
 #
 $LB20NonBreaks = [$LB18NonBreaks - $CB];

-# LB 20.09    Don't break between Hyphens and Letters when there is a break preceding the hyphen.
-#             Originally added as a Finnish tailoring, now promoted to default ICU behavior.
-#             Note: this is not default UAX-14 behaviour. See issue ICU-8151.
+# LB 20a      Don't break between Hyphens and Letters when there is a break preceding the hyphen.
+#             Originally added as a Finnish tailoring, promoted to default ICU behavior (ICU-8151),
+#             and then to default UAX #14 behaviour (UTC-179-C32).
 #
 ^($HY | $HH) $CM* $ALPlus;
+$GL ($HY | $HH) $CM* $ALPlus; 
+# Non-breaking CB from LB8a:
+$CB $CM* $ZWJ ($HY | $HH) $CM* $ALPlus;
+# Non-breaking SP from LB14:
+$OP $CM* $SP+ ($HY | $HH) $CM* $ALPlus; 
+# Non-breaking SP from LB15a:
+($OP $CM* $SP+ | [$OP $QU $GL] $CM*) ([\p{Pi} & $QU] $CM* $SP*)+ $SP ($HY | $HH) $CM* $ALPlus;
+^([\p{Pi} & $QU] $CM* $SP*)+ $SP ($HY | $HH) $CM* $ALPlus;
+# Non-breaking SP from LB15a following LB15b:
+$LB8NonBreaks [\p{Pf} & $QU] $CM* ([\p{Pi} & $QU] $CM* $SP*)+ $SP ($HY | $HH) $CM* $ALPlus;
+$CAN_CM $CM*  [\p{Pf} & $QU] $CM* ([\p{Pi} & $QU] $CM* $SP*)+ $SP ($HY | $HH) $CM* $ALPlus;
+^$CM+  [\p{Pf} & $QU] $CM* ([\p{Pi} & $QU] $CM* $SP*)+ $SP ($HY | $HH) $CM* $ALPlus;

 # LB 21        x   (BA | HY | NS)
 #           BB x
@ -325,10 +342,10 @@ $LB20NonBreaks $CM* ($BA | $HY | $NS);
 $BB $CM* [^$CB];                                  #  $BB  x
 $BB $CM* $LB20NonBreaks;

-# LB 21a Don't break after Hebrew + Hyphen
-#   HL (HY | BA) x
+# LB 21a Do not break after the hyphen in Hebrew + Hyphen + non-Hebrew
+#   HL (HY | BA) x [^HL]
 #
-$HL $CM* ($HY | $BA) $CM* [^$CB]?;
+$HL $CM* ($HY | [ $BA - $EastAsian ] ) $CM* [^$CB $HL]?;

 # LB 21b (forward) Don't break between SY and HL
 # (break between HL and SY already disallowed by LB 13 above)
@ -388,9 +405,9 @@ $PR $CM* ($JL | $JV | $JT | $H2 | $H3);
 $IS $CM* ($ALPlus | $HL);

 # LB 30
-($ALPlus | $HL | $NU) $CM* $OP30;
-^$CM+ $OP30;         # The $CM+ is from rule 10, an unattached CM is treated as AL.
-$CP30 $CM* ($ALPlus | $HL | $NU);
+($ALPlus | $HL | $NU) $CM* [$OP - $EastAsian];
+^$CM+ [$OP - $EastAsian];         # The $CM+ is from rule 10, an unattached CM is treated as AL.
+[$CP - $EastAsian] $CM* ($ALPlus | $HL | $NU);

 # LB 30a  Do not break between regional indicators. Break after pairs of them.
 #         Tricky interaction with LB8a: ZWJ x .   together with ZWJ acting like a CM.
--- a/icu4c/source/data/brkitr/rules/line_phrase_cj.txt
+++ b/icu4c/source/data/brkitr/rules/line_phrase_cj.txt
@ -76,12 +76,7 @@ $XX = [:LineBreak =  Unknown:];
 $ZW = [:LineBreak =  ZWSpace:];
 $ZWJ = [:LineBreak = ZWJ:];

-# OP30 and CP30 are variants of OP and CP that appear in-line in rule LB30 from UAX 14,
-# without a formal name. Because ICU rules require multiple uses of the expressions,
-# give them a single definition with a name
-
-$OP30 = [$OP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
-$CP30 = [$CP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
+$EastAsian = [\p{ea=F}\p{ea=W}\p{ea=H}];

 $ExtPictUnassigned = [\p{Extended_Pictographic} & \p{Cn}];

@ -135,7 +130,7 @@ $CANT_CM = [ $SP $BK $CR $LF $NL $ZW $CM];       # Bases that can't take CMs
 # AL_FOLLOW  set of chars that can unconditionally follow an AL
 #            Needed in rules where stand-alone $CM s are treated as AL.
 #
-$AL_FOLLOW      = [$BK $CR $LF $NL $ZW $SP $CL $CP $EX $HL $IS $SY $WJ $GL $OP30 $QU $BA $HY $NS $IN $NU $PR $PO $ALPlus];
+$AL_FOLLOW      = [$BK $CR $LF $NL $ZW $SP $CL $CP $EX $HL $IS $SY $WJ $GL [$OP - $EastAsian] $QU $BA $HY $NS $IN $NU $PR $PO $ALPlus];


 #
@ -264,6 +259,7 @@ $SP $IS $CM* $CMX / [^ $CanFollowIS $NU $CM];

 #
 # LB 15d Do not break before numeric separators (IS), even after spaces.
+# SP IS QU is handled below as part of LB 19.

 [$LB8NonBreaks - $SP] $IS;
 $SP $IS $CM* [$CanFollowIS {eof}];
@ -286,13 +282,22 @@ $LB18NonBreaks = [$LB8NonBreaks - [$SP]];
 $LB18Breaks    = [$LB8Breaks $SP];


-# LB 19
-#         x QU
+# LB 19 and LB 19a.
+# Instead of implementing both as keep-together rules as in UAX #14, we have an
+# East_Asian_Width and General_Category-insensitive keep-together rule
+# equivalent to the old LB19 × QU and QU ×, and then we poke holes into it based
+# on context.  This avoids having to do manual chaining over multiple characters
+# with many other rules over multiple characters, as a keep-together LB19a would
+# overlap in context with at least LB14, LB15a, LB15a, LB15d, LB30a, and itself.
 $LB18NonBreaks $CM* $QU;
 ^$CM+               $QU;

-#         QU  x
+[$LB18NonBreaks & $EastAsian - [$OP $GL]]           / [\p{Pi} & $QU] $CM* [ $EastAsian - $CM];
+[$LB18NonBreaks & $EastAsian - [$OP $GL]] $CM* $CMX / [\p{Pi} & $QU] $CM* [ $EastAsian - $CM];
+
 $QU $CM* .;
+[$LB18NonBreaks & $EastAsian] $CM* [\p{Pf} & $QU]           / [ $EastAsian - [$NS $BA $EX $CL $IN $IS $GL $CM]];
+[$LB18NonBreaks & $EastAsian] $CM* [\p{Pf} & $QU] $CM* $CMX / [ $EastAsian - [$NS $BA $EX $CL $IN $IS $GL $CM]];

 # LB 20
 #        <break>  $CB
@ -300,11 +305,23 @@ $QU $CM* .;
 #
 $LB20NonBreaks = [$LB18NonBreaks - $CB];

-# LB 20.09    Don't break between Hyphens and Letters when there is a break preceding the hyphen.
-#             Originally added as a Finnish tailoring, now promoted to default ICU behavior.
-#             Note: this is not default UAX-14 behaviour. See issue ICU-8151.
+# LB 20a      Don't break between Hyphens and Letters when there is a break preceding the hyphen.
+#             Originally added as a Finnish tailoring, promoted to default ICU behavior (ICU-8151),
+#             and then to default UAX #14 behaviour (UTC-179-C32).
 #
 ^($HY | $HH) $CM* $ALPlus;
+$GL ($HY | $HH) $CM* $ALPlus; 
+# Non-breaking CB from LB8a:
+$CB $CM* $ZWJ ($HY | $HH) $CM* $ALPlus;
+# Non-breaking SP from LB14:
+$OP $CM* $SP+ ($HY | $HH) $CM* $ALPlus; 
+# Non-breaking SP from LB15a:
+($OP $CM* $SP+ | [$OP $QU $GL] $CM*) ([\p{Pi} & $QU] $CM* $SP*)+ $SP ($HY | $HH) $CM* $ALPlus;
+^([\p{Pi} & $QU] $CM* $SP*)+ $SP ($HY | $HH) $CM* $ALPlus;
+# Non-breaking SP from LB15a following LB15b:
+$LB8NonBreaks [\p{Pf} & $QU] $CM* ([\p{Pi} & $QU] $CM* $SP*)+ $SP ($HY | $HH) $CM* $ALPlus;
+$CAN_CM $CM*  [\p{Pf} & $QU] $CM* ([\p{Pi} & $QU] $CM* $SP*)+ $SP ($HY | $HH) $CM* $ALPlus;
+^$CM+  [\p{Pf} & $QU] $CM* ([\p{Pi} & $QU] $CM* $SP*)+ $SP ($HY | $HH) $CM* $ALPlus;

 # LB 21        x   (BA | HY | NS)
 #           BB x
@ -317,10 +334,10 @@ $LB20NonBreaks $CM* ($BA | $HY | $NS);
 $BB $CM* [^$CB];                                  #  $BB  x
 $BB $CM* $LB20NonBreaks;

-# LB 21a Don't break after Hebrew + Hyphen
-#   HL (HY | BA) x
+# LB 21a Do not break after the hyphen in Hebrew + Hyphen + non-Hebrew
+#   HL (HY | BA) x [^HL]
 #
-$HL $CM* ($HY | $BA) $CM* [^$CB]?;
+$HL $CM* ($HY | [ $BA - $EastAsian ] ) $CM* [^$CB $HL]?;

 # LB 21b (forward) Don't break between SY and HL
 # (break between HL and SY already disallowed by LB 13 above)
@ -380,9 +397,9 @@ $PR $CM* ($JL | $JV | $JT | $H2 | $H3);
 $IS $CM* ($ALPlus | $HL);

 # LB 30
-($ALPlus | $HL | $NU) $CM* $OP30;
-^$CM+ $OP30;         # The $CM+ is from rule 10, an unattached CM is treated as AL.
-$CP30 $CM* ($ALPlus | $HL | $NU);
+($ALPlus | $HL | $NU) $CM* [$OP - $EastAsian];
+^$CM+ [$OP - $EastAsian];         # The $CM+ is from rule 10, an unattached CM is treated as AL.
+[$CP - $EastAsian] $CM* ($ALPlus | $HL | $NU);

 # LB 30a  Do not break between regional indicators. Break after pairs of them.
 #         Tricky interaction with LB8a: ZWJ x .   together with ZWJ acting like a CM.