diff --git a/docs/userguide/boundaryanalysis/break-rules.md b/docs/userguide/boundaryanalysis/break-rules.md index 678774101e1..bfd756cbb6b 100644 --- a/docs/userguide/boundaryanalysis/break-rules.md +++ b/docs/userguide/boundaryanalysis/break-rules.md @@ -206,15 +206,6 @@ Chaining into a rule can be dis-allowed by beginning that rule with a '`^`'. Rul so marked can begin a match after a preceding boundary or at the start of text, but cannot extend a match via chaining from another rule. -~~The !!LBCMNoChain; statement modifies chaining behavior by preventing chaining -from one rule to another from occurring on any character whose Line Break -property is Combining Mark. This option is subject to change or removal, and -should not be used in general. Within ICU, it is used only with the line break -rules. We hope to replace it with something more general.~~ - -> :point_right: **Note**: `!!LBCMNoChain` is deprecated, and will be removed -> completely from a future version of ICU. - ## Rule Status Values Break rules can be tagged with a number, which is called the *rule status*. @@ -293,7 +284,6 @@ See, for example, this snippet from the [line break rules](https://github.com/un | ~~`!!reverse`~~ | ~~*[deprecated]* The rules that follow are for reverse iteration. No longer needed; any rules in a Reverse rule section are ignored.~~ | | ~~`!!safe_forward`~~ | ~~*[deprecated]* The rules that follow are for safe forward iteration. No longer needed; any rules in such a section are ignored.~~ | | ~~`!!safe_reverse`~~ | ~~*[deprecated]* The rules that follow are for safe reverse iteration. No longer needed; any rules in such a section are ignored.~~ | -| ~~`!!LBCMNoChain`~~ | ~~*[deprecated]* Disable chaining when the overlap character matches `\p{Line_Break=Combining_Mark}`~~ | ## Rule Syntax @@ -377,22 +367,17 @@ Here is the syntax for the boundary rules. (The EBNF Syntax is given below.) exclamation `!`. This syntax is deprecated, and will be removed from a future version of ICU. -2. `!!LBCMNoChain` was a global option that specified that characters with the - line break property of "Combining Character" would not participate in rule - chaining. This option was always considered internal, is deprecated and will - be removed from a future version of ICU. - -3. Naked rule characters. Plain text, in the context of a rule, is treated as +2. Naked rule characters. Plain text, in the context of a rule, is treated as literal text to be matched, much like normal regular expressions. This turns out to be very error prone, has been the source of bugs in released versions of ICU, and is not useful in implementing normal text boundary rules. A future version will reject literal text that is not escaped. -4. Exact reverse rules and safe forward rules: planned changes to the break +3. Exact reverse rules and safe forward rules: planned changes to the break engine implementation will remove the need for exact reverse rules and safe forward rules. -5. `{bof}` and `{eof}`, appearing within `[`sets`]`, match the beginning or ending of +4. `{bof}` and `{eof}`, appearing within `[`sets`]`, match the beginning or ending of the input text, respectively. This is an internal (not documented) feature that will probably be removed in a future version of ICU. They are currently used by the standard rules for word, line and sentence breaking. An diff --git a/icu4c/source/common/rbbirb.cpp b/icu4c/source/common/rbbirb.cpp index 7177254ec4d..92cccc1a339 100644 --- a/icu4c/source/common/rbbirb.cpp +++ b/icu4c/source/common/rbbirb.cpp @@ -66,7 +66,6 @@ RBBIRuleBuilder::RBBIRuleBuilder(const UnicodeString &rules, fForwardTable = nullptr; fRuleStatusVals = nullptr; fChainRules = false; - fLBCMNoChain = false; fLookAheadHardBreak = false; fUSetNodes = nullptr; fRuleStatusVals = nullptr; diff --git a/icu4c/source/common/rbbirb.h b/icu4c/source/common/rbbirb.h index d983a184b64..96d3aa643dd 100644 --- a/icu4c/source/common/rbbirb.h +++ b/icu4c/source/common/rbbirb.h @@ -159,9 +159,6 @@ public: UBool fChainRules; // True for chained Unicode TR style rules. // False for traditional regexp rules. - UBool fLBCMNoChain; // True: suppress chaining of rules on - // chars with LineBreak property == CM. - UBool fLookAheadHardBreak; // True: Look ahead matches cause an // immediate break, no continuing for the // longest match. diff --git a/icu4c/source/common/rbbiscan.cpp b/icu4c/source/common/rbbiscan.cpp index 455ace78b80..844b0639099 100644 --- a/icu4c/source/common/rbbiscan.cpp +++ b/icu4c/source/common/rbbiscan.cpp @@ -547,8 +547,6 @@ UBool RBBIRuleScanner::doParseActions(int32_t action) UnicodeString opt(fRB->fRules, fOptionStart, fScanIndex-fOptionStart); if (opt == UNICODE_STRING("chain", 5)) { fRB->fChainRules = true; - } else if (opt == UNICODE_STRING("LBCMNoChain", 11)) { - fRB->fLBCMNoChain = true; } else if (opt == UNICODE_STRING("forward", 7)) { fRB->fDefaultTree = &fRB->fForwardTree; } else if (opt == UNICODE_STRING("reverse", 7)) { diff --git a/icu4c/source/common/rbbitblb.cpp b/icu4c/source/common/rbbitblb.cpp index 0c2bcff4e51..8b40136fc7c 100644 --- a/icu4c/source/common/rbbitblb.cpp +++ b/icu4c/source/common/rbbitblb.cpp @@ -458,21 +458,6 @@ void RBBITableBuilder::calcChainedFollowPos(RBBINode *tree, RBBINode *endMarkNod // We've got a node that can end a match. - // !!LBCMNoChain implementation: If this node's val correspond to - // the Line Break $CM char class, don't chain from it. - // TODO: Remove this. !!LBCMNoChain is deprecated, and is not used - // by any of the standard ICU rules. - if (fRB->fLBCMNoChain) { - UChar32 c = this->fRB->fSetBuilder->getFirstChar(endNode->fVal); - if (c != -1) { - // c == -1 occurs with sets containing only the {eof} marker string. - ULineBreak cLBProp = (ULineBreak)u_getIntPropertyValue(c, UCHAR_LINE_BREAK); - if (cLBProp == U_LB_COMBINING_MARK) { - continue; - } - } - } - // Now iterate over the nodes that can start a match, looking for ones // with the same char class as our ending node. RBBINode *startNode; diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/RBBIRuleBuilder.java b/icu4j/main/classes/core/src/com/ibm/icu/text/RBBIRuleBuilder.java index 9ef2e749b87..f29430f04f8 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/RBBIRuleBuilder.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/RBBIRuleBuilder.java @@ -53,9 +53,6 @@ class RBBIRuleBuilder { boolean fChainRules; // True for chained Unicode TR style rules. // False for traditional regexp rules. - boolean fLBCMNoChain; // True: suppress chaining of rules on - // chars with LineBreak property == CM. - boolean fLookAheadHardBreak; // True: Look ahead matches cause an // immediate break, no continuing for the // longest match. diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/RBBIRuleScanner.java b/icu4j/main/classes/core/src/com/ibm/icu/text/RBBIRuleScanner.java index 9249ba86edc..bc114b2d2d1 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/RBBIRuleScanner.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/RBBIRuleScanner.java @@ -436,8 +436,6 @@ class RBBIRuleScanner { String opt = fRB.fRules.substring(fOptionStart, fScanIndex); if (opt.equals("chain")) { fRB.fChainRules = true; - } else if (opt.equals("LBCMNoChain")) { - fRB.fLBCMNoChain = true; } else if (opt.equals("forward")) { fRB.fDefaultTree = RBBIRuleBuilder.fForwardTree; } else if (opt.equals("reverse")) { diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/RBBITableBuilder.java b/icu4j/main/classes/core/src/com/ibm/icu/text/RBBITableBuilder.java index 8e934547026..e6aa822c92a 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/RBBITableBuilder.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/RBBITableBuilder.java @@ -441,22 +441,6 @@ class RBBITableBuilder { // We've got a node that can end a match. - // !!LBCMNoChain implementation: If this node's val correspond to - // the Line Break $CM char class, don't chain from it. - // TODO: Remove this. !!LBCMNoChain is deprecated, and is not used - // by any of the standard ICU rules. - if (fRB.fLBCMNoChain) { - int c = this.fRB.fSetBuilder.getFirstChar(endNode.fVal); - if (c != -1) { - // c == -1 occurs with sets containing only the {eof} marker string. - int cLBProp = UCharacter.getIntPropertyValue(c, UProperty.LINE_BREAK); - if (cLBProp == UCharacter.LineBreak.COMBINING_MARK) { - continue; - } - } - } - - // Now iterate over the nodes that can start a match, looking for ones // with the same char class as our ending node. for (RBBINode startNode : matchStartNodes) {