From 4da7ffaa3648386704e1e4c6cbd1447bbac1678f Mon Sep 17 00:00:00 2001 From: Frank Tang Date: Fri, 8 Dec 2023 12:41:52 -0800 Subject: [PATCH] ICU-22580 Address infinity loop in RBBI ICU-22580 Fix tests --- icu4c/source/common/rbbiscan.cpp | 3 ++ icu4c/source/test/intltest/rbbiapts.cpp | 30 ++++++++++++++++++ icu4c/source/test/intltest/rbbiapts.h | 1 + .../com/ibm/icu/text/RBBIRuleScanner.java | 5 ++- .../ibm/icu/dev/test/rbbi/RBBIAPITest.java | 31 +++++++++++++++++++ 5 files changed, 69 insertions(+), 1 deletion(-) diff --git a/icu4c/source/common/rbbiscan.cpp b/icu4c/source/common/rbbiscan.cpp index 336cd943702..df75212a191 100644 --- a/icu4c/source/common/rbbiscan.cpp +++ b/icu4c/source/common/rbbiscan.cpp @@ -934,6 +934,9 @@ void RBBIRuleScanner::nextChar(RBBIRuleChar &c) { } } + if (c.fChar == (UChar32)-1) { + return; + } if (fQuoteMode) { c.fEscaped = true; } diff --git a/icu4c/source/test/intltest/rbbiapts.cpp b/icu4c/source/test/intltest/rbbiapts.cpp index d6d1a890d49..9013f18bb7d 100644 --- a/icu4c/source/test/intltest/rbbiapts.cpp +++ b/icu4c/source/test/intltest/rbbiapts.cpp @@ -881,6 +881,35 @@ void RBBIAPITest::TestBug2190() { delete bi; } +void RBBIAPITest::TestBug22580() { + UParseError parseError; + // Test single ' will not cause infinity loop + { + UnicodeString rulesString = u"'"; + UErrorCode status=U_ZERO_ERROR; + RuleBasedBreakIterator(rulesString, parseError, status); + } + if (quick) { + return; + } + // Test any 1 or 2 ASCII chars as rule will not cause infinity loop. + // only in exhaust mode + for (char16_t u1 = u' '; u1 <= u'~'; u1++) { + { + UnicodeString rule = u1; + UErrorCode status=U_ZERO_ERROR; + RuleBasedBreakIterator bi (rule, parseError, status); + } + for (char16_t u2 = u' '; u2 <= u'~'; u2++) { + { + UnicodeString rule; + rule.append(u1).append(u2); + UErrorCode status=U_ZERO_ERROR; + RuleBasedBreakIterator bi (rule, parseError, status); + } + } + } +} void RBBIAPITest::TestRegistration() { #if !UCONFIG_NO_SERVICE @@ -1428,6 +1457,7 @@ void RBBIAPITest::runIndexedTest( int32_t index, UBool exec, const char* &name, TESTCASE_AUTO(TestQuoteGrouping); TESTCASE_AUTO(TestRuleStatusVec); TESTCASE_AUTO(TestBug2190); + TESTCASE_AUTO(TestBug22580); #if !UCONFIG_NO_FILE_IO TESTCASE_AUTO(TestRegistration); TESTCASE_AUTO(TestBoilerPlate); diff --git a/icu4c/source/test/intltest/rbbiapts.h b/icu4c/source/test/intltest/rbbiapts.h index d65a2bc5219..dd0613817ac 100644 --- a/icu4c/source/test/intltest/rbbiapts.h +++ b/icu4c/source/test/intltest/rbbiapts.h @@ -83,6 +83,7 @@ public: void TestRuleStatusVec(); void TestBug2190(); + void TestBug22580(); void TestBoilerPlate(); diff --git a/icu4j/main/core/src/main/java/com/ibm/icu/text/RBBIRuleScanner.java b/icu4j/main/core/src/main/java/com/ibm/icu/text/RBBIRuleScanner.java index bc114b2d2d1..0b90431a3ab 100644 --- a/icu4j/main/core/src/main/java/com/ibm/icu/text/RBBIRuleScanner.java +++ b/icu4j/main/core/src/main/java/com/ibm/icu/text/RBBIRuleScanner.java @@ -770,7 +770,7 @@ class RBBIRuleScanner { // These are recognized in all contexts, whether in quoted text or not. // if (c.fChar == '\'') { - if (UTF16.charAt(fRB.fRules, fNextIndex) == '\'') { + if (fNextIndex < fRB.fRules.length() && UTF16.charAt(fRB.fRules, fNextIndex) == '\'') { c.fChar = nextCharLL(); // get nextChar officially so character counts c.fEscaped = true; // stay correct. } else { @@ -787,6 +787,9 @@ class RBBIRuleScanner { return; } } + if (c.fChar == -1) { + return; + } if (fQuoteMode) { c.fEscaped = true; diff --git a/icu4j/main/core/src/test/java/com/ibm/icu/dev/test/rbbi/RBBIAPITest.java b/icu4j/main/core/src/test/java/com/ibm/icu/dev/test/rbbi/RBBIAPITest.java index 8064bf841fa..3fdd1449d7a 100644 --- a/icu4j/main/core/src/test/java/com/ibm/icu/dev/test/rbbi/RBBIAPITest.java +++ b/icu4j/main/core/src/test/java/com/ibm/icu/dev/test/rbbi/RBBIAPITest.java @@ -485,4 +485,35 @@ public class RBBIAPITest extends CoreTestFmwk { bi.setText("Here is some Text"); TestFmwk.assertEquals("Title instance break iterator not correctly instantiated", bi.first(), 0); } + + @Test + public void TestBug22580() { + try { + RuleBasedBreakIterator bi = new RuleBasedBreakIterator("'"); + } catch(IllegalArgumentException e) { + // nothing. + } + boolean quick = TestFmwk.getExhaustiveness() <= 5; + if (quick) { + return; + } + // Test any 1 or 2 ASCII chars as rule will not cause infinity loop. + // only in exhaust mode + for (char u1 = ' '; u1 < '~'; u1++) { + try { + char array[] = {u1}; + RuleBasedBreakIterator bi = new RuleBasedBreakIterator(new String(array)); + } catch(IllegalArgumentException e) { + // nothing. + } + for (char u2 = ' '; u2 < '~'; u2++) { + try { + char array[] = {u1, u2}; + RuleBasedBreakIterator bi = new RuleBasedBreakIterator(new String(array)); + } catch(IllegalArgumentException e) { + // nothing. + } + } + } + } }