From 9846ee3b54f351dfa4667860ebbf0ea68cd47cee Mon Sep 17 00:00:00 2001 From: Andy Heninger Date: Wed, 18 Jan 2017 19:42:33 +0000 Subject: [PATCH] ICU-12918 Dictionary Break Iterator Assertion Failure X-SVN-Rev: 39573 --- icu4c/source/common/dictbe.cpp | 19 ++++++++++++++++--- icu4c/source/test/intltest/rbbitst.cpp | 21 +++++++++++++++++++++ icu4c/source/test/intltest/rbbitst.h | 1 + 3 files changed, 38 insertions(+), 3 deletions(-) diff --git a/icu4c/source/common/dictbe.cpp b/icu4c/source/common/dictbe.cpp index 924f09bd5dd..93cb57cd4dc 100644 --- a/icu4c/source/common/dictbe.cpp +++ b/icu4c/source/common/dictbe.cpp @@ -1385,12 +1385,25 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText, // Now that we're done, convert positions in t_boundary[] (indices in // the normalized input string) back to indices in the original input UText // while reversing t_boundary and pushing values to foundBreaks. + int32_t prevCPPos = -1; + int32_t prevUTextPos = -1; for (int32_t i = numBreaks-1; i >= 0; i--) { int32_t cpPos = t_boundary.elementAti(i); + U_ASSERT(cpPos > prevCPPos); int32_t utextPos = inputMap.isValid() ? inputMap->elementAti(cpPos) : cpPos + rangeStart; - // Boundaries are added to foundBreaks output in ascending order. - U_ASSERT(foundBreaks.size() == 0 ||foundBreaks.peeki() < utextPos); - foundBreaks.push(utextPos, status); + U_ASSERT(utextPos >= prevUTextPos); + if (utextPos > prevUTextPos) { + // Boundaries are added to foundBreaks output in ascending order. + U_ASSERT(foundBreaks.size() == 0 || foundBreaks.peeki() < utextPos); + foundBreaks.push(utextPos, status); + } else { + // Normalization expanded the input text, the dictionary found a boundary + // within the expansion, giving two boundaries with the same index in the + // original text. Ignore the second. See ticket #12918. + --numBreaks; + } + prevCPPos = cpPos; + prevUTextPos = utextPos; } // inString goes out of scope diff --git a/icu4c/source/test/intltest/rbbitst.cpp b/icu4c/source/test/intltest/rbbitst.cpp index a3102c12d84..96a2b7a9edd 100644 --- a/icu4c/source/test/intltest/rbbitst.cpp +++ b/icu4c/source/test/intltest/rbbitst.cpp @@ -103,6 +103,7 @@ void RBBITest::runIndexedTest( int32_t index, UBool exec, const char* &name, cha TESTCASE_AUTO(TestBug5532); TESTCASE_AUTO(TestBug7547); TESTCASE_AUTO(TestBug12797); + TESTCASE_AUTO(TestBug12918); TESTCASE_AUTO_END; } @@ -4652,6 +4653,26 @@ void RBBITest::TestBug12797() { } } +void RBBITest::TestBug12918() { + // This test triggers an assertion failure in dictbe.cpp + const UChar *crasherString = u"\u3325\u4a16"; + UErrorCode status = U_ZERO_ERROR; + UBreakIterator* iter = ubrk_open(UBRK_WORD, NULL, crasherString, -1, &status); + if (U_FAILURE(status)) { + errln("%s:%d status = %s", __FILE__, __LINE__, u_errorName(status)); + return; + } + ubrk_first(iter); + int32_t pos = 0; + int32_t lastPos = -1; + while((pos = ubrk_next(iter)) != UBRK_DONE) { + if (pos <= lastPos) { + errln("%s:%d (pos, lastPos) = (%d, %d)", __FILE__, __LINE__, pos, lastPos); + break; + } + } + ubrk_close(iter); +} // // TestDebug - A place-holder test for debugging purposes. diff --git a/icu4c/source/test/intltest/rbbitst.h b/icu4c/source/test/intltest/rbbitst.h index 6b2c2f0eb72..91c3d99f633 100644 --- a/icu4c/source/test/intltest/rbbitst.h +++ b/icu4c/source/test/intltest/rbbitst.h @@ -76,6 +76,7 @@ public: void TestBug9983(); void TestBug7547(); void TestBug12797(); + void TestBug12918(); void TestDebug(); void TestProperties();