diff --git a/icu4c/source/common/dictbe.cpp b/icu4c/source/common/dictbe.cpp index 36a35c411a9..6b6d4297ad4 100644 --- a/icu4c/source/common/dictbe.cpp +++ b/icu4c/source/common/dictbe.cpp @@ -1361,6 +1361,7 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText, // while reversing t_boundary and pushing values to foundBreaks. int32_t prevCPPos = -1; int32_t prevUTextPos = -1; + int correctedNumBreaks = 0; for (int32_t i = numBreaks-1; i >= 0; i--) { int32_t cpPos = t_boundary.elementAti(i); U_ASSERT(cpPos > prevCPPos); @@ -1369,7 +1370,10 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText, if (utextPos > prevUTextPos) { // Boundaries are added to foundBreaks output in ascending order. U_ASSERT(foundBreaks.size() == 0 || foundBreaks.peeki() < utextPos); - foundBreaks.push(utextPos, status); + if (!(foundBreaks.contains(utextPos) || utextPos == rangeStart)) { + foundBreaks.push(utextPos, status); + correctedNumBreaks++; + } } else { // Normalization expanded the input text, the dictionary found a boundary // within the expansion, giving two boundaries with the same index in the @@ -1381,9 +1385,14 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText, } (void)prevCPPos; // suppress compiler warnings about unused variable + if (!foundBreaks.isEmpty() && foundBreaks.peeki() == rangeEnd) { + foundBreaks.popi(); + correctedNumBreaks--; + } + // inString goes out of scope // inputMap goes out of scope - return numBreaks; + return correctedNumBreaks; } #endif diff --git a/icu4c/source/test/testdata/rbbitst.txt b/icu4c/source/test/testdata/rbbitst.txt index 7fb30c9e8e3..1948360277d 100644 --- a/icu4c/source/test/testdata/rbbitst.txt +++ b/icu4c/source/test/testdata/rbbitst.txt @@ -796,6 +796,9 @@ •ジョージア<400> • + +•[<0>携帯<400>電話<400>]<0>お金<400>が<400>かかる<400>ん<400>です<400>。<0> + # Test for #11723 •アレルギー性<400>結膜炎<400> diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/rbbi/rbbitst.txt b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/rbbi/rbbitst.txt index 7fb30c9e8e3..1948360277d 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/rbbi/rbbitst.txt +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/rbbi/rbbitst.txt @@ -796,6 +796,9 @@ •ジョージア<400> • + +•[<0>携帯<400>電話<400>]<0>お金<400>が<400>かかる<400>ん<400>です<400>。<0> + # Test for #11723 •アレルギー性<400>結膜炎<400>