ICU-21878 Sync icu4j's CjkBreakEngine to icu4c's

See #1953
This commit is contained in:
allenwtsu 2021-12-22 15:50:44 +00:00 committed by Frank Yung-Fong Tang
parent 904cf62457
commit 08c3f99c08
3 changed files with 17 additions and 2 deletions

View file

@ -1361,6 +1361,7 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText,
// while reversing t_boundary and pushing values to foundBreaks.
int32_t prevCPPos = -1;
int32_t prevUTextPos = -1;
int correctedNumBreaks = 0;
for (int32_t i = numBreaks-1; i >= 0; i--) {
int32_t cpPos = t_boundary.elementAti(i);
U_ASSERT(cpPos > prevCPPos);
@ -1369,7 +1370,10 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText,
if (utextPos > prevUTextPos) {
// Boundaries are added to foundBreaks output in ascending order.
U_ASSERT(foundBreaks.size() == 0 || foundBreaks.peeki() < utextPos);
foundBreaks.push(utextPos, status);
if (!(foundBreaks.contains(utextPos) || utextPos == rangeStart)) {
foundBreaks.push(utextPos, status);
correctedNumBreaks++;
}
} else {
// Normalization expanded the input text, the dictionary found a boundary
// within the expansion, giving two boundaries with the same index in the
@ -1381,9 +1385,14 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText,
}
(void)prevCPPos; // suppress compiler warnings about unused variable
if (!foundBreaks.isEmpty() && foundBreaks.peeki() == rangeEnd) {
foundBreaks.popi();
correctedNumBreaks--;
}
// inString goes out of scope
// inputMap goes out of scope
return numBreaks;
return correctedNumBreaks;
}
#endif

View file

@ -796,6 +796,9 @@
<word>
<data>•ジョージア<400> •</data>
<word>
<data>•[<0>携帯<400>電話<400><0>お金<400>が<400>かかる<400>ん<400>です<400>。<0></data>
# Test for #11723
<word>
<data>•アレルギー性<400>結膜炎<400></data>

View file

@ -796,6 +796,9 @@
<word>
<data>•ジョージア<400> •</data>
<word>
<data>•[<0>携帯<400>電話<400><0>お金<400>が<400>かかる<400>ん<400>です<400>。<0></data>
# Test for #11723
<word>
<data>•アレルギー性<400>結膜炎<400></data>