mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-14 17:24:01 +00:00
ICU-11999 BreakIterator, UnhandledBreakEngine consuming too many characters.
X-SVN-Rev: 38669
This commit is contained in:
parent
1f2813e7fa
commit
b5be040dd0
2 changed files with 26 additions and 2 deletions
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2014, International Business Machines Corporation and *
|
||||
* Copyright (C) 2016, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -10,6 +10,7 @@ import static com.ibm.icu.impl.CharacterIteration.DONE32;
|
|||
|
||||
import java.text.CharacterIterator;
|
||||
|
||||
import com.ibm.icu.impl.CharacterIteration;
|
||||
import com.ibm.icu.lang.UCharacter;
|
||||
import com.ibm.icu.lang.UProperty;
|
||||
|
||||
|
@ -30,7 +31,20 @@ final class UnhandledBreakEngine implements LanguageBreakEngine {
|
|||
|
||||
public int findBreaks(CharacterIterator text, int startPos, int endPos,
|
||||
boolean reverse, int breakType, DictionaryBreakEngine.DequeI foundBreaks) {
|
||||
text.setIndex(endPos);
|
||||
if (breakType >= 0 && breakType < fHandled.length) {
|
||||
int c = CharacterIteration.current32(text);
|
||||
if (reverse) {
|
||||
while (text.getIndex() > startPos && fHandled[breakType].contains(c)) {
|
||||
CharacterIteration.previous32(text);
|
||||
c = CharacterIteration.current32(text);
|
||||
}
|
||||
} else {
|
||||
while (text.getIndex() < endPos && fHandled[breakType].contains(c)) {
|
||||
CharacterIteration.next32(text);
|
||||
c = CharacterIteration.current32(text);
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -274,6 +274,16 @@
|
|||
<word>
|
||||
<data>•ジョージア<400> •</data>
|
||||
|
||||
# Ticket #11999
|
||||
# Unhandled Break Engine was consuming all characters, not just unhandled.
|
||||
# \U00011700 is AHOM LETTER KA. There is no dictionary for AHOM, triggering the unhandled engine,
|
||||
# which then incorrectly also consumed the following Japanese text. (ICU4J only)
|
||||
<word>
|
||||
<locale en>
|
||||
<data>•ロ<400>から<400>売却<400>完了<400>時<400>の<400>時価<400>が<400>提示<400>さ<400>れ<400>て<400>いる<400></data>
|
||||
<data>•\U00011700<200>ロ<400>から<400>売却<400>完了<400>時<400>の<400>時価<400>が<400>提示<400>さ<400>れ<400>て<400>いる<400></data>
|
||||
|
||||
|
||||
########################################################################################
|
||||
#
|
||||
#
|
||||
|
|
Loading…
Add table
Reference in a new issue