mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-06 14:05:32 +00:00
parent
32d279a437
commit
a67bb90150
4 changed files with 20 additions and 11 deletions
|
@ -1360,16 +1360,18 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText,
|
|||
int32_t prevIdx = numCodePts;
|
||||
|
||||
int32_t codeUnitIdx = -1;
|
||||
int32_t prevCodeUnitIdx = -1;
|
||||
int32_t length = -1;
|
||||
for (int32_t i = prev.elementAti(numCodePts); i > 0; i = prev.elementAti(i)) {
|
||||
codeUnitIdx = inString.moveIndex32(0, i);
|
||||
prevCodeUnitIdx = inString.moveIndex32(0, prevIdx);
|
||||
// Calculate the length by using the code unit.
|
||||
length = inString.moveIndex32(0, prevIdx) - codeUnitIdx;
|
||||
length = prevCodeUnitIdx - codeUnitIdx;
|
||||
prevIdx = i;
|
||||
// Keep the breakpoint if the pattern is not in the fSkipSet and continuous Katakana
|
||||
// characters don't occur.
|
||||
if (!fSkipSet.containsKey(inString.tempSubString(codeUnitIdx, length))
|
||||
&& (!isKatakana(inString.char32At(codeUnitIdx -1))
|
||||
&& (!isKatakana(inString.char32At(inString.moveIndex32(codeUnitIdx, -1)))
|
||||
|| !isKatakana(inString.char32At(codeUnitIdx)))) {
|
||||
t_boundary.addElement(i, status);
|
||||
numBreaks++;
|
||||
|
|
4
icu4c/source/test/testdata/rbbitst.txt
vendored
4
icu4c/source/test/testdata/rbbitst.txt
vendored
|
@ -1898,7 +1898,9 @@ Bangkok)•</data>
|
|||
<data>•\uff3b\u643a\u5e2f•\u96fb\u8a71\uff3d•\u6b63\u3057\u3044•\u9078\u629e•</data>
|
||||
#純金製百人一首にサッカーボール -> 純金•製•百人一首に•サッカーボール
|
||||
<data>•\u7D14\u91D1•\u88FD•\u767E\u4EBA\u4E00\u9996\u306B•\u30B5\u30C3\u30AB\u30FC\u30DC\u30FC\u30EB•</data>
|
||||
|
||||
#Kana supplement: 𛁈(U+1B048) -> \uD82C\uDC48, 𛀸(U+1B038) -> \uD82C\uDC38, 𛀙(U+1B019)-> \uD82C\uDC19</data>
|
||||
#𛁈る𛀸(しるこ)、あ𛀙よろし(あかよろし) -> 𛁈る𛀸•(しるこ)、•あ𛀙よろし•(あ•かよろし)
|
||||
<data>•\uD82C\uDC48\u308B\uD82C\uDC38•\uFF08\u3057\u308B\u3053\uFF09\u3001•\u3042\uD82C\uDC19\u3088\u308D\u3057•\uFF08\u3042•\u304B\u3088\u308D\u3057\uFF09•</data>
|
||||
|
||||
####################################################################################
|
||||
#
|
||||
|
|
|
@ -11,6 +11,7 @@ package com.ibm.icu.impl.breakiter;
|
|||
import static com.ibm.icu.impl.CharacterIteration.DONE32;
|
||||
import static com.ibm.icu.impl.CharacterIteration.current32;
|
||||
import static com.ibm.icu.impl.CharacterIteration.next32;
|
||||
import static com.ibm.icu.impl.CharacterIteration.previous32;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.text.CharacterIterator;
|
||||
|
@ -240,17 +241,18 @@ public class CjkBreakEngine extends DictionaryBreakEngine {
|
|||
t_boundary[numBreaks] = numCodePts;
|
||||
numBreaks++;
|
||||
int prevIdx = numCodePts;
|
||||
int codeUnitIdx = 0, length = 0;
|
||||
int codeUnitIdx = 0, prevCodeUnitIdx = 0, length = 0;
|
||||
for (int i = prev[numCodePts]; i > 0; i = prev[i]) {
|
||||
codeUnitIdx = prenormstr.offsetByCodePoints(0, i);
|
||||
length = prevIdx - i;
|
||||
prevCodeUnitIdx = prenormstr.offsetByCodePoints(0, prevIdx);
|
||||
length = prevCodeUnitIdx - codeUnitIdx;
|
||||
prevIdx = i;
|
||||
String pattern = getPatternFromText(text, s, codeUnitIdx, length);
|
||||
// Keep the breakpoint if the pattern is not in the fSkipSet and continuous Katakana
|
||||
// characters don't occur.
|
||||
text.setIndex(codeUnitIdx - 1);
|
||||
text.setIndex(codeUnitIdx);
|
||||
if (!fSkipSet.contains(pattern)
|
||||
&& (!isKatakana(current32(text)) || !isKatakana(next32(text)))) {
|
||||
&& (!isKatakana(current32(text)) || !isKatakana(previous32(text)))) {
|
||||
t_boundary[numBreaks] = i;
|
||||
numBreaks++;
|
||||
}
|
||||
|
@ -308,11 +310,11 @@ public class CjkBreakEngine extends DictionaryBreakEngine {
|
|||
private String getPatternFromText(CharacterIterator text, StringBuffer sb, int start,
|
||||
int length) {
|
||||
sb.setLength(0);
|
||||
if(length > 0) {
|
||||
if (length > 0) {
|
||||
text.setIndex(start);
|
||||
sb.appendCodePoint(current32(text));
|
||||
for (int j = 1; j < length; j++) {
|
||||
sb.appendCodePoint(next32(text));
|
||||
sb.append(text.current());
|
||||
for (int i = 1; i < length; i++) {
|
||||
sb.append(text.next());
|
||||
}
|
||||
}
|
||||
return sb.toString();
|
||||
|
|
|
@ -1898,6 +1898,9 @@ Bangkok)•</data>
|
|||
<data>•\uff3b\u643a\u5e2f•\u96fb\u8a71\uff3d•\u6b63\u3057\u3044•\u9078\u629e•</data>
|
||||
#純金製百人一首にサッカーボール -> 純金•製•百人一首に•サッカーボール
|
||||
<data>•\u7D14\u91D1•\u88FD•\u767E\u4EBA\u4E00\u9996\u306B•\u30B5\u30C3\u30AB\u30FC\u30DC\u30FC\u30EB•</data>
|
||||
#Kana supplement: 𛁈(U+1B048) -> \uD82C\uDC48, 𛀸(U+1B038) -> \uD82C\uDC38, 𛀙(U+1B019)-> \uD82C\uDC19</data>
|
||||
#𛁈る𛀸(しるこ)、あ𛀙よろし(あかよろし) -> 𛁈る𛀸•(しるこ)、•あ𛀙よろし•(あ•かよろし)
|
||||
<data>•\uD82C\uDC48\u308B\uD82C\uDC38•\uFF08\u3057\u308B\u3053\uFF09\u3001•\u3042\uD82C\uDC19\u3088\u308D\u3057•\uFF08\u3042•\u304B\u3088\u308D\u3057\uFF09•</data>
|
||||
|
||||
|
||||
####################################################################################
|
||||
|
|
Loading…
Add table
Reference in a new issue