ICU-21699 Concatenate Katakana chars

See #1962
This commit is contained in:
allensu05 2022-01-19 17:25:45 +00:00 committed by Frank Yung-Fong Tang
parent 470e44c551
commit c9fae4bda4
2 changed files with 8 additions and 3 deletions

View file

@ -1366,8 +1366,11 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText,
// Calculate the length by using the code unit.
length = inString.moveIndex32(0, prevIdx) - codeUnitIdx;
prevIdx = i;
// Skip the breakpoint if it belongs to the particle or Hiragana.
if (!fSkipSet.containsKey(inString.tempSubString(codeUnitIdx, length))) {
// Keep the breakpoint if the pattern is not in the fSkipSet and continuous Katakana
// characters don't occur.
if (!fSkipSet.containsKey(inString.tempSubString(codeUnitIdx, length))
&& (!isKatakana(inString.char32At(codeUnitIdx -1))
|| !isKatakana(inString.char32At(codeUnitIdx)))) {
t_boundary.addElement(i, status);
numBreaks++;
}

View file

@ -1892,10 +1892,12 @@ Bangkok)•</data>
<data>•\uff19\u6708\u306b•\u6771\u4eac\u304b\u3089•\u53cb\u9054\u304c•\u904a\u3073\u306b•\u6765\u305f•</data>
#る文字「そうだ、京都」-> る•文字•「そうだ、•京都」•
<data>•\u308b•\u6587\u5b57•\u300c\u305d\u3046\u3060\u3001•\u4eac\u90fd\u300d•</data>
#乗車率90%程度だろうか -> 乗車•率•90%•程度だ•ろうか•
#乗車率90%程度だろうか -> 乗車•率•90%•程度だ•ろうか
<data>•\u4e57\u8eca•\u7387•\uff19\uff10\uff05•\u7a0b\u5ea6\u3060•\u308d\u3046\u304b\u3002•</data>
#[携帯電話]正しい選択 -> [携帯•電話]•正しい•選択•
<data>•\uff3b\u643a\u5e2f•\u96fb\u8a71\uff3d•\u6b63\u3057\u3044•\u9078\u629e•</data>
#純金製百人一首にサッカーボール -> 純金•製•百人一首に•サッカーボール
<data>•\u7D14\u91D1•\u88FD•\u767E\u4EBA\u4E00\u9996\u306B•\u30B5\u30C3\u30AB\u30FC\u30DC\u30FC\u30EB•</data>
####################################################################################