diff --git a/icu4c/source/common/dictbe.cpp b/icu4c/source/common/dictbe.cpp index 64b4fbf6391..c0af19ef171 100644 --- a/icu4c/source/common/dictbe.cpp +++ b/icu4c/source/common/dictbe.cpp @@ -1366,8 +1366,11 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText, // Calculate the length by using the code unit. length = inString.moveIndex32(0, prevIdx) - codeUnitIdx; prevIdx = i; - // Skip the breakpoint if it belongs to the particle or Hiragana. - if (!fSkipSet.containsKey(inString.tempSubString(codeUnitIdx, length))) { + // Keep the breakpoint if the pattern is not in the fSkipSet and continuous Katakana + // characters don't occur. + if (!fSkipSet.containsKey(inString.tempSubString(codeUnitIdx, length)) + && (!isKatakana(inString.char32At(codeUnitIdx -1)) + || !isKatakana(inString.char32At(codeUnitIdx)))) { t_boundary.addElement(i, status); numBreaks++; } diff --git a/icu4c/source/test/testdata/rbbitst.txt b/icu4c/source/test/testdata/rbbitst.txt index 54c612da22c..1be45e9f3c5 100644 --- a/icu4c/source/test/testdata/rbbitst.txt +++ b/icu4c/source/test/testdata/rbbitst.txt @@ -1892,10 +1892,12 @@ Bangkok)• •\uff19\u6708\u306b•\u6771\u4eac\u304b\u3089•\u53cb\u9054\u304c•\u904a\u3073\u306b•\u6765\u305f• #る文字「そうだ、京都」-> る•文字•「そうだ、•京都」• •\u308b•\u6587\u5b57•\u300c\u305d\u3046\u3060\u3001•\u4eac\u90fd\u300d• -#乗車率90%程度だろうか -> 乗車•率•90%•程度だ•ろうか• +#乗車率90%程度だろうか。 -> 乗車•率•90%•程度だ•ろうか。• •\u4e57\u8eca•\u7387•\uff19\uff10\uff05•\u7a0b\u5ea6\u3060•\u308d\u3046\u304b\u3002• #[携帯電話]正しい選択 -> [携帯•電話]•正しい•選択• •\uff3b\u643a\u5e2f•\u96fb\u8a71\uff3d•\u6b63\u3057\u3044•\u9078\u629e• +#純金製百人一首にサッカーボール -> 純金•製•百人一首に•サッカーボール +•\u7D14\u91D1•\u88FD•\u767E\u4EBA\u4E00\u9996\u306B•\u30B5\u30C3\u30AB\u30FC\u30DC\u30FC\u30EB• ####################################################################################