diff --git a/icu4c/source/data/brkitr/rules/word.txt b/icu4c/source/data/brkitr/rules/word.txt index 3066922be1d..3027574d25d 100644 --- a/icu4c/source/data/brkitr/rules/word.txt +++ b/icu4c/source/data/brkitr/rules/word.txt @@ -44,7 +44,7 @@ $Double_Quote = [\p{Word_Break = Double_Quote}]; $MidNumLet = [\p{Word_Break = MidNumLet}]; $MidLetter = [\p{Word_Break = MidLetter}]; $MidNum = [\p{Word_Break = MidNum}]; -$Numeric = [\p{Word_Break = Numeric}]; +$Numeric = [[\p{Word_Break = Numeric}] [\uFF10-\uff19]]; # Patch for ICU-12079 $ExtendNumLet = [\p{Word_Break = ExtendNumLet}]; $WSegSpace = [\p{Word_Break = WSegSpace}]; $Extended_Pict = [:ExtPict:]; diff --git a/icu4c/source/data/brkitr/rules/word_POSIX.txt b/icu4c/source/data/brkitr/rules/word_POSIX.txt index 79126931ead..bcf127a42aa 100644 --- a/icu4c/source/data/brkitr/rules/word_POSIX.txt +++ b/icu4c/source/data/brkitr/rules/word_POSIX.txt @@ -44,7 +44,7 @@ $Double_Quote = [\p{Word_Break = Double_Quote}]; $MidNumLet = [\p{Word_Break = MidNumLet} - [.]]; $MidLetter = [\p{Word_Break = MidLetter} - [\:]]; $MidNum = [\p{Word_Break = MidNum} [.]]; -$Numeric = [\p{Word_Break = Numeric}]; +$Numeric = [[\p{Word_Break = Numeric}] [\uFF10-\uff19]]; # Patch for ICU-12079 $ExtendNumLet = [\p{Word_Break = ExtendNumLet}]; $WSegSpace = [\p{Word_Break = WSegSpace}]; $Extended_Pict = [:ExtPict:]; diff --git a/icu4c/source/test/intltest/rbbitst.cpp b/icu4c/source/test/intltest/rbbitst.cpp index 5467a96b499..3d1c0a8340d 100644 --- a/icu4c/source/test/intltest/rbbitst.cpp +++ b/icu4c/source/test/intltest/rbbitst.cpp @@ -1872,7 +1872,7 @@ RBBIWordMonkey::RBBIWordMonkey() fMidNumLetSet = new UnicodeSet(u"[\\p{Word_Break = MidNumLet}]", status); fMidLetterSet = new UnicodeSet(u"[\\p{Word_Break = MidLetter}]", status); fMidNumSet = new UnicodeSet(u"[\\p{Word_Break = MidNum}]", status); - fNumericSet = new UnicodeSet(u"[\\p{Word_Break = Numeric}]", status); + fNumericSet = new UnicodeSet(u"[[\\p{Word_Break = Numeric}][\\uff10-\\uff19]]", status); fFormatSet = new UnicodeSet(u"[\\p{Word_Break = Format}]", status); fExtendNumLetSet = new UnicodeSet(u"[\\p{Word_Break = ExtendNumLet}]", status); fExtendSet = new UnicodeSet(u"[\\p{Word_Break = Extend}]", status); diff --git a/icu4c/source/test/testdata/break_rules/word.txt b/icu4c/source/test/testdata/break_rules/word.txt index fc7bc9b1886..9b3e527ee72 100644 --- a/icu4c/source/test/testdata/break_rules/word.txt +++ b/icu4c/source/test/testdata/break_rules/word.txt @@ -30,7 +30,7 @@ Double_Quote = [\p{Word_Break = Double_Quote}]; MidNumLet = [\p{Word_Break = MidNumLet}]; MidLetter = [\p{Word_Break = MidLetter}]; MidNum = [\p{Word_Break = MidNum}]; -Numeric = [\p{Word_Break = Numeric}]; +Numeric = [[\p{Word_Break = Numeric}] [\uFF10-\uff19]]; # Patch for ICU-12079; ExtendNumLet = [\p{Word_Break = ExtendNumLet}]; WSegSpace = [\p{Word_Break = WSegSpace}]; Extended_Pict = [:ExtPict:]; diff --git a/icu4c/source/test/testdata/break_rules/word_POSIX.txt b/icu4c/source/test/testdata/break_rules/word_POSIX.txt index 10efc32d210..04bcb321ae9 100644 --- a/icu4c/source/test/testdata/break_rules/word_POSIX.txt +++ b/icu4c/source/test/testdata/break_rules/word_POSIX.txt @@ -29,7 +29,7 @@ Double_Quote = [\p{Word_Break = Double_Quote}]; MidNumLet = [\p{Word_Break = MidNumLet} - [.]]; MidLetter = [\p{Word_Break = MidLetter} - [\:]]; MidNum = [\p{Word_Break = MidNum} [.]]; -Numeric = [\p{Word_Break = Numeric}]; +Numeric = [[\p{Word_Break = Numeric}] [\uFF10-\uff19]]; # Patch for ICU-12079; ExtendNumLet = [\p{Word_Break = ExtendNumLet}]; WSegSpace = [\p{Word_Break = WSegSpace}]; Extended_Pict = [:ExtPict:]; diff --git a/icu4c/source/test/testdata/rbbitst.txt b/icu4c/source/test/testdata/rbbitst.txt index 63ba172233d..e9f2a32099a 100644 --- a/icu4c/source/test/testdata/rbbitst.txt +++ b/icu4c/source/test/testdata/rbbitst.txt @@ -240,8 +240,7 @@ •aa\N{COMBINING GRAVE ACCENT}a<200> • # fullwidth numeric, midletter characters etc should be treated like their halfwidth counterparts -# •ISN'T<200> •19<100>日<400> -# why was this added with the dbbi stuff? +•ISN'T<200> •19<100>日<400> # to test for bug #4098467 # What follows is a string of Korean characters (I found it in the Yellow Pages diff --git a/icu4j/main/shared/data/icudata.jar b/icu4j/main/shared/data/icudata.jar index f34485f69f0..1e5b5cb6067 100644 --- a/icu4j/main/shared/data/icudata.jar +++ b/icu4j/main/shared/data/icudata.jar @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:70f6389c315809aac32ac3145b5fe813e59de7b8ac13f4a8ead60f58c443cca5 -size 12697817 +oid sha256:1b8bb0208f9fd791029d55f17dd9722d7b4062f5478e55c28722cc7188435507 +size 12690372 diff --git a/icu4j/main/shared/data/icutzdata.jar b/icu4j/main/shared/data/icutzdata.jar index 1f0a57452ec..829821222ec 100644 --- a/icu4j/main/shared/data/icutzdata.jar +++ b/icu4j/main/shared/data/icutzdata.jar @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f810eeeeb46325f0b314b2ea0d971ee7c3c850e6160660651aff0be8aaaa27f7 +oid sha256:99a35b2f985a8a281b8474cc074195d23bdc6757ea561d082b5fb94a2d749cb2 size 92787 diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/rbbi/RBBITestMonkey.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/rbbi/RBBITestMonkey.java index 7b5803264c8..9a690f9375d 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/rbbi/RBBITestMonkey.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/rbbi/RBBITestMonkey.java @@ -330,7 +330,7 @@ public class RBBITestMonkey extends TestFmwk { fMidNumLetSet = new UnicodeSet("[\\p{Word_Break = MidNumLet}]"); fMidLetterSet = new UnicodeSet("[\\p{Word_Break = MidLetter}]"); fMidNumSet = new UnicodeSet("[\\p{Word_Break = MidNum}]"); - fNumericSet = new UnicodeSet("[\\p{Word_Break = Numeric}]"); + fNumericSet = new UnicodeSet("[[\\p{Word_Break = Numeric}][\\uFF10-\\uff19]]"); fFormatSet = new UnicodeSet("[\\p{Word_Break = Format}]"); fExtendNumLetSet = new UnicodeSet("[\\p{Word_Break = ExtendNumLet}]"); fExtendSet = new UnicodeSet("[\\p{Word_Break = Extend}]"); diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/rbbi/break_rules/word.txt b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/rbbi/break_rules/word.txt index fc7bc9b1886..9b3e527ee72 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/rbbi/break_rules/word.txt +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/rbbi/break_rules/word.txt @@ -30,7 +30,7 @@ Double_Quote = [\p{Word_Break = Double_Quote}]; MidNumLet = [\p{Word_Break = MidNumLet}]; MidLetter = [\p{Word_Break = MidLetter}]; MidNum = [\p{Word_Break = MidNum}]; -Numeric = [\p{Word_Break = Numeric}]; +Numeric = [[\p{Word_Break = Numeric}] [\uFF10-\uff19]]; # Patch for ICU-12079; ExtendNumLet = [\p{Word_Break = ExtendNumLet}]; WSegSpace = [\p{Word_Break = WSegSpace}]; Extended_Pict = [:ExtPict:]; diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/rbbi/break_rules/word_POSIX.txt b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/rbbi/break_rules/word_POSIX.txt index 10efc32d210..04bcb321ae9 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/rbbi/break_rules/word_POSIX.txt +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/rbbi/break_rules/word_POSIX.txt @@ -29,7 +29,7 @@ Double_Quote = [\p{Word_Break = Double_Quote}]; MidNumLet = [\p{Word_Break = MidNumLet} - [.]]; MidLetter = [\p{Word_Break = MidLetter} - [\:]]; MidNum = [\p{Word_Break = MidNum} [.]]; -Numeric = [\p{Word_Break = Numeric}]; +Numeric = [[\p{Word_Break = Numeric}] [\uFF10-\uff19]]; # Patch for ICU-12079; ExtendNumLet = [\p{Word_Break = ExtendNumLet}]; WSegSpace = [\p{Word_Break = WSegSpace}]; Extended_Pict = [:ExtPict:];