mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-15 09:45:26 +00:00
ICU-22360 revert portions of #2159 which included @ in ALetter for wordbreak, update tests
(cherry picked from commit 5618203821
)
This commit is contained in:
parent
6342f9140a
commit
6e72d90866
15 changed files with 19 additions and 19 deletions
|
@ -38,7 +38,7 @@ $Regional_Indicator = [\p{Word_Break = Regional_Indicator}];
|
|||
$Format = [\p{Word_Break = Format}];
|
||||
$Katakana = [\p{Word_Break = Katakana}];
|
||||
$Hebrew_Letter = [\p{Word_Break = Hebrew_Letter}];
|
||||
$ALetter = [\p{Word_Break = ALetter} @];
|
||||
$ALetter = [\p{Word_Break = ALetter}];
|
||||
$Single_Quote = [\p{Word_Break = Single_Quote}];
|
||||
$Double_Quote = [\p{Word_Break = Double_Quote}];
|
||||
$MidNumLet = [\p{Word_Break = MidNumLet}];
|
||||
|
|
|
@ -38,7 +38,7 @@ $Regional_Indicator = [\p{Word_Break = Regional_Indicator}];
|
|||
$Format = [\p{Word_Break = Format}];
|
||||
$Katakana = [\p{Word_Break = Katakana}];
|
||||
$Hebrew_Letter = [\p{Word_Break = Hebrew_Letter}];
|
||||
$ALetter = [\p{Word_Break = ALetter} @];
|
||||
$ALetter = [\p{Word_Break = ALetter}];
|
||||
$Single_Quote = [\p{Word_Break = Single_Quote}];
|
||||
$Double_Quote = [\p{Word_Break = Double_Quote}];
|
||||
$MidNumLet = [\p{Word_Break = MidNumLet} - [.]];
|
||||
|
|
|
@ -38,7 +38,7 @@ $Regional_Indicator = [\p{Word_Break = Regional_Indicator}];
|
|||
$Format = [\p{Word_Break = Format}];
|
||||
$Katakana = [\p{Word_Break = Katakana}];
|
||||
$Hebrew_Letter = [\p{Word_Break = Hebrew_Letter}];
|
||||
$ALetter = [\p{Word_Break = ALetter} @];
|
||||
$ALetter = [\p{Word_Break = ALetter}];
|
||||
$Single_Quote = [\p{Word_Break = Single_Quote}];
|
||||
$Double_Quote = [\p{Word_Break = Double_Quote}];
|
||||
$MidNumLet = [\p{Word_Break = MidNumLet}];
|
||||
|
|
|
@ -1937,7 +1937,7 @@ RBBIWordMonkey::RBBIWordMonkey()
|
|||
fKatakanaSet = new UnicodeSet(u"[\\p{Word_Break = Katakana}]", status);
|
||||
fRegionalIndicatorSet = new UnicodeSet(u"[\\p{Word_Break = Regional_Indicator}]", status);
|
||||
fHebrew_LetterSet = new UnicodeSet(u"[\\p{Word_Break = Hebrew_Letter}]", status);
|
||||
fALetterSet = new UnicodeSet(u"[\\p{Word_Break = ALetter} @]", status);
|
||||
fALetterSet = new UnicodeSet(u"[\\p{Word_Break = ALetter}]", status);
|
||||
fSingle_QuoteSet = new UnicodeSet(u"[\\p{Word_Break = Single_Quote}]", status);
|
||||
fDouble_QuoteSet = new UnicodeSet(u"[\\p{Word_Break = Double_Quote}]", status);
|
||||
fMidNumLetSet = new UnicodeSet(u"[\\p{Word_Break = MidNumLet}]", status);
|
||||
|
|
|
@ -25,7 +25,7 @@ Regional_Indicator = [\p{Word_Break = Regional_Indicator}];
|
|||
Format = [\p{Word_Break = Format}];
|
||||
Katakana = [\p{Word_Break = Katakana}];
|
||||
Hebrew_Letter = [\p{Word_Break = Hebrew_Letter}];
|
||||
ALetter = [\p{Word_Break = ALetter} @];
|
||||
ALetter = [\p{Word_Break = ALetter}];
|
||||
Single_Quote = [\p{Word_Break = Single_Quote}];
|
||||
Double_Quote = [\p{Word_Break = Double_Quote}];
|
||||
MidNumLet = [\p{Word_Break = MidNumLet}];
|
||||
|
|
8
icu4c/source/test/testdata/rbbitst.txt
vendored
8
icu4c/source/test/testdata/rbbitst.txt
vendored
|
@ -1586,7 +1586,7 @@ Bangkok)•</data>
|
|||
<data>•Can't<200> •have<200> •breaks<200> •in<200> •xx<200>:•yy<200> •or<200> •struct.field<200> \
|
||||
•for<200> •CS<200>-•types<200>.•</data>
|
||||
<data>•\uFF92\uFF76\uFF9E<400> •</data>
|
||||
<data>•xx@yy<200>.•</data>
|
||||
<data>•xx<200>@•yy<200>.•</data>
|
||||
|
||||
<locale en_US_POSIX>
|
||||
<word>
|
||||
|
@ -1594,21 +1594,21 @@ Bangkok)•</data>
|
|||
•for<200> •CS<200>-•types<200>.•</data>
|
||||
<data>•\u06c9<200>\uc799\ufffa•</data>
|
||||
<data>•\uFF92\uFF76\uFF9E<400> •</data>
|
||||
<data>•xx@yy<200>.•</data>
|
||||
<data>•xx<200>@•yy<200>.•</data>
|
||||
|
||||
<locale fi>
|
||||
<word>
|
||||
<data>•Can't<200> •have<200> •breaks<200> •in<200> •xx:yy<200> •or<200> •struct.field<200> \
|
||||
•for<200> •CS<200>-•types<200>.•</data>
|
||||
<data>•\uFF92\uFF76\uFF9E<400> •</data>
|
||||
<data>•xx@yy<200>.•</data>
|
||||
<data>•xx<200>@•yy<200>.•</data>
|
||||
|
||||
<locale sv>
|
||||
<word>
|
||||
<data>•Can't<200> •have<200> •breaks<200> •in<200> •xx:yy<200> •or<200> •struct.field<200> \
|
||||
•for<200> •CS<200>-•types<200>.•</data>
|
||||
<data>•\uFF92\uFF76\uFF9E<400> •</data>
|
||||
<data>•xx@yy<200>.•</data>
|
||||
<data>•xx<200>@•yy<200>.•</data>
|
||||
|
||||
|
||||
# UBreakIteratorType UBRK_CHARACTER, Locale "th"
|
||||
|
|
|
@ -1,3 +1,3 @@
|
|||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:88808b997ca13e786f5f6bdd95d05d7d9ba3fe6b12f6356b8b15bb4eb49d644e
|
||||
size 14330312
|
||||
oid sha256:9b764b3c6af6c9e8ed18770a1c758f9740aede42bdb435fe6cb3fa3f8a7846af
|
||||
size 14330291
|
||||
|
|
|
@ -1,3 +1,3 @@
|
|||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:affd9c4e150caed2894d4912763ef2cb95249f94e859d2b3298c5636ab124f50
|
||||
oid sha256:57224bd406c99dd7242f9aeac1db8beaf6e0e1520646b4bedab404aa02c896a3
|
||||
size 94829
|
||||
|
|
|
@ -1,3 +1,3 @@
|
|||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:d5c5dd3ac8fca302041ec888963b800c61dd4003a8647515e193cec72967f871
|
||||
oid sha256:0e466f0476161bdf5b82d33e164e44d2f0912156436057d53d949ee386bdc79d
|
||||
size 831605
|
||||
|
|
|
@ -400,7 +400,7 @@ public class RBBITestMonkey extends TestFmwk {
|
|||
fRegionalIndicatorSet = new UnicodeSet("[\\p{Word_Break = Regional_Indicator}]");
|
||||
fKatakanaSet = new UnicodeSet("[\\p{Word_Break = Katakana}]");
|
||||
fHebrew_LetterSet = new UnicodeSet("[\\p{Word_Break = Hebrew_Letter}]");
|
||||
fALetterSet = new UnicodeSet("[\\p{Word_Break = ALetter} @]");
|
||||
fALetterSet = new UnicodeSet("[\\p{Word_Break = ALetter}]");
|
||||
fSingle_QuoteSet = new UnicodeSet("[\\p{Word_Break = Single_Quote}]");
|
||||
fDouble_QuoteSet = new UnicodeSet("[\\p{Word_Break = Double_Quote}]");
|
||||
fMidNumLetSet = new UnicodeSet("[\\p{Word_Break = MidNumLet}]");
|
||||
|
|
|
@ -25,7 +25,7 @@ Regional_Indicator = [\p{Word_Break = Regional_Indicator}];
|
|||
Format = [\p{Word_Break = Format}];
|
||||
Katakana = [\p{Word_Break = Katakana}];
|
||||
Hebrew_Letter = [\p{Word_Break = Hebrew_Letter}];
|
||||
ALetter = [\p{Word_Break = ALetter} @];
|
||||
ALetter = [\p{Word_Break = ALetter}];
|
||||
Single_Quote = [\p{Word_Break = Single_Quote}];
|
||||
Double_Quote = [\p{Word_Break = Double_Quote}];
|
||||
MidNumLet = [\p{Word_Break = MidNumLet}];
|
||||
|
|
|
@ -1586,7 +1586,7 @@ Bangkok)•</data>
|
|||
<data>•Can't<200> •have<200> •breaks<200> •in<200> •xx<200>:•yy<200> •or<200> •struct.field<200> \
|
||||
•for<200> •CS<200>-•types<200>.•</data>
|
||||
<data>•\uFF92\uFF76\uFF9E<400> •</data>
|
||||
<data>•xx@yy<200>.•</data>
|
||||
<data>•xx<200>@•yy<200>.•</data>
|
||||
|
||||
<locale en_US_POSIX>
|
||||
<word>
|
||||
|
@ -1594,21 +1594,21 @@ Bangkok)•</data>
|
|||
•for<200> •CS<200>-•types<200>.•</data>
|
||||
<data>•\u06c9<200>\uc799\ufffa•</data>
|
||||
<data>•\uFF92\uFF76\uFF9E<400> •</data>
|
||||
<data>•xx@yy<200>.•</data>
|
||||
<data>•xx<200>@•yy<200>.•</data>
|
||||
|
||||
<locale fi>
|
||||
<word>
|
||||
<data>•Can't<200> •have<200> •breaks<200> •in<200> •xx:yy<200> •or<200> •struct.field<200> \
|
||||
•for<200> •CS<200>-•types<200>.•</data>
|
||||
<data>•\uFF92\uFF76\uFF9E<400> •</data>
|
||||
<data>•xx@yy<200>.•</data>
|
||||
<data>•xx<200>@•yy<200>.•</data>
|
||||
|
||||
<locale sv>
|
||||
<word>
|
||||
<data>•Can't<200> •have<200> •breaks<200> •in<200> •xx:yy<200> •or<200> •struct.field<200> \
|
||||
•for<200> •CS<200>-•types<200>.•</data>
|
||||
<data>•\uFF92\uFF76\uFF9E<400> •</data>
|
||||
<data>•xx@yy<200>.•</data>
|
||||
<data>•xx<200>@•yy<200>.•</data>
|
||||
|
||||
|
||||
# UBreakIteratorType UBRK_CHARACTER, Locale "th"
|
||||
|
|
Binary file not shown.
Binary file not shown.
Loading…
Add table
Reference in a new issue