mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-20 20:19:32 +00:00
ICU-9057 Add joiner chars 200c and 200d to \w word char set.
X-SVN-Rev: 31278
This commit is contained in:
parent
da5380d926
commit
085aca34c6
2 changed files with 13 additions and 3 deletions
|
@ -1,7 +1,7 @@
|
|||
//
|
||||
// regexst.h
|
||||
//
|
||||
// Copyright (C) 2004-2010, International Business Machines Corporation and others.
|
||||
// Copyright (C) 2004-2012, International Business Machines Corporation and others.
|
||||
// All Rights Reserved.
|
||||
//
|
||||
// This file contains class RegexStaticSets
|
||||
|
@ -79,8 +79,10 @@ static const UChar gIsWordPattern[] = {
|
|||
0x5c, 0x70, 0x7b, 0x4d, 0x7d,
|
||||
// \ p { N d } Digit_Numeric
|
||||
0x5c, 0x70, 0x7b, 0x4e, 0x64, 0x7d,
|
||||
// \ p { P c } ] Connector_Punctuation
|
||||
0x5c, 0x70, 0x7b, 0x50, 0x63, 0x7d, 0x5d, 0};
|
||||
// \ p { P c } Connector_Punctuation
|
||||
0x5c, 0x70, 0x7b, 0x50, 0x63, 0x7d,
|
||||
// \ u 2 0 0 c \ u 2 0 0 d ]
|
||||
0x5c, 0x75, 0x32, 0x30, 0x30, 0x63, 0x5c, 0x75, 0x32, 0x30, 0x30, 0x64, 0x5d, 0};
|
||||
|
||||
|
||||
//
|
||||
|
|
8
icu4c/source/test/testdata/regextst.txt
vendored
8
icu4c/source/test/testdata/regextst.txt
vendored
|
@ -1119,6 +1119,14 @@
|
|||
|
||||
" (ss) ((\1.*)|(.*))" i "<0> <1>ss</1> <2><4>sß</4></2></0>" # The back reference 'ss' must not match in 'sß'
|
||||
|
||||
# Bug 9057
|
||||
# \u200c and \u200d should be word characters.
|
||||
#
|
||||
"\w+" " <0>abc\u200cdef\u200dghi</0> "
|
||||
"\w+" i " <0>abc\u200cdef\u200dghi</0> "
|
||||
"[\w]+" " <0>abc\u200cdef\u200dghi</0> "
|
||||
"[\w]+" i " <0>abc\u200cdef\u200dghi</0> "
|
||||
|
||||
# Random debugging, Temporary
|
||||
#
|
||||
#"^(?:a?b?)*$" "a--"
|
||||
|
|
Loading…
Add table
Reference in a new issue