ICU-9057 Add joiner chars 200c and 200d to \w word char set.

X-SVN-Rev: 31278
This commit is contained in:
Andy Heninger 2012-01-31 02:28:10 +00:00
parent da5380d926
commit 085aca34c6
2 changed files with 13 additions and 3 deletions

View file

@ -1,7 +1,7 @@
//
// regexst.h
//
// Copyright (C) 2004-2010, International Business Machines Corporation and others.
// Copyright (C) 2004-2012, International Business Machines Corporation and others.
// All Rights Reserved.
//
// This file contains class RegexStaticSets
@ -79,8 +79,10 @@ static const UChar gIsWordPattern[] = {
0x5c, 0x70, 0x7b, 0x4d, 0x7d,
// \ p { N d } Digit_Numeric
0x5c, 0x70, 0x7b, 0x4e, 0x64, 0x7d,
// \ p { P c } ] Connector_Punctuation
0x5c, 0x70, 0x7b, 0x50, 0x63, 0x7d, 0x5d, 0};
// \ p { P c } Connector_Punctuation
0x5c, 0x70, 0x7b, 0x50, 0x63, 0x7d,
// \ u 2 0 0 c \ u 2 0 0 d ]
0x5c, 0x75, 0x32, 0x30, 0x30, 0x63, 0x5c, 0x75, 0x32, 0x30, 0x30, 0x64, 0x5d, 0};
//

View file

@ -1119,6 +1119,14 @@
" (ss) ((\1.*)|(.*))" i "<0> <1>ss</1> <2><4>sß</4></2></0>" # The back reference 'ss' must not match in 'sß'
# Bug 9057
# \u200c and \u200d should be word characters.
#
"\w+" " <0>abc\u200cdef\u200dghi</0> "
"\w+" i " <0>abc\u200cdef\u200dghi</0> "
"[\w]+" " <0>abc\u200cdef\u200dghi</0> "
"[\w]+" i " <0>abc\u200cdef\u200dghi</0> "
# Random debugging, Temporary
#
#"^(?:a?b?)*$" "a--"