mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-07 22:44:49 +00:00
ICU-2874 fix parsing of [^-b]
X-SVN-Rev: 11873
This commit is contained in:
parent
0f04c4bf54
commit
9a9e49c403
3 changed files with 37 additions and 7 deletions
|
@ -1879,7 +1879,8 @@ void UnicodeSet::_applyPattern(const UnicodeString& pattern,
|
|||
|
||||
// mode 0: No chars parsed yet; next must be '['
|
||||
// mode 1: '[' seen; if next is '^' or ':' then special
|
||||
// mode 2: '[' '^'? seen; parse pattern and close with ']'
|
||||
// mode 15: "[^" seen; if next is '-' then literal
|
||||
// mode 2: '[' '^'? '-'? seen; parse pattern and close with ']'
|
||||
// mode 3: '[:' seen; parse category and close with ':]'
|
||||
// mode 4: ']' seen; parse complete
|
||||
// mode 5: Top-level property pattern seen
|
||||
|
@ -1958,14 +1959,16 @@ void UnicodeSet::_applyPattern(const UnicodeString& pattern,
|
|||
case COMPLEMENT:
|
||||
invert = TRUE;
|
||||
newPat.append(c);
|
||||
mode = 15;
|
||||
continue; // Back to top to fetch next character
|
||||
case HYPHEN:
|
||||
isLiteral = TRUE; // Treat leading '-' as a literal
|
||||
break; // Fall through
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
if (c == HYPHEN && invert) {
|
||||
case 15:
|
||||
mode = 2;
|
||||
if (c == HYPHEN) {
|
||||
isLiteral = TRUE; // [^-...] starts with literal '-'
|
||||
}
|
||||
break;
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/translit/UnicodeSetTest.java,v $
|
||||
* $Date: 2003/02/27 18:35:54 $
|
||||
* $Revision: 1.47 $
|
||||
* $Date: 2003/05/09 21:25:25 $
|
||||
* $Revision: 1.48 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
|
@ -836,6 +836,24 @@ public class UnicodeSetTest extends TestFmwk {
|
|||
"[:Case Sensitive:]",
|
||||
"A\u1FFC\\U00010410",
|
||||
";\u00B4\\U00010500",
|
||||
|
||||
|
||||
// Regex compatibility test
|
||||
"[-b]", // leading '-' is literal
|
||||
"-b",
|
||||
"ac",
|
||||
|
||||
"[^-b]", // leading '-' is literal
|
||||
"ac",
|
||||
"-b",
|
||||
|
||||
"[b-]", // trailing '-' is literal
|
||||
"-b",
|
||||
"ac",
|
||||
|
||||
"[^b-]", // trailing '-' is literal
|
||||
"ac",
|
||||
"-b",
|
||||
};
|
||||
|
||||
for (int i=0; i<DATA.length; i+=3) {
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/UnicodeSet.java,v $
|
||||
* $Date: 2003/04/09 23:01:03 $
|
||||
* $Revision: 1.94 $
|
||||
* $Date: 2003/05/09 21:25:25 $
|
||||
* $Revision: 1.95 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
|
@ -2007,6 +2007,7 @@ public class UnicodeSet extends UnicodeFilter {
|
|||
|
||||
// mode 0: No chars parsed yet; next must be '['
|
||||
// mode 1: '[' seen; if next is '^' or ':' then special
|
||||
// mode 15: "[^" seen; if next is '-' then literal
|
||||
// mode 2: '[' '^'? seen; parse pattern and close with ']'
|
||||
// mode 3: '[:' seen; parse category and close with ':]'
|
||||
// mode 4: ']' seen; parse complete
|
||||
|
@ -2082,11 +2083,19 @@ public class UnicodeSet extends UnicodeFilter {
|
|||
case '^':
|
||||
invert = true;
|
||||
newPat.append((char) c);
|
||||
mode = 15;
|
||||
continue; // Back to top to fetch next character
|
||||
case '-':
|
||||
isLiteral = true; // Treat leading '-' as a literal
|
||||
break; // Fall through
|
||||
}
|
||||
break;
|
||||
case 15:
|
||||
mode = 2;
|
||||
if (c == '-') {
|
||||
isLiteral = true; // [^-...] starts with literal '-'
|
||||
}
|
||||
break;
|
||||
// else fall through and parse this character normally
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue