ICU-2874 fix parsing of [^-b]

X-SVN-Rev: 11873
This commit is contained in:
Alan Liu 2003-05-09 21:26:52 +00:00
parent 0f04c4bf54
commit 9a9e49c403
3 changed files with 37 additions and 7 deletions

View file

@ -1879,7 +1879,8 @@ void UnicodeSet::_applyPattern(const UnicodeString& pattern,
// mode 0: No chars parsed yet; next must be '['
// mode 1: '[' seen; if next is '^' or ':' then special
// mode 2: '[' '^'? seen; parse pattern and close with ']'
// mode 15: "[^" seen; if next is '-' then literal
// mode 2: '[' '^'? '-'? seen; parse pattern and close with ']'
// mode 3: '[:' seen; parse category and close with ':]'
// mode 4: ']' seen; parse complete
// mode 5: Top-level property pattern seen
@ -1958,14 +1959,16 @@ void UnicodeSet::_applyPattern(const UnicodeString& pattern,
case COMPLEMENT:
invert = TRUE;
newPat.append(c);
mode = 15;
continue; // Back to top to fetch next character
case HYPHEN:
isLiteral = TRUE; // Treat leading '-' as a literal
break; // Fall through
}
break;
case 2:
if (c == HYPHEN && invert) {
case 15:
mode = 2;
if (c == HYPHEN) {
isLiteral = TRUE; // [^-...] starts with literal '-'
}
break;

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/translit/UnicodeSetTest.java,v $
* $Date: 2003/02/27 18:35:54 $
* $Revision: 1.47 $
* $Date: 2003/05/09 21:25:25 $
* $Revision: 1.48 $
*
*****************************************************************************************
*/
@ -836,6 +836,24 @@ public class UnicodeSetTest extends TestFmwk {
"[:Case Sensitive:]",
"A\u1FFC\\U00010410",
";\u00B4\\U00010500",
// Regex compatibility test
"[-b]", // leading '-' is literal
"-b",
"ac",
"[^-b]", // leading '-' is literal
"ac",
"-b",
"[b-]", // trailing '-' is literal
"-b",
"ac",
"[^b-]", // trailing '-' is literal
"ac",
"-b",
};
for (int i=0; i<DATA.length; i+=3) {

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/UnicodeSet.java,v $
* $Date: 2003/04/09 23:01:03 $
* $Revision: 1.94 $
* $Date: 2003/05/09 21:25:25 $
* $Revision: 1.95 $
*
*****************************************************************************************
*/
@ -2007,6 +2007,7 @@ public class UnicodeSet extends UnicodeFilter {
// mode 0: No chars parsed yet; next must be '['
// mode 1: '[' seen; if next is '^' or ':' then special
// mode 15: "[^" seen; if next is '-' then literal
// mode 2: '[' '^'? seen; parse pattern and close with ']'
// mode 3: '[:' seen; parse category and close with ':]'
// mode 4: ']' seen; parse complete
@ -2082,11 +2083,19 @@ public class UnicodeSet extends UnicodeFilter {
case '^':
invert = true;
newPat.append((char) c);
mode = 15;
continue; // Back to top to fetch next character
case '-':
isLiteral = true; // Treat leading '-' as a literal
break; // Fall through
}
break;
case 15:
mode = 2;
if (c == '-') {
isLiteral = true; // [^-...] starts with literal '-'
}
break;
// else fall through and parse this character normally
}