mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-06 05:55:35 +00:00
ICU-13702 C++ UnicodeSet cleanup: use UTF-16 literals
This commit is contained in:
parent
67cc873789
commit
dea5448cb1
3 changed files with 106 additions and 156 deletions
|
@ -30,24 +30,6 @@
|
|||
#include "bmpset.h"
|
||||
#include "unisetspan.h"
|
||||
|
||||
// Define UChar constants using hex for EBCDIC compatibility
|
||||
// Used #define to reduce private static exports and memory access time.
|
||||
#define SET_OPEN ((UChar)0x005B) /*[*/
|
||||
#define SET_CLOSE ((UChar)0x005D) /*]*/
|
||||
#define HYPHEN ((UChar)0x002D) /*-*/
|
||||
#define COMPLEMENT ((UChar)0x005E) /*^*/
|
||||
#define COLON ((UChar)0x003A) /*:*/
|
||||
#define BACKSLASH ((UChar)0x005C) /*\*/
|
||||
#define INTERSECTION ((UChar)0x0026) /*&*/
|
||||
#define UPPER_U ((UChar)0x0055) /*U*/
|
||||
#define LOWER_U ((UChar)0x0075) /*u*/
|
||||
#define OPEN_BRACE ((UChar)123) /*{*/
|
||||
#define CLOSE_BRACE ((UChar)125) /*}*/
|
||||
#define UPPER_P ((UChar)0x0050) /*P*/
|
||||
#define LOWER_P ((UChar)0x0070) /*p*/
|
||||
#define UPPER_N ((UChar)78) /*N*/
|
||||
#define EQUALS ((UChar)0x003D) /*=*/
|
||||
|
||||
// HIGH_VALUE > all valid values. 110000 for codepoints
|
||||
#define UNICODESET_HIGH 0x0110000
|
||||
|
||||
|
@ -1989,22 +1971,22 @@ escapeUnprintable) {
|
|||
}
|
||||
// Okay to let ':' pass through
|
||||
switch (c) {
|
||||
case SET_OPEN:
|
||||
case SET_CLOSE:
|
||||
case HYPHEN:
|
||||
case COMPLEMENT:
|
||||
case INTERSECTION:
|
||||
case BACKSLASH:
|
||||
case OPEN_BRACE:
|
||||
case CLOSE_BRACE:
|
||||
case COLON:
|
||||
case u'[':
|
||||
case u']':
|
||||
case u'-':
|
||||
case u'^':
|
||||
case u'&':
|
||||
case u'\\':
|
||||
case u'{':
|
||||
case u'}':
|
||||
case u':':
|
||||
case SymbolTable::SYMBOL_REF:
|
||||
buf.append(BACKSLASH);
|
||||
buf.append(u'\\');
|
||||
break;
|
||||
default:
|
||||
// Escape whitespace
|
||||
if (PatternProps::isWhiteSpace(c)) {
|
||||
buf.append(BACKSLASH);
|
||||
buf.append(u'\\');
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -2037,7 +2019,7 @@ UnicodeString& UnicodeSet::_toPattern(UnicodeString& result,
|
|||
backslashCount = 0;
|
||||
} else {
|
||||
result.append(c);
|
||||
if (c == BACKSLASH) {
|
||||
if (c == u'\\') {
|
||||
++backslashCount;
|
||||
} else {
|
||||
backslashCount = 0;
|
||||
|
@ -2070,13 +2052,13 @@ UnicodeString& UnicodeSet::toPattern(UnicodeString& result,
|
|||
UnicodeString& UnicodeSet::_generatePattern(UnicodeString& result,
|
||||
UBool escapeUnprintable) const
|
||||
{
|
||||
result.append(SET_OPEN);
|
||||
result.append(u'[');
|
||||
|
||||
// // Check against the predefined categories. We implicitly build
|
||||
// // up ALL category sets the first time toPattern() is called.
|
||||
// for (int8_t cat=0; cat<Unicode::GENERAL_TYPES_COUNT; ++cat) {
|
||||
// if (*this == getCategorySet(cat)) {
|
||||
// result.append(COLON);
|
||||
// result.append(u':');
|
||||
// result.append(CATEGORY_NAMES, cat*2, 2);
|
||||
// return result.append(CATEGORY_CLOSE);
|
||||
// }
|
||||
|
@ -2092,7 +2074,7 @@ UnicodeString& UnicodeSet::_generatePattern(UnicodeString& result,
|
|||
getRangeEnd(count-1) == MAX_VALUE) {
|
||||
|
||||
// Emit the inverse
|
||||
result.append(COMPLEMENT);
|
||||
result.append(u'^');
|
||||
|
||||
for (int32_t i = 1; i < count; ++i) {
|
||||
UChar32 start = getRangeEnd(i-1)+1;
|
||||
|
@ -2100,7 +2082,7 @@ UnicodeString& UnicodeSet::_generatePattern(UnicodeString& result,
|
|||
_appendToPat(result, start, escapeUnprintable);
|
||||
if (start != end) {
|
||||
if ((start+1) != end) {
|
||||
result.append(HYPHEN);
|
||||
result.append(u'-');
|
||||
}
|
||||
_appendToPat(result, end, escapeUnprintable);
|
||||
}
|
||||
|
@ -2115,7 +2097,7 @@ UnicodeString& UnicodeSet::_generatePattern(UnicodeString& result,
|
|||
_appendToPat(result, start, escapeUnprintable);
|
||||
if (start != end) {
|
||||
if ((start+1) != end) {
|
||||
result.append(HYPHEN);
|
||||
result.append(u'-');
|
||||
}
|
||||
_appendToPat(result, end, escapeUnprintable);
|
||||
}
|
||||
|
@ -2124,14 +2106,14 @@ UnicodeString& UnicodeSet::_generatePattern(UnicodeString& result,
|
|||
|
||||
if (strings != nullptr) {
|
||||
for (int32_t i = 0; i<strings->size(); ++i) {
|
||||
result.append(OPEN_BRACE);
|
||||
result.append(u'{');
|
||||
_appendToPat(result,
|
||||
*(const UnicodeString*) strings->elementAt(i),
|
||||
escapeUnprintable);
|
||||
result.append(CLOSE_BRACE);
|
||||
result.append(u'}');
|
||||
}
|
||||
}
|
||||
return result.append(SET_CLOSE);
|
||||
return result.append(u']');
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -47,31 +47,6 @@
|
|||
|
||||
U_NAMESPACE_USE
|
||||
|
||||
// Define UChar constants using hex for EBCDIC compatibility
|
||||
// Used #define to reduce private static exports and memory access time.
|
||||
#define SET_OPEN ((UChar)0x005B) /*[*/
|
||||
#define SET_CLOSE ((UChar)0x005D) /*]*/
|
||||
#define HYPHEN ((UChar)0x002D) /*-*/
|
||||
#define COMPLEMENT ((UChar)0x005E) /*^*/
|
||||
#define COLON ((UChar)0x003A) /*:*/
|
||||
#define BACKSLASH ((UChar)0x005C) /*\*/
|
||||
#define INTERSECTION ((UChar)0x0026) /*&*/
|
||||
#define UPPER_U ((UChar)0x0055) /*U*/
|
||||
#define LOWER_U ((UChar)0x0075) /*u*/
|
||||
#define OPEN_BRACE ((UChar)123) /*{*/
|
||||
#define CLOSE_BRACE ((UChar)125) /*}*/
|
||||
#define UPPER_P ((UChar)0x0050) /*P*/
|
||||
#define LOWER_P ((UChar)0x0070) /*p*/
|
||||
#define UPPER_N ((UChar)78) /*N*/
|
||||
#define EQUALS ((UChar)0x003D) /*=*/
|
||||
|
||||
//static const UChar POSIX_OPEN[] = { SET_OPEN,COLON,0 }; // "[:"
|
||||
static const UChar POSIX_CLOSE[] = { COLON,SET_CLOSE,0 }; // ":]"
|
||||
//static const UChar PERL_OPEN[] = { BACKSLASH,LOWER_P,0 }; // "\\p"
|
||||
//static const UChar PERL_CLOSE[] = { CLOSE_BRACE,0 }; // "}"
|
||||
//static const UChar NAME_OPEN[] = { BACKSLASH,UPPER_N,0 }; // "\\N"
|
||||
static const UChar HYPHEN_RIGHT_BRACE[] = {HYPHEN,SET_CLOSE,0}; /*-]*/
|
||||
|
||||
// Special property set IDs
|
||||
static const char ANY[] = "ANY"; // [\u0000-\U0010FFFF]
|
||||
static const char ASCII[] = "ASCII"; // [\u0000-\u007F]
|
||||
|
@ -81,12 +56,6 @@ static const char ASSIGNED[] = "Assigned"; // [:^Cn:]
|
|||
#define NAME_PROP "na"
|
||||
#define NAME_PROP_LENGTH 2
|
||||
|
||||
/**
|
||||
* Delimiter string used in patterns to close a category reference:
|
||||
* ":]". Example: "[:Lu:]".
|
||||
*/
|
||||
//static const UChar CATEGORY_CLOSE[] = {COLON, SET_CLOSE, 0x0000}; /* ":]" */
|
||||
|
||||
// Cached sets ------------------------------------------------------------- ***
|
||||
|
||||
U_CDECL_BEGIN
|
||||
|
@ -140,27 +109,27 @@ uniset_getUnicode32Instance(UErrorCode &errorCode) {
|
|||
static inline UBool
|
||||
isPerlOpen(const UnicodeString &pattern, int32_t pos) {
|
||||
UChar c;
|
||||
return pattern.charAt(pos)==BACKSLASH && ((c=pattern.charAt(pos+1))==LOWER_P || c==UPPER_P);
|
||||
return pattern.charAt(pos)==u'\\' && ((c=pattern.charAt(pos+1))==u'p' || c==u'P');
|
||||
}
|
||||
|
||||
/*static inline UBool
|
||||
isPerlClose(const UnicodeString &pattern, int32_t pos) {
|
||||
return pattern.charAt(pos)==CLOSE_BRACE;
|
||||
return pattern.charAt(pos)==u'}';
|
||||
}*/
|
||||
|
||||
static inline UBool
|
||||
isNameOpen(const UnicodeString &pattern, int32_t pos) {
|
||||
return pattern.charAt(pos)==BACKSLASH && pattern.charAt(pos+1)==UPPER_N;
|
||||
return pattern.charAt(pos)==u'\\' && pattern.charAt(pos+1)==u'N';
|
||||
}
|
||||
|
||||
static inline UBool
|
||||
isPOSIXOpen(const UnicodeString &pattern, int32_t pos) {
|
||||
return pattern.charAt(pos)==SET_OPEN && pattern.charAt(pos+1)==COLON;
|
||||
return pattern.charAt(pos)==u'[' && pattern.charAt(pos+1)==u':';
|
||||
}
|
||||
|
||||
/*static inline UBool
|
||||
isPOSIXClose(const UnicodeString &pattern, int32_t pos) {
|
||||
return pattern.charAt(pos)==COLON && pattern.charAt(pos+1)==SET_CLOSE;
|
||||
return pattern.charAt(pos)==u':' && pattern.charAt(pos+1)==u']';
|
||||
}*/
|
||||
|
||||
// TODO memory debugging provided inside uniset.cpp
|
||||
|
@ -326,9 +295,8 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
|
|||
|
||||
while (mode != 2 && !chars.atEnd()) {
|
||||
U_ASSERT((lastItem == 0 && op == 0) ||
|
||||
(lastItem == 1 && (op == 0 || op == HYPHEN /*'-'*/)) ||
|
||||
(lastItem == 2 && (op == 0 || op == HYPHEN /*'-'*/ ||
|
||||
op == INTERSECTION /*'&'*/)));
|
||||
(lastItem == 1 && (op == 0 || op == u'-')) ||
|
||||
(lastItem == 2 && (op == 0 || op == u'-' || op == u'&')));
|
||||
|
||||
UChar32 c = 0;
|
||||
UBool literal = FALSE;
|
||||
|
@ -356,27 +324,27 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
|
|||
c = chars.next(opts, literal, ec);
|
||||
if (U_FAILURE(ec)) return;
|
||||
|
||||
if (c == 0x5B /*'['*/ && !literal) {
|
||||
if (c == u'[' && !literal) {
|
||||
if (mode == 1) {
|
||||
chars.setPos(backup); // backup
|
||||
setMode = 1;
|
||||
} else {
|
||||
// Handle opening '[' delimiter
|
||||
mode = 1;
|
||||
patLocal.append((UChar) 0x5B /*'['*/);
|
||||
patLocal.append(u'[');
|
||||
chars.getPos(backup); // prepare to backup
|
||||
c = chars.next(opts, literal, ec);
|
||||
if (U_FAILURE(ec)) return;
|
||||
if (c == 0x5E /*'^'*/ && !literal) {
|
||||
if (c == u'^' && !literal) {
|
||||
invert = TRUE;
|
||||
patLocal.append((UChar) 0x5E /*'^'*/);
|
||||
patLocal.append(u'^');
|
||||
chars.getPos(backup); // prepare to backup
|
||||
c = chars.next(opts, literal, ec);
|
||||
if (U_FAILURE(ec)) return;
|
||||
}
|
||||
// Fall through to handle special leading '-';
|
||||
// otherwise restart loop for nested [], \p{}, etc.
|
||||
if (c == HYPHEN /*'-'*/) {
|
||||
if (c == u'-') {
|
||||
literal = TRUE;
|
||||
// Fall through to handle literal '-' below
|
||||
} else {
|
||||
|
@ -418,7 +386,7 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
|
|||
op = 0;
|
||||
}
|
||||
|
||||
if (op == HYPHEN /*'-'*/ || op == INTERSECTION /*'&'*/) {
|
||||
if (op == u'-' || op == u'&') {
|
||||
patLocal.append(op);
|
||||
}
|
||||
|
||||
|
@ -454,10 +422,10 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
|
|||
}
|
||||
|
||||
switch (op) {
|
||||
case HYPHEN: /*'-'*/
|
||||
case u'-':
|
||||
removeAll(*nested);
|
||||
break;
|
||||
case INTERSECTION: /*'&'*/
|
||||
case u'&':
|
||||
retainAll(*nested);
|
||||
break;
|
||||
case 0:
|
||||
|
@ -483,24 +451,24 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
|
|||
|
||||
if (!literal) {
|
||||
switch (c) {
|
||||
case 0x5D /*']'*/:
|
||||
case u']':
|
||||
if (lastItem == 1) {
|
||||
add(lastChar, lastChar);
|
||||
_appendToPat(patLocal, lastChar, FALSE);
|
||||
}
|
||||
// Treat final trailing '-' as a literal
|
||||
if (op == HYPHEN /*'-'*/) {
|
||||
if (op == u'-') {
|
||||
add(op, op);
|
||||
patLocal.append(op);
|
||||
} else if (op == INTERSECTION /*'&'*/) {
|
||||
} else if (op == u'&') {
|
||||
// syntaxError(chars, "Trailing '&'");
|
||||
ec = U_MALFORMED_SET;
|
||||
return;
|
||||
}
|
||||
patLocal.append((UChar) 0x5D /*']'*/);
|
||||
patLocal.append(u']');
|
||||
mode = 2;
|
||||
continue;
|
||||
case HYPHEN /*'-'*/:
|
||||
case u'-':
|
||||
if (op == 0) {
|
||||
if (lastItem != 0) {
|
||||
op = (UChar) c;
|
||||
|
@ -510,8 +478,8 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
|
|||
add(c, c);
|
||||
c = chars.next(opts, literal, ec);
|
||||
if (U_FAILURE(ec)) return;
|
||||
if (c == 0x5D /*']'*/ && !literal) {
|
||||
patLocal.append(HYPHEN_RIGHT_BRACE, 2);
|
||||
if (c == u']' && !literal) {
|
||||
patLocal.append(u"-]", 2);
|
||||
mode = 2;
|
||||
continue;
|
||||
}
|
||||
|
@ -520,7 +488,7 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
|
|||
// syntaxError(chars, "'-' not after char or set");
|
||||
ec = U_MALFORMED_SET;
|
||||
return;
|
||||
case INTERSECTION /*'&'*/:
|
||||
case u'&':
|
||||
if (lastItem == 2 && op == 0) {
|
||||
op = (UChar) c;
|
||||
continue;
|
||||
|
@ -528,11 +496,11 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
|
|||
// syntaxError(chars, "'&' not after set");
|
||||
ec = U_MALFORMED_SET;
|
||||
return;
|
||||
case 0x5E /*'^'*/:
|
||||
case u'^':
|
||||
// syntaxError(chars, "'^' not after '['");
|
||||
ec = U_MALFORMED_SET;
|
||||
return;
|
||||
case 0x7B /*'{'*/:
|
||||
case u'{':
|
||||
if (op != 0) {
|
||||
// syntaxError(chars, "Missing operand after operator");
|
||||
ec = U_MALFORMED_SET;
|
||||
|
@ -549,7 +517,7 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
|
|||
while (!chars.atEnd()) {
|
||||
c = chars.next(opts, literal, ec);
|
||||
if (U_FAILURE(ec)) return;
|
||||
if (c == 0x7D /*'}'*/ && !literal) {
|
||||
if (c == u'}' && !literal) {
|
||||
ok = TRUE;
|
||||
break;
|
||||
}
|
||||
|
@ -565,9 +533,9 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
|
|||
// we don't need to drop through to the further
|
||||
// processing
|
||||
add(buf);
|
||||
patLocal.append((UChar) 0x7B /*'{'*/);
|
||||
patLocal.append(u'{');
|
||||
_appendToPat(patLocal, buf, FALSE);
|
||||
patLocal.append((UChar) 0x7D /*'}'*/);
|
||||
patLocal.append(u'}');
|
||||
continue;
|
||||
case SymbolTable::SYMBOL_REF:
|
||||
// symbols nosymbols
|
||||
|
@ -580,7 +548,7 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
|
|||
chars.getPos(backup);
|
||||
c = chars.next(opts, literal, ec);
|
||||
if (U_FAILURE(ec)) return;
|
||||
UBool anchor = (c == 0x5D /*']'*/ && !literal);
|
||||
UBool anchor = (c == u']' && !literal);
|
||||
if (symbols == 0 && !anchor) {
|
||||
c = SymbolTable::SYMBOL_REF;
|
||||
chars.setPos(backup);
|
||||
|
@ -594,7 +562,7 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
|
|||
add(U_ETHER);
|
||||
usePat = TRUE;
|
||||
patLocal.append((UChar) SymbolTable::SYMBOL_REF);
|
||||
patLocal.append((UChar) 0x5D /*']'*/);
|
||||
patLocal.append(u']');
|
||||
mode = 2;
|
||||
continue;
|
||||
}
|
||||
|
@ -617,7 +585,7 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
|
|||
lastChar = c;
|
||||
break;
|
||||
case 1:
|
||||
if (op == HYPHEN /*'-'*/) {
|
||||
if (op == u'-') {
|
||||
if (lastChar >= c) {
|
||||
// Don't allow redundant (a-a) or empty (b-a) ranges;
|
||||
// these are most likely typos.
|
||||
|
@ -1036,11 +1004,11 @@ UBool UnicodeSet::resemblesPropertyPattern(RuleCharacterIterator& chars,
|
|||
RuleCharacterIterator::Pos pos;
|
||||
chars.getPos(pos);
|
||||
UChar32 c = chars.next(iterOpts, literal, ec);
|
||||
if (c == 0x5B /*'['*/ || c == 0x5C /*'\\'*/) {
|
||||
if (c == u'[' || c == u'\\') {
|
||||
UChar32 d = chars.next(iterOpts & ~RuleCharacterIterator::SKIP_WHITESPACE,
|
||||
literal, ec);
|
||||
result = (c == 0x5B /*'['*/) ? (d == 0x3A /*':'*/) :
|
||||
(d == 0x4E /*'N'*/ || d == 0x70 /*'p'*/ || d == 0x50 /*'P'*/);
|
||||
result = (c == u'[') ? (d == u':') :
|
||||
(d == u'N' || d == u'p' || d == u'P');
|
||||
}
|
||||
chars.setPos(pos);
|
||||
return result && U_SUCCESS(ec);
|
||||
|
@ -1071,17 +1039,17 @@ UnicodeSet& UnicodeSet::applyPropertyPattern(const UnicodeString& pattern,
|
|||
posix = TRUE;
|
||||
pos += 2;
|
||||
pos = ICU_Utility::skipWhitespace(pattern, pos);
|
||||
if (pos < pattern.length() && pattern.charAt(pos) == COMPLEMENT) {
|
||||
if (pos < pattern.length() && pattern.charAt(pos) == u'^') {
|
||||
++pos;
|
||||
invert = TRUE;
|
||||
}
|
||||
} else if (isPerlOpen(pattern, pos) || isNameOpen(pattern, pos)) {
|
||||
UChar c = pattern.charAt(pos+1);
|
||||
invert = (c == UPPER_P);
|
||||
isName = (c == UPPER_N);
|
||||
invert = (c == u'P');
|
||||
isName = (c == u'N');
|
||||
pos += 2;
|
||||
pos = ICU_Utility::skipWhitespace(pattern, pos);
|
||||
if (pos == pattern.length() || pattern.charAt(pos++) != OPEN_BRACE) {
|
||||
if (pos == pattern.length() || pattern.charAt(pos++) != u'{') {
|
||||
// Syntax error; "\p" or "\P" not followed by "{"
|
||||
FAIL(ec);
|
||||
}
|
||||
|
@ -1093,9 +1061,9 @@ UnicodeSet& UnicodeSet::applyPropertyPattern(const UnicodeString& pattern,
|
|||
// Look for the matching close delimiter, either :] or }
|
||||
int32_t close;
|
||||
if (posix) {
|
||||
close = pattern.indexOf(POSIX_CLOSE, 2, pos);
|
||||
close = pattern.indexOf(u":]", 2, pos);
|
||||
} else {
|
||||
close = pattern.indexOf(CLOSE_BRACE, pos);
|
||||
close = pattern.indexOf(u'}', pos);
|
||||
}
|
||||
if (close < 0) {
|
||||
// Syntax error; close delimiter missing
|
||||
|
@ -1105,7 +1073,7 @@ UnicodeSet& UnicodeSet::applyPropertyPattern(const UnicodeString& pattern,
|
|||
// Look for an '=' sign. If this is present, we will parse a
|
||||
// medium \p{gc=Cf} or long \p{GeneralCategory=Format}
|
||||
// pattern.
|
||||
int32_t equals = pattern.indexOf(EQUALS, pos);
|
||||
int32_t equals = pattern.indexOf(u'=', pos);
|
||||
UnicodeString propName, valueName;
|
||||
if (equals >= 0 && equals < close && !isName) {
|
||||
// Equals seen; parse medium/long pattern
|
||||
|
|
|
@ -176,19 +176,19 @@ void UnicodeSetTest::TestToPattern() {
|
|||
const char* exp2[] = {"aa", "ab", "ac", NOT, "xy", NULL};
|
||||
expectToPattern(*s, "[a-z{aa}{ab}{ac}]", exp2);
|
||||
|
||||
s->applyPattern(UNICODE_STRING_SIMPLE("[a-z {\\{l} {r\\}}]"), ec);
|
||||
s->applyPattern(u"[a-z {\\{l} {r\\}}]", ec);
|
||||
if (U_FAILURE(ec)) break;
|
||||
const char* exp3[] = {"{l", "r}", NOT, "xy", NULL};
|
||||
expectToPattern(*s, UNICODE_STRING_SIMPLE("[a-z{r\\}}{\\{l}]"), exp3);
|
||||
expectToPattern(*s, u"[a-z{r\\}}{\\{l}]", exp3);
|
||||
|
||||
s->add("[]");
|
||||
const char* exp4[] = {"{l", "r}", "[]", NOT, "xy", NULL};
|
||||
expectToPattern(*s, UNICODE_STRING_SIMPLE("[a-z{\\[\\]}{r\\}}{\\{l}]"), exp4);
|
||||
expectToPattern(*s, u"[a-z{\\[\\]}{r\\}}{\\{l}]", exp4);
|
||||
|
||||
s->applyPattern(UNICODE_STRING_SIMPLE("[a-z {\\u4E01\\u4E02}{\\n\\r}]"), ec);
|
||||
s->applyPattern(u"[a-z {\\u4E01\\u4E02}{\\n\\r}]", ec);
|
||||
if (U_FAILURE(ec)) break;
|
||||
const char* exp5[] = {"\\u4E01\\u4E02", "\n\r", NULL};
|
||||
expectToPattern(*s, UNICODE_STRING_SIMPLE("[a-z{\\u000A\\u000D}{\\u4E01\\u4E02}]"), exp5);
|
||||
expectToPattern(*s, u"[a-z{\\u000A\\u000D}{\\u4E01\\u4E02}]", exp5);
|
||||
|
||||
// j2189
|
||||
s->clear();
|
||||
|
@ -206,7 +206,7 @@ void UnicodeSetTest::TestToPattern() {
|
|||
|
||||
// JB#3400: For 2 character ranges prefer [ab] to [a-b]
|
||||
UnicodeSet s;
|
||||
s.add((UChar)97, (UChar)98); // 'a', 'b'
|
||||
s.add(u'a', u'b');
|
||||
expectToPattern(s, "[ab]", NULL);
|
||||
}
|
||||
|
||||
|
@ -277,7 +277,7 @@ UnicodeSetTest::TestPatterns(void) {
|
|||
// Throw in a test of complement
|
||||
set.complement();
|
||||
UnicodeString exp;
|
||||
exp.append((UChar)0x0000).append("aeeoouu").append((UChar)(0x007a+1)).append((UChar)0xFFFF);
|
||||
exp.append((UChar)0x0000).append("aeeoouu").append((UChar)(u'z'+1)).append(u'\uFFFF');
|
||||
expectPairs(set, exp);
|
||||
}
|
||||
|
||||
|
@ -325,14 +325,14 @@ UnicodeSetTest::TestCloneEqualHash(void) {
|
|||
// set1 and set2 used to be built with the obsolete constructor taking
|
||||
// UCharCategory values; replaced with pattern constructors
|
||||
// markus 20030502
|
||||
UnicodeSet *set1=new UnicodeSet(UNICODE_STRING_SIMPLE("\\p{Lowercase Letter}"), status); // :Ll: Letter, lowercase
|
||||
UnicodeSet *set1a=new UnicodeSet(UNICODE_STRING_SIMPLE("[:Ll:]"), status); // Letter, lowercase
|
||||
UnicodeSet *set1=new UnicodeSet(u"\\p{Lowercase Letter}", status); // :Ll: Letter, lowercase
|
||||
UnicodeSet *set1a=new UnicodeSet(u"[:Ll:]", status); // Letter, lowercase
|
||||
if (U_FAILURE(status)){
|
||||
dataerrln((UnicodeString)"FAIL: Can't construst set with category->Ll" + " - " + UnicodeString(u_errorName(status)));
|
||||
return;
|
||||
}
|
||||
UnicodeSet *set2=new UnicodeSet(UNICODE_STRING_SIMPLE("\\p{Decimal Number}"), status); //Number, Decimal digit
|
||||
UnicodeSet *set2a=new UnicodeSet(UNICODE_STRING_SIMPLE("[:Nd:]"), status); //Number, Decimal digit
|
||||
UnicodeSet *set2=new UnicodeSet(u"\\p{Decimal Number}", status); //Number, Decimal digit
|
||||
UnicodeSet *set2a=new UnicodeSet(u"[:Nd:]", status); //Number, Decimal digit
|
||||
if (U_FAILURE(status)){
|
||||
errln((UnicodeString)"FAIL: Can't construct set with category->Nd");
|
||||
return;
|
||||
|
@ -653,7 +653,7 @@ void UnicodeSetTest::TestAPI() {
|
|||
errln("FAIL: UnicodeSetIterator::getString");
|
||||
}
|
||||
|
||||
set.add((UChar32)0x61, (UChar32)0x7A);
|
||||
set.add(u'a', u'z');
|
||||
set.complementAll("alan");
|
||||
exp.applyPattern("[{ab}b-kmo-z]", status);
|
||||
if (U_FAILURE(status)) { errln("FAIL"); return; }
|
||||
|
@ -668,16 +668,16 @@ void UnicodeSetTest::TestAPI() {
|
|||
if (!set.containsNone(exp)) { errln("FAIL: containsNone(UnicodeSet)"); }
|
||||
if (set.containsSome(exp)) { errln("FAIL: containsSome(UnicodeSet)"); }
|
||||
|
||||
if (set.containsNone((UChar32)0x61, (UChar32)0x7A)) {
|
||||
if (set.containsNone(u'a', u'z')) {
|
||||
errln("FAIL: containsNone(UChar32, UChar32)");
|
||||
}
|
||||
if (!set.containsSome((UChar32)0x61, (UChar32)0x7A)) {
|
||||
if (!set.containsSome(u'a', u'z')) {
|
||||
errln("FAIL: containsSome(UChar32, UChar32)");
|
||||
}
|
||||
if (!set.containsNone((UChar32)0x41, (UChar32)0x5A)) {
|
||||
if (!set.containsNone(u'A', u'Z')) {
|
||||
errln("FAIL: containsNone(UChar32, UChar32)");
|
||||
}
|
||||
if (set.containsSome((UChar32)0x41, (UChar32)0x5A)) {
|
||||
if (set.containsSome(u'A', u'Z')) {
|
||||
errln("FAIL: containsSome(UChar32, UChar32)");
|
||||
}
|
||||
|
||||
|
@ -691,7 +691,7 @@ void UnicodeSetTest::TestAPI() {
|
|||
if (U_FAILURE(status)) { errln("FAIL"); return; }
|
||||
if (set != exp) { errln("FAIL: retainAll(\"star\")"); return; }
|
||||
|
||||
set.retain((UChar32)0x73);
|
||||
set.retain(u's');
|
||||
exp.applyPattern("[s]", status);
|
||||
if (U_FAILURE(status)) { errln("FAIL"); return; }
|
||||
if (set != exp) { errln("FAIL: retain('s')"); return; }
|
||||
|
@ -699,7 +699,7 @@ void UnicodeSetTest::TestAPI() {
|
|||
uint16_t buf[32];
|
||||
int32_t slen = set.serialize(buf, UPRV_LENGTHOF(buf), status);
|
||||
if (U_FAILURE(status)) { errln("FAIL: serialize"); return; }
|
||||
if (slen != 3 || buf[0] != 2 || buf[1] != 0x73 || buf[2] != 0x74) {
|
||||
if (slen != 3 || buf[0] != 2 || buf[1] != u's' || buf[2] != u't') {
|
||||
errln("FAIL: serialize");
|
||||
return;
|
||||
}
|
||||
|
@ -717,7 +717,7 @@ void UnicodeSetTest::TestAPI() {
|
|||
TEST_ASSERT((void *)constSetx == (void *)constUSet);
|
||||
|
||||
// span(UnicodeString) and spanBack(UnicodeString) convenience methods
|
||||
UnicodeString longString=UNICODE_STRING_SIMPLE("aaaaaaaaaabbbbbbbbbbcccccccccc");
|
||||
UnicodeString longString=u"aaaaaaaaaabbbbbbbbbbcccccccccc";
|
||||
UnicodeSet ac(0x61, 0x63);
|
||||
ac.remove(0x62).freeze();
|
||||
if( ac.span(longString, -5, USET_SPAN_CONTAINED)!=10 ||
|
||||
|
@ -755,7 +755,7 @@ void UnicodeSetTest::TestIteration() {
|
|||
|
||||
// 6 code points, 3 ranges, 2 strings, 8 total elements
|
||||
// Iteration will access them in sorted order - a, b, c, y, z, U0001abcd, "str1", "str2"
|
||||
UnicodeSet set(UNICODE_STRING_SIMPLE("[zabyc\\U0001abcd{str1}{str2}]"), ec);
|
||||
UnicodeSet set(u"[zabyc\\U0001abcd{str1}{str2}]", ec);
|
||||
TEST_ASSERT_SUCCESS(ec);
|
||||
UnicodeSetIterator it(set);
|
||||
|
||||
|
@ -872,12 +872,12 @@ void UnicodeSetTest::TestStrings() {
|
|||
* Test the [:Latin:] syntax.
|
||||
*/
|
||||
void UnicodeSetTest::TestScriptSet() {
|
||||
expectContainment(UNICODE_STRING_SIMPLE("[:Latin:]"), "aA", CharsToUnicodeString("\\u0391\\u03B1"));
|
||||
expectContainment(u"[:Latin:]", "aA", CharsToUnicodeString("\\u0391\\u03B1"));
|
||||
|
||||
expectContainment(UNICODE_STRING_SIMPLE("[:Greek:]"), CharsToUnicodeString("\\u0391\\u03B1"), "aA");
|
||||
expectContainment(u"[:Greek:]", CharsToUnicodeString("\\u0391\\u03B1"), "aA");
|
||||
|
||||
/* Jitterbug 1423 */
|
||||
expectContainment(UNICODE_STRING_SIMPLE("[[:Common:][:Inherited:]]"), CharsToUnicodeString("\\U00003099\\U0001D169\\u0000"), "aA");
|
||||
expectContainment(u"[[:Common:][:Inherited:]]", CharsToUnicodeString("\\U00003099\\U0001D169\\u0000"), "aA");
|
||||
|
||||
}
|
||||
|
||||
|
@ -1080,56 +1080,56 @@ void UnicodeSetTest::TestPosixClasses() {
|
|||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UnicodeSet s1("[:alpha:]", status);
|
||||
UnicodeSet s2(UNICODE_STRING_SIMPLE("\\p{Alphabetic}"), status);
|
||||
UnicodeSet s2(u"\\p{Alphabetic}", status);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
TEST_ASSERT(s1==s2);
|
||||
}
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UnicodeSet s1("[:lower:]", status);
|
||||
UnicodeSet s2(UNICODE_STRING_SIMPLE("\\p{lowercase}"), status);
|
||||
UnicodeSet s2(u"\\p{lowercase}", status);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
TEST_ASSERT(s1==s2);
|
||||
}
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UnicodeSet s1("[:upper:]", status);
|
||||
UnicodeSet s2(UNICODE_STRING_SIMPLE("\\p{Uppercase}"), status);
|
||||
UnicodeSet s2(u"\\p{Uppercase}", status);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
TEST_ASSERT(s1==s2);
|
||||
}
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UnicodeSet s1("[:punct:]", status);
|
||||
UnicodeSet s2(UNICODE_STRING_SIMPLE("\\p{gc=Punctuation}"), status);
|
||||
UnicodeSet s2(u"\\p{gc=Punctuation}", status);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
TEST_ASSERT(s1==s2);
|
||||
}
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UnicodeSet s1("[:digit:]", status);
|
||||
UnicodeSet s2(UNICODE_STRING_SIMPLE("\\p{gc=DecimalNumber}"), status);
|
||||
UnicodeSet s2(u"\\p{gc=DecimalNumber}", status);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
TEST_ASSERT(s1==s2);
|
||||
}
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UnicodeSet s1("[:xdigit:]", status);
|
||||
UnicodeSet s2(UNICODE_STRING_SIMPLE("[\\p{DecimalNumber}\\p{HexDigit}]"), status);
|
||||
UnicodeSet s2(u"[\\p{DecimalNumber}\\p{HexDigit}]", status);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
TEST_ASSERT(s1==s2);
|
||||
}
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UnicodeSet s1("[:alnum:]", status);
|
||||
UnicodeSet s2(UNICODE_STRING_SIMPLE("[\\p{Alphabetic}\\p{DecimalNumber}]"), status);
|
||||
UnicodeSet s2(u"[\\p{Alphabetic}\\p{DecimalNumber}]", status);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
TEST_ASSERT(s1==s2);
|
||||
}
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UnicodeSet s1("[:space:]", status);
|
||||
UnicodeSet s2(UNICODE_STRING_SIMPLE("\\p{Whitespace}"), status);
|
||||
UnicodeSet s2(u"\\p{Whitespace}", status);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
TEST_ASSERT(s1==s2);
|
||||
}
|
||||
|
@ -1137,7 +1137,7 @@ void UnicodeSetTest::TestPosixClasses() {
|
|||
UErrorCode status = U_ZERO_ERROR;
|
||||
UnicodeSet s1("[:blank:]", status);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
UnicodeSet s2(UNICODE_STRING_SIMPLE("[\\p{Whitespace}-[\\u000a\\u000B\\u000c\\u000d\\u0085\\p{LineSeparator}\\p{ParagraphSeparator}]]"),
|
||||
UnicodeSet s2(u"[\\p{Whitespace}-[\\u000a\\u000B\\u000c\\u000d\\u0085\\p{LineSeparator}\\p{ParagraphSeparator}]]",
|
||||
status);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
TEST_ASSERT(s1==s2);
|
||||
|
@ -1146,7 +1146,7 @@ void UnicodeSetTest::TestPosixClasses() {
|
|||
UErrorCode status = U_ZERO_ERROR;
|
||||
UnicodeSet s1("[:cntrl:]", status);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
UnicodeSet s2(UNICODE_STRING_SIMPLE("\\p{Control}"), status);
|
||||
UnicodeSet s2(u"\\p{Control}", status);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
TEST_ASSERT(s1==s2);
|
||||
}
|
||||
|
@ -1154,7 +1154,7 @@ void UnicodeSetTest::TestPosixClasses() {
|
|||
UErrorCode status = U_ZERO_ERROR;
|
||||
UnicodeSet s1("[:graph:]", status);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
UnicodeSet s2(UNICODE_STRING_SIMPLE("[^\\p{Whitespace}\\p{Control}\\p{Surrogate}\\p{Unassigned}]"), status);
|
||||
UnicodeSet s2(u"[^\\p{Whitespace}\\p{Control}\\p{Surrogate}\\p{Unassigned}]", status);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
TEST_ASSERT(s1==s2);
|
||||
}
|
||||
|
@ -1162,7 +1162,7 @@ void UnicodeSetTest::TestPosixClasses() {
|
|||
UErrorCode status = U_ZERO_ERROR;
|
||||
UnicodeSet s1("[:print:]", status);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
UnicodeSet s2(UNICODE_STRING_SIMPLE("[[:graph:][:blank:]-[\\p{Control}]]") ,status);
|
||||
UnicodeSet s2(u"[[:graph:][:blank:]-[\\p{Control}]]", status);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
TEST_ASSERT(s1==s2);
|
||||
}
|
||||
|
@ -1198,7 +1198,7 @@ void UnicodeSetTest::TestIndexOf() {
|
|||
if (c != -1) {
|
||||
errln("FAIL: charAt(<out of range>) = %X", c);
|
||||
}
|
||||
int32_t j = set.indexOf((UChar32)0x71/*'q'*/);
|
||||
int32_t j = set.indexOf(u'q');
|
||||
if (j != -1) {
|
||||
errln((UnicodeString)"FAIL: indexOf('q') = " + j);
|
||||
}
|
||||
|
@ -1415,7 +1415,7 @@ void UnicodeSetTest::TestEscapePattern() {
|
|||
if (U_FAILURE(ec)) {
|
||||
continue;
|
||||
}
|
||||
if (set.contains((UChar)0x0644)){
|
||||
if (set.contains(u'\u0644')){
|
||||
errln((UnicodeString)"FAIL: " + escape(pat) + " contains(U+0664)");
|
||||
}
|
||||
|
||||
|
@ -1429,7 +1429,7 @@ void UnicodeSetTest::TestEscapePattern() {
|
|||
|
||||
for (int32_t i=0; i<set.getRangeCount(); ++i) {
|
||||
UnicodeString str("Range ");
|
||||
str.append((UChar)(0x30 + i))
|
||||
str.append((UChar)(u'0' + i))
|
||||
.append(": ")
|
||||
.append((UChar32)set.getRangeStart(i))
|
||||
.append(" - ")
|
||||
|
@ -2170,7 +2170,7 @@ void UnicodeSetTest::expectToPattern(const UnicodeSet& set,
|
|||
}
|
||||
}
|
||||
|
||||
static UChar toHexString(int32_t i) { return (UChar)(i + (i < 10 ? 0x30 : (0x41 - 10))); }
|
||||
static UChar toHexString(int32_t i) { return (UChar)(i + (i < 10 ? u'0' : (u'A' - 10))); }
|
||||
|
||||
void
|
||||
UnicodeSetTest::doAssert(UBool condition, const char *message)
|
||||
|
@ -2190,9 +2190,9 @@ UnicodeSetTest::escape(const UnicodeString& s) {
|
|||
buf += c;
|
||||
} else {
|
||||
if (c <= 0xFFFF) {
|
||||
buf += (UChar)0x5c; buf += (UChar)0x75;
|
||||
buf += u"\\u";
|
||||
} else {
|
||||
buf += (UChar)0x5c; buf += (UChar)0x55;
|
||||
buf += u"\\U";
|
||||
buf += toHexString((c & 0xF0000000) >> 28);
|
||||
buf += toHexString((c & 0x0F000000) >> 24);
|
||||
buf += toHexString((c & 0x00F00000) >> 20);
|
||||
|
@ -2302,7 +2302,7 @@ void UnicodeSetTest::TestFreezable() {
|
|||
applyPattern(wsPattern, USET_IGNORE_SPACE, NULL, errorCode).
|
||||
applyPattern(wsPattern, pos, USET_IGNORE_SPACE, NULL, errorCode).
|
||||
applyIntPropertyValue(UCHAR_CANONICAL_COMBINING_CLASS, 230, errorCode).
|
||||
applyPropertyAlias(UNICODE_STRING_SIMPLE("Assigned"), UnicodeString(), errorCode);
|
||||
applyPropertyAlias(u"Assigned", UnicodeString(), errorCode);
|
||||
if(frozen!=idSet || !(frozen==idSet)) {
|
||||
errln("FAIL: UnicodeSet::applyXYZ() modified a frozen set");
|
||||
}
|
||||
|
@ -3825,7 +3825,7 @@ void UnicodeSetTest::TestStringSpan() {
|
|||
errln("FAIL: UnicodeSet(%s).containsAll(%s[:-4]) should be TRUE", pattern, string);
|
||||
}
|
||||
|
||||
string16=UNICODE_STRING_SIMPLE("byayaxya");
|
||||
string16=u"byayaxya";
|
||||
const UChar *s16=string16.getBuffer();
|
||||
int32_t length16=string16.length();
|
||||
(void)length16; // Suppress set but not used warning.
|
||||
|
@ -3846,7 +3846,7 @@ void UnicodeSetTest::TestStringSpan() {
|
|||
errln("FAIL: Unable to create UnicodeSet(%s) - %s", pattern, u_errorName(errorCode));
|
||||
return;
|
||||
}
|
||||
string16=UNICODE_STRING_SIMPLE("acdabcdabccd");
|
||||
string16=u"acdabcdabccd";
|
||||
s16=string16.getBuffer();
|
||||
length16=string16.length();
|
||||
if( set.span(s16, 12, USET_SPAN_CONTAINED)!=12 ||
|
||||
|
@ -3863,7 +3863,7 @@ void UnicodeSetTest::TestStringSpan() {
|
|||
errln("FAIL: Unable to create UnicodeSet(%s) - %s", pattern, u_errorName(errorCode));
|
||||
return;
|
||||
}
|
||||
string16=UNICODE_STRING_SIMPLE("abbcdabcdabd");
|
||||
string16=u"abbcdabcdabd";
|
||||
s16=string16.getBuffer();
|
||||
length16=string16.length();
|
||||
if( set.spanBack(s16, 12, USET_SPAN_CONTAINED)!=0 ||
|
||||
|
|
Loading…
Add table
Reference in a new issue