mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-07 22:44:49 +00:00
ICU-2906 make unescapeAt() handle escaped surrogate pairs
X-SVN-Rev: 13342
This commit is contained in:
parent
9ef53293f9
commit
7fffbdf68f
3 changed files with 43 additions and 0 deletions
|
@ -1481,6 +1481,20 @@ u_unescapeAt(UNESCAPE_CHAR_AT charAt,
|
|||
if (result < 0 || result >= 0x110000) {
|
||||
goto err;
|
||||
}
|
||||
// If an escape sequence specifies a lead surrogate, see if
|
||||
// there is a trail surrogate after it, either as an escape or
|
||||
// as a literal. If so, join them up into a supplementary.
|
||||
if (*offset < length && U16_IS_LEAD(result)) {
|
||||
int32_t ahead = *offset + 1;
|
||||
c = charAt(*offset, context);
|
||||
if (c == 0x5C /*'\\'*/ && ahead < length) {
|
||||
c = (UChar) u_unescapeAt(charAt, &ahead, length, context);
|
||||
}
|
||||
if (U16_IS_TRAIL(c)) {
|
||||
*offset = ahead;
|
||||
result = U16_GET_SUPPLEMENTARY(result, c);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
|
@ -58,6 +58,7 @@ UnicodeSetTest::runIndexedTest(int32_t index, UBool exec,
|
|||
CASE(16,TestEscapePattern);
|
||||
CASE(17,TestInvalidCodePoint);
|
||||
CASE(18,TestSymbolTable);
|
||||
CASE(19,TestSurrogate);
|
||||
default: name = ""; break;
|
||||
}
|
||||
}
|
||||
|
@ -1259,6 +1260,32 @@ void UnicodeSetTest::TestSymbolTable() {
|
|||
}
|
||||
}
|
||||
|
||||
void UnicodeSetTest::TestSurrogate() {
|
||||
const char* DATA[] = {
|
||||
// These should all behave identically
|
||||
"[abc\\uD800\\uDC00]",
|
||||
// "[abc\uD800\uDC00]", // Can't do this on C -- only Java
|
||||
"[abc\\U00010000]",
|
||||
0
|
||||
};
|
||||
for (int i=0; DATA[i] != 0; ++i) {
|
||||
UErrorCode ec = U_ZERO_ERROR;
|
||||
logln((UnicodeString)"Test pattern " + i + " :" + DATA[i]);
|
||||
UnicodeSet set(DATA[i], ec);
|
||||
if (U_FAILURE(ec)) {
|
||||
errln("FAIL: UnicodeSet constructor");
|
||||
continue;
|
||||
}
|
||||
expectContainment(set,
|
||||
CharsToUnicodeString("abc\\U00010000"),
|
||||
CharsToUnicodeString("\\uD800;\\uDC00")); // split apart surrogate-pair
|
||||
if (set.size() != 4) {
|
||||
errln((UnicodeString)"FAIL: " + DATA[i] + ".size() == " +
|
||||
set.size() + ", expected 4");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void UnicodeSetTest::TestExhaustive() {
|
||||
// exhaustive tests. Simulate UnicodeSets with integers.
|
||||
// That gives us very solid tests (except for large memory tests).
|
||||
|
|
|
@ -72,6 +72,8 @@ private:
|
|||
|
||||
void TestSymbolTable(void);
|
||||
|
||||
void TestSurrogate();
|
||||
|
||||
private:
|
||||
|
||||
UBool toPatternAux(UChar32 start, UChar32 end);
|
||||
|
|
Loading…
Add table
Reference in a new issue