ICU-2906 make unescapeAt() handle escaped surrogate pairs

X-SVN-Rev: 13342
This commit is contained in:
Alan Liu 2003-10-07 18:10:37 +00:00
parent 9ef53293f9
commit 7fffbdf68f
3 changed files with 43 additions and 0 deletions

View file

@ -1481,6 +1481,20 @@ u_unescapeAt(UNESCAPE_CHAR_AT charAt,
if (result < 0 || result >= 0x110000) {
goto err;
}
// If an escape sequence specifies a lead surrogate, see if
// there is a trail surrogate after it, either as an escape or
// as a literal. If so, join them up into a supplementary.
if (*offset < length && U16_IS_LEAD(result)) {
int32_t ahead = *offset + 1;
c = charAt(*offset, context);
if (c == 0x5C /*'\\'*/ && ahead < length) {
c = (UChar) u_unescapeAt(charAt, &ahead, length, context);
}
if (U16_IS_TRAIL(c)) {
*offset = ahead;
result = U16_GET_SUPPLEMENTARY(result, c);
}
}
return result;
}

View file

@ -58,6 +58,7 @@ UnicodeSetTest::runIndexedTest(int32_t index, UBool exec,
CASE(16,TestEscapePattern);
CASE(17,TestInvalidCodePoint);
CASE(18,TestSymbolTable);
CASE(19,TestSurrogate);
default: name = ""; break;
}
}
@ -1259,6 +1260,32 @@ void UnicodeSetTest::TestSymbolTable() {
}
}
void UnicodeSetTest::TestSurrogate() {
const char* DATA[] = {
// These should all behave identically
"[abc\\uD800\\uDC00]",
// "[abc\uD800\uDC00]", // Can't do this on C -- only Java
"[abc\\U00010000]",
0
};
for (int i=0; DATA[i] != 0; ++i) {
UErrorCode ec = U_ZERO_ERROR;
logln((UnicodeString)"Test pattern " + i + " :" + DATA[i]);
UnicodeSet set(DATA[i], ec);
if (U_FAILURE(ec)) {
errln("FAIL: UnicodeSet constructor");
continue;
}
expectContainment(set,
CharsToUnicodeString("abc\\U00010000"),
CharsToUnicodeString("\\uD800;\\uDC00")); // split apart surrogate-pair
if (set.size() != 4) {
errln((UnicodeString)"FAIL: " + DATA[i] + ".size() == " +
set.size() + ", expected 4");
}
}
}
void UnicodeSetTest::TestExhaustive() {
// exhaustive tests. Simulate UnicodeSets with integers.
// That gives us very solid tests (except for large memory tests).

View file

@ -72,6 +72,8 @@ private:
void TestSymbolTable(void);
void TestSurrogate();
private:
UBool toPatternAux(UChar32 start, UChar32 end);