mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-06 14:05:32 +00:00
parent
ed268e77e3
commit
0eed48038b
2 changed files with 62 additions and 1 deletions
|
@ -1294,7 +1294,15 @@ u_unescapeAt(UNESCAPE_CHAR_AT charAt,
|
|||
int32_t ahead = *offset + 1;
|
||||
c = charAt(*offset, context);
|
||||
if (c == 0x5C /*'\\'*/ && ahead < length) {
|
||||
c = (UChar) u_unescapeAt(charAt, &ahead, length, context);
|
||||
// Calling u_unescapeAt recursively may cause a stack overflow if
|
||||
// we have repeated surrogate lead after that. Limit the
|
||||
// length to 5 ('u' and 4 hex) after ahead.
|
||||
int32_t tailLimit = ahead + 5;
|
||||
if (tailLimit > length) {
|
||||
tailLimit = length;
|
||||
}
|
||||
c = (UChar) u_unescapeAt(charAt, &ahead, tailLimit,
|
||||
context);
|
||||
}
|
||||
if (U16_IS_TRAIL(c)) {
|
||||
*offset = ahead;
|
||||
|
|
|
@ -22,6 +22,7 @@
|
|||
#include "unicode/ucnv.h"
|
||||
#include "unicode/uiter.h"
|
||||
#include "cintltst.h"
|
||||
#include "cstring.h"
|
||||
#include "cmemory.h"
|
||||
#include <string.h>
|
||||
|
||||
|
@ -36,6 +37,7 @@ static void TestStringFunctions(void);
|
|||
static void TestStringSearching(void);
|
||||
static void TestSurrogateSearching(void);
|
||||
static void TestUnescape(void);
|
||||
static void TestUnescapeRepeatedSurrogateLead20725(void);
|
||||
static void TestCountChar32(void);
|
||||
static void TestUCharIterator(void);
|
||||
|
||||
|
@ -48,6 +50,8 @@ void addUStringTest(TestNode** root)
|
|||
addTest(root, &TestStringSearching, "tsutil/custrtst/TestStringSearching");
|
||||
addTest(root, &TestSurrogateSearching, "tsutil/custrtst/TestSurrogateSearching");
|
||||
addTest(root, &TestUnescape, "tsutil/custrtst/TestUnescape");
|
||||
addTest(root, &TestUnescapeRepeatedSurrogateLead20725,
|
||||
"tsutil/custrtst/TestUnescapeRepeatedSurrogateLead20725");
|
||||
addTest(root, &TestCountChar32, "tsutil/custrtst/TestCountChar32");
|
||||
addTest(root, &TestUCharIterator, "tsutil/custrtst/TestUCharIterator");
|
||||
}
|
||||
|
@ -1124,6 +1128,55 @@ TestUnescape() {
|
|||
/* ### TODO: test u_unescapeAt() */
|
||||
}
|
||||
|
||||
static void
|
||||
TestUnescapeRepeatedSurrogateLead20725() {
|
||||
const int32_t repeat = 20000;
|
||||
const int32_t srclen = repeat * 6 + 1;
|
||||
char *src = (char*)malloc(srclen);
|
||||
UChar *dest = (UChar*) malloc(sizeof(UChar) * (repeat + 1));
|
||||
if (src == NULL || dest == NULL) {
|
||||
log_err("memory allocation error");
|
||||
}
|
||||
for (int32_t i = 0; i < repeat; i++) {
|
||||
uprv_strcpy(src + (i * 6), "\\ud841");
|
||||
}
|
||||
int32_t len = u_unescape(src, dest, repeat);
|
||||
if (len != repeat) {
|
||||
log_err("failure in u_unescape()");
|
||||
}
|
||||
for (int32_t i = 0; i < repeat; i++) {
|
||||
if (dest[i] != 0xd841) {
|
||||
log_err("failure in u_unescape() return value");
|
||||
}
|
||||
}
|
||||
free(src);
|
||||
|
||||
// A few simple test cases to make sure that the code recovers properly
|
||||
u_unescape("\\ud841\\x5A", dest, repeat);
|
||||
const UChar expected1[] = {0xd841, 'Z', 0};
|
||||
if (u_strcmp(dest, expected1)!=0) {
|
||||
log_err("u_unescape() should return u\"\\ud841Z\" but got %s", dest);
|
||||
}
|
||||
|
||||
u_unescape("\\ud841\\U00050005", dest, repeat);
|
||||
const UChar expected2[] = {0xd841, 0xd900, 0xdc05, 0};
|
||||
if (u_strcmp(dest, expected2)!=0) {
|
||||
log_err("u_unescape() should return u\"\\ud841\\ud900\\udc05\" "
|
||||
"but got %s", dest);
|
||||
}
|
||||
|
||||
// \\xXX is ill-formed. The documentation states:
|
||||
// If an escape sequence is ill-formed, this method returns an empty string.
|
||||
u_unescape("\\ud841\\xXX", dest, repeat);
|
||||
const UChar expected3[] = { 0 };
|
||||
if (u_strcmp(dest, expected3)!=0) {
|
||||
log_err("u_unescape() should return empty string");
|
||||
}
|
||||
|
||||
free(dest);
|
||||
|
||||
}
|
||||
|
||||
/* test code point counting functions --------------------------------------- */
|
||||
|
||||
/* reference implementation of u_strHasMoreChar32Than() */
|
||||
|
|
Loading…
Add table
Reference in a new issue