ICU-20725 Fix stack overflow of u_unescapeAt

See #1207
This commit is contained in:
Frank Tang 2020-08-08 00:53:25 +00:00 committed by Frank Yung-Fong Tang
parent ed268e77e3
commit 0eed48038b
2 changed files with 62 additions and 1 deletions

View file

@ -1294,7 +1294,15 @@ u_unescapeAt(UNESCAPE_CHAR_AT charAt,
int32_t ahead = *offset + 1;
c = charAt(*offset, context);
if (c == 0x5C /*'\\'*/ && ahead < length) {
c = (UChar) u_unescapeAt(charAt, &ahead, length, context);
// Calling u_unescapeAt recursively may cause a stack overflow if
// we have repeated surrogate lead after that. Limit the
// length to 5 ('u' and 4 hex) after ahead.
int32_t tailLimit = ahead + 5;
if (tailLimit > length) {
tailLimit = length;
}
c = (UChar) u_unescapeAt(charAt, &ahead, tailLimit,
context);
}
if (U16_IS_TRAIL(c)) {
*offset = ahead;

View file

@ -22,6 +22,7 @@
#include "unicode/ucnv.h"
#include "unicode/uiter.h"
#include "cintltst.h"
#include "cstring.h"
#include "cmemory.h"
#include <string.h>
@ -36,6 +37,7 @@ static void TestStringFunctions(void);
static void TestStringSearching(void);
static void TestSurrogateSearching(void);
static void TestUnescape(void);
static void TestUnescapeRepeatedSurrogateLead20725(void);
static void TestCountChar32(void);
static void TestUCharIterator(void);
@ -48,6 +50,8 @@ void addUStringTest(TestNode** root)
addTest(root, &TestStringSearching, "tsutil/custrtst/TestStringSearching");
addTest(root, &TestSurrogateSearching, "tsutil/custrtst/TestSurrogateSearching");
addTest(root, &TestUnescape, "tsutil/custrtst/TestUnescape");
addTest(root, &TestUnescapeRepeatedSurrogateLead20725,
"tsutil/custrtst/TestUnescapeRepeatedSurrogateLead20725");
addTest(root, &TestCountChar32, "tsutil/custrtst/TestCountChar32");
addTest(root, &TestUCharIterator, "tsutil/custrtst/TestUCharIterator");
}
@ -1124,6 +1128,55 @@ TestUnescape() {
/* ### TODO: test u_unescapeAt() */
}
static void
TestUnescapeRepeatedSurrogateLead20725() {
const int32_t repeat = 20000;
const int32_t srclen = repeat * 6 + 1;
char *src = (char*)malloc(srclen);
UChar *dest = (UChar*) malloc(sizeof(UChar) * (repeat + 1));
if (src == NULL || dest == NULL) {
log_err("memory allocation error");
}
for (int32_t i = 0; i < repeat; i++) {
uprv_strcpy(src + (i * 6), "\\ud841");
}
int32_t len = u_unescape(src, dest, repeat);
if (len != repeat) {
log_err("failure in u_unescape()");
}
for (int32_t i = 0; i < repeat; i++) {
if (dest[i] != 0xd841) {
log_err("failure in u_unescape() return value");
}
}
free(src);
// A few simple test cases to make sure that the code recovers properly
u_unescape("\\ud841\\x5A", dest, repeat);
const UChar expected1[] = {0xd841, 'Z', 0};
if (u_strcmp(dest, expected1)!=0) {
log_err("u_unescape() should return u\"\\ud841Z\" but got %s", dest);
}
u_unescape("\\ud841\\U00050005", dest, repeat);
const UChar expected2[] = {0xd841, 0xd900, 0xdc05, 0};
if (u_strcmp(dest, expected2)!=0) {
log_err("u_unescape() should return u\"\\ud841\\ud900\\udc05\" "
"but got %s", dest);
}
// \\xXX is ill-formed. The documentation states:
// If an escape sequence is ill-formed, this method returns an empty string.
u_unescape("\\ud841\\xXX", dest, repeat);
const UChar expected3[] = { 0 };
if (u_strcmp(dest, expected3)!=0) {
log_err("u_unescape() should return empty string");
}
free(dest);
}
/* test code point counting functions --------------------------------------- */
/* reference implementation of u_strHasMoreChar32Than() */