Fix for issue #78: unchecked::utf16to8 reads out of bounds if provided only leading surrogate

This commit is contained in:
nemtrif 2023-09-24 17:17:18 -04:00
parent 0c8da664ee
commit 0ee84daac8
2 changed files with 8 additions and 1 deletions

View file

@ -155,7 +155,9 @@ namespace utf8
{
while (start != end) {
uint32_t cp = utf8::internal::mask16(*start++);
// Take care of surrogate pairs first
if (start == end)
return result;
// Take care of surrogate pairs first
if (utf8::internal::is_lead_surrogate(cp)) {
uint32_t trail_surrogate = utf8::internal::mask16(*start++);
cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET;

View file

@ -137,6 +137,11 @@ TEST(UnCheckedAPITests, test_utf16to8)
string utf8result;
utf8::unchecked::utf16to8(utf16string, utf16string + 5, back_inserter(utf8result));
EXPECT_EQ (utf8result.size(), 10);
utf8result.clear();
unsigned short highsurrogateonly[] = {0xd800};
utf8::unchecked::utf16to8(highsurrogateonly, highsurrogateonly + 1, back_inserter(utf8result));
EXPECT_TRUE(true); // we didn't crash
}
TEST(UnCheckedAPITests, test_utf8to16)