From 0ee84daac8bd6c35cbc3b8a144e6a40e0b2fe588 Mon Sep 17 00:00:00 2001 From: nemtrif Date: Sun, 24 Sep 2023 17:17:18 -0400 Subject: [PATCH] Fix for issue #78: unchecked::utf16to8 reads out of bounds if provided only leading surrogate --- source/utf8/unchecked.h | 4 +++- tests/test_unchecked_api.h | 5 +++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/source/utf8/unchecked.h b/source/utf8/unchecked.h index 8fe83c9..7981839 100644 --- a/source/utf8/unchecked.h +++ b/source/utf8/unchecked.h @@ -155,7 +155,9 @@ namespace utf8 { while (start != end) { uint32_t cp = utf8::internal::mask16(*start++); - // Take care of surrogate pairs first + if (start == end) + return result; + // Take care of surrogate pairs first if (utf8::internal::is_lead_surrogate(cp)) { uint32_t trail_surrogate = utf8::internal::mask16(*start++); cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET; diff --git a/tests/test_unchecked_api.h b/tests/test_unchecked_api.h index 10c5991..66d0400 100644 --- a/tests/test_unchecked_api.h +++ b/tests/test_unchecked_api.h @@ -137,6 +137,11 @@ TEST(UnCheckedAPITests, test_utf16to8) string utf8result; utf8::unchecked::utf16to8(utf16string, utf16string + 5, back_inserter(utf8result)); EXPECT_EQ (utf8result.size(), 10); + + utf8result.clear(); + unsigned short highsurrogateonly[] = {0xd800}; + utf8::unchecked::utf16to8(highsurrogateonly, highsurrogateonly + 1, back_inserter(utf8result)); + EXPECT_TRUE(true); // we didn't crash } TEST(UnCheckedAPITests, test_utf8to16)