Fix for issue #111

Fixing regression caused by the fix for #78, which leads to utf8::unchecked::utf16to8() chopping off the last character in many cases.
This commit is contained in:
nemtrif 2023-10-28 11:14:26 -04:00
parent 925e7147ec
commit 972c5501d7
2 changed files with 7 additions and 2 deletions

View file

@ -183,10 +183,10 @@ namespace utf8
{
while (start != end) {
utfchar32_t cp = utf8::internal::mask16(*start++);
if (start == end)
return result;
// Take care of surrogate pairs first
if (utf8::internal::is_lead_surrogate(cp)) {
if (start == end)
return result;
utfchar32_t trail_surrogate = utf8::internal::mask16(*start++);
cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET;
}

View file

@ -50,6 +50,11 @@ TEST(CPP11APITests, test_utf16to8)
u16string utf16string = {0x41, 0x0448, 0x65e5, 0xd834, 0xdd1e};
string u = utf16to8(utf16string);
EXPECT_EQ (u.size(), 10);
u16string h16 = u"h!";
string h8;
utf8::unchecked::utf16to8(h16.begin(), h16.end(), std::back_inserter(h8));
EXPECT_EQ (h8, "h!");
}
TEST(CPP11APITests, test_utf8to16)