merge surrogate pair in utf8to32

This commit is contained in:
yihuang 2016-10-19 16:15:00 +08:00
parent b4c761dbe9
commit 2665564dbc
3 changed files with 23 additions and 5 deletions

View file

@ -257,8 +257,17 @@ namespace utf8
template <typename octet_iterator, typename u32bit_iterator>
u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result)
{
while (start < end)
(*result++) = utf8::next(start, end);
uint32_t ch1, ch2;
while (start < end) {
ch1 = utf8::next(start, end);
if (internal::is_lead_surrogate(ch1) && start < end) {
ch2 = utf8::next(start, end);
*(result++) = ((ch1 - internal::LEAD_SURROGATE_MIN) << 10) + ch2 - internal::TRAIL_SURROGATE_MIN;
}
else {
*(result++) = ch1;
}
}
return result;
}

View file

@ -92,7 +92,7 @@ namespace internal
template <typename u32>
inline bool is_code_point_valid(u32 cp)
{
return (cp <= CODE_POINT_MAX && !utf8::internal::is_surrogate(cp));
return cp <= CODE_POINT_MAX;
}
template <typename octet_iterator>

View file

@ -168,8 +168,17 @@ namespace utf8
template <typename octet_iterator, typename u32bit_iterator>
u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result)
{
while (start < end)
(*result++) = utf8::unchecked::next(start);
uint32_t ch1, ch2;
while (start < end) {
ch1 = utf8::unchecked::next(start);
if (internal::is_lead_surrogate(ch1) && start < end) {
ch2 = utf8::unchecked::next(start);
*(result++) = ((ch1 - internal::LEAD_SURROGATE_MIN) << 10) + ch2 - internal::TRAIL_SURROGATE_MIN;
}
else {
*(result++) = ch1;
}
}
return result;
}