diff --git a/source/utf8/checked.h b/source/utf8/checked.h index 2aef583..7151370 100644 --- a/source/utf8/checked.h +++ b/source/utf8/checked.h @@ -257,8 +257,17 @@ namespace utf8 template u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result) { - while (start < end) - (*result++) = utf8::next(start, end); + uint32_t ch1, ch2; + while (start < end) { + ch1 = utf8::next(start, end); + if (internal::is_lead_surrogate(ch1) && start < end) { + ch2 = utf8::next(start, end); + *(result++) = ((ch1 - internal::LEAD_SURROGATE_MIN) << 10) + ch2 - internal::TRAIL_SURROGATE_MIN; + } + else { + *(result++) = ch1; + } + } return result; } diff --git a/source/utf8/core.h b/source/utf8/core.h index ae0f367..dab6fed 100644 --- a/source/utf8/core.h +++ b/source/utf8/core.h @@ -92,7 +92,7 @@ namespace internal template inline bool is_code_point_valid(u32 cp) { - return (cp <= CODE_POINT_MAX && !utf8::internal::is_surrogate(cp)); + return cp <= CODE_POINT_MAX; } template diff --git a/source/utf8/unchecked.h b/source/utf8/unchecked.h index cb24271..731f636 100644 --- a/source/utf8/unchecked.h +++ b/source/utf8/unchecked.h @@ -168,8 +168,17 @@ namespace utf8 template u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result) { - while (start < end) - (*result++) = utf8::unchecked::next(start); + uint32_t ch1, ch2; + while (start < end) { + ch1 = utf8::unchecked::next(start); + if (internal::is_lead_surrogate(ch1) && start < end) { + ch2 = utf8::unchecked::next(start); + *(result++) = ((ch1 - internal::LEAD_SURROGATE_MIN) << 10) + ch2 - internal::TRAIL_SURROGATE_MIN; + } + else { + *(result++) = ch1; + } + } return result; }