mirror of
https://github.com/nemtrif/utfcpp.git
synced 2025-04-05 05:25:07 +00:00
next16
This commit is contained in:
parent
b455811a4e
commit
eab76c5312
3 changed files with 59 additions and 0 deletions
|
@ -167,6 +167,20 @@ namespace utf8
|
|||
return cp;
|
||||
}
|
||||
|
||||
template <typename word_iterator>
|
||||
utfchar32_t next16(word_iterator& it, word_iterator end)
|
||||
{
|
||||
utfchar32_t cp = 0;
|
||||
internal::utf_error err_code = utf8::internal::validate_next16(it, end, cp);
|
||||
switch (err_code) {
|
||||
case internal::UTF8_OK :
|
||||
break;
|
||||
case internal::NOT_ENOUGH_ROOM :
|
||||
throw not_enough_room();
|
||||
}
|
||||
return cp;
|
||||
}
|
||||
|
||||
template <typename octet_iterator>
|
||||
utfchar32_t peek_next(octet_iterator it, octet_iterator end)
|
||||
{
|
||||
|
|
|
@ -305,6 +305,42 @@ namespace internal
|
|||
return utf8::internal::validate_next(it, end, ignored);
|
||||
}
|
||||
|
||||
template <typename word_iterator>
|
||||
utf_error validate_next16(word_iterator& it, word_iterator end, utfchar32_t& code_point)
|
||||
{
|
||||
if (it == end)
|
||||
return NOT_ENOUGH_ROOM;
|
||||
// Save the original value of it so we can go back in case of failure
|
||||
// Of course, it does not make much sense with i.e. stream iterators
|
||||
word_iterator original_it = it;
|
||||
|
||||
utf_error err = UTF8_OK;
|
||||
|
||||
const utfchar16_t first_word = *it++;
|
||||
if (!is_surrogate(first_word)) {
|
||||
code_point = first_word;
|
||||
return UTF8_OK;
|
||||
}
|
||||
else {
|
||||
if (it == end)
|
||||
err = NOT_ENOUGH_ROOM;
|
||||
else if (is_lead_surrogate(first_word)) {
|
||||
const utfchar16_t second_word = *it++;
|
||||
if (is_trail_surrogate(second_word)) {
|
||||
code_point = (first_word << 10) + second_word + SURROGATE_OFFSET;
|
||||
return UTF8_OK;
|
||||
} else
|
||||
err = INCOMPLETE_SEQUENCE;
|
||||
|
||||
} else {
|
||||
err = INVALID_LEAD;
|
||||
}
|
||||
}
|
||||
// error branch
|
||||
it = original_it;
|
||||
return err;
|
||||
}
|
||||
|
||||
// Internal implementation of both checked and unchecked append() function
|
||||
// This function will be invoked by the overloads below, as they will know
|
||||
// the octet_type.
|
||||
|
|
|
@ -136,6 +136,15 @@ namespace utf8
|
|||
return utf8::unchecked::next(it);
|
||||
}
|
||||
|
||||
template <typename word_iterator>
|
||||
utfchar32_t next16(word_iterator& it)
|
||||
{
|
||||
utfchar32_t cp = utf8::internal::mask16(*it++);
|
||||
if (utf8::internal::is_lead_surrogate(cp))
|
||||
return (cp << 10) + *it++ + utf8::internal::SURROGATE_OFFSET;
|
||||
return cp;
|
||||
}
|
||||
|
||||
template <typename octet_iterator>
|
||||
utfchar32_t prior(octet_iterator& it)
|
||||
{
|
||||
|
|
Loading…
Add table
Reference in a new issue