mirror of
https://github.com/nemtrif/utfcpp.git
synced 2025-04-06 05:55:08 +00:00
add support for input_iterator
This commit is contained in:
parent
36206b924e
commit
b7d04b446a
4 changed files with 297 additions and 36 deletions
|
@ -29,7 +29,9 @@ DEALINGS IN THE SOFTWARE.
|
|||
#define UTF8_FOR_CPP_CHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
|
||||
|
||||
#include "core.h"
|
||||
#include "cpp11_facilities.h"
|
||||
#include <stdexcept>
|
||||
#include <utility>
|
||||
|
||||
namespace utf8
|
||||
{
|
||||
|
@ -136,10 +138,7 @@ namespace utf8
|
|||
}
|
||||
|
||||
template <typename octet_iterator>
|
||||
uint32_t next(octet_iterator& it, octet_iterator end)
|
||||
{
|
||||
uint32_t cp = 0;
|
||||
internal::utf_error err_code = utf8::internal::validate_next(it, end, cp);
|
||||
void check_err_code(internal::utf_error err_code, octet_iterator it, uint32_t cp) {
|
||||
switch (err_code) {
|
||||
case internal::UTF8_OK :
|
||||
break;
|
||||
|
@ -152,9 +151,25 @@ namespace utf8
|
|||
case internal::INVALID_CODE_POINT :
|
||||
throw invalid_code_point(cp);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename octet_iterator>
|
||||
uint32_t next_impl(octet_iterator& it, octet_iterator end)
|
||||
{
|
||||
uint32_t cp = 0;
|
||||
internal::utf_error err_code = utf8::internal::validate_next_impl(it, end, cp);
|
||||
check_err_code(err_code, it, cp);
|
||||
return cp;
|
||||
}
|
||||
|
||||
template <typename octet_iterator>
|
||||
uint32_t next(octet_iterator& it, octet_iterator end)
|
||||
{
|
||||
uint32_t cp = next_impl(it, end);//throw if error
|
||||
++it;
|
||||
return cp;
|
||||
}
|
||||
|
||||
template <typename octet_iterator>
|
||||
uint32_t peek_next(octet_iterator it, octet_iterator end)
|
||||
{
|
||||
|
@ -261,21 +276,21 @@ namespace utf8
|
|||
return result;
|
||||
}
|
||||
|
||||
// The iterator class
|
||||
namespace internal {
|
||||
|
||||
// The bidirectional_iterator class
|
||||
template <typename octet_iterator>
|
||||
class iterator : public std::iterator <std::bidirectional_iterator_tag, uint32_t> {
|
||||
class bidirectional_iterator : public std::iterator <std::bidirectional_iterator_tag, uint32_t> {
|
||||
octet_iterator it;
|
||||
octet_iterator range_start;
|
||||
octet_iterator range_end;
|
||||
public:
|
||||
iterator () {}
|
||||
explicit iterator (const octet_iterator& octet_it,
|
||||
bidirectional_iterator () {}
|
||||
explicit bidirectional_iterator (const octet_iterator& octet_it,
|
||||
const octet_iterator& rangestart,
|
||||
const octet_iterator& rangeend) :
|
||||
it(octet_it), range_start(rangestart), range_end(rangeend)
|
||||
{
|
||||
if (it < range_start || it > range_end)
|
||||
throw std::out_of_range("Invalid utf-8 iterator position");
|
||||
}
|
||||
// the default "big three" are OK
|
||||
octet_iterator base () const { return it; }
|
||||
|
@ -284,39 +299,142 @@ namespace utf8
|
|||
octet_iterator temp = it;
|
||||
return utf8::next(temp, range_end);
|
||||
}
|
||||
bool operator == (const iterator& rhs) const
|
||||
bool operator == (const bidirectional_iterator& rhs) const
|
||||
{
|
||||
if (range_start != rhs.range_start || range_end != rhs.range_end)
|
||||
throw std::logic_error("Comparing utf-8 iterators defined with different ranges");
|
||||
return (it == rhs.it);
|
||||
}
|
||||
bool operator != (const iterator& rhs) const
|
||||
bool operator != (const bidirectional_iterator& rhs) const
|
||||
{
|
||||
return !(operator == (rhs));
|
||||
}
|
||||
iterator& operator ++ ()
|
||||
bidirectional_iterator& operator ++ ()
|
||||
{
|
||||
utf8::next(it, range_end);
|
||||
return *this;
|
||||
}
|
||||
iterator operator ++ (int)
|
||||
bidirectional_iterator operator ++ (int)
|
||||
{
|
||||
iterator temp = *this;
|
||||
bidirectional_iterator temp = *this;
|
||||
utf8::next(it, range_end);
|
||||
return temp;
|
||||
}
|
||||
iterator& operator -- ()
|
||||
bidirectional_iterator& operator -- ()
|
||||
{
|
||||
utf8::prior(it, range_start);
|
||||
return *this;
|
||||
}
|
||||
iterator operator -- (int)
|
||||
bidirectional_iterator operator -- (int)
|
||||
{
|
||||
iterator temp = *this;
|
||||
bidirectional_iterator temp = *this;
|
||||
utf8::prior(it, range_start);
|
||||
return temp;
|
||||
}
|
||||
}; // class iterator
|
||||
}; // class bidirectional_iterator
|
||||
|
||||
template <typename octet_iterator>
|
||||
class input_iterator : public std::iterator<std::input_iterator_tag, uint32_t> {
|
||||
private:
|
||||
octet_iterator it;
|
||||
octet_iterator range_start;
|
||||
octet_iterator range_end;
|
||||
uint32_t cp{};
|
||||
bool ok{};
|
||||
void read() {
|
||||
ok = it != range_end;
|
||||
if(ok) {
|
||||
cp = utf8::next_impl(it, range_end);
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
input_iterator () {}
|
||||
explicit input_iterator (const octet_iterator& octet_it,
|
||||
const octet_iterator& rangestart,
|
||||
const octet_iterator& rangeend) :
|
||||
it(octet_it), range_start(rangestart), range_end(rangeend)
|
||||
{
|
||||
read();
|
||||
}
|
||||
octet_iterator base () const { return it; }
|
||||
uint32_t operator * () const
|
||||
{
|
||||
if(!ok) {
|
||||
throw std::runtime_error("no such element");
|
||||
}
|
||||
return cp;
|
||||
}
|
||||
|
||||
bool operator == (const input_iterator& rhs) const
|
||||
{
|
||||
if (range_start != rhs.range_start || range_end != rhs.range_end)
|
||||
throw std::logic_error("Comparing utf-8 iterators defined with different ranges");
|
||||
return ok == rhs.ok && (!ok || it == rhs.it);
|
||||
}
|
||||
bool operator != (const input_iterator& rhs) const
|
||||
{
|
||||
return !(operator == (rhs));
|
||||
}
|
||||
input_iterator& operator ++ ()
|
||||
{
|
||||
++it;
|
||||
read();
|
||||
return *this;
|
||||
}
|
||||
input_iterator operator ++ (int)
|
||||
{
|
||||
input_iterator temp = *this;
|
||||
++it;
|
||||
read();
|
||||
return temp;
|
||||
}
|
||||
}; // class input_iterator
|
||||
|
||||
template <typename octet_iterator>
|
||||
struct get_iterator_class {
|
||||
private:
|
||||
static input_iterator<octet_iterator> get(std::input_iterator_tag);
|
||||
static bidirectional_iterator<octet_iterator> get(std::bidirectional_iterator_tag);
|
||||
public:
|
||||
using type = decltype(get(Iterator_category<octet_iterator>{}));
|
||||
};
|
||||
|
||||
}//internal
|
||||
|
||||
template <typename octet_iterator>
|
||||
using iterator = typename utf8::internal::get_iterator_class<octet_iterator>::type;
|
||||
|
||||
template <typename Cont>
|
||||
inline std::pair<iterator<typename Cont::iterator>, iterator<typename Cont::iterator>> make_iterator_pair(Cont& c) {
|
||||
using Iter = iterator<typename Cont::iterator>;
|
||||
auto it = c.begin();
|
||||
auto end = c.end();
|
||||
return std::make_pair(Iter{it, it, end}, Iter{end, it, end});
|
||||
}
|
||||
template <typename Cont>
|
||||
inline std::pair<iterator<typename Cont::const_iterator>, iterator<typename Cont::const_iterator>> make_iterator_pair(const Cont& c) {
|
||||
using Iter = iterator<typename Cont::const_iterator>;
|
||||
auto it = c.begin();
|
||||
auto end = c.end();
|
||||
return std::make_pair(Iter{it, it, end}, Iter{end, it, end});
|
||||
}
|
||||
template <size_t N>
|
||||
inline std::pair<iterator<const char*>, iterator<const char*>> make_iterator_pair(const char(&tab)[N]) {
|
||||
static_assert(N > 0, "bad utf8 string");
|
||||
using Iter = iterator<const char*>;
|
||||
auto it = &tab[0];
|
||||
auto end = &tab[N-1];
|
||||
return std::make_pair(Iter{it, it, end}, Iter{end, it, end});
|
||||
}
|
||||
|
||||
inline std::pair<iterator<std::istream_iterator<char>>, iterator<std::istream_iterator<char>>> make_iterator_pair(std::istream& is) {
|
||||
using Is_iter = std::istream_iterator<char>;
|
||||
using Iter = iterator<Is_iter>;
|
||||
auto it = Is_iter{is};
|
||||
auto end = Is_iter{};
|
||||
return std::make_pair(Iter{it, it, end}, Iter{end, it, end});
|
||||
}
|
||||
|
||||
} // namespace utf8
|
||||
|
||||
|
|
|
@ -237,7 +237,7 @@ namespace internal
|
|||
#undef UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR
|
||||
|
||||
template <typename octet_iterator>
|
||||
utf_error validate_next(octet_iterator& it, octet_iterator end, uint32_t& code_point)
|
||||
utf_error validate_next_impl(octet_iterator& it, octet_iterator end, uint32_t& code_point)
|
||||
{
|
||||
if (it == end)
|
||||
return NOT_ENOUGH_ROOM;
|
||||
|
@ -276,7 +276,6 @@ namespace internal
|
|||
if (!utf8::internal::is_overlong_sequence(cp, length)){
|
||||
// Passed! Return here.
|
||||
code_point = cp;
|
||||
++it;
|
||||
return UTF8_OK;
|
||||
}
|
||||
else
|
||||
|
@ -291,6 +290,16 @@ namespace internal
|
|||
return err;
|
||||
}
|
||||
|
||||
template <typename octet_iterator>
|
||||
utf_error validate_next(octet_iterator& it, octet_iterator end, uint32_t& code_point) {
|
||||
utf_error err = validate_next_impl(it, end, code_point);
|
||||
if(err == UTF8_OK) {
|
||||
++it;
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
template <typename octet_iterator>
|
||||
inline utf_error validate_next(octet_iterator& it, octet_iterator end) {
|
||||
uint32_t ignored;
|
||||
|
|
37
source/utf8/cpp11_facilities.h
Normal file
37
source/utf8/cpp11_facilities.h
Normal file
|
@ -0,0 +1,37 @@
|
|||
/*
|
||||
Permission is hereby granted, free of charge, to any person or organization
|
||||
obtaining a copy of the software and accompanying documentation covered by
|
||||
this license (the "Software") to use, reproduce, display, distribute,
|
||||
execute, and transmit the Software, and to prepare derivative works of the
|
||||
Software, and to permit third-parties to whom the Software is furnished to
|
||||
do so, all subject to the following:
|
||||
|
||||
The copyright notices in the Software and this entire statement, including
|
||||
the above license grant, this restriction and the following disclaimer,
|
||||
must be included in all copies of the Software, in whole or in part, and
|
||||
all derivative works of the Software, unless such copies or derivative
|
||||
works are solely in the form of machine-executable object code generated by
|
||||
a source language processor.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
|
||||
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
|
||||
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
|
||||
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef UTF8_FOR_CPP_CPP11_FACILITIES_H
|
||||
#define UTF8_FOR_CPP_CPP11_FACILITIES_H
|
||||
|
||||
#include <type_traits>
|
||||
|
||||
namespace utf8
|
||||
{
|
||||
template<typename T>
|
||||
using Iterator_category = typename std::iterator_traits<T>::iterator_category;
|
||||
|
||||
} // namespace utf8
|
||||
|
||||
#endif // header guard
|
|
@ -29,6 +29,8 @@ DEALINGS IN THE SOFTWARE.
|
|||
#define UTF8_FOR_CPP_UNCHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
|
||||
|
||||
#include "core.h"
|
||||
#include "cpp11_facilities.h"
|
||||
#include <utility>
|
||||
|
||||
namespace utf8
|
||||
{
|
||||
|
@ -98,7 +100,7 @@ namespace utf8
|
|||
}
|
||||
|
||||
template <typename octet_iterator>
|
||||
uint32_t next(octet_iterator& it)
|
||||
uint32_t next_impl(octet_iterator& it)
|
||||
{
|
||||
uint32_t cp = utf8::internal::mask8(*it);
|
||||
typename std::iterator_traits<octet_iterator>::difference_type length = utf8::internal::sequence_length(it);
|
||||
|
@ -124,10 +126,16 @@ namespace utf8
|
|||
cp += (*it) & 0x3f;
|
||||
break;
|
||||
}
|
||||
++it;
|
||||
return cp;
|
||||
}
|
||||
|
||||
template <typename octet_iterator>
|
||||
inline uint32_t next(octet_iterator& it) {
|
||||
uint32_t cp{next_impl(it)};
|
||||
++it;
|
||||
return cp;
|
||||
}
|
||||
|
||||
template <typename octet_iterator>
|
||||
uint32_t peek_next(octet_iterator it)
|
||||
{
|
||||
|
@ -215,13 +223,14 @@ namespace utf8
|
|||
return result;
|
||||
}
|
||||
|
||||
// The iterator class
|
||||
namespace internal {
|
||||
// The bidirectional_iterator class
|
||||
template <typename octet_iterator>
|
||||
class iterator : public std::iterator <std::bidirectional_iterator_tag, uint32_t> {
|
||||
class bidirectional_iterator : public std::iterator <std::bidirectional_iterator_tag, uint32_t> {
|
||||
octet_iterator it;
|
||||
public:
|
||||
iterator () {}
|
||||
explicit iterator (const octet_iterator& octet_it): it(octet_it) {}
|
||||
bidirectional_iterator () {}
|
||||
explicit bidirectional_iterator (const octet_iterator& octet_it): it(octet_it) {}
|
||||
// the default "big three" are OK
|
||||
octet_iterator base () const { return it; }
|
||||
uint32_t operator * () const
|
||||
|
@ -229,37 +238,125 @@ namespace utf8
|
|||
octet_iterator temp = it;
|
||||
return utf8::unchecked::next(temp);
|
||||
}
|
||||
bool operator == (const iterator& rhs) const
|
||||
bool operator == (const bidirectional_iterator& rhs) const
|
||||
{
|
||||
return (it == rhs.it);
|
||||
}
|
||||
bool operator != (const iterator& rhs) const
|
||||
bool operator != (const bidirectional_iterator& rhs) const
|
||||
{
|
||||
return !(operator == (rhs));
|
||||
}
|
||||
iterator& operator ++ ()
|
||||
bidirectional_iterator& operator ++ ()
|
||||
{
|
||||
::std::advance(it, utf8::internal::sequence_length(it));
|
||||
return *this;
|
||||
}
|
||||
iterator operator ++ (int)
|
||||
bidirectional_iterator operator ++ (int)
|
||||
{
|
||||
iterator temp = *this;
|
||||
bidirectional_iterator temp = *this;
|
||||
::std::advance(it, utf8::internal::sequence_length(it));
|
||||
return temp;
|
||||
}
|
||||
iterator& operator -- ()
|
||||
bidirectional_iterator& operator -- ()
|
||||
{
|
||||
utf8::unchecked::prior(it);
|
||||
return *this;
|
||||
}
|
||||
iterator operator -- (int)
|
||||
bidirectional_iterator operator -- (int)
|
||||
{
|
||||
iterator temp = *this;
|
||||
bidirectional_iterator temp = *this;
|
||||
utf8::unchecked::prior(it);
|
||||
return temp;
|
||||
}
|
||||
}; // class iterator
|
||||
}; // class bidirectional_iterator
|
||||
|
||||
template <typename octet_iterator>
|
||||
class input_iterator : public std::iterator<std::input_iterator_tag, uint32_t> {
|
||||
private:
|
||||
octet_iterator it;
|
||||
uint32_t cp{};
|
||||
void read() {
|
||||
cp = utf8::unchecked::next_impl(it);
|
||||
}
|
||||
|
||||
public:
|
||||
input_iterator () {}
|
||||
explicit input_iterator (const octet_iterator& octet_it) : it(octet_it)
|
||||
{
|
||||
read();
|
||||
}
|
||||
octet_iterator base () const { return it; }
|
||||
uint32_t operator * () const
|
||||
{
|
||||
return cp;
|
||||
}
|
||||
|
||||
bool operator == (const input_iterator& rhs) const
|
||||
{
|
||||
return it == rhs.it;
|
||||
}
|
||||
bool operator != (const input_iterator& rhs) const
|
||||
{
|
||||
return !(operator == (rhs));
|
||||
}
|
||||
input_iterator& operator ++ ()
|
||||
{
|
||||
++it;
|
||||
read();
|
||||
return *this;
|
||||
}
|
||||
input_iterator operator ++ (int)
|
||||
{
|
||||
input_iterator temp = *this;
|
||||
++it;
|
||||
read();
|
||||
return temp;
|
||||
}
|
||||
}; // class input_iterator
|
||||
|
||||
template <typename octet_iterator>
|
||||
struct get_iterator_class {
|
||||
private:
|
||||
static input_iterator<octet_iterator> get(std::input_iterator_tag);
|
||||
static bidirectional_iterator<octet_iterator> get(std::bidirectional_iterator_tag);
|
||||
public:
|
||||
using type = decltype(get(Iterator_category<octet_iterator>{}));
|
||||
};
|
||||
}//internal
|
||||
|
||||
template <typename octet_iterator>
|
||||
using iterator = typename utf8::unchecked::internal::get_iterator_class<octet_iterator>::type;
|
||||
|
||||
template <typename Cont>
|
||||
inline std::pair<iterator<typename Cont::iterator>, iterator<typename Cont::iterator>> make_iterator_pair(Cont& c) {
|
||||
using Iter = iterator<typename Cont::iterator>;
|
||||
auto it = c.begin();
|
||||
auto end = c.end();
|
||||
return std::make_pair(Iter{it}, Iter{end});
|
||||
}
|
||||
template <typename Cont>
|
||||
inline std::pair<iterator<typename Cont::const_iterator>, iterator<typename Cont::const_iterator>> make_iterator_pair(const Cont& c) {
|
||||
using Iter = iterator<typename Cont::const_iterator>;
|
||||
auto it = c.begin();
|
||||
auto end = c.end();
|
||||
return std::make_pair(Iter{it}, Iter{end});
|
||||
}
|
||||
template <size_t N>
|
||||
inline std::pair<iterator<const char*>, iterator<const char*>> make_iterator_pair(const char(&tab)[N]) {
|
||||
static_assert(N > 0, "bad utf8 string");
|
||||
using Iter = iterator<const char*>;
|
||||
auto it = &tab[0];
|
||||
auto end = &tab[N-1];
|
||||
return std::make_pair(Iter{it}, Iter{end});
|
||||
}
|
||||
|
||||
inline std::pair<iterator<std::istream_iterator<char>>, iterator<std::istream_iterator<char>>> make_iterator_pair(std::istream& is) {
|
||||
using Is_iter = std::istream_iterator<char>;
|
||||
using Iter = iterator<Is_iter>;
|
||||
auto it = Is_iter{is};
|
||||
auto end = Is_iter{};
|
||||
return std::make_pair(Iter{it}, Iter{end});
|
||||
}
|
||||
|
||||
} // namespace utf8::unchecked
|
||||
} // namespace utf8
|
||||
|
|
Loading…
Add table
Reference in a new issue