add support for input_iterator

This commit is contained in:
Jean-Prost Frédéric 2019-11-18 22:14:10 +01:00
parent 36206b924e
commit b7d04b446a
4 changed files with 297 additions and 36 deletions

View file

@ -29,7 +29,9 @@ DEALINGS IN THE SOFTWARE.
#define UTF8_FOR_CPP_CHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
#include "core.h"
#include "cpp11_facilities.h"
#include <stdexcept>
#include <utility>
namespace utf8
{
@ -136,10 +138,7 @@ namespace utf8
}
template <typename octet_iterator>
uint32_t next(octet_iterator& it, octet_iterator end)
{
uint32_t cp = 0;
internal::utf_error err_code = utf8::internal::validate_next(it, end, cp);
void check_err_code(internal::utf_error err_code, octet_iterator it, uint32_t cp) {
switch (err_code) {
case internal::UTF8_OK :
break;
@ -152,9 +151,25 @@ namespace utf8
case internal::INVALID_CODE_POINT :
throw invalid_code_point(cp);
}
}
template <typename octet_iterator>
uint32_t next_impl(octet_iterator& it, octet_iterator end)
{
uint32_t cp = 0;
internal::utf_error err_code = utf8::internal::validate_next_impl(it, end, cp);
check_err_code(err_code, it, cp);
return cp;
}
template <typename octet_iterator>
uint32_t next(octet_iterator& it, octet_iterator end)
{
uint32_t cp = next_impl(it, end);//throw if error
++it;
return cp;
}
template <typename octet_iterator>
uint32_t peek_next(octet_iterator it, octet_iterator end)
{
@ -261,21 +276,21 @@ namespace utf8
return result;
}
// The iterator class
namespace internal {
// The bidirectional_iterator class
template <typename octet_iterator>
class iterator : public std::iterator <std::bidirectional_iterator_tag, uint32_t> {
class bidirectional_iterator : public std::iterator <std::bidirectional_iterator_tag, uint32_t> {
octet_iterator it;
octet_iterator range_start;
octet_iterator range_end;
public:
iterator () {}
explicit iterator (const octet_iterator& octet_it,
bidirectional_iterator () {}
explicit bidirectional_iterator (const octet_iterator& octet_it,
const octet_iterator& rangestart,
const octet_iterator& rangeend) :
it(octet_it), range_start(rangestart), range_end(rangeend)
{
if (it < range_start || it > range_end)
throw std::out_of_range("Invalid utf-8 iterator position");
}
// the default "big three" are OK
octet_iterator base () const { return it; }
@ -284,39 +299,142 @@ namespace utf8
octet_iterator temp = it;
return utf8::next(temp, range_end);
}
bool operator == (const iterator& rhs) const
bool operator == (const bidirectional_iterator& rhs) const
{
if (range_start != rhs.range_start || range_end != rhs.range_end)
throw std::logic_error("Comparing utf-8 iterators defined with different ranges");
return (it == rhs.it);
}
bool operator != (const iterator& rhs) const
bool operator != (const bidirectional_iterator& rhs) const
{
return !(operator == (rhs));
}
iterator& operator ++ ()
bidirectional_iterator& operator ++ ()
{
utf8::next(it, range_end);
return *this;
}
iterator operator ++ (int)
bidirectional_iterator operator ++ (int)
{
iterator temp = *this;
bidirectional_iterator temp = *this;
utf8::next(it, range_end);
return temp;
}
iterator& operator -- ()
bidirectional_iterator& operator -- ()
{
utf8::prior(it, range_start);
return *this;
}
iterator operator -- (int)
bidirectional_iterator operator -- (int)
{
iterator temp = *this;
bidirectional_iterator temp = *this;
utf8::prior(it, range_start);
return temp;
}
}; // class iterator
}; // class bidirectional_iterator
template <typename octet_iterator>
class input_iterator : public std::iterator<std::input_iterator_tag, uint32_t> {
private:
octet_iterator it;
octet_iterator range_start;
octet_iterator range_end;
uint32_t cp{};
bool ok{};
void read() {
ok = it != range_end;
if(ok) {
cp = utf8::next_impl(it, range_end);
}
}
public:
input_iterator () {}
explicit input_iterator (const octet_iterator& octet_it,
const octet_iterator& rangestart,
const octet_iterator& rangeend) :
it(octet_it), range_start(rangestart), range_end(rangeend)
{
read();
}
octet_iterator base () const { return it; }
uint32_t operator * () const
{
if(!ok) {
throw std::runtime_error("no such element");
}
return cp;
}
bool operator == (const input_iterator& rhs) const
{
if (range_start != rhs.range_start || range_end != rhs.range_end)
throw std::logic_error("Comparing utf-8 iterators defined with different ranges");
return ok == rhs.ok && (!ok || it == rhs.it);
}
bool operator != (const input_iterator& rhs) const
{
return !(operator == (rhs));
}
input_iterator& operator ++ ()
{
++it;
read();
return *this;
}
input_iterator operator ++ (int)
{
input_iterator temp = *this;
++it;
read();
return temp;
}
}; // class input_iterator
template <typename octet_iterator>
struct get_iterator_class {
private:
static input_iterator<octet_iterator> get(std::input_iterator_tag);
static bidirectional_iterator<octet_iterator> get(std::bidirectional_iterator_tag);
public:
using type = decltype(get(Iterator_category<octet_iterator>{}));
};
}//internal
template <typename octet_iterator>
using iterator = typename utf8::internal::get_iterator_class<octet_iterator>::type;
template <typename Cont>
inline std::pair<iterator<typename Cont::iterator>, iterator<typename Cont::iterator>> make_iterator_pair(Cont& c) {
using Iter = iterator<typename Cont::iterator>;
auto it = c.begin();
auto end = c.end();
return std::make_pair(Iter{it, it, end}, Iter{end, it, end});
}
template <typename Cont>
inline std::pair<iterator<typename Cont::const_iterator>, iterator<typename Cont::const_iterator>> make_iterator_pair(const Cont& c) {
using Iter = iterator<typename Cont::const_iterator>;
auto it = c.begin();
auto end = c.end();
return std::make_pair(Iter{it, it, end}, Iter{end, it, end});
}
template <size_t N>
inline std::pair<iterator<const char*>, iterator<const char*>> make_iterator_pair(const char(&tab)[N]) {
static_assert(N > 0, "bad utf8 string");
using Iter = iterator<const char*>;
auto it = &tab[0];
auto end = &tab[N-1];
return std::make_pair(Iter{it, it, end}, Iter{end, it, end});
}
inline std::pair<iterator<std::istream_iterator<char>>, iterator<std::istream_iterator<char>>> make_iterator_pair(std::istream& is) {
using Is_iter = std::istream_iterator<char>;
using Iter = iterator<Is_iter>;
auto it = Is_iter{is};
auto end = Is_iter{};
return std::make_pair(Iter{it, it, end}, Iter{end, it, end});
}
} // namespace utf8

View file

@ -237,7 +237,7 @@ namespace internal
#undef UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR
template <typename octet_iterator>
utf_error validate_next(octet_iterator& it, octet_iterator end, uint32_t& code_point)
utf_error validate_next_impl(octet_iterator& it, octet_iterator end, uint32_t& code_point)
{
if (it == end)
return NOT_ENOUGH_ROOM;
@ -276,7 +276,6 @@ namespace internal
if (!utf8::internal::is_overlong_sequence(cp, length)){
// Passed! Return here.
code_point = cp;
++it;
return UTF8_OK;
}
else
@ -291,6 +290,16 @@ namespace internal
return err;
}
template <typename octet_iterator>
utf_error validate_next(octet_iterator& it, octet_iterator end, uint32_t& code_point) {
utf_error err = validate_next_impl(it, end, code_point);
if(err == UTF8_OK) {
++it;
}
return err;
}
template <typename octet_iterator>
inline utf_error validate_next(octet_iterator& it, octet_iterator end) {
uint32_t ignored;

View file

@ -0,0 +1,37 @@
/*
Permission is hereby granted, free of charge, to any person or organization
obtaining a copy of the software and accompanying documentation covered by
this license (the "Software") to use, reproduce, display, distribute,
execute, and transmit the Software, and to prepare derivative works of the
Software, and to permit third-parties to whom the Software is furnished to
do so, all subject to the following:
The copyright notices in the Software and this entire statement, including
the above license grant, this restriction and the following disclaimer,
must be included in all copies of the Software, in whole or in part, and
all derivative works of the Software, unless such copies or derivative
works are solely in the form of machine-executable object code generated by
a source language processor.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*/
#ifndef UTF8_FOR_CPP_CPP11_FACILITIES_H
#define UTF8_FOR_CPP_CPP11_FACILITIES_H
#include <type_traits>
namespace utf8
{
template<typename T>
using Iterator_category = typename std::iterator_traits<T>::iterator_category;
} // namespace utf8
#endif // header guard

View file

@ -29,6 +29,8 @@ DEALINGS IN THE SOFTWARE.
#define UTF8_FOR_CPP_UNCHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
#include "core.h"
#include "cpp11_facilities.h"
#include <utility>
namespace utf8
{
@ -98,7 +100,7 @@ namespace utf8
}
template <typename octet_iterator>
uint32_t next(octet_iterator& it)
uint32_t next_impl(octet_iterator& it)
{
uint32_t cp = utf8::internal::mask8(*it);
typename std::iterator_traits<octet_iterator>::difference_type length = utf8::internal::sequence_length(it);
@ -124,10 +126,16 @@ namespace utf8
cp += (*it) & 0x3f;
break;
}
++it;
return cp;
}
template <typename octet_iterator>
inline uint32_t next(octet_iterator& it) {
uint32_t cp{next_impl(it)};
++it;
return cp;
}
template <typename octet_iterator>
uint32_t peek_next(octet_iterator it)
{
@ -215,13 +223,14 @@ namespace utf8
return result;
}
// The iterator class
namespace internal {
// The bidirectional_iterator class
template <typename octet_iterator>
class iterator : public std::iterator <std::bidirectional_iterator_tag, uint32_t> {
class bidirectional_iterator : public std::iterator <std::bidirectional_iterator_tag, uint32_t> {
octet_iterator it;
public:
iterator () {}
explicit iterator (const octet_iterator& octet_it): it(octet_it) {}
bidirectional_iterator () {}
explicit bidirectional_iterator (const octet_iterator& octet_it): it(octet_it) {}
// the default "big three" are OK
octet_iterator base () const { return it; }
uint32_t operator * () const
@ -229,37 +238,125 @@ namespace utf8
octet_iterator temp = it;
return utf8::unchecked::next(temp);
}
bool operator == (const iterator& rhs) const
bool operator == (const bidirectional_iterator& rhs) const
{
return (it == rhs.it);
}
bool operator != (const iterator& rhs) const
bool operator != (const bidirectional_iterator& rhs) const
{
return !(operator == (rhs));
}
iterator& operator ++ ()
bidirectional_iterator& operator ++ ()
{
::std::advance(it, utf8::internal::sequence_length(it));
return *this;
}
iterator operator ++ (int)
bidirectional_iterator operator ++ (int)
{
iterator temp = *this;
bidirectional_iterator temp = *this;
::std::advance(it, utf8::internal::sequence_length(it));
return temp;
}
iterator& operator -- ()
bidirectional_iterator& operator -- ()
{
utf8::unchecked::prior(it);
return *this;
}
iterator operator -- (int)
bidirectional_iterator operator -- (int)
{
iterator temp = *this;
bidirectional_iterator temp = *this;
utf8::unchecked::prior(it);
return temp;
}
}; // class iterator
}; // class bidirectional_iterator
template <typename octet_iterator>
class input_iterator : public std::iterator<std::input_iterator_tag, uint32_t> {
private:
octet_iterator it;
uint32_t cp{};
void read() {
cp = utf8::unchecked::next_impl(it);
}
public:
input_iterator () {}
explicit input_iterator (const octet_iterator& octet_it) : it(octet_it)
{
read();
}
octet_iterator base () const { return it; }
uint32_t operator * () const
{
return cp;
}
bool operator == (const input_iterator& rhs) const
{
return it == rhs.it;
}
bool operator != (const input_iterator& rhs) const
{
return !(operator == (rhs));
}
input_iterator& operator ++ ()
{
++it;
read();
return *this;
}
input_iterator operator ++ (int)
{
input_iterator temp = *this;
++it;
read();
return temp;
}
}; // class input_iterator
template <typename octet_iterator>
struct get_iterator_class {
private:
static input_iterator<octet_iterator> get(std::input_iterator_tag);
static bidirectional_iterator<octet_iterator> get(std::bidirectional_iterator_tag);
public:
using type = decltype(get(Iterator_category<octet_iterator>{}));
};
}//internal
template <typename octet_iterator>
using iterator = typename utf8::unchecked::internal::get_iterator_class<octet_iterator>::type;
template <typename Cont>
inline std::pair<iterator<typename Cont::iterator>, iterator<typename Cont::iterator>> make_iterator_pair(Cont& c) {
using Iter = iterator<typename Cont::iterator>;
auto it = c.begin();
auto end = c.end();
return std::make_pair(Iter{it}, Iter{end});
}
template <typename Cont>
inline std::pair<iterator<typename Cont::const_iterator>, iterator<typename Cont::const_iterator>> make_iterator_pair(const Cont& c) {
using Iter = iterator<typename Cont::const_iterator>;
auto it = c.begin();
auto end = c.end();
return std::make_pair(Iter{it}, Iter{end});
}
template <size_t N>
inline std::pair<iterator<const char*>, iterator<const char*>> make_iterator_pair(const char(&tab)[N]) {
static_assert(N > 0, "bad utf8 string");
using Iter = iterator<const char*>;
auto it = &tab[0];
auto end = &tab[N-1];
return std::make_pair(Iter{it}, Iter{end});
}
inline std::pair<iterator<std::istream_iterator<char>>, iterator<std::istream_iterator<char>>> make_iterator_pair(std::istream& is) {
using Is_iter = std::istream_iterator<char>;
using Iter = iterator<Is_iter>;
auto it = Is_iter{is};
auto end = Is_iter{};
return std::make_pair(Iter{it}, Iter{end});
}
} // namespace utf8::unchecked
} // namespace utf8