mirror of
https://github.com/nemtrif/utfcpp.git
synced 2025-04-06 05:55:08 +00:00
Implement istream_iterator support for checked iterators
This commit is contained in:
parent
e5967608f8
commit
6a4bccf9c7
2 changed files with 176 additions and 56 deletions
|
@ -261,62 +261,161 @@ namespace utf8
|
|||
return result;
|
||||
}
|
||||
|
||||
// The iterator class
|
||||
template <typename octet_iterator>
|
||||
class iterator : public std::iterator <std::bidirectional_iterator_tag, uint32_t> {
|
||||
octet_iterator it;
|
||||
octet_iterator range_start;
|
||||
octet_iterator range_end;
|
||||
public:
|
||||
iterator () {}
|
||||
explicit iterator (const octet_iterator& octet_it,
|
||||
const octet_iterator& rangestart,
|
||||
const octet_iterator& rangeend) :
|
||||
it(octet_it), range_start(rangestart), range_end(rangeend)
|
||||
{
|
||||
if (it < range_start || it > range_end)
|
||||
throw std::out_of_range("Invalid utf-8 iterator position");
|
||||
}
|
||||
// the default "big three" are OK
|
||||
octet_iterator base () const { return it; }
|
||||
uint32_t operator * () const
|
||||
{
|
||||
octet_iterator temp = it;
|
||||
return utf8::next(temp, range_end);
|
||||
}
|
||||
bool operator == (const iterator& rhs) const
|
||||
{
|
||||
if (range_start != rhs.range_start || range_end != rhs.range_end)
|
||||
throw std::logic_error("Comparing utf-8 iterators defined with different ranges");
|
||||
return (it == rhs.it);
|
||||
}
|
||||
bool operator != (const iterator& rhs) const
|
||||
{
|
||||
return !(operator == (rhs));
|
||||
}
|
||||
iterator& operator ++ ()
|
||||
{
|
||||
utf8::next(it, range_end);
|
||||
return *this;
|
||||
}
|
||||
iterator operator ++ (int)
|
||||
{
|
||||
iterator temp = *this;
|
||||
utf8::next(it, range_end);
|
||||
return temp;
|
||||
}
|
||||
iterator& operator -- ()
|
||||
{
|
||||
utf8::prior(it, range_start);
|
||||
return *this;
|
||||
}
|
||||
iterator operator -- (int)
|
||||
{
|
||||
iterator temp = *this;
|
||||
utf8::prior(it, range_start);
|
||||
return temp;
|
||||
}
|
||||
}; // class iterator
|
||||
namespace internal
|
||||
{
|
||||
template<typename iterator>
|
||||
using is_random_access_iterator = std::is_same<typename std::iterator_traits<iterator>::iterator_category, std::random_access_iterator_tag>;
|
||||
|
||||
template<typename iterator>
|
||||
typename std::enable_if<is_random_access_iterator<iterator>::value, bool>::type
|
||||
assert_iterator_in_range(const iterator& it, const iterator& rangestart, const iterator& rangeend){
|
||||
return it >= rangestart && it <= rangeend;
|
||||
}
|
||||
|
||||
template<typename iterator>
|
||||
typename std::enable_if<!is_random_access_iterator<iterator>::value, bool>::type
|
||||
assert_iterator_in_range(const iterator&, const iterator&, const iterator&){
|
||||
return true; // Cannot check
|
||||
}
|
||||
|
||||
template <typename octet_iterator>
|
||||
class bidirectional_iterator {
|
||||
octet_iterator it;
|
||||
octet_iterator range_start;
|
||||
octet_iterator range_end;
|
||||
public:
|
||||
using iterator_category = std::bidirectional_iterator_tag;
|
||||
using value_type = uint32_t;
|
||||
using difference_type = std::ptrdiff_t;
|
||||
using pointer = uint32_t*;
|
||||
using reference = uint32_t&;
|
||||
|
||||
explicit bidirectional_iterator(const octet_iterator& octet_it,
|
||||
const octet_iterator& rangestart,
|
||||
const octet_iterator& rangeend):
|
||||
it(octet_it), range_start(rangestart), range_end(rangeend)
|
||||
{
|
||||
if(!internal::assert_iterator_in_range(octet_it, rangestart, rangeend))
|
||||
throw std::out_of_range("Invalid utf-8 iterator position");
|
||||
}
|
||||
// the default "big three" are OK
|
||||
octet_iterator base() const { return it; }
|
||||
uint32_t operator * () const
|
||||
{
|
||||
octet_iterator temp = it;
|
||||
return utf8::next(temp, range_end);
|
||||
}
|
||||
bool operator == (const bidirectional_iterator& rhs) const
|
||||
{
|
||||
if(range_start != rhs.range_start || range_end != rhs.range_end)
|
||||
throw std::logic_error("Comparing utf-8 iterators defined with different ranges");
|
||||
return (it == rhs.it);
|
||||
}
|
||||
bool operator != (const bidirectional_iterator& rhs) const
|
||||
{
|
||||
return !(operator == (rhs));
|
||||
}
|
||||
bidirectional_iterator& operator ++ ()
|
||||
{
|
||||
utf8::next(it, range_end);
|
||||
return *this;
|
||||
}
|
||||
bidirectional_iterator operator ++ (int)
|
||||
{
|
||||
bidirectional_iterator temp = *this;
|
||||
utf8::next(it, range_end);
|
||||
return temp;
|
||||
}
|
||||
bidirectional_iterator& operator-- ()
|
||||
{
|
||||
utf8::prior(it, range_start);
|
||||
return *this;
|
||||
}
|
||||
bidirectional_iterator operator -- (int)
|
||||
{
|
||||
bidirectional_iterator temp = *this;
|
||||
utf8::prior(it, range_start);
|
||||
return temp;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename octet_iterator>
|
||||
class input_iterator {
|
||||
octet_iterator it;
|
||||
octet_iterator range_start;
|
||||
octet_iterator range_end;
|
||||
uint32_t curValue;
|
||||
utf_error error;
|
||||
public:
|
||||
using iterator_category = std::input_iterator_tag;
|
||||
using value_type = uint32_t;
|
||||
using difference_type = std::ptrdiff_t;
|
||||
using pointer = uint32_t*;
|
||||
using reference = uint32_t&;
|
||||
|
||||
explicit input_iterator(const octet_iterator& octet_it,
|
||||
const octet_iterator& rangestart,
|
||||
const octet_iterator& rangeend):
|
||||
it(octet_it), range_start(rangestart), range_end(rangeend), curValue(0), error(it == range_end ? NOT_ENOUGH_ROOM : UTF8_OK)
|
||||
{
|
||||
get_val();
|
||||
}
|
||||
// the default "big three" are OK
|
||||
octet_iterator base() const { return it; }
|
||||
uint32_t operator * () const
|
||||
{
|
||||
switch(error) {
|
||||
case internal::UTF8_OK:
|
||||
break;
|
||||
case internal::NOT_ENOUGH_ROOM:
|
||||
throw not_enough_room();
|
||||
case internal::INVALID_LEAD:
|
||||
case internal::INCOMPLETE_SEQUENCE:
|
||||
case internal::OVERLONG_SEQUENCE:
|
||||
throw invalid_utf8(*it);
|
||||
case internal::INVALID_CODE_POINT:
|
||||
throw invalid_code_point(curValue);
|
||||
}
|
||||
return curValue;
|
||||
}
|
||||
bool operator == (const input_iterator& rhs) const
|
||||
{
|
||||
if(range_start != rhs.range_start || range_end != rhs.range_end)
|
||||
throw std::logic_error("Comparing utf-8 iterators defined with different ranges");
|
||||
return (error == rhs.error) && (it == rhs.it);
|
||||
}
|
||||
bool operator != (const input_iterator& rhs) const
|
||||
{
|
||||
return !(operator == (rhs));
|
||||
}
|
||||
input_iterator& operator ++ ()
|
||||
{
|
||||
get_val();
|
||||
return *this;
|
||||
}
|
||||
input_iterator operator ++ (int)
|
||||
{
|
||||
input_iterator temp = *this;
|
||||
get_val();
|
||||
return temp;
|
||||
}
|
||||
private:
|
||||
void get_val(){
|
||||
if(error == UTF8_OK){
|
||||
error = validate_next(it, range_end, curValue);
|
||||
}
|
||||
}
|
||||
};
|
||||
template<typename octet_iterator>
|
||||
struct GetIterator
|
||||
{
|
||||
static bidirectional_iterator<octet_iterator> get(std::bidirectional_iterator_tag);
|
||||
static input_iterator<octet_iterator> get(std::input_iterator_tag);
|
||||
using type = decltype(get(typename std::iterator_traits<octet_iterator>::iterator_category{}));
|
||||
};
|
||||
}
|
||||
template<typename octet_iterator>
|
||||
using iterator = typename internal::GetIterator<octet_iterator>::type;
|
||||
|
||||
} // namespace utf8
|
||||
|
||||
|
|
|
@ -29,3 +29,24 @@ TEST(CheckedIteratrTests, test_decrement)
|
|||
EXPECT_EQ (--it, utf8::iterator<const char*>(threechars, threechars, threechars + 9));
|
||||
EXPECT_EQ (*it, 0x10346);
|
||||
}
|
||||
|
||||
TEST(CheckedIteratrTests, test_istream_iterator)
|
||||
{
|
||||
std::istringstream is("\xf0\x90\x8d\x86\xe6\x97\xa5\xd1\x88");
|
||||
using iterator = std::istream_iterator<char>;
|
||||
using utf8_iterator = utf8::iterator<iterator>;
|
||||
iterator base_it{is};
|
||||
utf8_iterator it(base_it, base_it, iterator{});
|
||||
utf8_iterator it2 = it;
|
||||
EXPECT_EQ(it2, it);
|
||||
EXPECT_EQ(*it, 0x10346);
|
||||
// Dereferencing it multiple times does not change the value
|
||||
EXPECT_EQ(*it, 0x10346);
|
||||
EXPECT_EQ(*(++it), 0x65e5);
|
||||
EXPECT_EQ((*it++), 0x65e5);
|
||||
EXPECT_EQ(*it, 0x0448);
|
||||
EXPECT_NE(it, it2);
|
||||
utf8_iterator endit(iterator{}, base_it, iterator{});
|
||||
EXPECT_EQ(++it, endit);
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue