From 6a4bccf9c7e7ce9b6323b47960267bb7cfb46c40 Mon Sep 17 00:00:00 2001 From: Flamefire Date: Sun, 10 Nov 2019 12:03:30 +0100 Subject: [PATCH] Implement istream_iterator support for checked iterators --- source/utf8/checked.h | 211 +++++++++++++++++++++++--------- tests/test_checked_iterator.cpp | 21 ++++ 2 files changed, 176 insertions(+), 56 deletions(-) diff --git a/source/utf8/checked.h b/source/utf8/checked.h index 0567b50..f88c7b5 100644 --- a/source/utf8/checked.h +++ b/source/utf8/checked.h @@ -261,62 +261,161 @@ namespace utf8 return result; } - // The iterator class - template - class iterator : public std::iterator { - octet_iterator it; - octet_iterator range_start; - octet_iterator range_end; - public: - iterator () {} - explicit iterator (const octet_iterator& octet_it, - const octet_iterator& rangestart, - const octet_iterator& rangeend) : - it(octet_it), range_start(rangestart), range_end(rangeend) - { - if (it < range_start || it > range_end) - throw std::out_of_range("Invalid utf-8 iterator position"); - } - // the default "big three" are OK - octet_iterator base () const { return it; } - uint32_t operator * () const - { - octet_iterator temp = it; - return utf8::next(temp, range_end); - } - bool operator == (const iterator& rhs) const - { - if (range_start != rhs.range_start || range_end != rhs.range_end) - throw std::logic_error("Comparing utf-8 iterators defined with different ranges"); - return (it == rhs.it); - } - bool operator != (const iterator& rhs) const - { - return !(operator == (rhs)); - } - iterator& operator ++ () - { - utf8::next(it, range_end); - return *this; - } - iterator operator ++ (int) - { - iterator temp = *this; - utf8::next(it, range_end); - return temp; - } - iterator& operator -- () - { - utf8::prior(it, range_start); - return *this; - } - iterator operator -- (int) - { - iterator temp = *this; - utf8::prior(it, range_start); - return temp; - } - }; // class iterator + namespace internal + { + template + using is_random_access_iterator = std::is_same::iterator_category, std::random_access_iterator_tag>; + + template + typename std::enable_if::value, bool>::type + assert_iterator_in_range(const iterator& it, const iterator& rangestart, const iterator& rangeend){ + return it >= rangestart && it <= rangeend; + } + + template + typename std::enable_if::value, bool>::type + assert_iterator_in_range(const iterator&, const iterator&, const iterator&){ + return true; // Cannot check + } + + template + class bidirectional_iterator { + octet_iterator it; + octet_iterator range_start; + octet_iterator range_end; + public: + using iterator_category = std::bidirectional_iterator_tag; + using value_type = uint32_t; + using difference_type = std::ptrdiff_t; + using pointer = uint32_t*; + using reference = uint32_t&; + + explicit bidirectional_iterator(const octet_iterator& octet_it, + const octet_iterator& rangestart, + const octet_iterator& rangeend): + it(octet_it), range_start(rangestart), range_end(rangeend) + { + if(!internal::assert_iterator_in_range(octet_it, rangestart, rangeend)) + throw std::out_of_range("Invalid utf-8 iterator position"); + } + // the default "big three" are OK + octet_iterator base() const { return it; } + uint32_t operator * () const + { + octet_iterator temp = it; + return utf8::next(temp, range_end); + } + bool operator == (const bidirectional_iterator& rhs) const + { + if(range_start != rhs.range_start || range_end != rhs.range_end) + throw std::logic_error("Comparing utf-8 iterators defined with different ranges"); + return (it == rhs.it); + } + bool operator != (const bidirectional_iterator& rhs) const + { + return !(operator == (rhs)); + } + bidirectional_iterator& operator ++ () + { + utf8::next(it, range_end); + return *this; + } + bidirectional_iterator operator ++ (int) + { + bidirectional_iterator temp = *this; + utf8::next(it, range_end); + return temp; + } + bidirectional_iterator& operator-- () + { + utf8::prior(it, range_start); + return *this; + } + bidirectional_iterator operator -- (int) + { + bidirectional_iterator temp = *this; + utf8::prior(it, range_start); + return temp; + } + }; + + template + class input_iterator { + octet_iterator it; + octet_iterator range_start; + octet_iterator range_end; + uint32_t curValue; + utf_error error; + public: + using iterator_category = std::input_iterator_tag; + using value_type = uint32_t; + using difference_type = std::ptrdiff_t; + using pointer = uint32_t*; + using reference = uint32_t&; + + explicit input_iterator(const octet_iterator& octet_it, + const octet_iterator& rangestart, + const octet_iterator& rangeend): + it(octet_it), range_start(rangestart), range_end(rangeend), curValue(0), error(it == range_end ? NOT_ENOUGH_ROOM : UTF8_OK) + { + get_val(); + } + // the default "big three" are OK + octet_iterator base() const { return it; } + uint32_t operator * () const + { + switch(error) { + case internal::UTF8_OK: + break; + case internal::NOT_ENOUGH_ROOM: + throw not_enough_room(); + case internal::INVALID_LEAD: + case internal::INCOMPLETE_SEQUENCE: + case internal::OVERLONG_SEQUENCE: + throw invalid_utf8(*it); + case internal::INVALID_CODE_POINT: + throw invalid_code_point(curValue); + } + return curValue; + } + bool operator == (const input_iterator& rhs) const + { + if(range_start != rhs.range_start || range_end != rhs.range_end) + throw std::logic_error("Comparing utf-8 iterators defined with different ranges"); + return (error == rhs.error) && (it == rhs.it); + } + bool operator != (const input_iterator& rhs) const + { + return !(operator == (rhs)); + } + input_iterator& operator ++ () + { + get_val(); + return *this; + } + input_iterator operator ++ (int) + { + input_iterator temp = *this; + get_val(); + return temp; + } + private: + void get_val(){ + if(error == UTF8_OK){ + error = validate_next(it, range_end, curValue); + } + } + }; + template + struct GetIterator + { + static bidirectional_iterator get(std::bidirectional_iterator_tag); + static input_iterator get(std::input_iterator_tag); + using type = decltype(get(typename std::iterator_traits::iterator_category{})); + }; + } + template + using iterator = typename internal::GetIterator::type; } // namespace utf8 diff --git a/tests/test_checked_iterator.cpp b/tests/test_checked_iterator.cpp index 4c44834..2b0bb8e 100644 --- a/tests/test_checked_iterator.cpp +++ b/tests/test_checked_iterator.cpp @@ -29,3 +29,24 @@ TEST(CheckedIteratrTests, test_decrement) EXPECT_EQ (--it, utf8::iterator(threechars, threechars, threechars + 9)); EXPECT_EQ (*it, 0x10346); } + +TEST(CheckedIteratrTests, test_istream_iterator) +{ + std::istringstream is("\xf0\x90\x8d\x86\xe6\x97\xa5\xd1\x88"); + using iterator = std::istream_iterator; + using utf8_iterator = utf8::iterator; + iterator base_it{is}; + utf8_iterator it(base_it, base_it, iterator{}); + utf8_iterator it2 = it; + EXPECT_EQ(it2, it); + EXPECT_EQ(*it, 0x10346); + // Dereferencing it multiple times does not change the value + EXPECT_EQ(*it, 0x10346); + EXPECT_EQ(*(++it), 0x65e5); + EXPECT_EQ((*it++), 0x65e5); + EXPECT_EQ(*it, 0x0448); + EXPECT_NE(it, it2); + utf8_iterator endit(iterator{}, base_it, iterator{}); + EXPECT_EQ(++it, endit); +} +