diff --git a/source/utf8/checked.h b/source/utf8/checked.h index 0567b50..7128193 100644 --- a/source/utf8/checked.h +++ b/source/utf8/checked.h @@ -29,7 +29,9 @@ DEALINGS IN THE SOFTWARE. #define UTF8_FOR_CPP_CHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731 #include "core.h" +#include "cpp11_facilities.h" #include +#include namespace utf8 { @@ -136,10 +138,7 @@ namespace utf8 } template - uint32_t next(octet_iterator& it, octet_iterator end) - { - uint32_t cp = 0; - internal::utf_error err_code = utf8::internal::validate_next(it, end, cp); + void check_err_code(internal::utf_error err_code, octet_iterator it, uint32_t cp) { switch (err_code) { case internal::UTF8_OK : break; @@ -152,9 +151,25 @@ namespace utf8 case internal::INVALID_CODE_POINT : throw invalid_code_point(cp); } + } + + template + uint32_t next_impl(octet_iterator& it, octet_iterator end) + { + uint32_t cp = 0; + internal::utf_error err_code = utf8::internal::validate_next_impl(it, end, cp); + check_err_code(err_code, it, cp); return cp; } + template + uint32_t next(octet_iterator& it, octet_iterator end) + { + uint32_t cp = next_impl(it, end);//throw if error + ++it; + return cp; + } + template uint32_t peek_next(octet_iterator it, octet_iterator end) { @@ -261,21 +276,21 @@ namespace utf8 return result; } - // The iterator class + namespace internal { + + // The bidirectional_iterator class template - class iterator : public std::iterator { + class bidirectional_iterator : public std::iterator { octet_iterator it; octet_iterator range_start; octet_iterator range_end; public: - iterator () {} - explicit iterator (const octet_iterator& octet_it, + bidirectional_iterator () {} + explicit bidirectional_iterator (const octet_iterator& octet_it, const octet_iterator& rangestart, const octet_iterator& rangeend) : it(octet_it), range_start(rangestart), range_end(rangeend) { - if (it < range_start || it > range_end) - throw std::out_of_range("Invalid utf-8 iterator position"); } // the default "big three" are OK octet_iterator base () const { return it; } @@ -284,39 +299,142 @@ namespace utf8 octet_iterator temp = it; return utf8::next(temp, range_end); } - bool operator == (const iterator& rhs) const + bool operator == (const bidirectional_iterator& rhs) const { if (range_start != rhs.range_start || range_end != rhs.range_end) throw std::logic_error("Comparing utf-8 iterators defined with different ranges"); return (it == rhs.it); } - bool operator != (const iterator& rhs) const + bool operator != (const bidirectional_iterator& rhs) const { return !(operator == (rhs)); } - iterator& operator ++ () + bidirectional_iterator& operator ++ () { utf8::next(it, range_end); return *this; } - iterator operator ++ (int) + bidirectional_iterator operator ++ (int) { - iterator temp = *this; + bidirectional_iterator temp = *this; utf8::next(it, range_end); return temp; } - iterator& operator -- () + bidirectional_iterator& operator -- () { utf8::prior(it, range_start); return *this; } - iterator operator -- (int) + bidirectional_iterator operator -- (int) { - iterator temp = *this; + bidirectional_iterator temp = *this; utf8::prior(it, range_start); return temp; } - }; // class iterator + }; // class bidirectional_iterator + + template + class input_iterator : public std::iterator { + private: + octet_iterator it; + octet_iterator range_start; + octet_iterator range_end; + uint32_t cp{}; + bool ok{}; + void read() { + ok = it != range_end; + if(ok) { + cp = utf8::next_impl(it, range_end); + } + } + + public: + input_iterator () {} + explicit input_iterator (const octet_iterator& octet_it, + const octet_iterator& rangestart, + const octet_iterator& rangeend) : + it(octet_it), range_start(rangestart), range_end(rangeend) + { + read(); + } + octet_iterator base () const { return it; } + uint32_t operator * () const + { + if(!ok) { + throw std::runtime_error("no such element"); + } + return cp; + } + + bool operator == (const input_iterator& rhs) const + { + if (range_start != rhs.range_start || range_end != rhs.range_end) + throw std::logic_error("Comparing utf-8 iterators defined with different ranges"); + return ok == rhs.ok && (!ok || it == rhs.it); + } + bool operator != (const input_iterator& rhs) const + { + return !(operator == (rhs)); + } + input_iterator& operator ++ () + { + ++it; + read(); + return *this; + } + input_iterator operator ++ (int) + { + input_iterator temp = *this; + ++it; + read(); + return temp; + } + }; // class input_iterator + + template + struct get_iterator_class { + private: + static input_iterator get(std::input_iterator_tag); + static bidirectional_iterator get(std::bidirectional_iterator_tag); + public: + using type = decltype(get(Iterator_category{})); + }; + + }//internal + + template + using iterator = typename utf8::internal::get_iterator_class::type; + + template + inline std::pair, iterator> make_iterator_pair(Cont& c) { + using Iter = iterator; + auto it = c.begin(); + auto end = c.end(); + return std::make_pair(Iter{it, it, end}, Iter{end, it, end}); + } + template + inline std::pair, iterator> make_iterator_pair(const Cont& c) { + using Iter = iterator; + auto it = c.begin(); + auto end = c.end(); + return std::make_pair(Iter{it, it, end}, Iter{end, it, end}); + } + template + inline std::pair, iterator> make_iterator_pair(const char(&tab)[N]) { + static_assert(N > 0, "bad utf8 string"); + using Iter = iterator; + auto it = &tab[0]; + auto end = &tab[N-1]; + return std::make_pair(Iter{it, it, end}, Iter{end, it, end}); + } + + inline std::pair>, iterator>> make_iterator_pair(std::istream& is) { + using Is_iter = std::istream_iterator; + using Iter = iterator; + auto it = Is_iter{is}; + auto end = Is_iter{}; + return std::make_pair(Iter{it, it, end}, Iter{end, it, end}); + } } // namespace utf8 diff --git a/source/utf8/core.h b/source/utf8/core.h index 244e892..584e8c9 100644 --- a/source/utf8/core.h +++ b/source/utf8/core.h @@ -237,7 +237,7 @@ namespace internal #undef UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR template - utf_error validate_next(octet_iterator& it, octet_iterator end, uint32_t& code_point) + utf_error validate_next_impl(octet_iterator& it, octet_iterator end, uint32_t& code_point) { if (it == end) return NOT_ENOUGH_ROOM; @@ -276,7 +276,6 @@ namespace internal if (!utf8::internal::is_overlong_sequence(cp, length)){ // Passed! Return here. code_point = cp; - ++it; return UTF8_OK; } else @@ -291,6 +290,16 @@ namespace internal return err; } + template + utf_error validate_next(octet_iterator& it, octet_iterator end, uint32_t& code_point) { + utf_error err = validate_next_impl(it, end, code_point); + if(err == UTF8_OK) { + ++it; + } + return err; + } + + template inline utf_error validate_next(octet_iterator& it, octet_iterator end) { uint32_t ignored; diff --git a/source/utf8/cpp11_facilities.h b/source/utf8/cpp11_facilities.h new file mode 100644 index 0000000..9e98db0 --- /dev/null +++ b/source/utf8/cpp11_facilities.h @@ -0,0 +1,37 @@ +/* +Permission is hereby granted, free of charge, to any person or organization +obtaining a copy of the software and accompanying documentation covered by +this license (the "Software") to use, reproduce, display, distribute, +execute, and transmit the Software, and to prepare derivative works of the +Software, and to permit third-parties to whom the Software is furnished to +do so, all subject to the following: + +The copyright notices in the Software and this entire statement, including +the above license grant, this restriction and the following disclaimer, +must be included in all copies of the Software, in whole or in part, and +all derivative works of the Software, unless such copies or derivative +works are solely in the form of machine-executable object code generated by +a source language processor. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. +*/ + +#ifndef UTF8_FOR_CPP_CPP11_FACILITIES_H +#define UTF8_FOR_CPP_CPP11_FACILITIES_H + +#include + +namespace utf8 +{ + template + using Iterator_category = typename std::iterator_traits::iterator_category; + +} // namespace utf8 + +#endif // header guard diff --git a/source/utf8/unchecked.h b/source/utf8/unchecked.h index def0009..53ce1dc 100644 --- a/source/utf8/unchecked.h +++ b/source/utf8/unchecked.h @@ -29,6 +29,8 @@ DEALINGS IN THE SOFTWARE. #define UTF8_FOR_CPP_UNCHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731 #include "core.h" +#include "cpp11_facilities.h" +#include namespace utf8 { @@ -98,7 +100,7 @@ namespace utf8 } template - uint32_t next(octet_iterator& it) + uint32_t next_impl(octet_iterator& it) { uint32_t cp = utf8::internal::mask8(*it); typename std::iterator_traits::difference_type length = utf8::internal::sequence_length(it); @@ -124,10 +126,16 @@ namespace utf8 cp += (*it) & 0x3f; break; } - ++it; return cp; } + template + inline uint32_t next(octet_iterator& it) { + uint32_t cp{next_impl(it)}; + ++it; + return cp; + } + template uint32_t peek_next(octet_iterator it) { @@ -215,13 +223,14 @@ namespace utf8 return result; } - // The iterator class + namespace internal { + // The bidirectional_iterator class template - class iterator : public std::iterator { + class bidirectional_iterator : public std::iterator { octet_iterator it; public: - iterator () {} - explicit iterator (const octet_iterator& octet_it): it(octet_it) {} + bidirectional_iterator () {} + explicit bidirectional_iterator (const octet_iterator& octet_it): it(octet_it) {} // the default "big three" are OK octet_iterator base () const { return it; } uint32_t operator * () const @@ -229,37 +238,125 @@ namespace utf8 octet_iterator temp = it; return utf8::unchecked::next(temp); } - bool operator == (const iterator& rhs) const + bool operator == (const bidirectional_iterator& rhs) const { return (it == rhs.it); } - bool operator != (const iterator& rhs) const + bool operator != (const bidirectional_iterator& rhs) const { return !(operator == (rhs)); } - iterator& operator ++ () + bidirectional_iterator& operator ++ () { ::std::advance(it, utf8::internal::sequence_length(it)); return *this; } - iterator operator ++ (int) + bidirectional_iterator operator ++ (int) { - iterator temp = *this; + bidirectional_iterator temp = *this; ::std::advance(it, utf8::internal::sequence_length(it)); return temp; } - iterator& operator -- () + bidirectional_iterator& operator -- () { utf8::unchecked::prior(it); return *this; } - iterator operator -- (int) + bidirectional_iterator operator -- (int) { - iterator temp = *this; + bidirectional_iterator temp = *this; utf8::unchecked::prior(it); return temp; } - }; // class iterator + }; // class bidirectional_iterator + + template + class input_iterator : public std::iterator { + private: + octet_iterator it; + uint32_t cp{}; + void read() { + cp = utf8::unchecked::next_impl(it); + } + + public: + input_iterator () {} + explicit input_iterator (const octet_iterator& octet_it) : it(octet_it) + { + read(); + } + octet_iterator base () const { return it; } + uint32_t operator * () const + { + return cp; + } + + bool operator == (const input_iterator& rhs) const + { + return it == rhs.it; + } + bool operator != (const input_iterator& rhs) const + { + return !(operator == (rhs)); + } + input_iterator& operator ++ () + { + ++it; + read(); + return *this; + } + input_iterator operator ++ (int) + { + input_iterator temp = *this; + ++it; + read(); + return temp; + } + }; // class input_iterator + + template + struct get_iterator_class { + private: + static input_iterator get(std::input_iterator_tag); + static bidirectional_iterator get(std::bidirectional_iterator_tag); + public: + using type = decltype(get(Iterator_category{})); + }; + }//internal + + template + using iterator = typename utf8::unchecked::internal::get_iterator_class::type; + + template + inline std::pair, iterator> make_iterator_pair(Cont& c) { + using Iter = iterator; + auto it = c.begin(); + auto end = c.end(); + return std::make_pair(Iter{it}, Iter{end}); + } + template + inline std::pair, iterator> make_iterator_pair(const Cont& c) { + using Iter = iterator; + auto it = c.begin(); + auto end = c.end(); + return std::make_pair(Iter{it}, Iter{end}); + } + template + inline std::pair, iterator> make_iterator_pair(const char(&tab)[N]) { + static_assert(N > 0, "bad utf8 string"); + using Iter = iterator; + auto it = &tab[0]; + auto end = &tab[N-1]; + return std::make_pair(Iter{it}, Iter{end}); + } + + inline std::pair>, iterator>> make_iterator_pair(std::istream& is) { + using Is_iter = std::istream_iterator; + using Iter = iterator; + auto it = Is_iter{is}; + auto end = Is_iter{}; + return std::make_pair(Iter{it}, Iter{end}); + } } // namespace utf8::unchecked } // namespace utf8