mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-07 22:44:49 +00:00
ICU-23004 make single-pass work with only movable UnitIter
This commit is contained in:
parent
c60546528f
commit
19658d2647
2 changed files with 68 additions and 34 deletions
|
@ -284,7 +284,7 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
static inline void inc(UnitIter &p, UnitIter limit) {
|
||||
static inline void inc(UnitIter &p, const UnitIter &limit) {
|
||||
// Very similar to U8_FWD_1().
|
||||
uint8_t b = *p;
|
||||
++p;
|
||||
|
@ -378,7 +378,8 @@ public:
|
|||
return {sub(), length, false, p0};
|
||||
}
|
||||
|
||||
static inline CodeUnits<UnitIter, CP32> singlePassReadAndInc(UnitIter &p, UnitIter limit) {
|
||||
static inline CodeUnits<UnitIter, CP32> singlePassReadAndInc(
|
||||
UnitIter &p, const UnitIter &limit) {
|
||||
// Very similar to U8_NEXT_OR_FFFD().
|
||||
CP32 c = uint8_t(*p);
|
||||
++p;
|
||||
|
@ -489,7 +490,7 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
static inline void inc(UnitIter &p, UnitIter limit) {
|
||||
static inline void inc(UnitIter &p, const UnitIter &limit) {
|
||||
// Very similar to U16_FWD_1().
|
||||
auto c = *p;
|
||||
++p;
|
||||
|
@ -525,7 +526,8 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
static inline CodeUnits<UnitIter, CP32> singlePassReadAndInc(UnitIter &p, UnitIter limit) {
|
||||
static inline CodeUnits<UnitIter, CP32> singlePassReadAndInc(
|
||||
UnitIter &p, const UnitIter &limit) {
|
||||
// Very similar to U16_NEXT_OR_FFFD().
|
||||
CP32 c = *p;
|
||||
++p;
|
||||
|
@ -588,7 +590,7 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
static inline void inc(UnitIter &p, UnitIter /*limit*/) {
|
||||
static inline void inc(UnitIter &p, const UnitIter &/*limit*/) {
|
||||
++p;
|
||||
}
|
||||
|
||||
|
@ -608,7 +610,8 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
static inline CodeUnits<UnitIter, CP32> singlePassReadAndInc(UnitIter &p, UnitIter /*limit*/) {
|
||||
static inline CodeUnits<UnitIter, CP32> singlePassReadAndInc(
|
||||
UnitIter &p, const UnitIter &/*limit*/) {
|
||||
uint32_t uc = *p;
|
||||
CP32 c = uc;
|
||||
++p;
|
||||
|
@ -919,6 +922,9 @@ public:
|
|||
// Constructs an iterator start or limit sentinel.
|
||||
inline UTFIterator(UnitIter p) : p_(p), start_(p), limit_(p), units_(0, 0, false, p) {}
|
||||
|
||||
inline UTFIterator(UTFIterator &&src) noexcept = default;
|
||||
inline UTFIterator &operator=(UTFIterator &&src) noexcept = default;
|
||||
|
||||
inline UTFIterator(const UTFIterator &other) = default;
|
||||
inline UTFIterator &operator=(const UTFIterator &other) = default;
|
||||
|
||||
|
@ -1076,10 +1082,14 @@ public:
|
|||
using difference_type = typename std::iterator_traits<UnitIter>::difference_type;
|
||||
using iterator_category = std::input_iterator_tag;
|
||||
|
||||
inline UTFIterator(UnitIter p, UnitIter limit) : p_(p), limit_(limit) {}
|
||||
inline UTFIterator(UnitIter p, UnitIter limit) : p_(std::move(p)), limit_(std::move(limit)) {}
|
||||
|
||||
// Constructs an iterator start or limit sentinel.
|
||||
inline UTFIterator(UnitIter p) : p_(p), limit_(p) {}
|
||||
// Requires p to be copyable.
|
||||
inline UTFIterator(UnitIter p) : p_(std::move(p)), limit_(p_) {}
|
||||
|
||||
inline UTFIterator(UTFIterator &&src) noexcept = default;
|
||||
inline UTFIterator &operator=(UTFIterator &&src) noexcept = default;
|
||||
|
||||
inline UTFIterator(const UTFIterator &other) = default;
|
||||
inline UTFIterator &operator=(const UTFIterator &other) = default;
|
||||
|
@ -1177,6 +1187,9 @@ public:
|
|||
p_(iter.getLogicalPosition()), start_(iter.start_), limit_(iter.limit_),
|
||||
units_(0, 0, false, p_), unitsLimit_(p_) {}
|
||||
|
||||
inline reverse_iterator(reverse_iterator &&src) noexcept = default;
|
||||
inline reverse_iterator &operator=(reverse_iterator &&src) noexcept = default;
|
||||
|
||||
inline reverse_iterator(const reverse_iterator &other) = default;
|
||||
inline reverse_iterator &operator=(const reverse_iterator &other) = default;
|
||||
|
||||
|
@ -1352,7 +1365,7 @@ private:
|
|||
*/
|
||||
template<typename CP32, UIllFormedBehavior behavior, typename UnitIter>
|
||||
auto utfIterator(UnitIter start, UnitIter p, UnitIter limit) {
|
||||
return UTFIterator<UnitIter, CP32, behavior>(start, p, limit);
|
||||
return UTFIterator<UnitIter, CP32, behavior>(std::move(start), std::move(p), std::move(limit));
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1368,11 +1381,17 @@ auto utfIterator(UnitIter start, UnitIter p, UnitIter limit) {
|
|||
*/
|
||||
template<typename CP32, UIllFormedBehavior behavior, typename UnitIter>
|
||||
auto utfIterator(UnitIter p, UnitIter limit) {
|
||||
return UTFIterator<UnitIter, CP32, behavior>(p, limit);
|
||||
return UTFIterator<UnitIter, CP32, behavior>(std::move(p), std::move(limit));
|
||||
}
|
||||
|
||||
// Note: We should only enable the following factory function for a copyable UnitIter.
|
||||
// In C++17, we would have to partially specialize with enable_if_t testing for forward_iterator,
|
||||
// but a function template partial specialization is not allowed.
|
||||
// In C++20, we might be able to require the std::copyable concept.
|
||||
|
||||
/**
|
||||
* UTFIterator factory function for a start or limit sentinel.
|
||||
* Requires UnitIter to be copyable.
|
||||
*
|
||||
* @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t
|
||||
* @tparam behavior How to handle ill-formed Unicode strings
|
||||
|
@ -1383,7 +1402,7 @@ auto utfIterator(UnitIter p, UnitIter limit) {
|
|||
*/
|
||||
template<typename CP32, UIllFormedBehavior behavior, typename UnitIter>
|
||||
auto utfIterator(UnitIter p) {
|
||||
return UTFIterator<UnitIter, CP32, behavior>(p);
|
||||
return UTFIterator<UnitIter, CP32, behavior>(std::move(p));
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1450,6 +1469,9 @@ public:
|
|||
|
||||
inline UnsafeUTFIterator(UnitIter p) : p_(p), units_(0, 0, p) {}
|
||||
|
||||
inline UnsafeUTFIterator(UnsafeUTFIterator &&src) noexcept = default;
|
||||
inline UnsafeUTFIterator &operator=(UnsafeUTFIterator &&src) noexcept = default;
|
||||
|
||||
inline UnsafeUTFIterator(const UnsafeUTFIterator &other) = default;
|
||||
inline UnsafeUTFIterator &operator=(const UnsafeUTFIterator &other) = default;
|
||||
|
||||
|
@ -1602,7 +1624,10 @@ public:
|
|||
using difference_type = typename std::iterator_traits<UnitIter>::difference_type;
|
||||
using iterator_category = std::input_iterator_tag;
|
||||
|
||||
inline UnsafeUTFIterator(UnitIter p) : p_(p) {}
|
||||
inline UnsafeUTFIterator(UnitIter p) : p_(std::move(p)) {}
|
||||
|
||||
inline UnsafeUTFIterator(UnsafeUTFIterator &&src) noexcept = default;
|
||||
inline UnsafeUTFIterator &operator=(UnsafeUTFIterator &&src) noexcept = default;
|
||||
|
||||
inline UnsafeUTFIterator(const UnsafeUTFIterator &other) = default;
|
||||
inline UnsafeUTFIterator &operator=(const UnsafeUTFIterator &other) = default;
|
||||
|
@ -1696,6 +1721,9 @@ public:
|
|||
inline reverse_iterator(U_HEADER_ONLY_NAMESPACE::UnsafeUTFIterator<UnitIter, CP32> iter) :
|
||||
p_(iter.getLogicalPosition()), units_(0, 0, p_), unitsLimit_(p_) {}
|
||||
|
||||
inline reverse_iterator(reverse_iterator &&src) noexcept = default;
|
||||
inline reverse_iterator &operator=(reverse_iterator &&src) noexcept = default;
|
||||
|
||||
inline reverse_iterator(const reverse_iterator &other) = default;
|
||||
inline reverse_iterator &operator=(const reverse_iterator &other) = default;
|
||||
|
||||
|
@ -1860,7 +1888,7 @@ private:
|
|||
*/
|
||||
template<typename CP32, typename UnitIter>
|
||||
auto unsafeUTFIterator(UnitIter iter) {
|
||||
return UnsafeUTFIterator<UnitIter, CP32>(iter);
|
||||
return UnsafeUTFIterator<UnitIter, CP32>(std::move(iter));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -25,8 +25,8 @@ using U_HEADER_ONLY_NAMESPACE::utfIterator;
|
|||
using U_HEADER_ONLY_NAMESPACE::UTFStringCodePoints;
|
||||
using U_HEADER_ONLY_NAMESPACE::utfStringCodePoints;
|
||||
|
||||
#ifdef SAMPLE_CODE
|
||||
// For API docs etc. Compile when changing samples or APIs.
|
||||
#if 0
|
||||
// Sample code for API docs etc. Compile when changing samples or APIs.
|
||||
using U_HEADER_ONLY_NAMESPACE::unsafeUTFIterator;
|
||||
using U_HEADER_ONLY_NAMESPACE::unsafeUTFStringCodePoints;
|
||||
|
||||
|
@ -101,6 +101,9 @@ std::string_view firstSequence8(std::string_view s) {
|
|||
|
||||
#endif // SAMPLE_CODE
|
||||
|
||||
template<typename Unit>
|
||||
class SinglePassIter;
|
||||
|
||||
// Shared state for one or more copies of single-pass iterators.
|
||||
// Similar to https://en.cppreference.com/w/cpp/iterator/istreambuf_iterator
|
||||
// but the iterators only implement LegacyIterator (* and ++) without post-increment.
|
||||
|
@ -109,6 +112,9 @@ class SinglePassSource {
|
|||
public:
|
||||
SinglePassSource(std::basic_string_view<Unit> s) : p(s.data()), limit(s.data() + s.length()) {}
|
||||
|
||||
SinglePassIter<Unit> begin() { return SinglePassIter<Unit>(*this); }
|
||||
SinglePassIter<Unit> end() { return SinglePassIter<Unit>(); }
|
||||
|
||||
private:
|
||||
template<typename U>
|
||||
friend class SinglePassIter;
|
||||
|
@ -132,7 +138,13 @@ public:
|
|||
// limit sentinel
|
||||
SinglePassIter() : src(nullptr) {}
|
||||
|
||||
// TODO: try to delete the copy constructor/assignment?
|
||||
// movable
|
||||
SinglePassIter(SinglePassIter &&src) noexcept = default;
|
||||
SinglePassIter &operator=(SinglePassIter &&src) noexcept = default;
|
||||
|
||||
// not copyable
|
||||
SinglePassIter(const SinglePassIter &other) = delete;
|
||||
SinglePassIter &operator=(const SinglePassIter &other) = delete;
|
||||
|
||||
bool operator==(const SinglePassIter &other) const {
|
||||
bool done = isDone();
|
||||
|
@ -325,17 +337,15 @@ void U16IteratorTest::testSurrogate() {
|
|||
|
||||
void U16IteratorTest::testSinglePassIter() {
|
||||
SinglePassSource<char16_t> good(u"abçカ🚴"sv);
|
||||
SinglePassIter<char16_t> goodBegin(good);
|
||||
SinglePassIter<char16_t> goodLimit{};
|
||||
auto rangeBegin = utfIterator<UChar32, U_BEHAVIOR_NEGATIVE>(goodBegin, goodLimit);
|
||||
auto rangeLimit = utfIterator<UChar32, U_BEHAVIOR_NEGATIVE>(goodLimit);
|
||||
auto iter = utfIterator<UChar32, U_BEHAVIOR_NEGATIVE>(good.begin(), good.end());
|
||||
auto rangeLimit = utfIterator<UChar32, U_BEHAVIOR_NEGATIVE>(good.end(), good.end());
|
||||
assertTrue(
|
||||
"input_iterator_tag",
|
||||
std::is_same_v<
|
||||
typename std::iterator_traits<
|
||||
UTFIterator<SinglePassIter<char16_t>, UChar32, U_BEHAVIOR_NEGATIVE>>::iterator_category,
|
||||
std::input_iterator_tag>);
|
||||
auto iter = rangeBegin;
|
||||
// auto iter = std::move(rangeBegin); -- TODO: why does this want to copy?
|
||||
assertEquals("iter[0] * codePoint", u'a', (*iter).codePoint());
|
||||
assertEquals("iter[0] -> codePoint", u'a', iter->codePoint());
|
||||
++iter; // pre-increment
|
||||
|
@ -363,7 +373,7 @@ void U16IteratorTest::testFwdIter() {
|
|||
FwdIter<char16_t> goodBegin(good.data());
|
||||
FwdIter<char16_t> goodLimit(good.data() + good.length());
|
||||
auto rangeBegin = utfIterator<UChar32, U_BEHAVIOR_NEGATIVE>(goodBegin, goodLimit);
|
||||
auto rangeLimit = utfIterator<UChar32, U_BEHAVIOR_NEGATIVE>(goodLimit);
|
||||
auto rangeLimit = utfIterator<UChar32, U_BEHAVIOR_NEGATIVE>(goodLimit, goodLimit);
|
||||
// TODO: UTFStringCodePoints<FwdIter, UChar32, U_BEHAVIOR_NEGATIVE> range(good);
|
||||
assertTrue(
|
||||
"forward_iterator_tag",
|
||||
|
@ -465,17 +475,15 @@ void U8IteratorTest::testGood() {
|
|||
|
||||
void U8IteratorTest::testSinglePassIter() {
|
||||
SinglePassSource<char> good(reinterpret_cast<const char*>(u8"abçカ🚴"));
|
||||
SinglePassIter<char> goodBegin(good);
|
||||
SinglePassIter<char> goodLimit{};
|
||||
auto rangeBegin = utfIterator<UChar32, U_BEHAVIOR_NEGATIVE>(goodBegin, goodLimit);
|
||||
auto rangeLimit = utfIterator<UChar32, U_BEHAVIOR_NEGATIVE>(goodLimit);
|
||||
auto iter = utfIterator<UChar32, U_BEHAVIOR_NEGATIVE>(good.begin(), good.end());
|
||||
auto rangeLimit = utfIterator<UChar32, U_BEHAVIOR_NEGATIVE>(good.end(), good.end());
|
||||
assertTrue(
|
||||
"input_iterator_tag",
|
||||
std::is_same_v<
|
||||
typename std::iterator_traits<
|
||||
UTFIterator<SinglePassIter<char>, UChar32, U_BEHAVIOR_NEGATIVE>>::iterator_category,
|
||||
std::input_iterator_tag>);
|
||||
auto iter = rangeBegin;
|
||||
// auto iter = std::move(rangeBegin); -- TODO: why does this want to copy?
|
||||
assertEquals("iter[0] * codePoint", u'a', (*iter).codePoint());
|
||||
assertEquals("iter[0] -> codePoint", u'a', iter->codePoint());
|
||||
++iter; // pre-increment
|
||||
|
@ -503,7 +511,7 @@ void U8IteratorTest::testFwdIter() {
|
|||
FwdIter<char> goodBegin(good.data());
|
||||
FwdIter<char> goodLimit(good.data() + good.length());
|
||||
auto rangeBegin = utfIterator<UChar32, U_BEHAVIOR_NEGATIVE>(goodBegin, goodLimit);
|
||||
auto rangeLimit = utfIterator<UChar32, U_BEHAVIOR_NEGATIVE>(goodLimit);
|
||||
auto rangeLimit = utfIterator<UChar32, U_BEHAVIOR_NEGATIVE>(goodLimit, goodLimit);
|
||||
// TODO: UTFStringCodePoints<FwdIter, UChar32, U_BEHAVIOR_NEGATIVE> range(good);
|
||||
assertTrue(
|
||||
"forward_iterator_tag",
|
||||
|
@ -603,17 +611,15 @@ void U32IteratorTest::testGood() {
|
|||
|
||||
void U32IteratorTest::testSinglePassIter() {
|
||||
SinglePassSource<char32_t> good(U"abçカ🚴"sv);
|
||||
SinglePassIter<char32_t> goodBegin(good);
|
||||
SinglePassIter<char32_t> goodLimit{};
|
||||
auto rangeBegin = utfIterator<UChar32, U_BEHAVIOR_NEGATIVE>(goodBegin, goodLimit);
|
||||
auto rangeLimit = utfIterator<UChar32, U_BEHAVIOR_NEGATIVE>(goodLimit);
|
||||
auto iter = utfIterator<UChar32, U_BEHAVIOR_NEGATIVE>(good.begin(), good.end());
|
||||
auto rangeLimit = utfIterator<UChar32, U_BEHAVIOR_NEGATIVE>(good.end(), good.end());
|
||||
assertTrue(
|
||||
"input_iterator_tag",
|
||||
std::is_same_v<
|
||||
typename std::iterator_traits<
|
||||
UTFIterator<SinglePassIter<char32_t>, UChar32, U_BEHAVIOR_NEGATIVE>>::iterator_category,
|
||||
std::input_iterator_tag>);
|
||||
auto iter = rangeBegin;
|
||||
// auto iter = std::move(rangeBegin); -- TODO: why does this want to copy?
|
||||
assertEquals("iter[0] * codePoint", u'a', (*iter).codePoint());
|
||||
assertEquals("iter[0] -> codePoint", u'a', iter->codePoint());
|
||||
++iter; // pre-increment
|
||||
|
@ -641,7 +647,7 @@ void U32IteratorTest::testFwdIter() {
|
|||
FwdIter<char32_t> goodBegin(good.data());
|
||||
FwdIter<char32_t> goodLimit(good.data() + good.length());
|
||||
auto rangeBegin = utfIterator<UChar32, U_BEHAVIOR_NEGATIVE>(goodBegin, goodLimit);
|
||||
auto rangeLimit = utfIterator<UChar32, U_BEHAVIOR_NEGATIVE>(goodLimit);
|
||||
auto rangeLimit = utfIterator<UChar32, U_BEHAVIOR_NEGATIVE>(goodLimit, goodLimit);
|
||||
// TODO: UTFStringCodePoints<FwdIter, UChar32, U_BEHAVIOR_NEGATIVE> range(good);
|
||||
assertTrue(
|
||||
"forward_iterator_tag",
|
||||
|
|
Loading…
Add table
Reference in a new issue