ICU-23004 make single-pass work with only movable UnitIter

This commit is contained in:
Markus Scherer 2025-03-08 15:32:40 -08:00
parent c60546528f
commit 19658d2647
2 changed files with 68 additions and 34 deletions

View file

@ -284,7 +284,7 @@ public:
}
}
static inline void inc(UnitIter &p, UnitIter limit) {
static inline void inc(UnitIter &p, const UnitIter &limit) {
// Very similar to U8_FWD_1().
uint8_t b = *p;
++p;
@ -378,7 +378,8 @@ public:
return {sub(), length, false, p0};
}
static inline CodeUnits<UnitIter, CP32> singlePassReadAndInc(UnitIter &p, UnitIter limit) {
static inline CodeUnits<UnitIter, CP32> singlePassReadAndInc(
UnitIter &p, const UnitIter &limit) {
// Very similar to U8_NEXT_OR_FFFD().
CP32 c = uint8_t(*p);
++p;
@ -489,7 +490,7 @@ public:
}
}
static inline void inc(UnitIter &p, UnitIter limit) {
static inline void inc(UnitIter &p, const UnitIter &limit) {
// Very similar to U16_FWD_1().
auto c = *p;
++p;
@ -525,7 +526,8 @@ public:
}
}
static inline CodeUnits<UnitIter, CP32> singlePassReadAndInc(UnitIter &p, UnitIter limit) {
static inline CodeUnits<UnitIter, CP32> singlePassReadAndInc(
UnitIter &p, const UnitIter &limit) {
// Very similar to U16_NEXT_OR_FFFD().
CP32 c = *p;
++p;
@ -588,7 +590,7 @@ public:
}
}
static inline void inc(UnitIter &p, UnitIter /*limit*/) {
static inline void inc(UnitIter &p, const UnitIter &/*limit*/) {
++p;
}
@ -608,7 +610,8 @@ public:
}
}
static inline CodeUnits<UnitIter, CP32> singlePassReadAndInc(UnitIter &p, UnitIter /*limit*/) {
static inline CodeUnits<UnitIter, CP32> singlePassReadAndInc(
UnitIter &p, const UnitIter &/*limit*/) {
uint32_t uc = *p;
CP32 c = uc;
++p;
@ -919,6 +922,9 @@ public:
// Constructs an iterator start or limit sentinel.
inline UTFIterator(UnitIter p) : p_(p), start_(p), limit_(p), units_(0, 0, false, p) {}
inline UTFIterator(UTFIterator &&src) noexcept = default;
inline UTFIterator &operator=(UTFIterator &&src) noexcept = default;
inline UTFIterator(const UTFIterator &other) = default;
inline UTFIterator &operator=(const UTFIterator &other) = default;
@ -1076,10 +1082,14 @@ public:
using difference_type = typename std::iterator_traits<UnitIter>::difference_type;
using iterator_category = std::input_iterator_tag;
inline UTFIterator(UnitIter p, UnitIter limit) : p_(p), limit_(limit) {}
inline UTFIterator(UnitIter p, UnitIter limit) : p_(std::move(p)), limit_(std::move(limit)) {}
// Constructs an iterator start or limit sentinel.
inline UTFIterator(UnitIter p) : p_(p), limit_(p) {}
// Requires p to be copyable.
inline UTFIterator(UnitIter p) : p_(std::move(p)), limit_(p_) {}
inline UTFIterator(UTFIterator &&src) noexcept = default;
inline UTFIterator &operator=(UTFIterator &&src) noexcept = default;
inline UTFIterator(const UTFIterator &other) = default;
inline UTFIterator &operator=(const UTFIterator &other) = default;
@ -1177,6 +1187,9 @@ public:
p_(iter.getLogicalPosition()), start_(iter.start_), limit_(iter.limit_),
units_(0, 0, false, p_), unitsLimit_(p_) {}
inline reverse_iterator(reverse_iterator &&src) noexcept = default;
inline reverse_iterator &operator=(reverse_iterator &&src) noexcept = default;
inline reverse_iterator(const reverse_iterator &other) = default;
inline reverse_iterator &operator=(const reverse_iterator &other) = default;
@ -1352,7 +1365,7 @@ private:
*/
template<typename CP32, UIllFormedBehavior behavior, typename UnitIter>
auto utfIterator(UnitIter start, UnitIter p, UnitIter limit) {
return UTFIterator<UnitIter, CP32, behavior>(start, p, limit);
return UTFIterator<UnitIter, CP32, behavior>(std::move(start), std::move(p), std::move(limit));
}
/**
@ -1368,11 +1381,17 @@ auto utfIterator(UnitIter start, UnitIter p, UnitIter limit) {
*/
template<typename CP32, UIllFormedBehavior behavior, typename UnitIter>
auto utfIterator(UnitIter p, UnitIter limit) {
return UTFIterator<UnitIter, CP32, behavior>(p, limit);
return UTFIterator<UnitIter, CP32, behavior>(std::move(p), std::move(limit));
}
// Note: We should only enable the following factory function for a copyable UnitIter.
// In C++17, we would have to partially specialize with enable_if_t testing for forward_iterator,
// but a function template partial specialization is not allowed.
// In C++20, we might be able to require the std::copyable concept.
/**
* UTFIterator factory function for a start or limit sentinel.
* Requires UnitIter to be copyable.
*
* @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t
* @tparam behavior How to handle ill-formed Unicode strings
@ -1383,7 +1402,7 @@ auto utfIterator(UnitIter p, UnitIter limit) {
*/
template<typename CP32, UIllFormedBehavior behavior, typename UnitIter>
auto utfIterator(UnitIter p) {
return UTFIterator<UnitIter, CP32, behavior>(p);
return UTFIterator<UnitIter, CP32, behavior>(std::move(p));
}
/**
@ -1450,6 +1469,9 @@ public:
inline UnsafeUTFIterator(UnitIter p) : p_(p), units_(0, 0, p) {}
inline UnsafeUTFIterator(UnsafeUTFIterator &&src) noexcept = default;
inline UnsafeUTFIterator &operator=(UnsafeUTFIterator &&src) noexcept = default;
inline UnsafeUTFIterator(const UnsafeUTFIterator &other) = default;
inline UnsafeUTFIterator &operator=(const UnsafeUTFIterator &other) = default;
@ -1602,7 +1624,10 @@ public:
using difference_type = typename std::iterator_traits<UnitIter>::difference_type;
using iterator_category = std::input_iterator_tag;
inline UnsafeUTFIterator(UnitIter p) : p_(p) {}
inline UnsafeUTFIterator(UnitIter p) : p_(std::move(p)) {}
inline UnsafeUTFIterator(UnsafeUTFIterator &&src) noexcept = default;
inline UnsafeUTFIterator &operator=(UnsafeUTFIterator &&src) noexcept = default;
inline UnsafeUTFIterator(const UnsafeUTFIterator &other) = default;
inline UnsafeUTFIterator &operator=(const UnsafeUTFIterator &other) = default;
@ -1696,6 +1721,9 @@ public:
inline reverse_iterator(U_HEADER_ONLY_NAMESPACE::UnsafeUTFIterator<UnitIter, CP32> iter) :
p_(iter.getLogicalPosition()), units_(0, 0, p_), unitsLimit_(p_) {}
inline reverse_iterator(reverse_iterator &&src) noexcept = default;
inline reverse_iterator &operator=(reverse_iterator &&src) noexcept = default;
inline reverse_iterator(const reverse_iterator &other) = default;
inline reverse_iterator &operator=(const reverse_iterator &other) = default;
@ -1860,7 +1888,7 @@ private:
*/
template<typename CP32, typename UnitIter>
auto unsafeUTFIterator(UnitIter iter) {
return UnsafeUTFIterator<UnitIter, CP32>(iter);
return UnsafeUTFIterator<UnitIter, CP32>(std::move(iter));
}
/**

View file

@ -25,8 +25,8 @@ using U_HEADER_ONLY_NAMESPACE::utfIterator;
using U_HEADER_ONLY_NAMESPACE::UTFStringCodePoints;
using U_HEADER_ONLY_NAMESPACE::utfStringCodePoints;
#ifdef SAMPLE_CODE
// For API docs etc. Compile when changing samples or APIs.
#if 0
// Sample code for API docs etc. Compile when changing samples or APIs.
using U_HEADER_ONLY_NAMESPACE::unsafeUTFIterator;
using U_HEADER_ONLY_NAMESPACE::unsafeUTFStringCodePoints;
@ -101,6 +101,9 @@ std::string_view firstSequence8(std::string_view s) {
#endif // SAMPLE_CODE
template<typename Unit>
class SinglePassIter;
// Shared state for one or more copies of single-pass iterators.
// Similar to https://en.cppreference.com/w/cpp/iterator/istreambuf_iterator
// but the iterators only implement LegacyIterator (* and ++) without post-increment.
@ -109,6 +112,9 @@ class SinglePassSource {
public:
SinglePassSource(std::basic_string_view<Unit> s) : p(s.data()), limit(s.data() + s.length()) {}
SinglePassIter<Unit> begin() { return SinglePassIter<Unit>(*this); }
SinglePassIter<Unit> end() { return SinglePassIter<Unit>(); }
private:
template<typename U>
friend class SinglePassIter;
@ -132,7 +138,13 @@ public:
// limit sentinel
SinglePassIter() : src(nullptr) {}
// TODO: try to delete the copy constructor/assignment?
// movable
SinglePassIter(SinglePassIter &&src) noexcept = default;
SinglePassIter &operator=(SinglePassIter &&src) noexcept = default;
// not copyable
SinglePassIter(const SinglePassIter &other) = delete;
SinglePassIter &operator=(const SinglePassIter &other) = delete;
bool operator==(const SinglePassIter &other) const {
bool done = isDone();
@ -325,17 +337,15 @@ void U16IteratorTest::testSurrogate() {
void U16IteratorTest::testSinglePassIter() {
SinglePassSource<char16_t> good(u"abçカ🚴"sv);
SinglePassIter<char16_t> goodBegin(good);
SinglePassIter<char16_t> goodLimit{};
auto rangeBegin = utfIterator<UChar32, U_BEHAVIOR_NEGATIVE>(goodBegin, goodLimit);
auto rangeLimit = utfIterator<UChar32, U_BEHAVIOR_NEGATIVE>(goodLimit);
auto iter = utfIterator<UChar32, U_BEHAVIOR_NEGATIVE>(good.begin(), good.end());
auto rangeLimit = utfIterator<UChar32, U_BEHAVIOR_NEGATIVE>(good.end(), good.end());
assertTrue(
"input_iterator_tag",
std::is_same_v<
typename std::iterator_traits<
UTFIterator<SinglePassIter<char16_t>, UChar32, U_BEHAVIOR_NEGATIVE>>::iterator_category,
std::input_iterator_tag>);
auto iter = rangeBegin;
// auto iter = std::move(rangeBegin); -- TODO: why does this want to copy?
assertEquals("iter[0] * codePoint", u'a', (*iter).codePoint());
assertEquals("iter[0] -> codePoint", u'a', iter->codePoint());
++iter; // pre-increment
@ -363,7 +373,7 @@ void U16IteratorTest::testFwdIter() {
FwdIter<char16_t> goodBegin(good.data());
FwdIter<char16_t> goodLimit(good.data() + good.length());
auto rangeBegin = utfIterator<UChar32, U_BEHAVIOR_NEGATIVE>(goodBegin, goodLimit);
auto rangeLimit = utfIterator<UChar32, U_BEHAVIOR_NEGATIVE>(goodLimit);
auto rangeLimit = utfIterator<UChar32, U_BEHAVIOR_NEGATIVE>(goodLimit, goodLimit);
// TODO: UTFStringCodePoints<FwdIter, UChar32, U_BEHAVIOR_NEGATIVE> range(good);
assertTrue(
"forward_iterator_tag",
@ -465,17 +475,15 @@ void U8IteratorTest::testGood() {
void U8IteratorTest::testSinglePassIter() {
SinglePassSource<char> good(reinterpret_cast<const char*>(u8"abçカ🚴"));
SinglePassIter<char> goodBegin(good);
SinglePassIter<char> goodLimit{};
auto rangeBegin = utfIterator<UChar32, U_BEHAVIOR_NEGATIVE>(goodBegin, goodLimit);
auto rangeLimit = utfIterator<UChar32, U_BEHAVIOR_NEGATIVE>(goodLimit);
auto iter = utfIterator<UChar32, U_BEHAVIOR_NEGATIVE>(good.begin(), good.end());
auto rangeLimit = utfIterator<UChar32, U_BEHAVIOR_NEGATIVE>(good.end(), good.end());
assertTrue(
"input_iterator_tag",
std::is_same_v<
typename std::iterator_traits<
UTFIterator<SinglePassIter<char>, UChar32, U_BEHAVIOR_NEGATIVE>>::iterator_category,
std::input_iterator_tag>);
auto iter = rangeBegin;
// auto iter = std::move(rangeBegin); -- TODO: why does this want to copy?
assertEquals("iter[0] * codePoint", u'a', (*iter).codePoint());
assertEquals("iter[0] -> codePoint", u'a', iter->codePoint());
++iter; // pre-increment
@ -503,7 +511,7 @@ void U8IteratorTest::testFwdIter() {
FwdIter<char> goodBegin(good.data());
FwdIter<char> goodLimit(good.data() + good.length());
auto rangeBegin = utfIterator<UChar32, U_BEHAVIOR_NEGATIVE>(goodBegin, goodLimit);
auto rangeLimit = utfIterator<UChar32, U_BEHAVIOR_NEGATIVE>(goodLimit);
auto rangeLimit = utfIterator<UChar32, U_BEHAVIOR_NEGATIVE>(goodLimit, goodLimit);
// TODO: UTFStringCodePoints<FwdIter, UChar32, U_BEHAVIOR_NEGATIVE> range(good);
assertTrue(
"forward_iterator_tag",
@ -603,17 +611,15 @@ void U32IteratorTest::testGood() {
void U32IteratorTest::testSinglePassIter() {
SinglePassSource<char32_t> good(U"abçカ🚴"sv);
SinglePassIter<char32_t> goodBegin(good);
SinglePassIter<char32_t> goodLimit{};
auto rangeBegin = utfIterator<UChar32, U_BEHAVIOR_NEGATIVE>(goodBegin, goodLimit);
auto rangeLimit = utfIterator<UChar32, U_BEHAVIOR_NEGATIVE>(goodLimit);
auto iter = utfIterator<UChar32, U_BEHAVIOR_NEGATIVE>(good.begin(), good.end());
auto rangeLimit = utfIterator<UChar32, U_BEHAVIOR_NEGATIVE>(good.end(), good.end());
assertTrue(
"input_iterator_tag",
std::is_same_v<
typename std::iterator_traits<
UTFIterator<SinglePassIter<char32_t>, UChar32, U_BEHAVIOR_NEGATIVE>>::iterator_category,
std::input_iterator_tag>);
auto iter = rangeBegin;
// auto iter = std::move(rangeBegin); -- TODO: why does this want to copy?
assertEquals("iter[0] * codePoint", u'a', (*iter).codePoint());
assertEquals("iter[0] -> codePoint", u'a', iter->codePoint());
++iter; // pre-increment
@ -641,7 +647,7 @@ void U32IteratorTest::testFwdIter() {
FwdIter<char32_t> goodBegin(good.data());
FwdIter<char32_t> goodLimit(good.data() + good.length());
auto rangeBegin = utfIterator<UChar32, U_BEHAVIOR_NEGATIVE>(goodBegin, goodLimit);
auto rangeLimit = utfIterator<UChar32, U_BEHAVIOR_NEGATIVE>(goodLimit);
auto rangeLimit = utfIterator<UChar32, U_BEHAVIOR_NEGATIVE>(goodLimit, goodLimit);
// TODO: UTFStringCodePoints<FwdIter, UChar32, U_BEHAVIOR_NEGATIVE> range(good);
assertTrue(
"forward_iterator_tag",