mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-13 17:01:16 +00:00
ICU-23004 generic UTF class names
This commit is contained in:
parent
518967ab01
commit
8db7b581b5
2 changed files with 113 additions and 98 deletions
|
@ -1,8 +1,9 @@
|
|||
// © 2024 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: https://www.unicode.org/copyright.html
|
||||
|
||||
// utf16cppiter.h
|
||||
// utfiter.h
|
||||
// created: 2024aug12 Markus W. Scherer
|
||||
// TODO: rename this header file to utfiterator.h?
|
||||
|
||||
#ifndef __UTF16CPPITER_H__
|
||||
#define __UTF16CPPITER_H__
|
||||
|
@ -57,6 +58,8 @@ namespace header {}
|
|||
#ifndef U_HIDE_DRAFT_API
|
||||
|
||||
// Some defined behaviors for handling ill-formed Unicode strings.
|
||||
// TODO: For UTF-32, we have basically orthogonal conditions for surrogate vs. out-of-range.
|
||||
// Maybe make U_BEHAVIOR_SURROGATE return FFFD for out-of-range?
|
||||
typedef enum UIllFormedBehavior {
|
||||
U_BEHAVIOR_NEGATIVE,
|
||||
U_BEHAVIOR_FFFD,
|
||||
|
@ -488,6 +491,7 @@ public:
|
|||
* Validating bidirectional iterator over the code points in a Unicode 16-bit string.
|
||||
*
|
||||
* @tparam UnitIter An iterator (often a pointer) that returns a code unit type:
|
||||
* UTF-8: char or char8_t or uint8_t;
|
||||
* UTF-16: char16_t or uint16_t or (on Windows) wchar_t
|
||||
* @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t;
|
||||
* should be signed if U_BEHAVIOR_NEGATIVE
|
||||
|
@ -495,7 +499,7 @@ public:
|
|||
* @draft ICU 78
|
||||
*/
|
||||
template<typename UnitIter, typename CP32, UIllFormedBehavior behavior, typename = void>
|
||||
class U16Iterator {
|
||||
class UTFIterator {
|
||||
using Impl = UTFImpl<UnitIter, CP32, behavior>;
|
||||
|
||||
// Proxy type for operator->() (required by LegacyInputIterator)
|
||||
|
@ -516,22 +520,22 @@ public:
|
|||
// TODO: Maybe std::move() the UnitIters?
|
||||
// TODO: We might try to support limit==nullptr, similar to U16_ macros supporting length<0.
|
||||
// Test pointers for == or != but not < or >.
|
||||
U16Iterator(UnitIter start, UnitIter p, UnitIter limit) :
|
||||
UTFIterator(UnitIter start, UnitIter p, UnitIter limit) :
|
||||
p_(p), start_(start), limit_(limit), units_(0, 0, false, p) {}
|
||||
// TODO: add constructor with just start-or-p and limit: start=p
|
||||
// Constructs an iterator start or limit sentinel.
|
||||
U16Iterator(UnitIter p) : p_(p), start_(p), limit_(p), units_(0, 0, false, p) {}
|
||||
UTFIterator(UnitIter p) : p_(p), start_(p), limit_(p), units_(0, 0, false, p) {}
|
||||
|
||||
U16Iterator(const U16Iterator &other) = default;
|
||||
U16Iterator &operator=(const U16Iterator &other) = default;
|
||||
UTFIterator(const UTFIterator &other) = default;
|
||||
UTFIterator &operator=(const UTFIterator &other) = default;
|
||||
|
||||
bool operator==(const U16Iterator &other) const {
|
||||
bool operator==(const UTFIterator &other) const {
|
||||
// Compare logical positions.
|
||||
UnitIter p1 = state_ <= 0 ? p_ : units_.data();
|
||||
UnitIter p2 = other.state_ <= 0 ? other.p_ : other.units_.data();
|
||||
return p1 == p2;
|
||||
}
|
||||
bool operator!=(const U16Iterator &other) const { return !operator==(other); }
|
||||
bool operator!=(const UTFIterator &other) const { return !operator==(other); }
|
||||
|
||||
CodeUnits<UnitIter, CP32> operator*() const {
|
||||
if (state_ == 0) {
|
||||
|
@ -549,7 +553,7 @@ public:
|
|||
return Proxy(units_);
|
||||
}
|
||||
|
||||
U16Iterator &operator++() { // pre-increment
|
||||
UTFIterator &operator++() { // pre-increment
|
||||
if (state_ > 0) {
|
||||
// operator*() called readAndInc() so p_ is already ahead.
|
||||
state_ = 0;
|
||||
|
@ -562,27 +566,27 @@ public:
|
|||
return *this;
|
||||
}
|
||||
|
||||
U16Iterator operator++(int) { // post-increment
|
||||
UTFIterator operator++(int) { // post-increment
|
||||
if (state_ > 0) {
|
||||
// operator*() called readAndInc() so p_ is already ahead.
|
||||
U16Iterator result(*this);
|
||||
UTFIterator result(*this);
|
||||
state_ = 0;
|
||||
return result;
|
||||
} else if (state_ == 0) {
|
||||
units_ = Impl::readAndInc(p_, limit_);
|
||||
U16Iterator result(*this);
|
||||
UTFIterator result(*this);
|
||||
result.state_ = units_.length();
|
||||
// keep this->state_ == 0
|
||||
return result;
|
||||
} else /* state_ < 0 */ {
|
||||
U16Iterator result(*this);
|
||||
UTFIterator result(*this);
|
||||
// operator--() called decAndRead() so we know how far to skip.
|
||||
Impl::moveToDecAndReadLimit(p_, state_);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
U16Iterator &operator--() { // pre-decrement
|
||||
UTFIterator &operator--() { // pre-decrement
|
||||
if (state_ > 0) {
|
||||
// operator*() called readAndInc() so p_ is ahead of the logical position.
|
||||
Impl::moveToReadAndIncStart(p_, state_);
|
||||
|
@ -592,8 +596,8 @@ public:
|
|||
return *this;
|
||||
}
|
||||
|
||||
U16Iterator operator--(int) { // post-decrement
|
||||
U16Iterator result(*this);
|
||||
UTFIterator operator--(int) { // post-decrement
|
||||
UTFIterator result(*this);
|
||||
operator--();
|
||||
return result;
|
||||
}
|
||||
|
@ -620,7 +624,7 @@ private:
|
|||
#ifndef U_IN_DOXYGEN
|
||||
// Partial template specialization for single-pass input iterator.
|
||||
template<typename UnitIter, typename CP32, UIllFormedBehavior behavior>
|
||||
class U16Iterator<
|
||||
class UTFIterator<
|
||||
UnitIter,
|
||||
CP32,
|
||||
behavior,
|
||||
|
@ -651,22 +655,22 @@ public:
|
|||
// Might allow interesting sentinel types.
|
||||
// Would be trouble for the sentinel constructor that inits both iters from the same p.
|
||||
|
||||
U16Iterator(UnitIter p, UnitIter limit) : p_(p), limit_(limit) {}
|
||||
UTFIterator(UnitIter p, UnitIter limit) : p_(p), limit_(limit) {}
|
||||
// TODO: We might try to support limit==nullptr, similar to U16_ macros supporting length<0.
|
||||
// Test pointers for == or != but not < or >.
|
||||
|
||||
// Constructs an iterator start or limit sentinel.
|
||||
U16Iterator(UnitIter p) : p_(p), limit_(p) {}
|
||||
UTFIterator(UnitIter p) : p_(p), limit_(p) {}
|
||||
|
||||
U16Iterator(const U16Iterator &other) = default;
|
||||
U16Iterator &operator=(const U16Iterator &other) = default;
|
||||
UTFIterator(const UTFIterator &other) = default;
|
||||
UTFIterator &operator=(const UTFIterator &other) = default;
|
||||
|
||||
bool operator==(const U16Iterator &other) const {
|
||||
bool operator==(const UTFIterator &other) const {
|
||||
return p_ == other.p_ && ahead_ == other.ahead_;
|
||||
// Strictly speaking, we should check if the logical position is the same.
|
||||
// However, we cannot move, or do arithmetic with, a single-pass UnitIter.
|
||||
}
|
||||
bool operator!=(const U16Iterator &other) const { return !operator==(other); }
|
||||
bool operator!=(const UTFIterator &other) const { return !operator==(other); }
|
||||
|
||||
CodeUnits<UnitIter, CP32> operator*() const {
|
||||
if (!ahead_) {
|
||||
|
@ -684,7 +688,7 @@ public:
|
|||
return Proxy(units_);
|
||||
}
|
||||
|
||||
U16Iterator &operator++() { // pre-increment
|
||||
UTFIterator &operator++() { // pre-increment
|
||||
if (ahead_) {
|
||||
// operator*() called readAndInc() so p_ is already ahead.
|
||||
ahead_ = false;
|
||||
|
@ -728,6 +732,7 @@ private:
|
|||
* Not bidirectional, but optimized for reverse iteration.
|
||||
*
|
||||
* @tparam UnitIter An iterator (often a pointer) that returns a code unit type:
|
||||
* UTF-8: char or char8_t or uint8_t;
|
||||
* UTF-16: char16_t or uint16_t or (on Windows) wchar_t
|
||||
* @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t;
|
||||
* should be signed if U_BEHAVIOR_NEGATIVE
|
||||
|
@ -799,34 +804,36 @@ private:
|
|||
/**
|
||||
* A C++ "range" for validating iteration over all of the code points of a 16-bit Unicode string.
|
||||
*
|
||||
* @tparam Unit16 Code unit type: char16_t or uint16_t or (on Windows) wchar_t
|
||||
* @tparam Unit16 Code unit type:
|
||||
* UTF-8: char or char8_t or uint8_t;
|
||||
* UTF-16: char16_t or uint16_t or (on Windows) wchar_t
|
||||
* @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t;
|
||||
* should be signed if U_BEHAVIOR_NEGATIVE
|
||||
* @tparam UIllFormedBehavior TODO
|
||||
* @draft ICU 78
|
||||
*/
|
||||
template<typename Unit16, typename CP32, UIllFormedBehavior behavior>
|
||||
class U16StringCodePoints {
|
||||
class UTFStringCodePoints {
|
||||
public:
|
||||
/**
|
||||
* Constructs a C++ "range" object over the code points in the string.
|
||||
* @draft ICU 78
|
||||
*/
|
||||
U16StringCodePoints(std::basic_string_view<Unit16> s) : s(s) {}
|
||||
UTFStringCodePoints(std::basic_string_view<Unit16> s) : s(s) {}
|
||||
|
||||
/** @draft ICU 78 */
|
||||
U16StringCodePoints(const U16StringCodePoints &other) = default;
|
||||
UTFStringCodePoints(const UTFStringCodePoints &other) = default;
|
||||
|
||||
/** @draft ICU 78 */
|
||||
U16StringCodePoints &operator=(const U16StringCodePoints &other) = default;
|
||||
UTFStringCodePoints &operator=(const UTFStringCodePoints &other) = default;
|
||||
|
||||
/** @draft ICU 78 */
|
||||
U16Iterator<const Unit16 *, CP32, behavior> begin() const {
|
||||
UTFIterator<const Unit16 *, CP32, behavior> begin() const {
|
||||
return {s.data(), s.data(), s.data() + s.length()};
|
||||
}
|
||||
|
||||
/** @draft ICU 78 */
|
||||
U16Iterator<const Unit16 *, CP32, behavior> end() const {
|
||||
UTFIterator<const Unit16 *, CP32, behavior> end() const {
|
||||
const Unit16 *limit = s.data() + s.length();
|
||||
return {s.data(), limit, limit};
|
||||
}
|
||||
|
@ -848,30 +855,32 @@ private:
|
|||
// ------------------------------------------------------------------------- ***
|
||||
|
||||
/**
|
||||
* Internal base class for public U16UnsafeIterator & U16UnsafeReverseIterator.
|
||||
* Internal base class for public UnsafeUTFIterator & UnsafeUTFReverseIterator.
|
||||
* Not intended for public subclassing.
|
||||
*
|
||||
* @tparam Unit16 Code unit type: char16_t or uint16_t or (on Windows) wchar_t
|
||||
* @tparam Unit16 Code unit type:
|
||||
* UTF-8: char or char8_t or uint8_t;
|
||||
* UTF-16: char16_t or uint16_t or (on Windows) wchar_t
|
||||
* @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t;
|
||||
* should be signed if U_BEHAVIOR_NEGATIVE
|
||||
* @internal
|
||||
*/
|
||||
template<typename Unit16, typename CP32>
|
||||
class U16UnsafeIteratorBase {
|
||||
class UnsafeUTFIteratorBase {
|
||||
protected:
|
||||
// @internal
|
||||
U16UnsafeIteratorBase(const Unit16 *p) : p_(p) {}
|
||||
UnsafeUTFIteratorBase(const Unit16 *p) : p_(p) {}
|
||||
// Test pointers for == or != but not < or >.
|
||||
|
||||
// @internal
|
||||
U16UnsafeIteratorBase(const U16UnsafeIteratorBase &other) = default;
|
||||
UnsafeUTFIteratorBase(const UnsafeUTFIteratorBase &other) = default;
|
||||
// @internal
|
||||
U16UnsafeIteratorBase &operator=(const U16UnsafeIteratorBase &other) = default;
|
||||
UnsafeUTFIteratorBase &operator=(const UnsafeUTFIteratorBase &other) = default;
|
||||
|
||||
// @internal
|
||||
bool operator==(const U16UnsafeIteratorBase &other) const { return p_ == other.p_; }
|
||||
bool operator==(const UnsafeUTFIteratorBase &other) const { return p_ == other.p_; }
|
||||
// @internal
|
||||
bool operator!=(const U16UnsafeIteratorBase &other) const { return !operator==(other); }
|
||||
bool operator!=(const UnsafeUTFIteratorBase &other) const { return !operator==(other); }
|
||||
|
||||
// @internal
|
||||
void dec() {
|
||||
|
@ -917,23 +926,25 @@ protected:
|
|||
* Non-validating bidirectional iterator over the code points in a UTF-16 string.
|
||||
* The string must be well-formed.
|
||||
*
|
||||
* @tparam Unit16 Code unit type: char16_t or uint16_t or (on Windows) wchar_t
|
||||
* @tparam Unit16 Code unit type:
|
||||
* UTF-8: char or char8_t or uint8_t;
|
||||
* UTF-16: char16_t or uint16_t or (on Windows) wchar_t
|
||||
* @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t;
|
||||
* should be signed if U_BEHAVIOR_NEGATIVE
|
||||
* @draft ICU 78
|
||||
*/
|
||||
template<typename Unit16, typename CP32>
|
||||
class U16UnsafeIterator : private U16UnsafeIteratorBase<Unit16, CP32> {
|
||||
class UnsafeUTFIterator : private UnsafeUTFIteratorBase<Unit16, CP32> {
|
||||
// FYI: We need to qualify all accesses to super class members because of private inheritance.
|
||||
using Super = U16UnsafeIteratorBase<Unit16, CP32>;
|
||||
using Super = UnsafeUTFIteratorBase<Unit16, CP32>;
|
||||
public:
|
||||
U16UnsafeIterator(const Unit16 *p) : Super(p) {}
|
||||
UnsafeUTFIterator(const Unit16 *p) : Super(p) {}
|
||||
|
||||
U16UnsafeIterator(const U16UnsafeIterator &other) = default;
|
||||
U16UnsafeIterator &operator=(const U16UnsafeIterator &other) = default;
|
||||
UnsafeUTFIterator(const UnsafeUTFIterator &other) = default;
|
||||
UnsafeUTFIterator &operator=(const UnsafeUTFIterator &other) = default;
|
||||
|
||||
bool operator==(const U16UnsafeIterator &other) const { return Super::operator==(other); }
|
||||
bool operator!=(const U16UnsafeIterator &other) const { return !Super::operator==(other); }
|
||||
bool operator==(const UnsafeUTFIterator &other) const { return Super::operator==(other); }
|
||||
bool operator!=(const UnsafeUTFIterator &other) const { return !Super::operator==(other); }
|
||||
|
||||
UnsafeCodeUnits<Unit16, CP32> operator*() const {
|
||||
// Call the same function in both operator*() and operator++() so that an
|
||||
|
@ -942,7 +953,7 @@ public:
|
|||
return Super::readAndInc(p);
|
||||
}
|
||||
|
||||
U16UnsafeIterator &operator++() { // pre-increment
|
||||
UnsafeUTFIterator &operator++() { // pre-increment
|
||||
// Call the same function in both operator*() and operator++() so that an
|
||||
// optimizing compiler can easily eliminate redundant work when alternating between the two.
|
||||
Super::readAndInc(Super::p_);
|
||||
|
@ -950,21 +961,21 @@ public:
|
|||
}
|
||||
|
||||
// TODO: disable for single-pass input iterator? or return proxy like std::istreambuf_iterator?
|
||||
U16UnsafeIterator operator++(int) { // post-increment
|
||||
UnsafeUTFIterator operator++(int) { // post-increment
|
||||
// Call the same function in both operator*() and operator++() so that an
|
||||
// optimizing compiler can easily eliminate redundant work when alternating between the two.
|
||||
U16UnsafeIterator result(*this);
|
||||
UnsafeUTFIterator result(*this);
|
||||
Super::readAndInc(Super::p_);
|
||||
return result;
|
||||
}
|
||||
|
||||
U16UnsafeIterator &operator--() { // pre-decrement
|
||||
UnsafeUTFIterator &operator--() { // pre-decrement
|
||||
Super::dec();
|
||||
return *this;
|
||||
}
|
||||
|
||||
U16UnsafeIterator operator--(int) { // post-decrement
|
||||
U16UnsafeIterator result(*this);
|
||||
UnsafeUTFIterator operator--(int) { // post-decrement
|
||||
UnsafeUTFIterator result(*this);
|
||||
Super::dec();
|
||||
return result;
|
||||
}
|
||||
|
@ -975,22 +986,24 @@ public:
|
|||
* Not bidirectional, but optimized for reverse iteration.
|
||||
* The string must be well-formed.
|
||||
*
|
||||
* @tparam Unit16 Code unit type: char16_t or uint16_t or (on Windows) wchar_t
|
||||
* @tparam Unit16 Code unit type:
|
||||
* UTF-8: char or char8_t or uint8_t;
|
||||
* UTF-16: char16_t or uint16_t or (on Windows) wchar_t
|
||||
* @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t;
|
||||
* should be signed if U_BEHAVIOR_NEGATIVE
|
||||
* @draft ICU 78
|
||||
*/
|
||||
template<typename Unit16, typename CP32>
|
||||
class U16UnsafeReverseIterator : private U16UnsafeIteratorBase<Unit16, CP32> {
|
||||
using Super = U16UnsafeIteratorBase<Unit16, CP32>;
|
||||
class UnsafeUTFReverseIterator : private UnsafeUTFIteratorBase<Unit16, CP32> {
|
||||
using Super = UnsafeUTFIteratorBase<Unit16, CP32>;
|
||||
public:
|
||||
U16UnsafeReverseIterator(const Unit16 *p) : Super(p) {}
|
||||
UnsafeUTFReverseIterator(const Unit16 *p) : Super(p) {}
|
||||
|
||||
U16UnsafeReverseIterator(const U16UnsafeReverseIterator &other) = default;
|
||||
U16UnsafeReverseIterator &operator=(const U16UnsafeReverseIterator &other) = default;
|
||||
UnsafeUTFReverseIterator(const UnsafeUTFReverseIterator &other) = default;
|
||||
UnsafeUTFReverseIterator &operator=(const UnsafeUTFReverseIterator &other) = default;
|
||||
|
||||
bool operator==(const U16UnsafeReverseIterator &other) const { return Super::operator==(other); }
|
||||
bool operator!=(const U16UnsafeReverseIterator &other) const { return !Super::operator==(other); }
|
||||
bool operator==(const UnsafeUTFReverseIterator &other) const { return Super::operator==(other); }
|
||||
bool operator!=(const UnsafeUTFReverseIterator &other) const { return !Super::operator==(other); }
|
||||
|
||||
UnsafeCodeUnits<Unit16, CP32> operator*() const {
|
||||
// Call the same function in both operator*() and operator++() so that an
|
||||
|
@ -999,17 +1012,17 @@ public:
|
|||
return Super::decAndRead(p);
|
||||
}
|
||||
|
||||
U16UnsafeReverseIterator &operator++() { // pre-increment
|
||||
UnsafeUTFReverseIterator &operator++() { // pre-increment
|
||||
// Call the same function in both operator*() and operator++() so that an
|
||||
// optimizing compiler can easily eliminate redundant work when alternating between the two.
|
||||
Super::decAndRead(Super::p_);
|
||||
return *this;
|
||||
}
|
||||
|
||||
U16UnsafeReverseIterator operator++(int) { // post-increment
|
||||
UnsafeUTFReverseIterator operator++(int) { // post-increment
|
||||
// Call the same function in both operator*() and operator++() so that an
|
||||
// optimizing compiler can easily eliminate redundant work when alternating between the two.
|
||||
U16UnsafeReverseIterator result(*this);
|
||||
UnsafeUTFReverseIterator result(*this);
|
||||
Super::decAndRead(Super::p_);
|
||||
return result;
|
||||
}
|
||||
|
@ -1019,41 +1032,43 @@ public:
|
|||
* A C++ "range" for non-validating iteration over all of the code points of a UTF-16 string.
|
||||
* The string must be well-formed.
|
||||
*
|
||||
* @tparam Unit16 Code unit type: char16_t or uint16_t or (on Windows) wchar_t
|
||||
* @tparam Unit16 Code unit type:
|
||||
* UTF-8: char or char8_t or uint8_t;
|
||||
* UTF-16: char16_t or uint16_t or (on Windows) wchar_t
|
||||
* @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t;
|
||||
* should be signed if U_BEHAVIOR_NEGATIVE
|
||||
* @draft ICU 78
|
||||
*/
|
||||
template<typename Unit16, typename CP32>
|
||||
class U16UnsafeStringCodePoints {
|
||||
class UnsafeUTFStringCodePoints {
|
||||
public:
|
||||
/**
|
||||
* Constructs a C++ "range" object over the code points in the string.
|
||||
* @draft ICU 78
|
||||
*/
|
||||
U16UnsafeStringCodePoints(std::basic_string_view<Unit16> s) : s(s) {}
|
||||
UnsafeUTFStringCodePoints(std::basic_string_view<Unit16> s) : s(s) {}
|
||||
|
||||
/** @draft ICU 78 */
|
||||
U16UnsafeStringCodePoints(const U16UnsafeStringCodePoints &other) = default;
|
||||
U16UnsafeStringCodePoints &operator=(const U16UnsafeStringCodePoints &other) = default;
|
||||
UnsafeUTFStringCodePoints(const UnsafeUTFStringCodePoints &other) = default;
|
||||
UnsafeUTFStringCodePoints &operator=(const UnsafeUTFStringCodePoints &other) = default;
|
||||
|
||||
/** @draft ICU 78 */
|
||||
U16UnsafeIterator<Unit16, CP32> begin() const {
|
||||
UnsafeUTFIterator<Unit16, CP32> begin() const {
|
||||
return {s.data()};
|
||||
}
|
||||
|
||||
/** @draft ICU 78 */
|
||||
U16UnsafeIterator<Unit16, CP32> end() const {
|
||||
UnsafeUTFIterator<Unit16, CP32> end() const {
|
||||
return {s.data() + s.length()};
|
||||
}
|
||||
|
||||
/** @draft ICU 78 */
|
||||
U16UnsafeReverseIterator<Unit16, CP32> rbegin() const {
|
||||
UnsafeUTFReverseIterator<Unit16, CP32> rbegin() const {
|
||||
return {s.data() + s.length()};
|
||||
}
|
||||
|
||||
/** @draft ICU 78 */
|
||||
U16UnsafeReverseIterator<Unit16, CP32> rend() const {
|
||||
UnsafeUTFReverseIterator<Unit16, CP32> rend() const {
|
||||
return {s.data()};
|
||||
}
|
||||
|
||||
|
@ -1068,7 +1083,7 @@ private:
|
|||
// TODO: remove experimental sample code
|
||||
#ifndef UTYPES_H
|
||||
int32_t rangeLoop16(std::u16string_view s) {
|
||||
header::U16StringCodePoints<char16_t, UChar32, U_BEHAVIOR_NEGATIVE> range(s);
|
||||
header::UTFStringCodePoints<char16_t, UChar32, U_BEHAVIOR_NEGATIVE> range(s);
|
||||
int32_t sum = 0;
|
||||
for (auto units : range) {
|
||||
sum += units.codePoint();
|
||||
|
@ -1077,7 +1092,7 @@ int32_t rangeLoop16(std::u16string_view s) {
|
|||
}
|
||||
|
||||
int32_t loopIterPlusPlus16(std::u16string_view s) {
|
||||
header::U16StringCodePoints<char16_t, UChar32, U_BEHAVIOR_NEGATIVE> range(s);
|
||||
header::UTFStringCodePoints<char16_t, UChar32, U_BEHAVIOR_NEGATIVE> range(s);
|
||||
int32_t sum = 0;
|
||||
auto iter = range.begin();
|
||||
auto limit = range.end();
|
||||
|
@ -1088,7 +1103,7 @@ int32_t loopIterPlusPlus16(std::u16string_view s) {
|
|||
}
|
||||
|
||||
int32_t backwardLoop16(std::u16string_view s) {
|
||||
header::U16StringCodePoints<char16_t, UChar32, U_BEHAVIOR_NEGATIVE> range(s);
|
||||
header::UTFStringCodePoints<char16_t, UChar32, U_BEHAVIOR_NEGATIVE> range(s);
|
||||
int32_t sum = 0;
|
||||
auto start = range.begin();
|
||||
auto iter = range.end();
|
||||
|
@ -1099,7 +1114,7 @@ int32_t backwardLoop16(std::u16string_view s) {
|
|||
}
|
||||
|
||||
int32_t reverseLoop16(std::u16string_view s) {
|
||||
header::U16StringCodePoints<char16_t, UChar32, U_BEHAVIOR_NEGATIVE> range(s);
|
||||
header::UTFStringCodePoints<char16_t, UChar32, U_BEHAVIOR_NEGATIVE> range(s);
|
||||
int32_t sum = 0;
|
||||
for (auto iter = range.rbegin(); iter != range.rend(); ++iter) {
|
||||
sum += (*iter).codePoint();
|
||||
|
@ -1108,7 +1123,7 @@ int32_t reverseLoop16(std::u16string_view s) {
|
|||
}
|
||||
|
||||
int32_t unsafeRangeLoop16(std::u16string_view s) {
|
||||
header::U16UnsafeStringCodePoints<char16_t, UChar32> range(s);
|
||||
header::UnsafeUTFStringCodePoints<char16_t, UChar32> range(s);
|
||||
int32_t sum = 0;
|
||||
for (auto units : range) {
|
||||
sum += units.codePoint();
|
||||
|
@ -1117,7 +1132,7 @@ int32_t unsafeRangeLoop16(std::u16string_view s) {
|
|||
}
|
||||
|
||||
int32_t unsafeReverseLoop16(std::u16string_view s) {
|
||||
header::U16UnsafeStringCodePoints<char16_t, UChar32> range(s);
|
||||
header::UnsafeUTFStringCodePoints<char16_t, UChar32> range(s);
|
||||
int32_t sum = 0;
|
||||
for (auto iter = range.rbegin(); iter != range.rend(); ++iter) {
|
||||
sum += iter->codePoint();
|
||||
|
@ -1126,7 +1141,7 @@ int32_t unsafeReverseLoop16(std::u16string_view s) {
|
|||
}
|
||||
|
||||
int32_t rangeLoop8(std::string_view s) {
|
||||
header::U16StringCodePoints<char, UChar32, U_BEHAVIOR_NEGATIVE> range(s);
|
||||
header::UTFStringCodePoints<char, UChar32, U_BEHAVIOR_NEGATIVE> range(s);
|
||||
int32_t sum = 0;
|
||||
for (auto units : range) {
|
||||
sum += units.codePoint();
|
||||
|
@ -1135,7 +1150,7 @@ int32_t rangeLoop8(std::string_view s) {
|
|||
}
|
||||
|
||||
int32_t reverseLoop(std::string_view s) {
|
||||
header::U16StringCodePoints<char, UChar32, U_BEHAVIOR_NEGATIVE> range(s);
|
||||
header::UTFStringCodePoints<char, UChar32, U_BEHAVIOR_NEGATIVE> range(s);
|
||||
int32_t sum = 0;
|
||||
for (auto iter = range.rbegin(); iter != range.rend(); ++iter) {
|
||||
sum += iter->codePoint();
|
||||
|
|
|
@ -20,8 +20,8 @@
|
|||
// https://en.cppreference.com/w/cpp/string/basic_string_view/operator%22%22sv
|
||||
using namespace std::string_view_literals;
|
||||
|
||||
using U_HEADER_ONLY_NAMESPACE::U16Iterator;
|
||||
using U_HEADER_ONLY_NAMESPACE::U16StringCodePoints;
|
||||
using U_HEADER_ONLY_NAMESPACE::UTFIterator;
|
||||
using U_HEADER_ONLY_NAMESPACE::UTFStringCodePoints;
|
||||
|
||||
// Shared state for one or more copies of single-pass iterators.
|
||||
// Similar to https://en.cppreference.com/w/cpp/iterator/istreambuf_iterator
|
||||
|
@ -141,7 +141,7 @@ void U16IteratorTest::runIndexedTest(int32_t index, UBool exec, const char *&nam
|
|||
|
||||
void U16IteratorTest::testGood() {
|
||||
std::u16string_view good(u"abçカ🚴"sv);
|
||||
U16StringCodePoints<char16_t, UChar32, U_BEHAVIOR_NEGATIVE> range(good);
|
||||
UTFStringCodePoints<char16_t, UChar32, U_BEHAVIOR_NEGATIVE> range(good);
|
||||
auto iter = range.begin();
|
||||
assertEquals("iter[0] * codePoint", u'a', (*iter).codePoint());
|
||||
assertEquals("iter[0] -> codePoint", u'a', iter->codePoint());
|
||||
|
@ -168,7 +168,7 @@ void U16IteratorTest::testGood() {
|
|||
void U16IteratorTest::testNegative() {
|
||||
static const char16_t badChars[] = { u'a', 0xd900, u'b', 0xdc05, u'ç' };
|
||||
std::u16string_view bad(badChars, 5);
|
||||
U16StringCodePoints<char16_t, UChar32, U_BEHAVIOR_NEGATIVE> range(bad);
|
||||
UTFStringCodePoints<char16_t, UChar32, U_BEHAVIOR_NEGATIVE> range(bad);
|
||||
auto iter = range.begin();
|
||||
assertEquals("iter[0] * codePoint", u'a', (*iter).codePoint());
|
||||
assertEquals("iter[0] -> codePoint", u'a', iter->codePoint());
|
||||
|
@ -193,7 +193,7 @@ void U16IteratorTest::testNegative() {
|
|||
void U16IteratorTest::testFFFD() {
|
||||
static const char16_t badChars[] = { u'a', 0xd900, u'b', 0xdc05, u'ç' };
|
||||
std::u16string_view bad(badChars, 5);
|
||||
U16StringCodePoints<char16_t, char32_t, U_BEHAVIOR_FFFD> range(bad);
|
||||
UTFStringCodePoints<char16_t, char32_t, U_BEHAVIOR_FFFD> range(bad);
|
||||
auto iter = range.begin();
|
||||
assertEquals("iter[0] * codePoint", u'a', (*iter).codePoint());
|
||||
assertEquals("iter[0] -> codePoint", u'a', iter->codePoint());
|
||||
|
@ -217,7 +217,7 @@ void U16IteratorTest::testFFFD() {
|
|||
void U16IteratorTest::testSurrogate() {
|
||||
static const char16_t badChars[] = { u'a', 0xd900, u'b', 0xdc05, u'ç' };
|
||||
std::u16string_view bad(badChars, 5);
|
||||
U16StringCodePoints<char16_t, uint32_t, U_BEHAVIOR_SURROGATE> range(bad);
|
||||
UTFStringCodePoints<char16_t, uint32_t, U_BEHAVIOR_SURROGATE> range(bad);
|
||||
auto iter = range.begin();
|
||||
assertEquals("iter[0] * codePoint", u'a', (*iter).codePoint());
|
||||
assertEquals("iter[0] -> codePoint", u'a', iter->codePoint());
|
||||
|
@ -242,9 +242,9 @@ void U16IteratorTest::testSinglePassIter() {
|
|||
SinglePassSource<char16_t> good(u"abçカ🚴"sv);
|
||||
SinglePassIter<char16_t> goodBegin(good);
|
||||
SinglePassIter<char16_t> goodLimit{};
|
||||
U16Iterator<SinglePassIter<char16_t>, UChar32, U_BEHAVIOR_NEGATIVE> rangeBegin(
|
||||
UTFIterator<SinglePassIter<char16_t>, UChar32, U_BEHAVIOR_NEGATIVE> rangeBegin(
|
||||
goodBegin, goodLimit);
|
||||
U16Iterator<SinglePassIter<char16_t>, UChar32, U_BEHAVIOR_NEGATIVE> rangeLimit(goodLimit);
|
||||
UTFIterator<SinglePassIter<char16_t>, UChar32, U_BEHAVIOR_NEGATIVE> rangeLimit(goodLimit);
|
||||
auto iter = rangeBegin;
|
||||
assertEquals("iter[0] * codePoint", u'a', (*iter).codePoint());
|
||||
assertEquals("iter[0] -> codePoint", u'a', iter->codePoint());
|
||||
|
@ -272,10 +272,10 @@ void U16IteratorTest::testFwdIter() {
|
|||
std::u16string_view good(u"abçカ🚴"sv);
|
||||
FwdIter<char16_t> goodBegin(good.data());
|
||||
FwdIter<char16_t> goodLimit(good.data() + good.length());
|
||||
U16Iterator<FwdIter<char16_t>, UChar32, U_BEHAVIOR_NEGATIVE> rangeBegin(
|
||||
UTFIterator<FwdIter<char16_t>, UChar32, U_BEHAVIOR_NEGATIVE> rangeBegin(
|
||||
goodBegin, goodBegin, goodLimit);
|
||||
U16Iterator<FwdIter<char16_t>, UChar32, U_BEHAVIOR_NEGATIVE> rangeLimit(goodLimit);
|
||||
// TODO: U16StringCodePoints<FwdIter, UChar32, U_BEHAVIOR_NEGATIVE> range(good);
|
||||
UTFIterator<FwdIter<char16_t>, UChar32, U_BEHAVIOR_NEGATIVE> rangeLimit(goodLimit);
|
||||
// TODO: UTFStringCodePoints<FwdIter, UChar32, U_BEHAVIOR_NEGATIVE> range(good);
|
||||
auto iter = rangeBegin;
|
||||
assertEquals("iter[0] * codePoint", u'a', (*iter).codePoint());
|
||||
assertEquals("iter[0] -> codePoint", u'a', iter->codePoint());
|
||||
|
@ -336,7 +336,7 @@ void U8IteratorTest::runIndexedTest(int32_t index, UBool exec, const char *&name
|
|||
|
||||
void U8IteratorTest::testGood() {
|
||||
std::string_view good(reinterpret_cast<const char*>(u8"abçカ🚴"));
|
||||
U16StringCodePoints<char, UChar32, U_BEHAVIOR_NEGATIVE> range(good);
|
||||
UTFStringCodePoints<char, UChar32, U_BEHAVIOR_NEGATIVE> range(good);
|
||||
auto iter = range.begin();
|
||||
assertEquals("iter[0] * codePoint", u'a', (*iter).codePoint());
|
||||
assertEquals("iter[0] -> codePoint", u'a', iter->codePoint());
|
||||
|
@ -366,9 +366,9 @@ void U8IteratorTest::testSinglePassIter() {
|
|||
SinglePassSource<char> good(reinterpret_cast<const char*>(u8"abçカ🚴"));
|
||||
SinglePassIter<char> goodBegin(good);
|
||||
SinglePassIter<char> goodLimit{};
|
||||
U16Iterator<SinglePassIter<char>, UChar32, U_BEHAVIOR_NEGATIVE> rangeBegin(
|
||||
UTFIterator<SinglePassIter<char>, UChar32, U_BEHAVIOR_NEGATIVE> rangeBegin(
|
||||
goodBegin, goodLimit);
|
||||
U16Iterator<SinglePassIter<char>, UChar32, U_BEHAVIOR_NEGATIVE> rangeLimit(goodLimit);
|
||||
UTFIterator<SinglePassIter<char>, UChar32, U_BEHAVIOR_NEGATIVE> rangeLimit(goodLimit);
|
||||
auto iter = rangeBegin;
|
||||
assertEquals("iter[0] * codePoint", u'a', (*iter).codePoint());
|
||||
assertEquals("iter[0] -> codePoint", u'a', iter->codePoint());
|
||||
|
@ -396,10 +396,10 @@ void U8IteratorTest::testFwdIter() {
|
|||
std::string_view good(reinterpret_cast<const char*>(u8"abçカ🚴"));
|
||||
FwdIter<char> goodBegin(good.data());
|
||||
FwdIter<char> goodLimit(good.data() + good.length());
|
||||
U16Iterator<FwdIter<char>, UChar32, U_BEHAVIOR_NEGATIVE> rangeBegin(
|
||||
UTFIterator<FwdIter<char>, UChar32, U_BEHAVIOR_NEGATIVE> rangeBegin(
|
||||
goodBegin, goodBegin, goodLimit);
|
||||
U16Iterator<FwdIter<char>, UChar32, U_BEHAVIOR_NEGATIVE> rangeLimit(goodLimit);
|
||||
// TODO: U16StringCodePoints<FwdIter, UChar32, U_BEHAVIOR_NEGATIVE> range(good);
|
||||
UTFIterator<FwdIter<char>, UChar32, U_BEHAVIOR_NEGATIVE> rangeLimit(goodLimit);
|
||||
// TODO: UTFStringCodePoints<FwdIter, UChar32, U_BEHAVIOR_NEGATIVE> range(good);
|
||||
auto iter = rangeBegin;
|
||||
assertEquals("iter[0] * codePoint", u'a', (*iter).codePoint());
|
||||
assertEquals("iter[0] -> codePoint", u'a', iter->codePoint());
|
||||
|
|
Loading…
Add table
Reference in a new issue