mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-05 21:45:37 +00:00
Merge e127460b6e
into 572d03f85a
This commit is contained in:
commit
54a78eb95e
10 changed files with 3492 additions and 3 deletions
|
@ -1267,6 +1267,9 @@
|
|||
<CustomBuild Include="unicode\utf_old.h">
|
||||
<Filter>strings</Filter>
|
||||
</CustomBuild>
|
||||
<CustomBuild Include="unicode\utfiterator.h">
|
||||
<Filter>strings</Filter>
|
||||
</CustomBuild>
|
||||
<CustomBuild Include="unicode\bytestrie.h">
|
||||
<Filter>collections</Filter>
|
||||
</CustomBuild>
|
||||
|
|
|
@ -479,6 +479,8 @@
|
|||
/* Otherwise use the predefined value. */
|
||||
#elif !defined(__cplusplus)
|
||||
# define U_CPLUSPLUS_VERSION 0
|
||||
#elif __cplusplus >= 202002L || (defined(_MSVC_LANG) && _MSVC_LANG >= 202002L)
|
||||
# define U_CPLUSPLUS_VERSION 20
|
||||
#elif __cplusplus >= 201703L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201703L)
|
||||
# define U_CPLUSPLUS_VERSION 17
|
||||
#elif __cplusplus >= 201402L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201402L)
|
||||
|
|
|
@ -119,6 +119,28 @@
|
|||
/** Obsolete/same as U_CAPI; was used to declare a function as an internal ICU C API */
|
||||
#define U_INTERNAL U_CAPI
|
||||
|
||||
/**
|
||||
* \def U_FORCE_INLINE
|
||||
* Forces function inlining on compilers that are known to support it.
|
||||
* Place this before specifiers like "static" and "explicit".
|
||||
*
|
||||
* This does not replace the "inline" keyword which suspends the One Definition Rule (ODR)
|
||||
* in addition to optionally serving as an inlining hint to the compiler.
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
#ifdef U_FORCE_INLINE
|
||||
// already defined
|
||||
#elif defined(U_IN_DOXYGEN)
|
||||
# define U_FORCE_INLINE inline
|
||||
#elif (defined(__clang__) && __clang__) || U_GCC_MAJOR_MINOR != 0
|
||||
# define U_FORCE_INLINE [[gnu::always_inline]]
|
||||
#elif defined(U_REAL_MSVC)
|
||||
# define U_FORCE_INLINE __forceinline
|
||||
#else
|
||||
# define U_FORCE_INLINE inline
|
||||
#endif
|
||||
|
||||
// Before ICU 65, function-like, multi-statement ICU macros were just defined as
|
||||
// series of statements wrapped in { } blocks and the caller could choose to
|
||||
// either treat them as if they were actual functions and end the invocation
|
||||
|
|
|
@ -517,7 +517,7 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
|
|||
if(U8_IS_TRAIL(__t1)) { \
|
||||
++(i); \
|
||||
} \
|
||||
} else /* c>=0xf0 */ { \
|
||||
} else /* b>=0xf0 */ { \
|
||||
if(U8_IS_VALID_LEAD4_AND_T1(__b, __t1) && \
|
||||
++(i)!=(length) && U8_IS_TRAIL((s)[i]) && \
|
||||
++(i)!=(length) && U8_IS_TRAIL((s)[i])) { \
|
||||
|
@ -683,7 +683,7 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
|
|||
*/
|
||||
#define U8_PREV_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
(c)=(uint8_t)(s)[--(i)]; \
|
||||
if(U8_IS_TRAIL(c)) { \
|
||||
if(!U8_IS_SINGLE(c)) { \
|
||||
uint8_t __b, __count=1, __shift=6; \
|
||||
\
|
||||
/* c is a trail byte */ \
|
||||
|
|
2502
icu4c/source/common/unicode/utfiterator.h
Normal file
2502
icu4c/source/common/unicode/utfiterator.h
Normal file
File diff suppressed because it is too large
Load diff
|
@ -75,7 +75,9 @@ numbertest_parse.o numbertest_doubleconversion.o numbertest_skeletons.o \
|
|||
static_unisets_test.o numfmtdatadriventest.o numbertest_range.o erarulestest.o \
|
||||
formattedvaluetest.o formatted_string_builder_test.o numbertest_permutation.o \
|
||||
units_data_test.o units_router_test.o units_test.o displayoptions_test.o \
|
||||
numbertest_simple.o cplusplus_header_api_build_test.o uchar_type_build_test.o ucolheaderonlytest.o usetheaderonlytest.o
|
||||
numbertest_simple.o \
|
||||
cplusplus_header_api_build_test.o uchar_type_build_test.o \
|
||||
ucolheaderonlytest.o usetheaderonlytest.o utfiteratortest.o
|
||||
|
||||
DEPS = $(OBJECTS:.o=.d)
|
||||
|
||||
|
|
|
@ -223,6 +223,7 @@
|
|||
<ClCompile Include="sfwdchit.cpp" />
|
||||
<ClCompile Include="strcase.cpp" />
|
||||
<ClCompile Include="ustrtest.cpp" />
|
||||
<ClCompile Include="utfiteratortest.cpp" />
|
||||
<ClCompile Include="utxttest.cpp" />
|
||||
<ClCompile Include="cpdtrtst.cpp" />
|
||||
<ClCompile Include="ittrans.cpp" />
|
||||
|
|
|
@ -490,6 +490,9 @@
|
|||
<ClCompile Include="ustrtest.cpp">
|
||||
<Filter>strings</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="utfiteratortest.cpp">
|
||||
<Filter>strings</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="utxttest.cpp">
|
||||
<Filter>strings</Filter>
|
||||
</ClCompile>
|
||||
|
|
|
@ -51,6 +51,7 @@ extern IntlTest *createPluralMapTest();
|
|||
extern IntlTest *createStaticUnicodeSetsTest();
|
||||
#endif
|
||||
static IntlTest *createUHashTest();
|
||||
extern IntlTest *createUTFIteratorTest();
|
||||
|
||||
void IntlTestUtilities::runIndexedTest( int32_t index, UBool exec, const char* &name, char* par )
|
||||
{
|
||||
|
@ -90,6 +91,7 @@ void IntlTestUtilities::runIndexedTest( int32_t index, UBool exec, const char* &
|
|||
TESTCASE_AUTO_CREATE_CLASS(UColHeaderOnlyTest);
|
||||
#endif
|
||||
TESTCASE_AUTO_CREATE_CLASS(USetHeaderOnlyTest);
|
||||
TESTCASE_AUTO_CREATE_CLASS(UTFIteratorTest);
|
||||
TESTCASE_AUTO_END;
|
||||
}
|
||||
|
||||
|
|
952
icu4c/source/test/intltest/utfiteratortest.cpp
Normal file
952
icu4c/source/test/intltest/utfiteratortest.cpp
Normal file
|
@ -0,0 +1,952 @@
|
|||
// © 2024 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: https://www.unicode.org/copyright.html
|
||||
|
||||
// utfiteratortest.cpp
|
||||
// created: 2024aug12 Markus W. Scherer
|
||||
|
||||
#include <algorithm>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <vector>
|
||||
|
||||
// Test header-only ICU C++ APIs. Do not use other ICU C++ APIs.
|
||||
// Non-default configuration:
|
||||
#define U_SHOW_CPLUSPLUS_API 0
|
||||
// Default configuration:
|
||||
// #define U_SHOW_CPLUSPLUS_HEADER_API 1
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/utf8.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "unicode/utfiterator.h"
|
||||
#include "intltest.h"
|
||||
|
||||
// Makes u"literal"sv std::u16string_view literals possible.
|
||||
// https://en.cppreference.com/w/cpp/string/basic_string_view/operator%22%22sv
|
||||
using namespace std::string_view_literals;
|
||||
|
||||
using U_HEADER_ONLY_NAMESPACE::UTFIterator;
|
||||
using U_HEADER_ONLY_NAMESPACE::utfIterator;
|
||||
using U_HEADER_ONLY_NAMESPACE::UTFStringCodePoints;
|
||||
using U_HEADER_ONLY_NAMESPACE::utfStringCodePoints;
|
||||
using U_HEADER_ONLY_NAMESPACE::UnsafeUTFIterator;
|
||||
using U_HEADER_ONLY_NAMESPACE::unsafeUTFIterator;
|
||||
using U_HEADER_ONLY_NAMESPACE::UnsafeUTFStringCodePoints;
|
||||
using U_HEADER_ONLY_NAMESPACE::unsafeUTFStringCodePoints;
|
||||
|
||||
#if 0
|
||||
// Sample code for API docs etc. Compile when changing samples or APIs.
|
||||
#include <iostream>
|
||||
|
||||
int32_t rangeLoop16(std::u16string_view s) {
|
||||
// We are just adding up the code points for minimal-code demonstration purposes.
|
||||
int32_t sum = 0;
|
||||
for (auto units : utfStringCodePoints<UChar32, UTF_BEHAVIOR_NEGATIVE>(s)) {
|
||||
sum += units.codePoint(); // < 0 if ill-formed
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
int32_t loopIterPlusPlus16(std::u16string_view s) {
|
||||
auto range = utfStringCodePoints<char32_t, UTF_BEHAVIOR_FFFD>(s);
|
||||
int32_t sum = 0;
|
||||
for (auto iter = range.begin(), limit = range.end(); iter != limit;) {
|
||||
sum += (*iter++).codePoint(); // U+FFFD if ill-formed
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
int32_t backwardLoop16(std::u16string_view s) {
|
||||
auto range = utfStringCodePoints<UChar32, UTF_BEHAVIOR_SURROGATE>(s);
|
||||
int32_t sum = 0;
|
||||
for (auto start = range.begin(), iter = range.end(); start != iter;) {
|
||||
sum += (*--iter).codePoint(); // surrogate code point if unpaired / ill-formed
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
int32_t reverseLoop8(std::string_view s) {
|
||||
auto range = utfStringCodePoints<char32_t, UTF_BEHAVIOR_FFFD>(s);
|
||||
int32_t sum = 0;
|
||||
for (auto iter = range.rbegin(), limit = range.rend(); iter != limit; ++iter) {
|
||||
sum += iter->codePoint(); // U+FFFD if ill-formed
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
int32_t countCodePoints16(std::u16string_view s) {
|
||||
auto range = utfStringCodePoints<UChar32, UTF_BEHAVIOR_SURROGATE>(s);
|
||||
return std::distance(range.begin(), range.end());
|
||||
}
|
||||
|
||||
int32_t unsafeRangeLoop16(std::u16string_view s) {
|
||||
int32_t sum = 0;
|
||||
for (auto units : unsafeUTFStringCodePoints<UChar32>(s)) {
|
||||
sum += units.codePoint();
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
int32_t unsafeReverseLoop8(std::string_view s) {
|
||||
auto range = unsafeUTFStringCodePoints<UChar32>(s);
|
||||
int32_t sum = 0;
|
||||
for (auto iter = range.rbegin(), limit = range.rend(); iter != limit; ++iter) {
|
||||
sum += iter->codePoint();
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
char32_t firstCodePointOrFFFD16(std::u16string_view s) {
|
||||
if (s.empty()) { return 0xfffd; }
|
||||
auto range = utfStringCodePoints<char32_t, UTF_BEHAVIOR_FFFD>(s);
|
||||
return range.begin()->codePoint();
|
||||
}
|
||||
|
||||
std::string_view firstSequence8(std::string_view s) {
|
||||
if (s.empty()) { return {}; }
|
||||
auto range = utfStringCodePoints<char32_t, UTF_BEHAVIOR_FFFD>(s);
|
||||
auto units = *(range.begin());
|
||||
if (units.wellFormed()) {
|
||||
return units.stringView();
|
||||
} else {
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
template<typename InputStream> // some istream or streambuf
|
||||
std::u32string cpFromInput(InputStream &in) {
|
||||
// This is a single-pass input_iterator.
|
||||
std::istreambuf_iterator bufIter(in);
|
||||
std::istreambuf_iterator<typename InputStream::char_type> bufLimit;
|
||||
auto iter = utfIterator<char32_t, UTF_BEHAVIOR_FFFD>(bufIter);
|
||||
auto limit = utfIterator<char32_t, UTF_BEHAVIOR_FFFD>(bufLimit);
|
||||
std::u32string s32;
|
||||
for (; iter != limit; ++iter) {
|
||||
s32.push_back(iter->codePoint());
|
||||
}
|
||||
return s32;
|
||||
}
|
||||
|
||||
std::u32string cpFromStdin() { return cpFromInput(std::cin); }
|
||||
std::u32string cpFromWideStdin() { return cpFromInput(std::wcin); }
|
||||
|
||||
#endif // SAMPLE_CODE
|
||||
|
||||
namespace {
|
||||
|
||||
template<typename Unit>
|
||||
class SinglePassIter;
|
||||
|
||||
// Shared state for one or more copies of single-pass iterators.
|
||||
// Similar to https://en.cppreference.com/w/cpp/iterator/istreambuf_iterator
|
||||
// but the iterators only implement LegacyIterator (* and ++) without post-increment.
|
||||
template<typename Unit>
|
||||
class SinglePassSource {
|
||||
public:
|
||||
explicit SinglePassSource(std::basic_string_view<Unit> s) : p(s.data()), limit(s.data() + s.length()) {}
|
||||
|
||||
SinglePassIter<Unit> begin() { return SinglePassIter<Unit>(*this); }
|
||||
SinglePassIter<Unit> end() { return SinglePassIter<Unit>(); }
|
||||
|
||||
private:
|
||||
template<typename U>
|
||||
friend class SinglePassIter;
|
||||
|
||||
const Unit *p; // incremented by iterators
|
||||
const Unit *limit;
|
||||
};
|
||||
|
||||
template<typename Unit>
|
||||
class SinglePassIter {
|
||||
public:
|
||||
typedef Unit value_type;
|
||||
typedef Unit &reference;
|
||||
typedef Unit *pointer;
|
||||
typedef std::ptrdiff_t difference_type;
|
||||
// This is a LegacyIterator but there is no specific category for that,
|
||||
// so we claim it to be a LegacyInputIterator. It *is* single-pass.
|
||||
typedef std::input_iterator_tag iterator_category;
|
||||
|
||||
explicit SinglePassIter(SinglePassSource<Unit> &src) : src(&src) {}
|
||||
// limit sentinel
|
||||
SinglePassIter() : src(nullptr) {}
|
||||
|
||||
// movable
|
||||
SinglePassIter(SinglePassIter &&src) noexcept = default;
|
||||
SinglePassIter &operator=(SinglePassIter &&src) noexcept = default;
|
||||
|
||||
// not copyable
|
||||
SinglePassIter(const SinglePassIter &other) = delete;
|
||||
SinglePassIter &operator=(const SinglePassIter &other) = delete;
|
||||
|
||||
bool operator==(const SinglePassIter &other) const {
|
||||
bool done = isDone();
|
||||
bool otherDone = other.isDone();
|
||||
return done ? otherDone : (!otherDone && src->p == other.src->p);
|
||||
}
|
||||
bool operator!=(const SinglePassIter &other) const { return !operator==(other); }
|
||||
|
||||
Unit operator*() const { return *(src->p); }
|
||||
SinglePassIter &operator++() { // pre-increment
|
||||
++(src->p);
|
||||
return *this;
|
||||
}
|
||||
// *no* post-increment
|
||||
|
||||
private:
|
||||
bool isDone() const { return src == nullptr || src->p == src->limit; }
|
||||
|
||||
SinglePassSource<Unit> *src;
|
||||
};
|
||||
|
||||
template<typename Unit>
|
||||
class FwdIter {
|
||||
public:
|
||||
typedef Unit value_type;
|
||||
typedef Unit &reference;
|
||||
typedef Unit *pointer;
|
||||
typedef std::ptrdiff_t difference_type;
|
||||
// https://en.cppreference.com/w/cpp/named_req/ForwardIterator#Multi-pass_guarantee
|
||||
typedef std::forward_iterator_tag iterator_category;
|
||||
|
||||
explicit FwdIter(const Unit *data) : p(data) {}
|
||||
FwdIter() = default;
|
||||
|
||||
bool operator==(const FwdIter &other) const { return p == other.p; }
|
||||
bool operator!=(const FwdIter &other) const { return !operator==(other); }
|
||||
|
||||
Unit operator*() const { return *p; }
|
||||
FwdIter &operator++() { // pre-increment
|
||||
++p;
|
||||
return *this;
|
||||
}
|
||||
FwdIter operator++(int) { // post-increment
|
||||
FwdIter result(*this);
|
||||
++p;
|
||||
return result;
|
||||
}
|
||||
|
||||
private:
|
||||
const Unit *p;
|
||||
};
|
||||
|
||||
template<typename StringView>
|
||||
std::vector<StringView> split(StringView s) {
|
||||
using Unit = typename StringView::value_type;
|
||||
std::vector<StringView> result;
|
||||
while (!s.empty()) {
|
||||
auto pos = s.find(static_cast<Unit>(u'|'));
|
||||
if (pos == StringView::npos) { break; }
|
||||
result.push_back(s.substr(0, pos));
|
||||
s.remove_prefix(pos + 1);
|
||||
}
|
||||
result.push_back(s);
|
||||
return result;
|
||||
}
|
||||
|
||||
template<typename Unit>
|
||||
std::basic_string<Unit> join(std::vector<std::basic_string_view<Unit>> parts) {
|
||||
std::basic_string<Unit> result;
|
||||
for (auto part : parts) {
|
||||
result.append(part);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// Avoids having to cast each byte value to char or uint8_t or similar.
|
||||
std::string string8FromBytes(const int bytes[], size_t length) {
|
||||
std::string result;
|
||||
for (size_t i = 0; i < length; ++i) {
|
||||
result.push_back(static_cast<char>(bytes[i]));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
T reverseCopy(T x) {
|
||||
T result{x};
|
||||
std::reverse(result.begin(), result.end());
|
||||
return result;
|
||||
}
|
||||
|
||||
// Use SAFE when we don't care about ILL_FORMED vs. WELL_FORMED.
|
||||
enum TestMode { SAFE, ILL_FORMED, WELL_FORMED, UNSAFE };
|
||||
enum IterType { INPUT, FWD, CONTIG };
|
||||
|
||||
// Use this don't-care behavior value for unsafe iterators that do not use the behavior tparam.
|
||||
constexpr auto ANY_B = UTF_BEHAVIOR_FFFD;
|
||||
|
||||
template<typename Unit>
|
||||
struct ImplTest {
|
||||
std::basic_string<Unit> str;
|
||||
std::vector<std::basic_string<Unit>> parts;
|
||||
std::u32string codePoints;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
class UTFIteratorTest : public IntlTest {
|
||||
public:
|
||||
void runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) override {
|
||||
if (exec) { logln("TestSuite UTFIteratorTest: "); }
|
||||
TESTCASE_AUTO_BEGIN;
|
||||
|
||||
TESTCASE_AUTO(testSafe16Good);
|
||||
TESTCASE_AUTO(testSafe16Negative);
|
||||
TESTCASE_AUTO(testSafe16FFFD);
|
||||
TESTCASE_AUTO(testSafe16Surrogate);
|
||||
TESTCASE_AUTO(testUnsafe16);
|
||||
|
||||
TESTCASE_AUTO(testSafe16SinglePassIterGood);
|
||||
TESTCASE_AUTO(testSafe16SinglePassIterNegative);
|
||||
TESTCASE_AUTO(testUnsafe16SinglePassIter);
|
||||
|
||||
TESTCASE_AUTO(testSafe16FwdIter);
|
||||
TESTCASE_AUTO(testUnsafe16FwdIter);
|
||||
|
||||
TESTCASE_AUTO(testSafe8Good);
|
||||
TESTCASE_AUTO(testSafe8Negative);
|
||||
TESTCASE_AUTO(testSafe8FFFD);
|
||||
TESTCASE_AUTO(testUnsafe8);
|
||||
|
||||
TESTCASE_AUTO(testSafe8SinglePassIterGood);
|
||||
TESTCASE_AUTO(testSafe8SinglePassIterFFFD);
|
||||
TESTCASE_AUTO(testUnsafe8SinglePassIter);
|
||||
|
||||
TESTCASE_AUTO(testSafe8FwdIter);
|
||||
TESTCASE_AUTO(testUnsafe8FwdIter);
|
||||
|
||||
TESTCASE_AUTO(testSafe32Good);
|
||||
TESTCASE_AUTO(testSafe32Negative);
|
||||
TESTCASE_AUTO(testSafe32FFFD);
|
||||
TESTCASE_AUTO(testSafe32Surrogate);
|
||||
TESTCASE_AUTO(testUnsafe32);
|
||||
|
||||
TESTCASE_AUTO(testSafe32SinglePassIterGood);
|
||||
TESTCASE_AUTO(testSafe32SinglePassIterSurrogate);
|
||||
TESTCASE_AUTO(testUnsafe32SinglePassIter);
|
||||
|
||||
TESTCASE_AUTO(testSafe32FwdIter);
|
||||
TESTCASE_AUTO(testUnsafe32FwdIter);
|
||||
|
||||
TESTCASE_AUTO(testSafe16LongLinearContig);
|
||||
TESTCASE_AUTO(testSafe8LongLinearContig);
|
||||
TESTCASE_AUTO(testSafe32LongLinearContig);
|
||||
|
||||
TESTCASE_AUTO(testUnsafe16LongLinearContig);
|
||||
TESTCASE_AUTO(testUnsafe8LongLinearContig);
|
||||
TESTCASE_AUTO(testUnsafe32LongLinearContig);
|
||||
|
||||
TESTCASE_AUTO_END;
|
||||
}
|
||||
|
||||
template<typename CP32, UTFIllFormedBehavior behavior, typename StringView>
|
||||
CP32 sub(StringView part) {
|
||||
switch (behavior) {
|
||||
case UTF_BEHAVIOR_NEGATIVE: return U_SENTINEL;
|
||||
case UTF_BEHAVIOR_FFFD: return 0xfffd;
|
||||
case UTF_BEHAVIOR_SURROGATE: {
|
||||
auto c = part[0];
|
||||
return U_IS_SURROGATE(c) ? c : 0xfffd;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<TestMode mode, typename CP32, UTFIllFormedBehavior behavior, typename StringView>
|
||||
void testBidiIter(StringView piped);
|
||||
|
||||
template<TestMode mode, typename CP32, UTFIllFormedBehavior behavior,
|
||||
typename StringView, typename CodePoints>
|
||||
void testBidiIter(StringView sv, const std::vector<StringView> &parts, CodePoints range);
|
||||
|
||||
template<TestMode mode, typename CP32, UTFIllFormedBehavior behavior, typename StringView>
|
||||
void testSinglePassIter(StringView piped);
|
||||
|
||||
template<TestMode mode, typename CP32, UTFIllFormedBehavior behavior,
|
||||
typename StringView, typename Iter>
|
||||
void testSinglePassIter(const std::vector<StringView> &parts, Iter &iter, const Iter &rangeLimit);
|
||||
|
||||
template<TestMode mode, typename CP32, UTFIllFormedBehavior behavior, typename StringView>
|
||||
void testFwdIter(StringView piped);
|
||||
|
||||
template<TestMode mode, typename StringView, typename UnitIter, typename Iter>
|
||||
void testFwdIter(const std::vector<StringView> &parts, UnitIter goodLimit,
|
||||
Iter iter, Iter rangeLimit);
|
||||
|
||||
static constexpr std::u16string_view good16{u"a|b|ç|カ|🚴"};
|
||||
static const char *good8Chars;
|
||||
static constexpr std::u32string_view good32{U"a|b|ç|カ|🚴"};
|
||||
|
||||
static constexpr char16_t badChars16[] = {
|
||||
u'a', u'|', 0xd900, u'|', u'ç', u'|', 0xdc05, u'|', u"🚴"[0], u"🚴"[1]
|
||||
};
|
||||
static constexpr std::u16string_view bad16{badChars16, std::size(badChars16)};
|
||||
|
||||
static constexpr int badChars8[] = {
|
||||
u8'a', u8'|', 0xe0, 0xa0, u8'|', u8"ç"[0], u8"ç"[1], u8'|',
|
||||
0xf4, 0x8f, 0xbf, u8'|', u8"🚴"[0], u8"🚴"[1], u8"🚴"[2], u8"🚴"[3]
|
||||
};
|
||||
|
||||
static constexpr char32_t badChars32[] = {
|
||||
u'a', u'|', 0xd900, u'|', u'ç', u'|', 0x110000, u'|', U'🚴'
|
||||
};
|
||||
static constexpr std::u32string_view bad32{badChars32, std::size(badChars32)};
|
||||
|
||||
void testSafe16Good() {
|
||||
testBidiIter<WELL_FORMED, UChar32, UTF_BEHAVIOR_NEGATIVE>(good16);
|
||||
}
|
||||
void testSafe16Negative() {
|
||||
testBidiIter<ILL_FORMED, UChar32, UTF_BEHAVIOR_NEGATIVE>(bad16);
|
||||
}
|
||||
void testSafe16FFFD() {
|
||||
testBidiIter<ILL_FORMED, char32_t, UTF_BEHAVIOR_FFFD>(bad16);
|
||||
}
|
||||
void testSafe16Surrogate() {
|
||||
testBidiIter<ILL_FORMED, uint32_t, UTF_BEHAVIOR_SURROGATE>(bad16);
|
||||
}
|
||||
void testUnsafe16() {
|
||||
testBidiIter<UNSAFE, UChar32, ANY_B>(good16);
|
||||
}
|
||||
|
||||
void testSafe16SinglePassIterGood() {
|
||||
testSinglePassIter<WELL_FORMED, UChar32, UTF_BEHAVIOR_NEGATIVE>(good16);
|
||||
}
|
||||
void testSafe16SinglePassIterNegative() {
|
||||
testSinglePassIter<ILL_FORMED, UChar32, UTF_BEHAVIOR_NEGATIVE>(bad16);
|
||||
}
|
||||
void testUnsafe16SinglePassIter() {
|
||||
testSinglePassIter<UNSAFE, UChar32, ANY_B>(good16);
|
||||
}
|
||||
|
||||
void testSafe16FwdIter() {
|
||||
testFwdIter<SAFE, UChar32, UTF_BEHAVIOR_NEGATIVE>(good16);
|
||||
}
|
||||
void testUnsafe16FwdIter() {
|
||||
testFwdIter<UNSAFE, UChar32, ANY_B>(good16);
|
||||
}
|
||||
|
||||
void testSafe8Good() {
|
||||
testBidiIter<WELL_FORMED, UChar32, UTF_BEHAVIOR_NEGATIVE>(std::string_view{good8Chars});
|
||||
}
|
||||
void testSafe8Negative() {
|
||||
testBidiIter<ILL_FORMED, UChar32, UTF_BEHAVIOR_NEGATIVE>(
|
||||
std::string_view(string8FromBytes(badChars8, std::size(badChars8))));
|
||||
}
|
||||
void testSafe8FFFD() {
|
||||
testBidiIter<ILL_FORMED, char32_t, UTF_BEHAVIOR_FFFD>(
|
||||
std::string_view(string8FromBytes(badChars8, std::size(badChars8))));
|
||||
}
|
||||
void testUnsafe8() {
|
||||
testBidiIter<UNSAFE, UChar32, ANY_B>(std::string_view{good8Chars});
|
||||
}
|
||||
|
||||
void testSafe8SinglePassIterGood() {
|
||||
testSinglePassIter<WELL_FORMED, UChar32, UTF_BEHAVIOR_NEGATIVE>(
|
||||
std::string_view{good8Chars});
|
||||
}
|
||||
void testSafe8SinglePassIterFFFD() {
|
||||
testSinglePassIter<ILL_FORMED, char32_t, UTF_BEHAVIOR_FFFD>(
|
||||
std::string_view(string8FromBytes(badChars8, std::size(badChars8))));
|
||||
}
|
||||
void testUnsafe8SinglePassIter() {
|
||||
testSinglePassIter<UNSAFE, UChar32, ANY_B>(std::string_view{good8Chars});
|
||||
}
|
||||
|
||||
void testSafe8FwdIter() {
|
||||
testFwdIter<SAFE, UChar32, UTF_BEHAVIOR_NEGATIVE>(std::string_view{good8Chars});
|
||||
}
|
||||
void testUnsafe8FwdIter() {
|
||||
testFwdIter<UNSAFE, UChar32, ANY_B>(std::string_view{good8Chars});
|
||||
}
|
||||
|
||||
void testSafe32Good() {
|
||||
testBidiIter<WELL_FORMED, UChar32, UTF_BEHAVIOR_NEGATIVE>(good32);
|
||||
}
|
||||
void testSafe32Negative() {
|
||||
testBidiIter<ILL_FORMED, UChar32, UTF_BEHAVIOR_NEGATIVE>(bad32);
|
||||
}
|
||||
void testSafe32FFFD() {
|
||||
testBidiIter<ILL_FORMED, char32_t, UTF_BEHAVIOR_FFFD>(bad32);
|
||||
}
|
||||
void testSafe32Surrogate() {
|
||||
testBidiIter<ILL_FORMED, uint32_t, UTF_BEHAVIOR_SURROGATE>(bad32);
|
||||
}
|
||||
void testUnsafe32() {
|
||||
testBidiIter<UNSAFE, UChar32, ANY_B>(good32);
|
||||
}
|
||||
|
||||
void testSafe32SinglePassIterGood() {
|
||||
testSinglePassIter<WELL_FORMED, UChar32, UTF_BEHAVIOR_NEGATIVE>(good32);
|
||||
}
|
||||
void testSafe32SinglePassIterSurrogate() {
|
||||
testSinglePassIter<ILL_FORMED, uint32_t, UTF_BEHAVIOR_SURROGATE>(bad32);
|
||||
}
|
||||
void testUnsafe32SinglePassIter() {
|
||||
testSinglePassIter<UNSAFE, UChar32, ANY_B>(good32);
|
||||
}
|
||||
|
||||
void testSafe32FwdIter() {
|
||||
testFwdIter<SAFE, UChar32, UTF_BEHAVIOR_NEGATIVE>(good32);
|
||||
}
|
||||
void testUnsafe32FwdIter() {
|
||||
testFwdIter<UNSAFE, UChar32, ANY_B>(good32);
|
||||
}
|
||||
|
||||
// implementation code coverage ---------------------------------------- ***
|
||||
|
||||
void initLong();
|
||||
|
||||
template<TestMode mode, UTFIllFormedBehavior behavior, IterType type, typename Unit, typename Units>
|
||||
void checkUnits(const Units &units, std::basic_string_view<Unit> part, UChar32 expectedCP);
|
||||
|
||||
template<TestMode mode, UTFIllFormedBehavior behavior, IterType type, typename Unit, typename Iter>
|
||||
void testLongLinear(const ImplTest<Unit> &test, Iter begin, Iter end) {
|
||||
for (size_t i = 0; begin != end; ++i, ++begin) {
|
||||
checkUnits<mode, behavior, type, Unit>(*begin, test.parts[i], test.codePoints[i]);
|
||||
}
|
||||
}
|
||||
|
||||
template<TestMode mode, UTFIllFormedBehavior behavior, typename Unit>
|
||||
void testLongLinearContig(const ImplTest<Unit> &test) {
|
||||
initLong();
|
||||
if constexpr (mode == UNSAFE) {
|
||||
auto range = unsafeUTFStringCodePoints<UChar32>(test.str);
|
||||
testLongLinear<mode, behavior, CONTIG, Unit>(test, range.begin(), range.end());
|
||||
} else {
|
||||
auto range = utfStringCodePoints<UChar32, behavior>(test.str);
|
||||
testLongLinear<mode, behavior, CONTIG, Unit>(test, range.begin(), range.end());
|
||||
}
|
||||
}
|
||||
|
||||
void testSafe16LongLinearContig() {
|
||||
testLongLinearContig<SAFE, UTF_BEHAVIOR_SURROGATE, char16_t>(longBad16);
|
||||
}
|
||||
void testSafe8LongLinearContig() {
|
||||
testLongLinearContig<SAFE, UTF_BEHAVIOR_NEGATIVE, char>(longBad8);
|
||||
}
|
||||
void testSafe32LongLinearContig() {
|
||||
testLongLinearContig<SAFE, UTF_BEHAVIOR_SURROGATE, char32_t>(longBad32);
|
||||
}
|
||||
|
||||
void testUnsafe16LongLinearContig() {
|
||||
testLongLinearContig<UNSAFE, ANY_B, char16_t>(longGood16);
|
||||
}
|
||||
void testUnsafe8LongLinearContig() {
|
||||
testLongLinearContig<UNSAFE, ANY_B, char>(longGood8);
|
||||
}
|
||||
void testUnsafe32LongLinearContig() {
|
||||
testLongLinearContig<UNSAFE, ANY_B, char32_t>(longGood32);
|
||||
}
|
||||
|
||||
ImplTest<char> longGood8;
|
||||
ImplTest<char16_t> longGood16;
|
||||
ImplTest<char32_t> longGood32;
|
||||
ImplTest<char> longBad8;
|
||||
ImplTest<char16_t> longBad16;
|
||||
ImplTest<char32_t> longBad32;
|
||||
};
|
||||
|
||||
const char *UTFIteratorTest::good8Chars = reinterpret_cast<const char *>(u8"a|b|ç|カ|🚴");
|
||||
|
||||
extern IntlTest *createUTFIteratorTest() {
|
||||
return new UTFIteratorTest();
|
||||
}
|
||||
|
||||
template<TestMode mode, typename CP32, UTFIllFormedBehavior behavior, typename StringView>
|
||||
void UTFIteratorTest::testBidiIter(StringView piped) {
|
||||
using Unit = typename StringView::value_type;
|
||||
auto parts = split(piped);
|
||||
auto joined = join<Unit>(parts);
|
||||
StringView sv(joined);
|
||||
// "abçカ🚴"
|
||||
// or
|
||||
// "a?ç?🚴" where the ? sequences are ill-formed
|
||||
if constexpr (mode == UNSAFE) {
|
||||
auto range = unsafeUTFStringCodePoints<CP32>(sv);
|
||||
testBidiIter<mode, CP32, behavior>(sv, parts, range);
|
||||
} else {
|
||||
auto range = utfStringCodePoints<CP32, behavior>(sv);
|
||||
testBidiIter<mode, CP32, behavior>(sv, parts, range);
|
||||
}
|
||||
}
|
||||
|
||||
template<TestMode mode, typename CP32, UTFIllFormedBehavior behavior,
|
||||
typename StringView, typename CodePoints>
|
||||
void UTFIteratorTest::testBidiIter(
|
||||
StringView sv, const std::vector<StringView> &parts, CodePoints range) {
|
||||
constexpr bool isWellFormed = mode != ILL_FORMED;
|
||||
auto last = parts[4];
|
||||
auto iter = range.begin();
|
||||
assertTrue(
|
||||
"bidirectional_iterator_tag",
|
||||
std::is_same_v<
|
||||
typename std::iterator_traits<decltype(iter)>::iterator_category,
|
||||
std::bidirectional_iterator_tag>);
|
||||
assertEquals("iter[0] * codePoint", u'a', (*iter).codePoint());
|
||||
assertEquals("iter[0] -> codePoint", u'a', iter->codePoint());
|
||||
++iter; // pre-increment
|
||||
auto units = *iter;
|
||||
CP32 expectedCP = isWellFormed ? u'b' : sub<CP32, behavior>(parts[1]);
|
||||
assertEquals("iter[1] * codePoint", expectedCP, units.codePoint());
|
||||
assertEquals("iter[1] * length", parts[1].length(), units.length());
|
||||
if constexpr (mode != UNSAFE) {
|
||||
assertEquals("iter[1] * wellFormed", isWellFormed, units.wellFormed());
|
||||
}
|
||||
assertTrue("iter[1] * stringView()", units.stringView() == parts[1]);
|
||||
auto unitsIter = units.begin();
|
||||
for (auto c : parts[1]) {
|
||||
assertEquals("iter[1] * begin()[i]",
|
||||
static_cast<UChar32>(c), static_cast<UChar32>(*unitsIter++));
|
||||
}
|
||||
assertTrue("iter[1] * end()[0]", *units.end() == parts[2][0]);
|
||||
++iter;
|
||||
assertEquals("iter[2] * codePoint", u'ç', (*iter++).codePoint()); // post-increment
|
||||
units = *iter++; // post-increment
|
||||
expectedCP = isWellFormed ? u'カ' : sub<CP32, behavior>(parts[3]);
|
||||
assertEquals("iter[3] * codePoint", expectedCP, units.codePoint());
|
||||
if constexpr (mode != UNSAFE) {
|
||||
assertEquals("iter[3] * wellFormed", isWellFormed, units.wellFormed());
|
||||
}
|
||||
// Fetch the current code point twice.
|
||||
assertEquals("iter[4.0] * codePoint", U'🚴', (*iter).codePoint());
|
||||
units = *iter++; // post-increment
|
||||
assertEquals("iter[4] * codePoint", U'🚴', units.codePoint());
|
||||
assertEquals("iter[4] * length", last.length(), units.length());
|
||||
if constexpr (mode != UNSAFE) {
|
||||
assertTrue("iter[4] * wellFormed", units.wellFormed());
|
||||
}
|
||||
assertTrue("iter[4] * stringView()", units.stringView() == last);
|
||||
unitsIter = units.begin();
|
||||
for (auto c : last) {
|
||||
assertEquals("iter[back 4] * begin()[i]",
|
||||
static_cast<UChar32>(c), static_cast<UChar32>(*unitsIter++));
|
||||
}
|
||||
assertTrue("iter[4] * end() == endIter", units.end() == sv.end());
|
||||
assertTrue("iter == endIter", iter == range.end());
|
||||
// backwards
|
||||
units = *--iter; // pre-decrement
|
||||
assertEquals("iter[back 4] * codePoint", U'🚴', units.codePoint());
|
||||
assertEquals("iter[back 4] * length", last.length(), units.length());
|
||||
if constexpr (mode != UNSAFE) {
|
||||
assertTrue("iter[back 4] * wellFormed", units.wellFormed());
|
||||
}
|
||||
assertTrue("iter[back 4] * stringView()", units.stringView() == last);
|
||||
unitsIter = units.begin();
|
||||
for (auto c : last) {
|
||||
assertEquals("iter[back 4] * begin()[i]",
|
||||
static_cast<UChar32>(c), static_cast<UChar32>(*unitsIter++));
|
||||
}
|
||||
assertTrue("iter[back 4] * end() == endIter", units.end() == sv.end());
|
||||
--iter;
|
||||
if constexpr (mode != UNSAFE) {
|
||||
assertEquals("iter[back 3] -> wellFormed", isWellFormed, iter->wellFormed());
|
||||
}
|
||||
assertEquals("iter[back 3] * codePoint", expectedCP, (*iter--).codePoint()); // post-decrement
|
||||
assertEquals("iter[back 2] * codePoint", u'ç', (*iter).codePoint());
|
||||
assertEquals("iter[back 2] -> length", parts[2].length(), iter->length());
|
||||
if constexpr (mode != UNSAFE) {
|
||||
assertTrue("iter[back 2] -> wellFormed", iter->wellFormed());
|
||||
}
|
||||
units = *--iter;
|
||||
expectedCP = isWellFormed ? u'b' : sub<CP32, behavior>(parts[1]);
|
||||
assertEquals("iter[back 1] * codePoint", expectedCP, units.codePoint());
|
||||
if constexpr (mode != UNSAFE) {
|
||||
assertEquals("iter[back 1] * wellFormed", isWellFormed, units.wellFormed());
|
||||
}
|
||||
assertTrue("iter[back 1] * stringView()", units.stringView() == parts[1]);
|
||||
--iter;
|
||||
assertEquals("iter[back 0] -> codePoint", u'a', iter->codePoint());
|
||||
assertTrue("iter[back 0] -> begin() == beginIter", iter->begin() == sv.begin());
|
||||
assertTrue("iter == beginIter", iter == range.begin());
|
||||
}
|
||||
|
||||
template<TestMode mode, typename CP32, UTFIllFormedBehavior behavior, typename StringView>
|
||||
void UTFIteratorTest::testSinglePassIter(StringView piped) {
|
||||
using Unit = typename StringView::value_type;
|
||||
auto parts = split(piped);
|
||||
auto joined = join<Unit>(parts);
|
||||
SinglePassSource<Unit> good(joined);
|
||||
// "abçカ🚴"
|
||||
// or
|
||||
// "a?ç?🚴" where the ? sequences are ill-formed
|
||||
if constexpr (mode == UNSAFE) {
|
||||
auto iter = unsafeUTFIterator<CP32>(good.begin());
|
||||
auto rangeLimit = unsafeUTFIterator<CP32>(good.end());
|
||||
testSinglePassIter<mode, CP32, behavior>(parts, iter, rangeLimit);
|
||||
} else {
|
||||
auto iter = utfIterator<CP32, behavior>(good.begin(), good.end());
|
||||
auto rangeLimit = utfIterator<CP32, behavior>(good.end(), good.end());
|
||||
testSinglePassIter<mode, CP32, behavior>(parts, iter, rangeLimit);
|
||||
}
|
||||
}
|
||||
|
||||
template<TestMode mode, typename CP32, UTFIllFormedBehavior behavior,
|
||||
typename StringView, typename Iter>
|
||||
void UTFIteratorTest::testSinglePassIter(
|
||||
const std::vector<StringView> &parts, Iter &iter, const Iter &rangeLimit) {
|
||||
constexpr bool isWellFormed = mode != ILL_FORMED;
|
||||
assertTrue(
|
||||
"input_iterator_tag",
|
||||
std::is_same_v<
|
||||
typename std::iterator_traits<std::remove_reference_t<decltype(iter)>>::
|
||||
iterator_category,
|
||||
std::input_iterator_tag>);
|
||||
assertEquals("iter[0] * codePoint", u'a', (*iter).codePoint());
|
||||
assertEquals("iter[0] -> codePoint", u'a', iter->codePoint());
|
||||
++iter; // pre-increment
|
||||
auto units = *iter;
|
||||
CP32 expectedCP = isWellFormed ? u'b' : sub<CP32, behavior>(parts[1]);
|
||||
assertEquals("iter[1] * codePoint", expectedCP, units.codePoint());
|
||||
assertEquals("iter[1] * length", parts[1].length(), units.length());
|
||||
if constexpr (mode != UNSAFE) {
|
||||
assertEquals("iter[1] * wellFormed", isWellFormed, units.wellFormed());
|
||||
}
|
||||
// No units.stringView() when the unit iterator is not a pointer.
|
||||
// No begin() for a single-pass unit iterator.
|
||||
++iter;
|
||||
assertEquals("iter[2] * codePoint", u'ç', (*iter++).codePoint()); // post-increment
|
||||
expectedCP = isWellFormed ? u'カ' : sub<CP32, behavior>(parts[3]);
|
||||
assertEquals("iter[3] -> codePoint", expectedCP, iter->codePoint());
|
||||
++iter;
|
||||
// Fetch the current code point twice.
|
||||
assertEquals("iter[4.0] * codePoint", U'🚴', (*iter).codePoint());
|
||||
units = *iter++;
|
||||
assertEquals("iter[4] * codePoint", U'🚴', units.codePoint());
|
||||
assertEquals("iter[4] * length", parts[4].length(), units.length());
|
||||
if constexpr (mode != UNSAFE) {
|
||||
assertTrue("iter[4] * wellFormed", units.wellFormed());
|
||||
}
|
||||
assertTrue("iter == endIter", iter == rangeLimit);
|
||||
}
|
||||
|
||||
template<TestMode mode, typename CP32, UTFIllFormedBehavior behavior, typename StringView>
|
||||
void UTFIteratorTest::testFwdIter(StringView piped) {
|
||||
using Unit = typename StringView::value_type;
|
||||
auto parts = split(piped);
|
||||
auto joined = join<Unit>(parts);
|
||||
// "abçカ🚴"
|
||||
FwdIter<Unit> goodBegin(joined.data());
|
||||
FwdIter<Unit> goodLimit(joined.data() + joined.length());
|
||||
if constexpr (mode == UNSAFE) {
|
||||
auto iter = unsafeUTFIterator<CP32>(goodBegin);
|
||||
auto rangeLimit = unsafeUTFIterator<CP32>(goodLimit);
|
||||
testFwdIter<mode, StringView>(parts, goodLimit, iter, rangeLimit);
|
||||
} else {
|
||||
auto iter = utfIterator<CP32, behavior>(goodBegin, goodLimit);
|
||||
auto rangeLimit = utfIterator<CP32, behavior>(goodLimit);
|
||||
testFwdIter<mode, StringView>(parts, goodLimit, iter, rangeLimit);
|
||||
}
|
||||
}
|
||||
|
||||
template<TestMode mode, typename StringView, typename UnitIter, typename Iter>
|
||||
void UTFIteratorTest::testFwdIter(const std::vector<StringView> &parts, UnitIter goodLimit,
|
||||
Iter iter, Iter rangeLimit) {
|
||||
assertTrue(
|
||||
"forward_iterator_tag",
|
||||
std::is_same_v<
|
||||
typename std::iterator_traits<decltype(iter)>::iterator_category,
|
||||
std::forward_iterator_tag>);
|
||||
assertEquals("iter[0] * codePoint", u'a', (*iter).codePoint());
|
||||
assertEquals("iter[0] -> codePoint", u'a', iter->codePoint());
|
||||
++iter; // pre-increment
|
||||
auto units = *iter;
|
||||
assertEquals("iter[1] * codePoint", u'b', units.codePoint());
|
||||
assertEquals("iter[1] * length", parts[1].length(), units.length());
|
||||
if constexpr (mode != UNSAFE) {
|
||||
assertTrue("iter[1] * wellFormed", units.wellFormed());
|
||||
}
|
||||
// No units.stringView() when the unit iterator is not a pointer.
|
||||
auto unitsIter = units.begin();
|
||||
for (auto c : parts[1]) {
|
||||
assertEquals("iter[1] * begin()[i]",
|
||||
static_cast<UChar32>(c), static_cast<UChar32>(*unitsIter++));
|
||||
}
|
||||
assertTrue("iter[1] * end()[0]", *units.end() == parts[2][0]);
|
||||
++iter;
|
||||
assertEquals("iter[2] * codePoint", u'ç', (*iter++).codePoint()); // post-increment
|
||||
assertEquals("iter[3] -> codePoint", u'カ', iter->codePoint());
|
||||
++iter;
|
||||
// Fetch the current code point twice.
|
||||
assertEquals("iter[4.0] * codePoint", U'🚴', (*iter).codePoint());
|
||||
units = *iter++;
|
||||
assertEquals("iter[4] * codePoint", U'🚴', units.codePoint());
|
||||
assertEquals("iter[4] * length", parts[4].length(), units.length());
|
||||
if constexpr (mode != UNSAFE) {
|
||||
assertTrue("iter[4] * wellFormed", units.wellFormed());
|
||||
}
|
||||
unitsIter = units.begin();
|
||||
for (auto c : parts[4]) {
|
||||
assertEquals("iter[back 4] * begin()[i]",
|
||||
static_cast<UChar32>(c), static_cast<UChar32>(*unitsIter++));
|
||||
}
|
||||
assertTrue("iter[4] * end() == endIter", units.end() == goodLimit);
|
||||
assertTrue("iter == endIter", iter == rangeLimit);
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
enum PartType { GOOD, BAD8, BAD16, BAD32 };
|
||||
|
||||
struct Part {
|
||||
constexpr Part(char32_t c) : type_(GOOD), len_(0), c_(c) {}
|
||||
constexpr Part(PartType t, int32_t u0) : type_(t), len_(1), u0_(u0) {}
|
||||
constexpr Part(PartType t, int32_t u0, int32_t u1) : type_(t), len_(2), u0_(u0), u1_(u1) {}
|
||||
constexpr Part(PartType t, int32_t u0, int32_t u1, int32_t u2) :
|
||||
type_(t), len_(3), u0_(u0), u1_(u1), u2_(u2) {}
|
||||
|
||||
PartType type_;
|
||||
uint8_t len_;
|
||||
char32_t c_ = U'?';
|
||||
int32_t u0_ = 0;
|
||||
int32_t u1_ = 0;
|
||||
int32_t u2_ = 0;
|
||||
};
|
||||
|
||||
// Careful: We test with the reverse order of parts as well.
|
||||
// For that to yield self-contained results, parts must not
|
||||
// continue sequences across part boundaries in either order.
|
||||
constexpr Part testParts[] = {
|
||||
// "abçカ🚴"
|
||||
u'a',
|
||||
0x7f,
|
||||
0x80,
|
||||
Part(BAD8, 0xc0),
|
||||
Part(BAD8, 0x80),
|
||||
Part(BAD8, 0xc1),
|
||||
0,
|
||||
Part(BAD8, 0xe0),
|
||||
Part(BAD8, 0xe0, 0xa0),
|
||||
Part(BAD8, 0xe0, 0xbf),
|
||||
Part(BAD8, 0xed, 0x9f),
|
||||
// ED A0 xx .. ED BF xx would be surrogate code points
|
||||
Part(BAD8, 0xed),
|
||||
Part(BAD8, 0xa0),
|
||||
Part(BAD8, 0xed),
|
||||
Part(BAD8, 0xbf),
|
||||
u'ç',
|
||||
Part(BAD8, 0xee, 0x80),
|
||||
Part(BAD8, 0xef, 0xbf),
|
||||
Part(BAD8, 0xf0),
|
||||
Part(BAD8, 0x8f),
|
||||
u'b',
|
||||
Part(BAD8, 0xf0),
|
||||
Part(BAD8, 0xf0, 0x90),
|
||||
Part(BAD8, 0xf0, 0x90, 0x80),
|
||||
Part(BAD8, 0xf4),
|
||||
Part(BAD8, 0xf4, 0x8f),
|
||||
Part(BAD8, 0xf4, 0x8f, 0xbf),
|
||||
Part(BAD8, 0xf5),
|
||||
Part(BAD8, 0xbf),
|
||||
U'🚴',
|
||||
0x7ff,
|
||||
0x800,
|
||||
0xfff,
|
||||
0x1000,
|
||||
0xd7ff,
|
||||
Part(BAD16, 0xd800),
|
||||
Part(BAD16, 0xdbff),
|
||||
u'カ',
|
||||
Part(BAD16, 0xdc00),
|
||||
Part(BAD16, 0xdfff),
|
||||
0xe000,
|
||||
0xfffd,
|
||||
0xffff,
|
||||
0x10000,
|
||||
0x10ffff,
|
||||
Part(BAD32, 0x110000),
|
||||
Part(BAD32, -1)
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
void UTFIteratorTest::initLong() {
|
||||
if (!longGood32.str.empty()) { return; }
|
||||
for (auto part : testParts) {
|
||||
switch (part.type_) {
|
||||
case GOOD: {
|
||||
char u8[4];
|
||||
size_t len8 = 0;
|
||||
U8_APPEND_UNSAFE(u8, len8, part.c_);
|
||||
longGood8.str.append(u8, len8);
|
||||
longGood8.parts.push_back({u8, len8});
|
||||
longBad8.str.append(u8, len8);
|
||||
longBad8.parts.push_back({u8, len8});
|
||||
longBad8.codePoints.push_back(part.c_);
|
||||
|
||||
char16_t u16[2];
|
||||
size_t len16 = 0;
|
||||
U16_APPEND_UNSAFE(u16, len16, part.c_);
|
||||
longGood16.str.append(u16, len16);
|
||||
longGood16.parts.push_back({u16, len16});
|
||||
longBad16.str.append(u16, len16);
|
||||
longBad16.parts.push_back({u16, len16});
|
||||
longBad16.codePoints.push_back(part.c_);
|
||||
|
||||
longGood32.str.push_back(part.c_);
|
||||
longGood32.parts.push_back({&part.c_, 1});
|
||||
longBad32.str.push_back(part.c_);
|
||||
longBad32.parts.push_back({&part.c_, 1});
|
||||
longBad32.codePoints.push_back(part.c_);
|
||||
break;
|
||||
}
|
||||
case BAD8: {
|
||||
char u8[3] = {
|
||||
static_cast<char>(part.u0_),
|
||||
static_cast<char>(part.u1_),
|
||||
static_cast<char>(part.u2_)
|
||||
};
|
||||
longBad8.str.append(u8, part.len_);
|
||||
longBad8.parts.push_back({u8, part.len_});
|
||||
longBad8.codePoints.push_back(U'?');
|
||||
break;
|
||||
}
|
||||
case BAD16: { // surrogate code unit / code point
|
||||
char16_t u16 = part.u0_;
|
||||
longBad16.str.push_back(u16);
|
||||
longBad16.parts.push_back({&u16, 1});
|
||||
longBad16.codePoints.push_back(U'?');
|
||||
char32_t u32 = part.u0_;
|
||||
longBad32.str.push_back(u32);
|
||||
longBad32.parts.push_back({&u32, 1});
|
||||
longBad32.codePoints.push_back(U'?');
|
||||
break;
|
||||
}
|
||||
case BAD32: {
|
||||
char32_t u32 = part.u0_;
|
||||
longBad32.str.push_back(u32);
|
||||
longBad32.parts.push_back({&u32, 1});
|
||||
longBad32.codePoints.push_back(U'?');
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
longGood8.codePoints = longGood16.codePoints = longGood32.codePoints = longGood32.str;
|
||||
}
|
||||
|
||||
template<TestMode mode, UTFIllFormedBehavior behavior, IterType type, typename Unit, typename Units>
|
||||
void UTFIteratorTest::checkUnits(
|
||||
const Units &units, std::basic_string_view<Unit> part, UChar32 expectedCP) {
|
||||
printf("U+%04lx\n", (long)units.codePoint());
|
||||
bool expectedWellFormed = true;
|
||||
if (expectedCP == u'?') {
|
||||
expectedCP = sub<UChar32, behavior>(part);
|
||||
expectedWellFormed = false;
|
||||
}
|
||||
assertEquals("cp[i]", expectedCP, units.codePoint());
|
||||
assertEquals("length[i]", part.length(), units.length());
|
||||
if constexpr (mode != UNSAFE) {
|
||||
assertEquals("wellFormed[i]", expectedWellFormed, units.wellFormed());
|
||||
}
|
||||
if constexpr (type >= FWD) {
|
||||
int32_t j = 0;
|
||||
for (Unit unit : units) { // begin()..end()
|
||||
assertEquals("units.iter[i][j]",
|
||||
static_cast<UChar32>(part[j]), static_cast<UChar32>(unit));
|
||||
++j;
|
||||
}
|
||||
assertEquals("units.iter.length[i]", static_cast<int32_t>(part.length()), j);
|
||||
}
|
||||
if constexpr (type >= CONTIG) {
|
||||
assertTrue("stringView[i]", part == units.stringView());
|
||||
}
|
||||
}
|
Loading…
Add table
Reference in a new issue