C enum UIllFormedBehavior will be shared with 8-bit

This commit is contained in:
Markus Scherer 2025-01-06 14:48:30 -08:00
parent 633fafafda
commit 70ef2fa0d3
2 changed files with 29 additions and 35 deletions

View file

@ -47,18 +47,15 @@ namespace header {}
#ifndef U_HIDE_DRAFT_API
namespace U_HEADER_ONLY_NAMESPACE {
// Some defined behaviors for handling ill-formed Unicode strings.
// TODO: For 8-bit strings, the SURROGATE option does not have an equivalent -- static_assert.
typedef enum UIllFormedBehavior {
U_BEHAVIOR_NEGATIVE,
U_BEHAVIOR_FFFD,
U_BEHAVIOR_SURROGATE
} UIllFormedBehavior;
// Some defined behaviors for handling ill-formed 16-bit strings.
// TODO: Maybe share with 8-bit strings, but the SURROGATE option does not have an equivalent there.
//
// TODO: A possible alternative to an enum might be some kind of function template
// which would be fully customizable.
enum U16IllFormedBehavior {
U16_BEHAVIOR_NEGATIVE,
U16_BEHAVIOR_FFFD,
U16_BEHAVIOR_SURROGATE
};
namespace U_HEADER_ONLY_NAMESPACE {
/**
* A code unit sequence for one code point returned by U16Iterator.
@ -66,7 +63,7 @@ enum U16IllFormedBehavior {
*
* @tparam Unit16 Code unit type: char16_t or uint16_t or (on Windows) wchar_t
* @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t;
* should be signed if U16_BEHAVIOR_NEGATIVE
* should be signed if U_BEHAVIOR_NEGATIVE
* @draft ICU 77
*/
template<typename Unit16, typename CP32>
@ -89,7 +86,7 @@ struct U16OneSeq {
* Not intended for public subclassing.
* @internal
*/
template<typename Unit16, typename CP32, U16IllFormedBehavior behavior>
template<typename Unit16, typename CP32, UIllFormedBehavior behavior>
class U16IteratorBase {
protected:
// @internal
@ -146,9 +143,9 @@ protected:
// @internal
CP32 sub(CP32 surrogate) const {
switch (behavior) {
case U16_BEHAVIOR_NEGATIVE: return U_SENTINEL;
case U16_BEHAVIOR_FFFD: return 0xfffd;
case U16_BEHAVIOR_SURROGATE: return surrogate;
case U_BEHAVIOR_NEGATIVE: return U_SENTINEL;
case U_BEHAVIOR_FFFD: return 0xfffd;
case U_BEHAVIOR_SURROGATE: return surrogate;
}
}
@ -167,11 +164,11 @@ protected:
*
* @tparam Unit16 Code unit type: char16_t or uint16_t or (on Windows) wchar_t
* @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t;
* should be signed if U16_BEHAVIOR_NEGATIVE
* @tparam U16IllFormedBehavior TODO
* should be signed if U_BEHAVIOR_NEGATIVE
* @tparam UIllFormedBehavior TODO
* @draft ICU 77
*/
template<typename Unit16, typename CP32, U16IllFormedBehavior behavior>
template<typename Unit16, typename CP32, UIllFormedBehavior behavior>
class U16Iterator : private U16IteratorBase<Unit16, CP32, behavior> {
// FYI: We need to qualify all accesses to super class members because of private inheritance.
using Super = U16IteratorBase<Unit16, CP32, behavior>;
@ -224,11 +221,11 @@ public:
*
* @tparam Unit16 Code unit type: char16_t or uint16_t or (on Windows) wchar_t
* @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t;
* should be signed if U16_BEHAVIOR_NEGATIVE
* @tparam U16IllFormedBehavior TODO
* should be signed if U_BEHAVIOR_NEGATIVE
* @tparam UIllFormedBehavior TODO
* @draft ICU 77
*/
template<typename Unit16, typename CP32, U16IllFormedBehavior behavior>
template<typename Unit16, typename CP32, UIllFormedBehavior behavior>
class U16ReverseIterator : private U16IteratorBase<Unit16, CP32, behavior> {
using Super = U16IteratorBase<Unit16, CP32, behavior>;
public:
@ -269,11 +266,11 @@ public:
*
* @tparam Unit16 Code unit type: char16_t or uint16_t or (on Windows) wchar_t
* @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t;
* should be signed if U16_BEHAVIOR_NEGATIVE
* @tparam U16IllFormedBehavior TODO
* should be signed if U_BEHAVIOR_NEGATIVE
* @tparam UIllFormedBehavior TODO
* @draft ICU 77
*/
template<typename Unit16, typename CP32, U16IllFormedBehavior behavior>
template<typename Unit16, typename CP32, UIllFormedBehavior behavior>
class U16StringCodePoints {
public:
/**
@ -322,7 +319,7 @@ private:
// TODO: remove experimental sample code
#ifndef UTYPES_H
int32_t rangeLoop(std::u16string_view s) {
header::U16StringCodePoints<char16_t, UChar32, header::U16_BEHAVIOR_NEGATIVE> range(s);
header::U16StringCodePoints<char16_t, UChar32, U_BEHAVIOR_NEGATIVE> range(s);
int32_t sum = 0;
for (auto seq : range) {
sum += seq.codePoint;
@ -331,7 +328,7 @@ int32_t rangeLoop(std::u16string_view s) {
}
int32_t loopIterPlusPlus(std::u16string_view s) {
header::U16StringCodePoints<char16_t, UChar32, header::U16_BEHAVIOR_NEGATIVE> range(s);
header::U16StringCodePoints<char16_t, UChar32, U_BEHAVIOR_NEGATIVE> range(s);
int32_t sum = 0;
auto iter = range.begin();
auto limit = range.end();
@ -342,7 +339,7 @@ int32_t loopIterPlusPlus(std::u16string_view s) {
}
int32_t reverseLoop(std::u16string_view s) {
header::U16StringCodePoints<char16_t, UChar32, header::U16_BEHAVIOR_NEGATIVE> range(s);
header::U16StringCodePoints<char16_t, UChar32, U_BEHAVIOR_NEGATIVE> range(s);
int32_t sum = 0;
for (auto iter = range.rbegin(); iter != range.rend(); ++iter) {
sum += (*iter).codePoint;

View file

@ -20,9 +20,6 @@
// https://en.cppreference.com/w/cpp/string/basic_string_view/operator%22%22sv
using namespace std::string_view_literals;
using U_HEADER_ONLY_NAMESPACE::U16_BEHAVIOR_NEGATIVE;
using U_HEADER_ONLY_NAMESPACE::U16_BEHAVIOR_FFFD;
using U_HEADER_ONLY_NAMESPACE::U16_BEHAVIOR_SURROGATE;
using U_HEADER_ONLY_NAMESPACE::U16Iterator;
using U_HEADER_ONLY_NAMESPACE::U16OneSeq;
using U_HEADER_ONLY_NAMESPACE::U16StringCodePoints;
@ -58,7 +55,7 @@ void U16IteratorTest::runIndexedTest(int32_t index, UBool exec, const char *&nam
void U16IteratorTest::testGood() {
IcuTestErrorCode errorCode(*this, "testGood");
std::u16string_view good(u"abçカ🚴"sv);
U16StringCodePoints<char16_t, UChar32, U16_BEHAVIOR_NEGATIVE> range(good);
U16StringCodePoints<char16_t, UChar32, U_BEHAVIOR_NEGATIVE> range(good);
auto iter = range.begin();
assertEquals("iter[0] * codePoint", u'a', (*iter).codePoint);
++iter; // pre-increment
@ -83,7 +80,7 @@ void U16IteratorTest::testNegative() {
IcuTestErrorCode errorCode(*this, "testNegative");
static const char16_t badChars[] = { u'a', 0xd900, u'b', 0xdc05, u'ç' };
std::u16string_view bad(badChars, 5);
U16StringCodePoints<char16_t, UChar32, U16_BEHAVIOR_NEGATIVE> range(bad);
U16StringCodePoints<char16_t, UChar32, U_BEHAVIOR_NEGATIVE> range(bad);
auto iter = range.begin();
assertEquals("iter[0] * codePoint", u'a', (*iter).codePoint);
++iter; // pre-increment
@ -107,7 +104,7 @@ void U16IteratorTest::testFFFD() {
IcuTestErrorCode errorCode(*this, "testFFFD");
static const char16_t badChars[] = { u'a', 0xd900, u'b', 0xdc05, u'ç' };
std::u16string_view bad(badChars, 5);
U16StringCodePoints<char16_t, char32_t, U16_BEHAVIOR_FFFD> range(bad);
U16StringCodePoints<char16_t, char32_t, U_BEHAVIOR_FFFD> range(bad);
auto iter = range.begin();
assertEquals("iter[0] * codePoint", u'a', (*iter).codePoint);
++iter; // pre-increment
@ -131,7 +128,7 @@ void U16IteratorTest::testSurrogate() {
IcuTestErrorCode errorCode(*this, "testSurrogate");
static const char16_t badChars[] = { u'a', 0xd900, u'b', 0xdc05, u'ç' };
std::u16string_view bad(badChars, 5);
U16StringCodePoints<char16_t, uint32_t, U16_BEHAVIOR_SURROGATE> range(bad);
U16StringCodePoints<char16_t, uint32_t, U_BEHAVIOR_SURROGATE> range(bad);
auto iter = range.begin();
assertEquals("iter[0] * codePoint", u'a', (*iter).codePoint);
++iter; // pre-increment