CodeUnits: getters / private fields

This commit is contained in:
Markus Scherer 2025-01-06 15:16:35 -08:00
parent da93999f6d
commit 5c6e1a6a76
2 changed files with 72 additions and 55 deletions

View file

@ -68,18 +68,35 @@ namespace U_HEADER_ONLY_NAMESPACE {
* @draft ICU 77
*/
template<typename Unit, typename CP32>
struct CodeUnits {
// Order of fields with padding and access frequency in mind.
CP32 codePoint = 0;
uint8_t length = 0;
bool isWellFormed = false;
const Unit *data;
class CodeUnits {
public:
// @internal
CodeUnits(CP32 codePoint, uint8_t length, bool wellFormed, const Unit *data) :
c(codePoint), len(length), ok(wellFormed), p(data) {}
CodeUnits(const CodeUnits &other) = default;
CodeUnits &operator=(const CodeUnits &other) = default;
UChar32 codePoint() const { return c; }
bool wellFormed() const { return ok; }
const Unit *data() const { return p; }
int32_t length() const { return len; }
std::basic_string_view<Unit> stringView() const {
return std::basic_string_view<Unit>(data, length);
return std::basic_string_view<Unit>(p, len);
}
// TODO: std::optional<CP32> maybeCodePoint() const ? (nullopt if !isWellFormed)
// TODO: std::optional<CP32> maybeCodePoint() const ? (nullopt if ill-formed)
private:
// Order of fields with padding and access frequency in mind.
CP32 c;
uint8_t len;
bool ok;
const Unit *p;
};
/**
@ -327,8 +344,8 @@ private:
int32_t rangeLoop(std::u16string_view s) {
header::U16StringCodePoints<char16_t, UChar32, U_BEHAVIOR_NEGATIVE> range(s);
int32_t sum = 0;
for (auto seq : range) {
sum += seq.codePoint;
for (auto units : range) {
sum += units.codePoint();
}
return sum;
}
@ -339,7 +356,7 @@ int32_t loopIterPlusPlus(std::u16string_view s) {
auto iter = range.begin();
auto limit = range.end();
while (iter != limit) {
sum += (*iter++).codePoint;
sum += (*iter++).codePoint();
}
return sum;
}
@ -348,7 +365,7 @@ int32_t reverseLoop(std::u16string_view s) {
header::U16StringCodePoints<char16_t, UChar32, U_BEHAVIOR_NEGATIVE> range(s);
int32_t sum = 0;
for (auto iter = range.rbegin(); iter != range.rend(); ++iter) {
sum += (*iter).codePoint;
sum += (*iter).codePoint();
}
return sum;
}

View file

@ -56,22 +56,22 @@ void U16IteratorTest::testGood() {
std::u16string_view good(u"abçカ🚴"sv);
U16StringCodePoints<char16_t, UChar32, U_BEHAVIOR_NEGATIVE> range(good);
auto iter = range.begin();
assertEquals("iter[0] * codePoint", u'a', (*iter).codePoint);
assertEquals("iter[0] * codePoint", u'a', (*iter).codePoint());
++iter; // pre-increment
auto seq = *iter;
assertEquals("iter[1] * codePoint", u'b', seq.codePoint);
assertEquals("iter[1] * length", 1, seq.length);
assertTrue("iter[1] * isWellFormed", seq.isWellFormed);
assertTrue("iter[1] * stringView()", seq.stringView() == u"b"sv);
auto units = *iter;
assertEquals("iter[1] * codePoint", u'b', units.codePoint());
assertEquals("iter[1] * length", 1, units.length());
assertTrue("iter[1] * wellFormed", units.wellFormed());
assertTrue("iter[1] * stringView()", units.stringView() == u"b"sv);
++iter;
assertEquals("iter[2] * codePoint", u'ç', (*iter++).codePoint); // post-increment
assertEquals("iter[3] * codePoint", u'', (*iter).codePoint);
assertEquals("iter[2] * codePoint", u'ç', (*iter++).codePoint()); // post-increment
assertEquals("iter[3] * codePoint", u'', (*iter).codePoint());
++iter;
seq = *iter++;
assertEquals("iter[4] * codePoint", U'🚴', seq.codePoint);
assertEquals("iter[4] * length", 2, seq.length);
assertTrue("iter[4] * isWellFormed", seq.isWellFormed);
assertTrue("iter[4] * stringView()", seq.stringView() == u"🚴"sv);
units = *iter++;
assertEquals("iter[4] * codePoint", U'🚴', units.codePoint());
assertEquals("iter[4] * length", 2, units.length());
assertTrue("iter[4] * wellFormed", units.wellFormed());
assertTrue("iter[4] * stringView()", units.stringView() == u"🚴"sv);
assertTrue("iter == endIter", iter == range.end());
}
@ -81,20 +81,20 @@ void U16IteratorTest::testNegative() {
std::u16string_view bad(badChars, 5);
U16StringCodePoints<char16_t, UChar32, U_BEHAVIOR_NEGATIVE> range(bad);
auto iter = range.begin();
assertEquals("iter[0] * codePoint", u'a', (*iter).codePoint);
assertEquals("iter[0] * codePoint", u'a', (*iter).codePoint());
++iter; // pre-increment
auto seq = *iter;
assertEquals("iter[1] * codePoint", -1, seq.codePoint);
assertEquals("iter[1] * length", 1, seq.length);
assertFalse("iter[1] * isWellFormed", seq.isWellFormed);
auto sv = seq.stringView();
auto units = *iter;
assertEquals("iter[1] * codePoint", -1, units.codePoint());
assertEquals("iter[1] * length", 1, units.length());
assertFalse("iter[1] * wellFormed", units.wellFormed());
auto sv = units.stringView();
assertEquals("iter[1] * stringView().length()", 1, sv.length());
assertEquals("iter[1] * stringView()[0]", 0xd900, sv[0]);
++iter;
assertEquals("iter[2] * codePoint", u'b', (*iter++).codePoint); // post-increment
seq = *iter++; // post-increment
assertEquals("iter[3] * codePoint", -1, seq.codePoint);
assertFalse("iter[3] * isWellFormed", seq.isWellFormed);
assertEquals("iter[2] * codePoint", u'b', (*iter++).codePoint()); // post-increment
units = *iter++; // post-increment
assertEquals("iter[3] * codePoint", -1, units.codePoint());
assertFalse("iter[3] * wellFormed", units.wellFormed());
assertEquals("iter[4] * stringView()", u"ç", (*iter++).stringView()); // post-increment
assertTrue("iter == endIter", iter == range.end());
}
@ -105,20 +105,20 @@ void U16IteratorTest::testFFFD() {
std::u16string_view bad(badChars, 5);
U16StringCodePoints<char16_t, char32_t, U_BEHAVIOR_FFFD> range(bad);
auto iter = range.begin();
assertEquals("iter[0] * codePoint", u'a', (*iter).codePoint);
assertEquals("iter[0] * codePoint", u'a', (*iter).codePoint());
++iter; // pre-increment
auto seq = *iter;
assertEquals("iter[1] * codePoint", 0xfffd, seq.codePoint);
assertEquals("iter[1] * length", 1, seq.length);
assertFalse("iter[1] * isWellFormed", seq.isWellFormed);
auto sv = seq.stringView();
auto units = *iter;
assertEquals("iter[1] * codePoint", 0xfffd, units.codePoint());
assertEquals("iter[1] * length", 1, units.length());
assertFalse("iter[1] * wellFormed", units.wellFormed());
auto sv = units.stringView();
assertEquals("iter[1] * stringView().length()", 1, sv.length());
assertEquals("iter[1] * stringView()[0]", 0xd900, sv[0]);
++iter;
assertEquals("iter[2] * codePoint", u'b', (*iter++).codePoint); // post-increment
seq = *iter++; // post-increment
assertEquals("iter[3] * codePoint", 0xfffd, seq.codePoint);
assertFalse("iter[3] * isWellFormed", seq.isWellFormed);
assertEquals("iter[2] * codePoint", u'b', (*iter++).codePoint()); // post-increment
units = *iter++; // post-increment
assertEquals("iter[3] * codePoint", 0xfffd, units.codePoint());
assertFalse("iter[3] * wellFormed", units.wellFormed());
assertEquals("iter[4] * stringView()", u"ç", (*iter++).stringView()); // post-increment
assertTrue("iter == endIter", iter == range.end());
}
@ -129,20 +129,20 @@ void U16IteratorTest::testSurrogate() {
std::u16string_view bad(badChars, 5);
U16StringCodePoints<char16_t, uint32_t, U_BEHAVIOR_SURROGATE> range(bad);
auto iter = range.begin();
assertEquals("iter[0] * codePoint", u'a', (*iter).codePoint);
assertEquals("iter[0] * codePoint", u'a', (*iter).codePoint());
++iter; // pre-increment
auto seq = *iter;
assertEquals("iter[1] * codePoint", 0xd900, seq.codePoint);
assertEquals("iter[1] * length", 1, seq.length);
assertFalse("iter[1] * isWellFormed", seq.isWellFormed);
auto sv = seq.stringView();
auto units = *iter;
assertEquals("iter[1] * codePoint", 0xd900, units.codePoint());
assertEquals("iter[1] * length", 1, units.length());
assertFalse("iter[1] * wellFormed", units.wellFormed());
auto sv = units.stringView();
assertEquals("iter[1] * stringView().length()", 1, sv.length());
assertEquals("iter[1] * stringView()[0]", 0xd900, sv[0]);
++iter;
assertEquals("iter[2] * codePoint", u'b', (*iter++).codePoint); // post-increment
seq = *iter++; // post-increment
assertEquals("iter[3] * codePoint", 0xdc05, seq.codePoint);
assertFalse("iter[3] * isWellFormed", seq.isWellFormed);
assertEquals("iter[2] * codePoint", u'b', (*iter++).codePoint()); // post-increment
units = *iter++; // post-increment
assertEquals("iter[3] * codePoint", 0xdc05, units.codePoint());
assertFalse("iter[3] * wellFormed", units.wellFormed());
assertEquals("iter[4] * stringView()", u"ç", (*iter++).stringView()); // post-increment
assertTrue("iter == endIter", iter == range.end());
}