From 807f7b356a6151a46a1a2d096b1f6ef96352f983 Mon Sep 17 00:00:00 2001 From: Markus Scherer Date: Fri, 28 Mar 2025 20:07:20 -0700 Subject: [PATCH] ICU-23004 merge testSafeGood() & testSafeBad() --- .../source/test/intltest/utfiteratortest.cpp | 124 ++++++------------ 1 file changed, 43 insertions(+), 81 deletions(-) diff --git a/icu4c/source/test/intltest/utfiteratortest.cpp b/icu4c/source/test/intltest/utfiteratortest.cpp index f85c6c8400b..c437343889c 100644 --- a/icu4c/source/test/intltest/utfiteratortest.cpp +++ b/icu4c/source/test/intltest/utfiteratortest.cpp @@ -296,10 +296,7 @@ public: } template - void testSafeGood(StringView piped); - - template - void testSafeBad(StringView piped); + void testSafe(StringView piped, bool isWellFormed); template void testSafeSinglePassIter(StringView piped); @@ -327,16 +324,16 @@ public: static constexpr std::u32string_view bad32{badChars32, std::size(badChars32)}; void testSafe16Good() { - testSafeGood(good16); + testSafe(good16, true); } void testSafe16Negative() { - testSafeBad(bad16); + testSafe(bad16, false); } void testSafe16FFFD() { - testSafeBad(bad16); + testSafe(bad16, false); } void testSafe16Surrogate() { - testSafeBad(bad16); + testSafe(bad16, false); } void testSafe16SinglePassIter() { testSafeSinglePassIter(good16); @@ -346,15 +343,15 @@ public: } void testSafe8Good() { - testSafeGood(std::string_view{good8Chars}); + testSafe(std::string_view{good8Chars}, true); } void testSafe8Negative() { - testSafeBad( - std::string_view(string8FromBytes(badChars8, std::size(badChars8)))); + testSafe( + std::string_view(string8FromBytes(badChars8, std::size(badChars8))), false); } void testSafe8FFFD() { - testSafeBad( - std::string_view(string8FromBytes(badChars8, std::size(badChars8)))); + testSafe( + std::string_view(string8FromBytes(badChars8, std::size(badChars8))), false); } void testSafe8SinglePassIter() { testSafeSinglePassIter(std::string_view{good8Chars}); @@ -364,16 +361,16 @@ public: } void testSafe32Good() { - testSafeGood(good32); + testSafe(good32, true); } void testSafe32Negative() { - testSafeBad(bad32); + testSafe(bad32, false); } void testSafe32FFFD() { - testSafeBad(bad32); + testSafe(bad32, false); } void testSafe32Surrogate() { - testSafeBad(bad32); + testSafe(bad32, false); } void testSafe32SinglePassIter() { testSafeSinglePassIter(good32); @@ -390,14 +387,16 @@ extern IntlTest *createUTFIteratorTest() { } template -void UTFIteratorTest::testSafeGood(StringView piped) { +void UTFIteratorTest::testSafe(StringView piped, bool isWellFormed) { using Unit = typename StringView::value_type; auto parts = split(piped); auto joined = join(parts); auto last = parts[4]; - StringView good(joined); + StringView sv(joined); // "abçカ🚴" - auto range = utfStringCodePoints(good); + // or + // "a?ç?🚴" where the ? sequences are ill-formed + auto range = utfStringCodePoints(sv); auto iter = range.begin(); assertTrue( "bidirectional_iterator_tag", @@ -408,27 +407,36 @@ void UTFIteratorTest::testSafeGood(StringView piped) { assertEquals("iter[0] -> codePoint", u'a', iter->codePoint()); ++iter; // pre-increment auto units = *iter; - assertEquals("iter[1] * codePoint", u'b', units.codePoint()); + CP32 expectedCP = isWellFormed ? u'b' : sub(parts[1]); + assertEquals("iter[1] * codePoint", expectedCP, units.codePoint()); assertEquals("iter[1] * length", parts[1].length(), units.length()); - assertTrue("iter[1] * wellFormed", units.wellFormed()); + assertEquals("iter[1] * wellFormed", isWellFormed, units.wellFormed()); assertTrue("iter[1] * stringView()", units.stringView() == parts[1]); + auto unitsIter = units.begin(); + for (auto c : parts[1]) { + assertEquals("iter[1] * begin()[i]", + static_cast(c), static_cast(*unitsIter++)); + } + assertTrue("iter[1] * end()[0]", *units.end() == parts[2][0]); ++iter; assertEquals("iter[2] * codePoint", u'ç', (*iter++).codePoint()); // post-increment - assertEquals("iter[3] -> codePoint", u'カ', iter->codePoint()); - ++iter; + units = *iter++; // post-increment + expectedCP = isWellFormed ? u'カ' : sub(parts[3]); + assertEquals("iter[3] * codePoint", expectedCP, units.codePoint()); + assertEquals("iter[3] * wellFormed", isWellFormed, units.wellFormed()); // Fetch the current code point twice. assertEquals("iter[4.0] * codePoint", U'🚴', (*iter).codePoint()); - units = *iter++; + units = *iter++; // post-increment assertEquals("iter[4] * codePoint", U'🚴', units.codePoint()); assertEquals("iter[4] * length", last.length(), units.length()); assertTrue("iter[4] * wellFormed", units.wellFormed()); assertTrue("iter[4] * stringView()", units.stringView() == last); - auto unitsIter = units.begin(); + unitsIter = units.begin(); for (auto c : last) { assertEquals("iter[back 4] * begin()[i]", static_cast(c), static_cast(*unitsIter++)); } - assertTrue("iter[4] * end() == endIter", units.end() == good.end()); + assertTrue("iter[4] * end() == endIter", units.end() == sv.end()); assertTrue("iter == endIter", iter == range.end()); // backwards units = *--iter; // pre-decrement @@ -441,67 +449,21 @@ void UTFIteratorTest::testSafeGood(StringView piped) { assertEquals("iter[back 4] * begin()[i]", static_cast(c), static_cast(*unitsIter++)); } - assertTrue("iter[back 4] * end() == endIter", units.end() == good.end()); + assertTrue("iter[back 4] * end() == endIter", units.end() == sv.end()); --iter; - assertEquals("iter[back 3] * codePoint", u'カ', (*iter--).codePoint()); // post-decrement + assertEquals("iter[back 3] -> wellFormed", isWellFormed, iter->wellFormed()); + assertEquals("iter[back 3] * codePoint", expectedCP, (*iter--).codePoint()); // post-decrement assertEquals("iter[back 2] * codePoint", u'ç', (*iter).codePoint()); assertEquals("iter[back 2] -> length", parts[2].length(), iter->length()); + assertTrue("iter[back 2] -> wellFormed", iter->wellFormed()); units = *--iter; - assertEquals("iter[back 1] * codePoint", u'b', units.codePoint()); - assertTrue("iter[back 1] * wellFormed", units.wellFormed()); + expectedCP = isWellFormed ? u'b' : sub(parts[1]); + assertEquals("iter[back 1] * codePoint", expectedCP, units.codePoint()); + assertEquals("iter[back 1] * wellFormed", isWellFormed, units.wellFormed()); assertTrue("iter[back 1] * stringView()", units.stringView() == parts[1]); --iter; assertEquals("iter[back 0] -> codePoint", u'a', iter->codePoint()); - assertTrue("iter[back 0] -> begin() == beginIter", iter->begin() == good.begin()); - assertTrue("iter == beginIter", iter == range.begin()); -} - -template -void UTFIteratorTest::testSafeBad(StringView piped) { - using Unit = typename StringView::value_type; - auto parts = split(piped); - auto joined = join(parts); - StringView bad(joined); - // "a?ç?🚴" where the ? sequences are ill-formed - auto range = utfStringCodePoints(bad); - auto iter = range.begin(); - assertEquals("iter[0] * codePoint", u'a', (*iter).codePoint()); - assertEquals("iter[0] -> codePoint", u'a', iter->codePoint()); - ++iter; // pre-increment - auto units = *iter; - assertEquals("iter[1] * codePoint", sub(parts[1]), units.codePoint()); - assertEquals("iter[1] * length", parts[1].length(), units.length()); - assertFalse("iter[1] * wellFormed", units.wellFormed()); - auto sv = units.stringView(); - assertEquals("iter[1] * stringView().length()", - static_cast(parts[1].length()), static_cast(sv.length())); - int32_t i = 0; - for (auto c : parts[1]) { - assertEquals("iter[1] * stringView()[i]", - static_cast(c), static_cast(sv[i++])); - } - auto unitsIter = units.begin(); - for (auto c : parts[1]) { - assertEquals("iter[1] * begin()[i]", - static_cast(c), static_cast(*unitsIter++)); - } - assertTrue("iter[1] * end()[0]", *units.end() == parts[2][0]); - ++iter; - assertEquals("iter[2] * codePoint", u'ç', (*iter++).codePoint()); // post-increment - units = *iter++; // post-increment - assertEquals("iter[3] * codePoint", sub(parts[3]), units.codePoint()); - assertFalse("iter[3] * wellFormed", units.wellFormed()); - assertTrue("iter[4] * stringView()", (*iter++).stringView() == parts[4]); // post-increment - assertTrue("iter == endIter", iter == range.end()); - // backwards - assertEquals("iter[back 4] * codePoint", U'🚴', (*--iter).codePoint()); - assertTrue("iter[back 4] -> wellFormed", iter->wellFormed()); - assertEquals("iter[back 3] * codePoint", sub(parts[3]), (*--iter).codePoint()); - assertFalse("iter[back 3] -> wellFormed", iter->wellFormed()); - assertEquals("iter[back 2] * codePoint", U'ç', (*--iter).codePoint()); - assertEquals("iter[back 1] * codePoint", sub(parts[1]), (*--iter).codePoint()); - assertEquals("iter[back 0] * codePoint", U'a', (*--iter).codePoint()); - assertTrue("iter[back 0] -> begin() == beginIter", iter->begin() == bad.begin()); + assertTrue("iter[back 0] -> begin() == beginIter", iter->begin() == sv.begin()); assertTrue("iter == beginIter", iter == range.begin()); }