From bb66c8c3382f9cdd1cc8c43770f8eb464eb6e9f8 Mon Sep 17 00:00:00 2001 From: Markus Scherer Date: Fri, 7 Mar 2025 12:21:49 -0800 Subject: [PATCH] ICU-23004 utfIterator(UnitIter) deduces UnitIter --- icu4c/source/common/unicode/utfiter.h | 68 +++++++++++++++++++++- icu4c/source/test/intltest/utfitertest.cpp | 28 +++++---- 2 files changed, 78 insertions(+), 18 deletions(-) diff --git a/icu4c/source/common/unicode/utfiter.h b/icu4c/source/common/unicode/utfiter.h index a49d795a9c7..16f94acb02d 100644 --- a/icu4c/source/common/unicode/utfiter.h +++ b/icu4c/source/common/unicode/utfiter.h @@ -878,7 +878,7 @@ public: * UTF-16: char16_t or uint16_t or (on Windows) wchar_t * @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t; * should be signed if U_BEHAVIOR_NEGATIVE - * @tparam UIllFormedBehavior How to handle ill-formed Unicode strings + * @tparam behavior How to handle ill-formed Unicode strings * @draft ICU 78 */ template @@ -1289,7 +1289,7 @@ namespace U_HEADER_ONLY_NAMESPACE { * UTF-16: char16_t or uint16_t or (on Windows) wchar_t * @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t; * should be signed if U_BEHAVIOR_NEGATIVE - * @tparam UIllFormedBehavior How to handle ill-formed Unicode strings + * @tparam behavior How to handle ill-formed Unicode strings * @draft ICU 78 */ template @@ -1338,10 +1338,59 @@ private: std::basic_string_view s; }; +/** + * UTFIterator factory function for start <= p < limit. + * + * @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t + * @tparam behavior How to handle ill-formed Unicode strings + * @param start start code unit iterator + * @param p current-position code unit iterator + * @param limit limit (exclusive-end) code unit iterator + * @return a UTFIterator<UnitIter, CP32, behavior> + * for the given code unit iterators or character pointers + * @draft ICU 78 + */ +template +auto utfIterator(UnitIter start, UnitIter p, UnitIter limit) { + return UTFIterator(start, p, limit); +} + +/** + * UTFIterator factory function for start = p < limit. + * + * @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t + * @tparam behavior How to handle ill-formed Unicode strings + * @param p start and current-position code unit iterator + * @param limit limit (exclusive-end) code unit iterator + * @return a UTFIterator<UnitIter, CP32, behavior> + * for the given code unit iterators or character pointers + * @draft ICU 78 + */ +template +auto utfIterator(UnitIter p, UnitIter limit) { + return UTFIterator(p, limit); +} + +/** + * UTFIterator factory function for a start or limit sentinel. + * + * @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t + * @tparam behavior How to handle ill-formed Unicode strings + * @param p code unit iterator + * @return a UTFIterator<UnitIter, CP32, behavior> + * for the given code unit iterator or character pointer + * @draft ICU 78 + */ +template +auto utfIterator(UnitIter p) { + return UTFIterator(p); +} + /** * @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t; * should be signed if U_BEHAVIOR_NEGATIVE - * @tparam UIllFormedBehavior How to handle ill-formed Unicode strings + * @tparam behavior How to handle ill-formed Unicode strings + * @param s input string_view * @return a UTFStringCodePoints<Unit, CP32, behavior> * for the given std::basic_string_view<Unit>, * deducing the Unit character type @@ -1804,6 +1853,19 @@ private: /** * @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t + * @param iter code unit iterator + * @return an UnsafeUTFIterator<UnitIter, CP32> + * for the given code unit iterator or character pointer + * @draft ICU 78 + */ +template +auto unsafeUTFIterator(UnitIter iter) { + return UnsafeUTFIterator(iter); +} + +/** + * @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t + * @param s input string_view * @return an UnsafeUTFStringCodePoints<Unit, CP32> * for the given std::basic_string_view<Unit>, * deducing the Unit character type diff --git a/icu4c/source/test/intltest/utfitertest.cpp b/icu4c/source/test/intltest/utfitertest.cpp index 721aa147515..d08ccb7d754 100644 --- a/icu4c/source/test/intltest/utfitertest.cpp +++ b/icu4c/source/test/intltest/utfitertest.cpp @@ -21,6 +21,7 @@ using namespace std::string_view_literals; using U_HEADER_ONLY_NAMESPACE::UTFIterator; +using U_HEADER_ONLY_NAMESPACE::utfIterator; using U_HEADER_ONLY_NAMESPACE::UTFStringCodePoints; using U_HEADER_ONLY_NAMESPACE::utfStringCodePoints; @@ -250,9 +251,8 @@ void U16IteratorTest::testSinglePassIter() { SinglePassSource good(u"abçカ🚴"sv); SinglePassIter goodBegin(good); SinglePassIter goodLimit{}; - UTFIterator, UChar32, U_BEHAVIOR_NEGATIVE> rangeBegin( - goodBegin, goodLimit); - UTFIterator, UChar32, U_BEHAVIOR_NEGATIVE> rangeLimit(goodLimit); + auto rangeBegin = utfIterator(goodBegin, goodLimit); + auto rangeLimit = utfIterator(goodLimit); assertTrue( "input_iterator_tag", std::is_same_v< @@ -286,8 +286,8 @@ void U16IteratorTest::testFwdIter() { std::u16string_view good(u"abçカ🚴"sv); FwdIter goodBegin(good.data()); FwdIter goodLimit(good.data() + good.length()); - UTFIterator, UChar32, U_BEHAVIOR_NEGATIVE> rangeBegin(goodBegin, goodLimit); - UTFIterator, UChar32, U_BEHAVIOR_NEGATIVE> rangeLimit(goodLimit); + auto rangeBegin = utfIterator(goodBegin, goodLimit); + auto rangeLimit = utfIterator(goodLimit); // TODO: UTFStringCodePoints range(good); assertTrue( "forward_iterator_tag", @@ -391,9 +391,8 @@ void U8IteratorTest::testSinglePassIter() { SinglePassSource good(reinterpret_cast(u8"abçカ🚴")); SinglePassIter goodBegin(good); SinglePassIter goodLimit{}; - UTFIterator, UChar32, U_BEHAVIOR_NEGATIVE> rangeBegin( - goodBegin, goodLimit); - UTFIterator, UChar32, U_BEHAVIOR_NEGATIVE> rangeLimit(goodLimit); + auto rangeBegin = utfIterator(goodBegin, goodLimit); + auto rangeLimit = utfIterator(goodLimit); assertTrue( "input_iterator_tag", std::is_same_v< @@ -427,8 +426,8 @@ void U8IteratorTest::testFwdIter() { std::string_view good(reinterpret_cast(u8"abçカ🚴")); FwdIter goodBegin(good.data()); FwdIter goodLimit(good.data() + good.length()); - UTFIterator, UChar32, U_BEHAVIOR_NEGATIVE> rangeBegin(goodBegin, goodLimit); - UTFIterator, UChar32, U_BEHAVIOR_NEGATIVE> rangeLimit(goodLimit); + auto rangeBegin = utfIterator(goodBegin, goodLimit); + auto rangeLimit = utfIterator(goodLimit); // TODO: UTFStringCodePoints range(good); assertTrue( "forward_iterator_tag", @@ -530,9 +529,8 @@ void U32IteratorTest::testSinglePassIter() { SinglePassSource good(U"abçカ🚴"sv); SinglePassIter goodBegin(good); SinglePassIter goodLimit{}; - UTFIterator, UChar32, U_BEHAVIOR_NEGATIVE> rangeBegin( - goodBegin, goodLimit); - UTFIterator, UChar32, U_BEHAVIOR_NEGATIVE> rangeLimit(goodLimit); + auto rangeBegin = utfIterator(goodBegin, goodLimit); + auto rangeLimit = utfIterator(goodLimit); assertTrue( "input_iterator_tag", std::is_same_v< @@ -566,8 +564,8 @@ void U32IteratorTest::testFwdIter() { std::u32string_view good(U"abçカ🚴"sv); FwdIter goodBegin(good.data()); FwdIter goodLimit(good.data() + good.length()); - UTFIterator, UChar32, U_BEHAVIOR_NEGATIVE> rangeBegin(goodBegin, goodLimit); - UTFIterator, UChar32, U_BEHAVIOR_NEGATIVE> rangeLimit(goodLimit); + auto rangeBegin = utfIterator(goodBegin, goodLimit); + auto rangeLimit = utfIterator(goodLimit); // TODO: UTFStringCodePoints range(good); assertTrue( "forward_iterator_tag",