ICU-23004 utfIterator(UnitIter) deduces UnitIter

This commit is contained in:
Markus Scherer 2025-03-07 12:21:49 -08:00
parent d1e9fb7003
commit bb66c8c338
2 changed files with 78 additions and 18 deletions

View file

@ -878,7 +878,7 @@ public:
* UTF-16: char16_t or uint16_t or (on Windows) wchar_t
* @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t;
* should be signed if U_BEHAVIOR_NEGATIVE
* @tparam UIllFormedBehavior How to handle ill-formed Unicode strings
* @tparam behavior How to handle ill-formed Unicode strings
* @draft ICU 78
*/
template<typename UnitIter, typename CP32, UIllFormedBehavior behavior, typename = void>
@ -1289,7 +1289,7 @@ namespace U_HEADER_ONLY_NAMESPACE {
* UTF-16: char16_t or uint16_t or (on Windows) wchar_t
* @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t;
* should be signed if U_BEHAVIOR_NEGATIVE
* @tparam UIllFormedBehavior How to handle ill-formed Unicode strings
* @tparam behavior How to handle ill-formed Unicode strings
* @draft ICU 78
*/
template<typename Unit, typename CP32, UIllFormedBehavior behavior>
@ -1338,10 +1338,59 @@ private:
std::basic_string_view<Unit> s;
};
/**
* UTFIterator factory function for start <= p < limit.
*
* @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t
* @tparam behavior How to handle ill-formed Unicode strings
* @param start start code unit iterator
* @param p current-position code unit iterator
* @param limit limit (exclusive-end) code unit iterator
* @return a UTFIterator&lt;UnitIter, CP32, behavior&gt;
* for the given code unit iterators or character pointers
* @draft ICU 78
*/
template<typename CP32, UIllFormedBehavior behavior, typename UnitIter>
auto utfIterator(UnitIter start, UnitIter p, UnitIter limit) {
return UTFIterator<UnitIter, CP32, behavior>(start, p, limit);
}
/**
* UTFIterator factory function for start = p < limit.
*
* @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t
* @tparam behavior How to handle ill-formed Unicode strings
* @param p start and current-position code unit iterator
* @param limit limit (exclusive-end) code unit iterator
* @return a UTFIterator&lt;UnitIter, CP32, behavior&gt;
* for the given code unit iterators or character pointers
* @draft ICU 78
*/
template<typename CP32, UIllFormedBehavior behavior, typename UnitIter>
auto utfIterator(UnitIter p, UnitIter limit) {
return UTFIterator<UnitIter, CP32, behavior>(p, limit);
}
/**
* UTFIterator factory function for a start or limit sentinel.
*
* @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t
* @tparam behavior How to handle ill-formed Unicode strings
* @param p code unit iterator
* @return a UTFIterator&lt;UnitIter, CP32, behavior&gt;
* for the given code unit iterator or character pointer
* @draft ICU 78
*/
template<typename CP32, UIllFormedBehavior behavior, typename UnitIter>
auto utfIterator(UnitIter p) {
return UTFIterator<UnitIter, CP32, behavior>(p);
}
/**
* @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t;
* should be signed if U_BEHAVIOR_NEGATIVE
* @tparam UIllFormedBehavior How to handle ill-formed Unicode strings
* @tparam behavior How to handle ill-formed Unicode strings
* @param s input string_view
* @return a UTFStringCodePoints&lt;Unit, CP32, behavior&gt;
* for the given std::basic_string_view&lt;Unit&gt;,
* deducing the Unit character type
@ -1804,6 +1853,19 @@ private:
/**
* @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t
* @param iter code unit iterator
* @return an UnsafeUTFIterator&lt;UnitIter, CP32&gt;
* for the given code unit iterator or character pointer
* @draft ICU 78
*/
template<typename CP32, typename UnitIter>
auto unsafeUTFIterator(UnitIter iter) {
return UnsafeUTFIterator<UnitIter, CP32>(iter);
}
/**
* @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t
* @param s input string_view
* @return an UnsafeUTFStringCodePoints&lt;Unit, CP32&gt;
* for the given std::basic_string_view&lt;Unit&gt;,
* deducing the Unit character type

View file

@ -21,6 +21,7 @@
using namespace std::string_view_literals;
using U_HEADER_ONLY_NAMESPACE::UTFIterator;
using U_HEADER_ONLY_NAMESPACE::utfIterator;
using U_HEADER_ONLY_NAMESPACE::UTFStringCodePoints;
using U_HEADER_ONLY_NAMESPACE::utfStringCodePoints;
@ -250,9 +251,8 @@ void U16IteratorTest::testSinglePassIter() {
SinglePassSource<char16_t> good(u"abçカ🚴"sv);
SinglePassIter<char16_t> goodBegin(good);
SinglePassIter<char16_t> goodLimit{};
UTFIterator<SinglePassIter<char16_t>, UChar32, U_BEHAVIOR_NEGATIVE> rangeBegin(
goodBegin, goodLimit);
UTFIterator<SinglePassIter<char16_t>, UChar32, U_BEHAVIOR_NEGATIVE> rangeLimit(goodLimit);
auto rangeBegin = utfIterator<UChar32, U_BEHAVIOR_NEGATIVE>(goodBegin, goodLimit);
auto rangeLimit = utfIterator<UChar32, U_BEHAVIOR_NEGATIVE>(goodLimit);
assertTrue(
"input_iterator_tag",
std::is_same_v<
@ -286,8 +286,8 @@ void U16IteratorTest::testFwdIter() {
std::u16string_view good(u"abçカ🚴"sv);
FwdIter<char16_t> goodBegin(good.data());
FwdIter<char16_t> goodLimit(good.data() + good.length());
UTFIterator<FwdIter<char16_t>, UChar32, U_BEHAVIOR_NEGATIVE> rangeBegin(goodBegin, goodLimit);
UTFIterator<FwdIter<char16_t>, UChar32, U_BEHAVIOR_NEGATIVE> rangeLimit(goodLimit);
auto rangeBegin = utfIterator<UChar32, U_BEHAVIOR_NEGATIVE>(goodBegin, goodLimit);
auto rangeLimit = utfIterator<UChar32, U_BEHAVIOR_NEGATIVE>(goodLimit);
// TODO: UTFStringCodePoints<FwdIter, UChar32, U_BEHAVIOR_NEGATIVE> range(good);
assertTrue(
"forward_iterator_tag",
@ -391,9 +391,8 @@ void U8IteratorTest::testSinglePassIter() {
SinglePassSource<char> good(reinterpret_cast<const char*>(u8"abçカ🚴"));
SinglePassIter<char> goodBegin(good);
SinglePassIter<char> goodLimit{};
UTFIterator<SinglePassIter<char>, UChar32, U_BEHAVIOR_NEGATIVE> rangeBegin(
goodBegin, goodLimit);
UTFIterator<SinglePassIter<char>, UChar32, U_BEHAVIOR_NEGATIVE> rangeLimit(goodLimit);
auto rangeBegin = utfIterator<UChar32, U_BEHAVIOR_NEGATIVE>(goodBegin, goodLimit);
auto rangeLimit = utfIterator<UChar32, U_BEHAVIOR_NEGATIVE>(goodLimit);
assertTrue(
"input_iterator_tag",
std::is_same_v<
@ -427,8 +426,8 @@ void U8IteratorTest::testFwdIter() {
std::string_view good(reinterpret_cast<const char*>(u8"abçカ🚴"));
FwdIter<char> goodBegin(good.data());
FwdIter<char> goodLimit(good.data() + good.length());
UTFIterator<FwdIter<char>, UChar32, U_BEHAVIOR_NEGATIVE> rangeBegin(goodBegin, goodLimit);
UTFIterator<FwdIter<char>, UChar32, U_BEHAVIOR_NEGATIVE> rangeLimit(goodLimit);
auto rangeBegin = utfIterator<UChar32, U_BEHAVIOR_NEGATIVE>(goodBegin, goodLimit);
auto rangeLimit = utfIterator<UChar32, U_BEHAVIOR_NEGATIVE>(goodLimit);
// TODO: UTFStringCodePoints<FwdIter, UChar32, U_BEHAVIOR_NEGATIVE> range(good);
assertTrue(
"forward_iterator_tag",
@ -530,9 +529,8 @@ void U32IteratorTest::testSinglePassIter() {
SinglePassSource<char32_t> good(U"abçカ🚴"sv);
SinglePassIter<char32_t> goodBegin(good);
SinglePassIter<char32_t> goodLimit{};
UTFIterator<SinglePassIter<char32_t>, UChar32, U_BEHAVIOR_NEGATIVE> rangeBegin(
goodBegin, goodLimit);
UTFIterator<SinglePassIter<char32_t>, UChar32, U_BEHAVIOR_NEGATIVE> rangeLimit(goodLimit);
auto rangeBegin = utfIterator<UChar32, U_BEHAVIOR_NEGATIVE>(goodBegin, goodLimit);
auto rangeLimit = utfIterator<UChar32, U_BEHAVIOR_NEGATIVE>(goodLimit);
assertTrue(
"input_iterator_tag",
std::is_same_v<
@ -566,8 +564,8 @@ void U32IteratorTest::testFwdIter() {
std::u32string_view good(U"abçカ🚴"sv);
FwdIter<char32_t> goodBegin(good.data());
FwdIter<char32_t> goodLimit(good.data() + good.length());
UTFIterator<FwdIter<char32_t>, UChar32, U_BEHAVIOR_NEGATIVE> rangeBegin(goodBegin, goodLimit);
UTFIterator<FwdIter<char32_t>, UChar32, U_BEHAVIOR_NEGATIVE> rangeLimit(goodLimit);
auto rangeBegin = utfIterator<UChar32, U_BEHAVIOR_NEGATIVE>(goodBegin, goodLimit);
auto rangeLimit = utfIterator<UChar32, U_BEHAVIOR_NEGATIVE>(goodLimit);
// TODO: UTFStringCodePoints<FwdIter, UChar32, U_BEHAVIOR_NEGATIVE> range(good);
assertTrue(
"forward_iterator_tag",