diff --git a/icu4c/source/common/unicode/utfiter.h b/icu4c/source/common/unicode/utfiter.h index 16f94acb02d..73e1017c0a3 100644 --- a/icu4c/source/common/unicode/utfiter.h +++ b/icu4c/source/common/unicode/utfiter.h @@ -1891,9 +1891,7 @@ int32_t rangeLoop16(std::u16string_view s) { int32_t loopIterPlusPlus16(std::u16string_view s) { auto range = header::utfStringCodePoints(s); int32_t sum = 0; - auto iter = range.begin(); - auto limit = range.end(); - while (iter != limit) { + for (auto iter = range.begin(), limit = range.end(); iter != limit;) { sum += (*iter++).codePoint(); } return sum; @@ -1902,9 +1900,7 @@ int32_t loopIterPlusPlus16(std::u16string_view s) { int32_t backwardLoop16(std::u16string_view s) { auto range = header::utfStringCodePoints(s); int32_t sum = 0; - auto start = range.begin(); - auto iter = range.end(); - while (start != iter) { + for (auto start = range.begin(), iter = range.end(); start != iter;) { sum += (*--iter).codePoint(); } return sum; @@ -1913,7 +1909,7 @@ int32_t backwardLoop16(std::u16string_view s) { int32_t reverseLoop16(std::u16string_view s) { auto range = header::utfStringCodePoints(s); int32_t sum = 0; - for (auto iter = range.rbegin(); iter != range.rend(); ++iter) { + for (auto iter = range.rbegin(), limit = range.rend(); iter != limit; ++iter) { sum += iter->codePoint(); } return sum; @@ -1930,7 +1926,7 @@ int32_t unsafeRangeLoop16(std::u16string_view s) { int32_t unsafeReverseLoop16(std::u16string_view s) { auto range = header::unsafeUTFStringCodePoints(s); int32_t sum = 0; - for (auto iter = range.rbegin(); iter != range.rend(); ++iter) { + for (auto iter = range.rbegin(), limit = range.rend(); iter != limit; ++iter) { sum += iter->codePoint(); } return sum; @@ -1947,7 +1943,7 @@ int32_t rangeLoop8(std::string_view s) { int32_t reverseLoop8(std::string_view s) { auto range = header::utfStringCodePoints(s); int32_t sum = 0; - for (auto iter = range.rbegin(); iter != range.rend(); ++iter) { + for (auto iter = range.rbegin(), limit = range.rend(); iter != limit; ++iter) { sum += iter->codePoint(); } return sum; @@ -1975,7 +1971,7 @@ int32_t unsafeRangeLoop8(std::string_view s) { int32_t unsafeReverseLoop8(std::string_view s) { auto range = header::unsafeUTFStringCodePoints(s); int32_t sum = 0; - for (auto iter = range.rbegin(); iter != range.rend(); ++iter) { + for (auto iter = range.rbegin(), limit = range.rend(); iter != limit; ++iter) { sum += iter->codePoint(); } return sum; diff --git a/icu4c/source/test/intltest/utfitertest.cpp b/icu4c/source/test/intltest/utfitertest.cpp index d08ccb7d754..23c0cde5517 100644 --- a/icu4c/source/test/intltest/utfitertest.cpp +++ b/icu4c/source/test/intltest/utfitertest.cpp @@ -25,6 +25,82 @@ using U_HEADER_ONLY_NAMESPACE::utfIterator; using U_HEADER_ONLY_NAMESPACE::UTFStringCodePoints; using U_HEADER_ONLY_NAMESPACE::utfStringCodePoints; +#ifdef SAMPLE_CODE +// For API docs etc. Compile when changing samples or APIs. +using U_HEADER_ONLY_NAMESPACE::unsafeUTFIterator; +using U_HEADER_ONLY_NAMESPACE::unsafeUTFStringCodePoints; + +int32_t rangeLoop16(std::u16string_view s) { + int32_t sum = 0; + for (auto units : utfStringCodePoints(s)) { + sum += units.codePoint(); // < 0 if ill-formed + } + return sum; +} + +int32_t loopIterPlusPlus16(std::u16string_view s) { + auto range = utfStringCodePoints(s); + int32_t sum = 0; + for (auto iter = range.begin(), limit = range.end(); iter != limit;) { + sum += (*iter++).codePoint(); // U+FFFD if ill-formed + } + return sum; +} + +int32_t backwardLoop16(std::u16string_view s) { + auto range = utfStringCodePoints(s); + int32_t sum = 0; + for (auto start = range.begin(), iter = range.end(); start != iter;) { + sum += (*--iter).codePoint(); // surrogate code point if unpaired / ill-formed + } + return sum; +} + +int32_t reverseLoop8(std::string_view s) { + auto range = utfStringCodePoints(s); + int32_t sum = 0; + for (auto iter = range.rbegin(), limit = range.rend(); iter != limit; ++iter) { + sum += iter->codePoint(); // U+FFFD if ill-formed + } + return sum; +} + +int32_t unsafeRangeLoop16(std::u16string_view s) { + int32_t sum = 0; + for (auto units : unsafeUTFStringCodePoints(s)) { + sum += units.codePoint(); + } + return sum; +} + +int32_t unsafeReverseLoop8(std::string_view s) { + auto range = unsafeUTFStringCodePoints(s); + int32_t sum = 0; + for (auto iter = range.rbegin(), limit = range.rend(); iter != limit; ++iter) { + sum += iter->codePoint(); + } + return sum; +} + +char32_t firstCodePointOrFFFD16(std::u16string_view s) { + if (s.empty()) { return 0xfffd; } + auto range = utfStringCodePoints(s); + return range.begin()->codePoint(); +} + +std::string_view firstSequence8(std::string_view s) { + if (s.empty()) { return {}; } + auto range = utfStringCodePoints(s); + auto units = *(range.begin()); + if (units.wellFormed()) { + return units.stringView(); + } else { + return {}; + } +} + +#endif // SAMPLE_CODE + // Shared state for one or more copies of single-pass iterators. // Similar to https://en.cppreference.com/w/cpp/iterator/istreambuf_iterator // but the iterators only implement LegacyIterator (* and ++) without post-increment.