mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-07 22:44:49 +00:00
ICU-23004 utfStringCodePoints(StringView) deduces Unit
This commit is contained in:
parent
65c155de9b
commit
d1e9fb7003
2 changed files with 45 additions and 24 deletions
|
@ -1338,6 +1338,20 @@ private:
|
|||
std::basic_string_view<Unit> s;
|
||||
};
|
||||
|
||||
/**
|
||||
* @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t;
|
||||
* should be signed if U_BEHAVIOR_NEGATIVE
|
||||
* @tparam UIllFormedBehavior How to handle ill-formed Unicode strings
|
||||
* @return a UTFStringCodePoints<Unit, CP32, behavior>
|
||||
* for the given std::basic_string_view<Unit>,
|
||||
* deducing the Unit character type
|
||||
* @draft ICU 78
|
||||
*/
|
||||
template<typename CP32, UIllFormedBehavior behavior, typename StringView>
|
||||
auto utfStringCodePoints(StringView s) {
|
||||
return UTFStringCodePoints<typename StringView::value_type, CP32, behavior>(s);
|
||||
}
|
||||
|
||||
// Non-validating iterators ------------------------------------------------ ***
|
||||
|
||||
/**
|
||||
|
@ -1354,8 +1368,7 @@ private:
|
|||
* @tparam UnitIter An iterator (often a pointer) that returns a code unit type:
|
||||
* UTF-8: char or char8_t or uint8_t;
|
||||
* UTF-16: char16_t or uint16_t or (on Windows) wchar_t
|
||||
* @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t;
|
||||
* should be signed if U_BEHAVIOR_NEGATIVE
|
||||
* @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t
|
||||
* @draft ICU 78
|
||||
*/
|
||||
template<typename UnitIter, typename CP32, typename = void>
|
||||
|
@ -1741,8 +1754,7 @@ namespace U_HEADER_ONLY_NAMESPACE {
|
|||
* @tparam Unit Code unit type:
|
||||
* UTF-8: char or char8_t or uint8_t;
|
||||
* UTF-16: char16_t or uint16_t or (on Windows) wchar_t
|
||||
* @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t;
|
||||
* should be signed if U_BEHAVIOR_NEGATIVE
|
||||
* @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t
|
||||
* @draft ICU 78
|
||||
*/
|
||||
template<typename Unit, typename CP32>
|
||||
|
@ -1790,21 +1802,32 @@ private:
|
|||
std::basic_string_view<Unit> s;
|
||||
};
|
||||
|
||||
/**
|
||||
* @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t
|
||||
* @return an UnsafeUTFStringCodePoints<Unit, CP32>
|
||||
* for the given std::basic_string_view<Unit>,
|
||||
* deducing the Unit character type
|
||||
* @draft ICU 78
|
||||
*/
|
||||
template<typename CP32, typename StringView>
|
||||
auto unsafeUTFStringCodePoints(StringView s) {
|
||||
return UnsafeUTFStringCodePoints<typename StringView::value_type, CP32>(s);
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------- ***
|
||||
|
||||
// TODO: remove experimental sample code
|
||||
#ifndef UTYPES_H
|
||||
int32_t rangeLoop16(std::u16string_view s) {
|
||||
header::UTFStringCodePoints<char16_t, UChar32, U_BEHAVIOR_NEGATIVE> range(s);
|
||||
int32_t sum = 0;
|
||||
for (auto units : range) {
|
||||
for (auto units : header::utfStringCodePoints<UChar32, U_BEHAVIOR_NEGATIVE>(s)) {
|
||||
sum += units.codePoint();
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
int32_t loopIterPlusPlus16(std::u16string_view s) {
|
||||
header::UTFStringCodePoints<char16_t, UChar32, U_BEHAVIOR_NEGATIVE> range(s);
|
||||
auto range = header::utfStringCodePoints<UChar32, U_BEHAVIOR_NEGATIVE>(s);
|
||||
int32_t sum = 0;
|
||||
auto iter = range.begin();
|
||||
auto limit = range.end();
|
||||
|
@ -1815,7 +1838,7 @@ int32_t loopIterPlusPlus16(std::u16string_view s) {
|
|||
}
|
||||
|
||||
int32_t backwardLoop16(std::u16string_view s) {
|
||||
header::UTFStringCodePoints<char16_t, UChar32, U_BEHAVIOR_NEGATIVE> range(s);
|
||||
auto range = header::utfStringCodePoints<UChar32, U_BEHAVIOR_NEGATIVE>(s);
|
||||
int32_t sum = 0;
|
||||
auto start = range.begin();
|
||||
auto iter = range.end();
|
||||
|
@ -1826,7 +1849,7 @@ int32_t backwardLoop16(std::u16string_view s) {
|
|||
}
|
||||
|
||||
int32_t reverseLoop16(std::u16string_view s) {
|
||||
header::UTFStringCodePoints<char16_t, UChar32, U_BEHAVIOR_NEGATIVE> range(s);
|
||||
auto range = header::utfStringCodePoints<UChar32, U_BEHAVIOR_NEGATIVE>(s);
|
||||
int32_t sum = 0;
|
||||
for (auto iter = range.rbegin(); iter != range.rend(); ++iter) {
|
||||
sum += iter->codePoint();
|
||||
|
@ -1835,16 +1858,15 @@ int32_t reverseLoop16(std::u16string_view s) {
|
|||
}
|
||||
|
||||
int32_t unsafeRangeLoop16(std::u16string_view s) {
|
||||
header::UnsafeUTFStringCodePoints<char16_t, UChar32> range(s);
|
||||
int32_t sum = 0;
|
||||
for (auto units : range) {
|
||||
for (auto units : header::unsafeUTFStringCodePoints<UChar32>(s)) {
|
||||
sum += units.codePoint();
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
int32_t unsafeReverseLoop16(std::u16string_view s) {
|
||||
header::UnsafeUTFStringCodePoints<char16_t, UChar32> range(s);
|
||||
auto range = header::unsafeUTFStringCodePoints<UChar32>(s);
|
||||
int32_t sum = 0;
|
||||
for (auto iter = range.rbegin(); iter != range.rend(); ++iter) {
|
||||
sum += iter->codePoint();
|
||||
|
@ -1853,16 +1875,15 @@ int32_t unsafeReverseLoop16(std::u16string_view s) {
|
|||
}
|
||||
|
||||
int32_t rangeLoop8(std::string_view s) {
|
||||
header::UTFStringCodePoints<char, UChar32, U_BEHAVIOR_NEGATIVE> range(s);
|
||||
int32_t sum = 0;
|
||||
for (auto units : range) {
|
||||
for (auto units : header::utfStringCodePoints<UChar32, U_BEHAVIOR_NEGATIVE>(s)) {
|
||||
sum += units.codePoint();
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
int32_t reverseLoop8(std::string_view s) {
|
||||
header::UTFStringCodePoints<char, UChar32, U_BEHAVIOR_NEGATIVE> range(s);
|
||||
auto range = header::utfStringCodePoints<UChar32, U_BEHAVIOR_NEGATIVE>(s);
|
||||
int32_t sum = 0;
|
||||
for (auto iter = range.rbegin(); iter != range.rend(); ++iter) {
|
||||
sum += iter->codePoint();
|
||||
|
@ -1882,16 +1903,15 @@ int32_t macroLoop8(std::string_view s) {
|
|||
}
|
||||
|
||||
int32_t unsafeRangeLoop8(std::string_view s) {
|
||||
header::UnsafeUTFStringCodePoints<char, UChar32> range(s);
|
||||
int32_t sum = 0;
|
||||
for (auto units : range) {
|
||||
for (auto units : header::unsafeUTFStringCodePoints<UChar32>(s)) {
|
||||
sum += units.codePoint();
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
int32_t unsafeReverseLoop8(std::string_view s) {
|
||||
header::UnsafeUTFStringCodePoints<char, UChar32> range(s);
|
||||
auto range = header::unsafeUTFStringCodePoints<UChar32>(s);
|
||||
int32_t sum = 0;
|
||||
for (auto iter = range.rbegin(); iter != range.rend(); ++iter) {
|
||||
sum += iter->codePoint();
|
||||
|
|
|
@ -22,6 +22,7 @@ using namespace std::string_view_literals;
|
|||
|
||||
using U_HEADER_ONLY_NAMESPACE::UTFIterator;
|
||||
using U_HEADER_ONLY_NAMESPACE::UTFStringCodePoints;
|
||||
using U_HEADER_ONLY_NAMESPACE::utfStringCodePoints;
|
||||
|
||||
// Shared state for one or more copies of single-pass iterators.
|
||||
// Similar to https://en.cppreference.com/w/cpp/iterator/istreambuf_iterator
|
||||
|
@ -141,7 +142,7 @@ void U16IteratorTest::runIndexedTest(int32_t index, UBool exec, const char *&nam
|
|||
|
||||
void U16IteratorTest::testGood() {
|
||||
std::u16string_view good(u"abçカ🚴"sv);
|
||||
UTFStringCodePoints<char16_t, UChar32, U_BEHAVIOR_NEGATIVE> range(good);
|
||||
auto range = utfStringCodePoints<UChar32, U_BEHAVIOR_NEGATIVE>(good);
|
||||
// TODO: Try to un-hardcode the iterator types in these checks via declspec.
|
||||
assertTrue(
|
||||
"bidirectional_iterator_tag",
|
||||
|
@ -175,7 +176,7 @@ void U16IteratorTest::testGood() {
|
|||
void U16IteratorTest::testNegative() {
|
||||
static const char16_t badChars[] = { u'a', 0xd900, u'b', 0xdc05, u'ç' };
|
||||
std::u16string_view bad(badChars, 5);
|
||||
UTFStringCodePoints<char16_t, UChar32, U_BEHAVIOR_NEGATIVE> range(bad);
|
||||
auto range = utfStringCodePoints<UChar32, U_BEHAVIOR_NEGATIVE>(bad);
|
||||
auto iter = range.begin();
|
||||
assertEquals("iter[0] * codePoint", u'a', (*iter).codePoint());
|
||||
assertEquals("iter[0] -> codePoint", u'a', iter->codePoint());
|
||||
|
@ -200,7 +201,7 @@ void U16IteratorTest::testNegative() {
|
|||
void U16IteratorTest::testFFFD() {
|
||||
static const char16_t badChars[] = { u'a', 0xd900, u'b', 0xdc05, u'ç' };
|
||||
std::u16string_view bad(badChars, 5);
|
||||
UTFStringCodePoints<char16_t, char32_t, U_BEHAVIOR_FFFD> range(bad);
|
||||
auto range = utfStringCodePoints<char32_t, U_BEHAVIOR_FFFD>(bad);
|
||||
auto iter = range.begin();
|
||||
assertEquals("iter[0] * codePoint", u'a', (*iter).codePoint());
|
||||
assertEquals("iter[0] -> codePoint", u'a', iter->codePoint());
|
||||
|
@ -224,7 +225,7 @@ void U16IteratorTest::testFFFD() {
|
|||
void U16IteratorTest::testSurrogate() {
|
||||
static const char16_t badChars[] = { u'a', 0xd900, u'b', 0xdc05, u'ç' };
|
||||
std::u16string_view bad(badChars, 5);
|
||||
UTFStringCodePoints<char16_t, uint32_t, U_BEHAVIOR_SURROGATE> range(bad);
|
||||
auto range = utfStringCodePoints<uint32_t, U_BEHAVIOR_SURROGATE>(bad);
|
||||
auto iter = range.begin();
|
||||
assertEquals("iter[0] * codePoint", u'a', (*iter).codePoint());
|
||||
assertEquals("iter[0] -> codePoint", u'a', iter->codePoint());
|
||||
|
@ -354,7 +355,7 @@ void U8IteratorTest::runIndexedTest(int32_t index, UBool exec, const char *&name
|
|||
|
||||
void U8IteratorTest::testGood() {
|
||||
std::string_view good(reinterpret_cast<const char*>(u8"abçカ🚴"));
|
||||
UTFStringCodePoints<char, UChar32, U_BEHAVIOR_NEGATIVE> range(good);
|
||||
auto range = utfStringCodePoints<UChar32, U_BEHAVIOR_NEGATIVE>(good);
|
||||
assertTrue(
|
||||
"bidirectional_iterator_tag",
|
||||
std::is_same_v<
|
||||
|
@ -495,7 +496,7 @@ void U32IteratorTest::runIndexedTest(int32_t index, UBool exec, const char *&nam
|
|||
|
||||
void U32IteratorTest::testGood() {
|
||||
std::u32string_view good(U"abçカ🚴"sv);
|
||||
UTFStringCodePoints<char32_t, UChar32, U_BEHAVIOR_NEGATIVE> range(good);
|
||||
auto range = utfStringCodePoints<UChar32, U_BEHAVIOR_NEGATIVE>(good);
|
||||
assertTrue(
|
||||
"bidirectional_iterator_tag",
|
||||
std::is_same_v<
|
||||
|
|
Loading…
Add table
Reference in a new issue