mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-05 05:25:34 +00:00
ICU-23004 fix utfStringCodePoints(): StringView -> 5 string_view overloads
This commit is contained in:
parent
b03914fa73
commit
6fb4eca493
3 changed files with 163 additions and 29 deletions
|
@ -479,6 +479,8 @@
|
|||
/* Otherwise use the predefined value. */
|
||||
#elif !defined(__cplusplus)
|
||||
# define U_CPLUSPLUS_VERSION 0
|
||||
#elif __cplusplus >= 202002L || (defined(_MSVC_LANG) && _MSVC_LANG >= 202002L)
|
||||
# define U_CPLUSPLUS_VERSION 20
|
||||
#elif __cplusplus >= 201703L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201703L)
|
||||
# define U_CPLUSPLUS_VERSION 17
|
||||
#elif __cplusplus >= 201402L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201402L)
|
||||
|
|
|
@ -1632,21 +1632,94 @@ auto utfIterator(UnitIter p) {
|
|||
/**
|
||||
* UTFStringCodePoints factory function for a "range" of code points in a string,
|
||||
* which validates while decoding.
|
||||
* Deduces the Unit template parameter from the input.
|
||||
* Avoids having to explicitly specify the Unit template parameter for the UTFStringCodePoints.
|
||||
*
|
||||
* @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t;
|
||||
* should be signed if UTF_BEHAVIOR_NEGATIVE
|
||||
* @tparam behavior How to handle ill-formed Unicode strings
|
||||
* @tparam StringView Can usually be omitted/deduced: A std::basic_string_view<Unit>
|
||||
* @param s input string_view
|
||||
* @param s input string
|
||||
* @return a UTFStringCodePoints<CP32, behavior, Unit>
|
||||
* for the given std::basic_string_view<Unit>,
|
||||
* deducing the Unit character type
|
||||
* for the given std::basic_string_view<Unit>
|
||||
* @draft ICU 78
|
||||
*/
|
||||
template<typename CP32, UTFIllFormedBehavior behavior, typename StringView>
|
||||
auto utfStringCodePoints(StringView s) {
|
||||
return UTFStringCodePoints<CP32, behavior, typename StringView::value_type>(s);
|
||||
template<typename CP32, UTFIllFormedBehavior behavior>
|
||||
auto utfStringCodePoints(std::string_view s) {
|
||||
return UTFStringCodePoints<CP32, behavior, decltype(s)::value_type>(s);
|
||||
}
|
||||
|
||||
/**
|
||||
* UTFStringCodePoints factory function for a "range" of code points in a string,
|
||||
* which validates while decoding.
|
||||
* Avoids having to explicitly specify the Unit template parameter for the UTFStringCodePoints.
|
||||
*
|
||||
* @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t;
|
||||
* should be signed if UTF_BEHAVIOR_NEGATIVE
|
||||
* @tparam behavior How to handle ill-formed Unicode strings
|
||||
* @param s input string
|
||||
* @return a UTFStringCodePoints<CP32, behavior, Unit>
|
||||
* for the given std::basic_string_view<Unit>
|
||||
* @draft ICU 78
|
||||
*/
|
||||
template<typename CP32, UTFIllFormedBehavior behavior>
|
||||
auto utfStringCodePoints(std::u16string_view s) {
|
||||
return UTFStringCodePoints<CP32, behavior, decltype(s)::value_type>(s);
|
||||
}
|
||||
|
||||
/**
|
||||
* UTFStringCodePoints factory function for a "range" of code points in a string,
|
||||
* which validates while decoding.
|
||||
* Avoids having to explicitly specify the Unit template parameter for the UTFStringCodePoints.
|
||||
*
|
||||
* @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t;
|
||||
* should be signed if UTF_BEHAVIOR_NEGATIVE
|
||||
* @tparam behavior How to handle ill-formed Unicode strings
|
||||
* @param s input string
|
||||
* @return a UTFStringCodePoints<CP32, behavior, Unit>
|
||||
* for the given std::basic_string_view<Unit>
|
||||
* @draft ICU 78
|
||||
*/
|
||||
template<typename CP32, UTFIllFormedBehavior behavior>
|
||||
auto utfStringCodePoints(std::u32string_view s) {
|
||||
return UTFStringCodePoints<CP32, behavior, decltype(s)::value_type>(s);
|
||||
}
|
||||
|
||||
#if U_CPLUSPLUS_VERSION >= 20
|
||||
// The new type char8_t is distinct from char. u8"literals" are now char8_t literals.
|
||||
/**
|
||||
* UTFStringCodePoints factory function for a "range" of code points in a string,
|
||||
* which validates while decoding.
|
||||
* Avoids having to explicitly specify the Unit template parameter for the UTFStringCodePoints.
|
||||
*
|
||||
* @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t;
|
||||
* should be signed if UTF_BEHAVIOR_NEGATIVE
|
||||
* @tparam behavior How to handle ill-formed Unicode strings
|
||||
* @param s input string
|
||||
* @return a UTFStringCodePoints<CP32, behavior, Unit>
|
||||
* for the given std::basic_string_view<Unit>
|
||||
* @draft ICU 78
|
||||
*/
|
||||
template<typename CP32, UTFIllFormedBehavior behavior>
|
||||
auto utfStringCodePoints(std::u8string_view s) {
|
||||
return UTFStringCodePoints<CP32, behavior, decltype(s)::value_type>(s);
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* UTFStringCodePoints factory function for a "range" of code points in a string,
|
||||
* which validates while decoding.
|
||||
* Avoids having to explicitly specify the Unit template parameter for the UTFStringCodePoints.
|
||||
*
|
||||
* @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t;
|
||||
* should be signed if UTF_BEHAVIOR_NEGATIVE
|
||||
* @tparam behavior How to handle ill-formed Unicode strings
|
||||
* @param s input string
|
||||
* @return a UTFStringCodePoints<CP32, behavior, Unit>
|
||||
* for the given std::basic_string_view<Unit>
|
||||
* @draft ICU 78
|
||||
*/
|
||||
template<typename CP32, UTFIllFormedBehavior behavior>
|
||||
auto utfStringCodePoints(std::wstring_view s) {
|
||||
return UTFStringCodePoints<CP32, behavior, decltype(s)::value_type>(s);
|
||||
}
|
||||
|
||||
// Non-validating iterators ------------------------------------------------ ***
|
||||
|
@ -2206,19 +2279,89 @@ auto unsafeUTFIterator(UnitIter iter) {
|
|||
/**
|
||||
* UnsafeUTFStringCodePoints factory function for a "range" of code points in a string.
|
||||
* The string must be well-formed.
|
||||
* Deduces the Unit template parameter from the input.
|
||||
* Avoids having to explicitly specify the Unit template parameter
|
||||
* for the UnsafeUTFStringCodePoints.
|
||||
*
|
||||
* @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t
|
||||
* @tparam StringView Can usually be omitted/deduced: A std::basic_string_view<Unit>
|
||||
* @param s input string_view
|
||||
* @param s input string
|
||||
* @return an UnsafeUTFStringCodePoints<CP32, Unit>
|
||||
* for the given std::basic_string_view<Unit>,
|
||||
* deducing the Unit character type
|
||||
* for the given std::basic_string_view<Unit>
|
||||
* @draft ICU 78
|
||||
*/
|
||||
template<typename CP32, typename StringView>
|
||||
auto unsafeUTFStringCodePoints(StringView s) {
|
||||
return UnsafeUTFStringCodePoints<CP32, typename StringView::value_type>(s);
|
||||
template<typename CP32>
|
||||
auto unsafeUTFStringCodePoints(std::string_view s) {
|
||||
return UnsafeUTFStringCodePoints<CP32, decltype(s)::value_type>(s);
|
||||
}
|
||||
|
||||
/**
|
||||
* UnsafeUTFStringCodePoints factory function for a "range" of code points in a string.
|
||||
* The string must be well-formed.
|
||||
* Avoids having to explicitly specify the Unit template parameter
|
||||
* for the UnsafeUTFStringCodePoints.
|
||||
*
|
||||
* @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t
|
||||
* @param s input string
|
||||
* @return an UnsafeUTFStringCodePoints<CP32, Unit>
|
||||
* for the given std::basic_string_view<Unit>
|
||||
* @draft ICU 78
|
||||
*/
|
||||
template<typename CP32>
|
||||
auto unsafeUTFStringCodePoints(std::u16string_view s) {
|
||||
return UnsafeUTFStringCodePoints<CP32, decltype(s)::value_type>(s);
|
||||
}
|
||||
|
||||
/**
|
||||
* UnsafeUTFStringCodePoints factory function for a "range" of code points in a string.
|
||||
* The string must be well-formed.
|
||||
* Avoids having to explicitly specify the Unit template parameter
|
||||
* for the UnsafeUTFStringCodePoints.
|
||||
*
|
||||
* @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t
|
||||
* @param s input string
|
||||
* @return an UnsafeUTFStringCodePoints<CP32, Unit>
|
||||
* for the given std::basic_string_view<Unit>
|
||||
* @draft ICU 78
|
||||
*/
|
||||
template<typename CP32>
|
||||
auto unsafeUTFStringCodePoints(std::u32string_view s) {
|
||||
return UnsafeUTFStringCodePoints<CP32, decltype(s)::value_type>(s);
|
||||
}
|
||||
|
||||
#if U_CPLUSPLUS_VERSION >= 20
|
||||
// The new type char8_t is distinct from char. u8"literals" are now char8_t literals.
|
||||
/**
|
||||
* UnsafeUTFStringCodePoints factory function for a "range" of code points in a string.
|
||||
* The string must be well-formed.
|
||||
* Avoids having to explicitly specify the Unit template parameter
|
||||
* for the UnsafeUTFStringCodePoints.
|
||||
*
|
||||
* @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t
|
||||
* @param s input string
|
||||
* @return an UnsafeUTFStringCodePoints<CP32, Unit>
|
||||
* for the given std::basic_string_view<Unit>
|
||||
* @draft ICU 78
|
||||
*/
|
||||
template<typename CP32>
|
||||
auto unsafeUTFStringCodePoints(std::u8string_view s) {
|
||||
return UnsafeUTFStringCodePoints<CP32, decltype(s)::value_type>(s);
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* UnsafeUTFStringCodePoints factory function for a "range" of code points in a string.
|
||||
* The string must be well-formed.
|
||||
* Avoids having to explicitly specify the Unit template parameter
|
||||
* for the UnsafeUTFStringCodePoints.
|
||||
*
|
||||
* @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t
|
||||
* @param s input string
|
||||
* @return an UnsafeUTFStringCodePoints<CP32, Unit>
|
||||
* for the given std::basic_string_view<Unit>
|
||||
* @draft ICU 78
|
||||
*/
|
||||
template<typename CP32>
|
||||
auto unsafeUTFStringCodePoints(std::wstring_view s) {
|
||||
return UnsafeUTFStringCodePoints<CP32, decltype(s)::value_type>(s);
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------- ***
|
||||
|
|
|
@ -507,22 +507,11 @@ public:
|
|||
template<TestMode mode, UTFIllFormedBehavior behavior, typename Unit>
|
||||
void testLongLinearContig(const ImplTest<Unit> &test) {
|
||||
initLong();
|
||||
// TODO: fix utfStringCodePoints() & unsafeUTFStringCodePoints()
|
||||
// to *actually take* string_view arguments.
|
||||
// Currently, if we pass in a string, then the function makes a temporary copy
|
||||
// of the string and creates an [Unsafe]UTFStringCodePoints which
|
||||
// takes a copy of a string_view *which refers to the temporary copy*
|
||||
// which then goes out of scope, taking its heap buffer with it.
|
||||
// Look at unicode/char16ptr.h ConvertibleToU16StringView.
|
||||
// If this means that we can no longer deduce the Unit type, then maybe
|
||||
// remove these functions.
|
||||
// If we can keep them, then pass test.str directly into the ...CodePoints() function.
|
||||
std::basic_string_view<Unit> sv{test.str};
|
||||
if constexpr (mode == UNSAFE) {
|
||||
auto range = unsafeUTFStringCodePoints<UChar32>(sv);
|
||||
auto range = unsafeUTFStringCodePoints<UChar32>(test.str);
|
||||
testLongLinear<mode, behavior, CONTIG, Unit>(test, range.begin(), range.end());
|
||||
} else {
|
||||
auto range = utfStringCodePoints<UChar32, behavior>(sv);
|
||||
auto range = utfStringCodePoints<UChar32, behavior>(test.str);
|
||||
testLongLinear<mode, behavior, CONTIG, Unit>(test, range.begin(), range.end());
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue