ICU-22843 Enable constructing UnicodeString from literal in fixed time.

When passing a string literal to any of the legacy constructors that
take just a plain pointer to a UTF-16 string it becomes necessary to
iterate through the string to find its length, even though this length
was known to the compiler (which just has no way of passing it on to the
constructor).

But when calling the new templated string view constructor instead it
becomes possible for the compiler to use the known length of a string
literal to directly create a string view of the correct size and pass
this on to the constructor.

By replacing the legacy constructors with the new constructor this is
made the default behaviour.
This commit is contained in:
Fredrik Roubert 2024-08-15 02:40:11 +02:00 committed by Fredrik Roubert
parent f062f52c12
commit 6c9d39a08c
3 changed files with 83 additions and 67 deletions

View file

@ -309,13 +309,19 @@ inline OldUChar *toOldUCharPtr(char16_t *p) {
#ifndef U_FORCE_HIDE_INTERNAL_API
/**
* Is T convertible to a std::u16string_view or to a 16-bit std::wstring_view?
* Is T convertible to a std::u16string_view or some other 16-bit string view?
* @internal
*/
template<typename T>
constexpr bool ConvertibleToU16StringView =
std::is_convertible_v<T, std::u16string_view> ||
(U_SIZEOF_WCHAR_T==2 && std::is_convertible_v<T, std::wstring_view>);
std::is_convertible_v<T, std::u16string_view>
#if !U_CHAR16_IS_TYPEDEF
|| std::is_convertible_v<T, std::basic_string_view<uint16_t>>
#endif
#if U_SIZEOF_WCHAR_T==2
|| std::is_convertible_v<T, std::wstring_view>
#endif
;
namespace internal {
/**
@ -324,6 +330,17 @@ namespace internal {
*/
inline std::u16string_view toU16StringView(std::u16string_view sv) { return sv; }
#if !U_CHAR16_IS_TYPEDEF
/**
* Basically undefined behavior but sometimes necessary conversion
* from std::basic_string_view<uint16_t> to std::u16string_view.
* @internal
*/
inline std::u16string_view toU16StringView(std::basic_string_view<uint16_t> sv) {
return { ConstChar16Ptr(sv.data()), sv.length() };
}
#endif
#if U_SIZEOF_WCHAR_T==2
/**
* Basically undefined behavior but sometimes necessary conversion
@ -334,6 +351,29 @@ inline std::u16string_view toU16StringView(std::wstring_view sv) {
return { ConstChar16Ptr(sv.data()), sv.length() };
}
#endif
/**
* Pass-through overload.
* @internal
*/
template <typename T,
typename = typename std::enable_if_t<!std::is_pointer_v<std::remove_reference_t<T>>>>
inline std::u16string_view toU16StringViewNullable(const T& text) {
return toU16StringView(text);
}
/**
* In case of nullptr, return an empty view.
* @internal
*/
template <typename T,
typename = typename std::enable_if_t<std::is_pointer_v<std::remove_reference_t<T>>>,
typename = void>
inline std::u16string_view toU16StringViewNullable(const T& text) {
if (text == nullptr) return {}; // For backward compatibility.
return toU16StringView(text);
}
} // internal
#endif // U_FORCE_HIDE_INTERNAL_API

View file

@ -105,13 +105,11 @@ class UnicodeStringAppendable; // unicode/appendable.h
* this macro was provided for portability and efficiency when
* initializing UnicodeStrings from literals.
*
* Since C++17 and ICU 76, you can use std::u16string_view literals with compile-time
* Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time
* length determination:
* \code
* #include &lt;string_view&gt;
* using namespace std::string_view_literals;
* UnicodeString str(u"literal"sv);
* if (str == u"other literal"sv) { ... }
* UnicodeString str(u"literal");
* if (str == u"other literal") { ... }
* \endcode
*
* The string parameter must be a C string literal.
@ -335,13 +333,11 @@ public:
* which is, or which is implicitly convertible to,
* a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view.
*
* For performance, you can use std::u16string_view literals with compile-time
* For performance, you can use UTF-16 string literals with compile-time
* length determination:
* \code
* #include &lt;string_view&gt;
* using namespace std::string_view_literals;
* UnicodeString str = ...;
* if (str == u"literal"sv) { ... }
* if (str == u"literal") { ... }
* \endcode
* @param text The string view to compare to this string.
* @return true if `text` contains the same characters as this one, false otherwise.
@ -3104,6 +3100,7 @@ public:
*/
UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar32 ch);
#ifdef U_HIDE_DRAFT_API
/**
* char16_t* constructor.
*
@ -3112,20 +3109,19 @@ public:
* on the compiler command line or similar.
*
* Note, for string literals:
* Since C++17 and ICU 76, you can use std::u16string_view literals with compile-time
* Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time
* length determination:
* \code
* #include &lt;string_view&gt;
* using namespace std::string_view_literals;
* UnicodeString str(u"literal"sv);
* if (str == u"other literal"sv) { ... }
* UnicodeString str(u"literal");
* if (str == u"other literal") { ... }
* \endcode
*
* @param text The characters to place in the UnicodeString. `text`
* must be NUL (U+0000) terminated.
* @stable ICU 2.0
*/
UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char16_t *text);
UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char16_t *text) :
UnicodeString(text, -1) {}
#if !U_CHAR16_IS_TYPEDEF
/**
@ -3137,20 +3133,18 @@ public:
* on the compiler command line or similar.
*
* Note, for string literals:
* Since C++17 and ICU 76, you can use std::u16string_view literals with compile-time
* Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time
* length determination:
* \code
* #include &lt;string_view&gt;
* using namespace std::string_view_literals;
* UnicodeString str(u"literal"sv);
* if (str == u"other literal"sv) { ... }
* UnicodeString str(u"literal");
* if (str == u"other literal") { ... }
* \endcode
*
* @param text NUL-terminated UTF-16 string
* @stable ICU 59
*/
UNISTR_FROM_STRING_EXPLICIT UnicodeString(const uint16_t *text) :
UnicodeString(ConstChar16Ptr(text)) {}
UnicodeString(ConstChar16Ptr(text), -1) {}
#endif
#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
@ -3164,21 +3158,20 @@ public:
* on the compiler command line or similar.
*
* Note, for string literals:
* Since C++17 and ICU 76, you can use std::u16string_view literals with compile-time
* Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time
* length determination:
* \code
* #include &lt;string_view&gt;
* using namespace std::string_view_literals;
* UnicodeString str(u"literal"sv);
* if (str == u"other literal"sv) { ... }
* UnicodeString str(u"literal");
* if (str == u"other literal") { ... }
* \endcode
*
* @param text NUL-terminated UTF-16 string
* @stable ICU 59
*/
UNISTR_FROM_STRING_EXPLICIT UnicodeString(const wchar_t *text) :
UnicodeString(ConstChar16Ptr(text)) {}
UnicodeString(ConstChar16Ptr(text), -1) {}
#endif
#endif // U_HIDE_DRAFT_API
/**
* nullptr_t constructor.
@ -3196,13 +3189,11 @@ public:
* char16_t* constructor.
*
* Note, for string literals:
* Since C++17 and ICU 76, you can use std::u16string_view literals with compile-time
* Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time
* length determination:
* \code
* #include &lt;string_view&gt;
* using namespace std::string_view_literals;
* UnicodeString str(u"literal"sv);
* if (str == u"other literal"sv) { ... }
* UnicodeString str(u"literal");
* if (str == u"other literal") { ... }
* \endcode
*
* @param text The characters to place in the UnicodeString.
@ -3219,13 +3210,11 @@ public:
* Delegates to UnicodeString(const char16_t *, int32_t).
*
* Note, for string literals:
* Since C++17 and ICU 76, you can use std::u16string_view literals with compile-time
* Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time
* length determination:
* \code
* #include &lt;string_view&gt;
* using namespace std::string_view_literals;
* UnicodeString str(u"literal"sv);
* if (str == u"other literal"sv) { ... }
* UnicodeString str(u"literal");
* if (str == u"other literal") { ... }
* \endcode
*
* @param text UTF-16 string
@ -3243,13 +3232,11 @@ public:
* Delegates to UnicodeString(const char16_t *, int32_t).
*
* Note, for string literals:
* Since C++17 and ICU 76, you can use std::u16string_view literals with compile-time
* Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time
* length determination:
* \code
* #include &lt;string_view&gt;
* using namespace std::string_view_literals;
* UnicodeString str(u"literal"sv);
* if (str == u"other literal"sv) { ... }
* UnicodeString str(u"literal");
* if (str == u"other literal") { ... }
* \endcode
*
* @param text UTF-16 string
@ -3283,9 +3270,9 @@ public:
* @draft ICU 76
*/
template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
explicit inline UnicodeString(const S &text) {
UNISTR_FROM_STRING_EXPLICIT UnicodeString(const S &text) {
fUnion.fFields.fLengthAndFlags = kShortString;
doAppend(internal::toU16StringView(text));
doAppend(internal::toU16StringViewNullable(text));
}
#endif // U_HIDE_DRAFT_API
@ -3304,13 +3291,11 @@ public:
* so that both strings then alias the same readonly-text.
*
* Note, for string literals:
* Since C++17 and ICU 76, you can use std::u16string_view literals with compile-time
* Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time
* length determination:
* \code
* #include &lt;string_view&gt;
* using namespace std::string_view_literals;
* UnicodeString alias = UnicodeString::readOnlyAlias(u"literal"sv);
* if (str == u"other literal"sv) { ... }
* UnicodeString alias = UnicodeString::readOnlyAlias(u"literal");
* if (str == u"other literal") { ... }
* \endcode
*
* @param isTerminated specifies if `text` is `NUL`-terminated.
@ -3393,13 +3378,11 @@ public:
* the constructor that takes a US_INV (for its enum EInvariant).
*
* Note, for string literals:
* Since C++17 and ICU 76, you can use std::u16string_view literals with compile-time
* Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time
* length determination:
* \code
* #include &lt;string_view&gt;
* using namespace std::string_view_literals;
* UnicodeString str(u"literal"sv);
* if (str == u"other literal"sv) { ... }
* UnicodeString str(u"literal");
* if (str == u"other literal") { ... }
* \endcode
*
* It is recommended to mark this constructor "explicit" by
@ -3509,13 +3492,11 @@ public:
* \endcode
*
* Note, for string literals:
* Since C++17 and ICU 76, you can use std::u16string_view literals with compile-time
* Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time
* length determination:
* \code
* #include &lt;string_view&gt;
* using namespace std::string_view_literals;
* UnicodeString str(u"literal"sv);
* if (str == u"other literal"sv) { ... }
* UnicodeString str(u"literal");
* if (str == u"other literal") { ... }
* \endcode
*
* @param src String using only invariant characters.

View file

@ -230,11 +230,6 @@ UnicodeString::UnicodeString(UChar32 ch) {
}
}
UnicodeString::UnicodeString(const char16_t *text) {
fUnion.fFields.fLengthAndFlags = kShortString;
doAppend(text, 0, -1);
}
UnicodeString::UnicodeString(const char16_t *text,
int32_t textLength) {
fUnion.fFields.fLengthAndFlags = kShortString;