ICU-22843 UnicodeString <-> std::u16string_view / wstring_view via templates

This commit is contained in:
Markus Scherer 2024-08-01 11:19:12 -07:00
parent 7ffbe77e12
commit 72206495de
8 changed files with 614 additions and 39 deletions

View file

@ -12,6 +12,7 @@
#if U_SHOW_CPLUSPLUS_API
#include <cstddef>
#include <string_view>
/**
* \file
@ -306,6 +307,36 @@ inline OldUChar *toOldUCharPtr(char16_t *p) {
return reinterpret_cast<OldUChar *>(p);
}
#ifndef U_FORCE_HIDE_INTERNAL_API
/**
* Is T convertible to a std::u16string_view or to a 16-bit std::wstring_view?
* @internal
*/
template<typename T>
constexpr bool ConvertibleToU16StringView =
std::is_convertible_v<T, std::u16string_view> ||
(U_SIZEOF_WCHAR_T==2 && std::is_convertible_v<T, std::wstring_view>);
namespace internal {
/**
* Pass-through overload.
* @internal
*/
inline std::u16string_view toU16StringView(std::u16string_view sv) { return sv; }
#if U_SIZEOF_WCHAR_T==2
/**
* Basically undefined behavior but sometimes necessary conversion
* from std::wstring_view to std::u16string_view.
* @internal
*/
inline std::u16string_view toU16StringView(std::wstring_view sv) {
return { ConstChar16Ptr(sv.data()), sv.length() };
}
#endif
} // internal
#endif // U_FORCE_HIDE_INTERNAL_API
U_NAMESPACE_END
#endif /* U_SHOW_CPLUSPLUS_API */

View file

@ -735,7 +735,9 @@
* @{
* \def U_DECLARE_UTF16
* Do not use this macro because it is not defined on all platforms.
* Use the UNICODE_STRING or U_STRING_DECL macros instead.
* In C++, use std::u16string_view literals, see the UNICODE_STRING docs.
* In C, use u"UTF-16 literals".
* See also the public U_STRING_DECL macro.
* @internal
*/
#ifdef U_DECLARE_UTF16

View file

@ -33,6 +33,7 @@
#if U_SHOW_CPLUSPLUS_API
#include <cstddef>
#include <string_view>
#include "unicode/char16ptr.h"
#include "unicode/rep.h"
#include "unicode/std_string.h"
@ -97,16 +98,21 @@ class UnicodeStringAppendable; // unicode/appendable.h
#define US_INV icu::UnicodeString::kInvariant
/**
* Unicode String literals in C++.
* \def UNICODE_STRING
* Obsolete macro approximating UnicodeString literals.
*
* Note: these macros are not recommended for new code.
* Prior to the availability of C++11 and u"unicode string literals",
* these macros were provided for portability and efficiency when
* Prior to the availability of C++11 and u"UTF-16 string literals",
* this macro was provided for portability and efficiency when
* initializing UnicodeStrings from literals.
*
* They work only for strings that contain "invariant characters", i.e.,
* only latin letters, digits, and some punctuation.
* See utypes.h for details.
* Since C++17 and ICU 76, you can use std::u16string_view literals with compile-time
* length determination:
* \code
* #include &lt;string_view&gt;
* using namespace std::string_view_literals;
* UnicodeString str(u"literal"sv);
* if (str == u"other literal"sv) { ... }
* \endcode
*
* The string parameter must be a C string literal.
* The length of the string, not including the terminating
@ -121,16 +127,12 @@ class UnicodeStringAppendable; // unicode/appendable.h
/**
* Unicode String literals in C++.
* Dependent on the platform properties, different UnicodeString
* constructors should be used to create a UnicodeString object from
* a string literal.
* The macros are defined for improved performance.
* They work only for strings that contain "invariant characters", i.e.,
* only latin letters, digits, and some punctuation.
* See utypes.h for details.
* Obsolete macro approximating UnicodeString literals.
* See UNICODE_STRING.
*
* The string parameter must be a C string literal.
* @stable ICU 2.0
* @see UNICODE_STRING
*/
#define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1)
@ -327,6 +329,32 @@ public:
*/
inline bool operator== (const UnicodeString& text) const;
#ifndef U_HIDE_DRAFT_API
/**
* Equality operator. Performs only bitwise comparison with `text`
* which is, or which is implicitly convertible to,
* a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view.
*
* For performance, you can use std::u16string_view literals with compile-time
* length determination:
* \code
* #include &lt;string_view&gt;
* using namespace std::string_view_literals;
* UnicodeString str = ...;
* if (str == u"literal"sv) { ... }
* \endcode
* @param text The string view to compare to this string.
* @return true if `text` contains the same characters as this one, false otherwise.
* @draft ICU 76
*/
template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
inline bool operator==(const S &text) const {
std::u16string_view sv(internal::toU16StringView(text));
uint32_t len; // unsigned to avoid a compiler warning
return !isBogus() && (len = length()) == sv.length() && doEquals(sv.data(), len);
}
#endif // U_HIDE_DRAFT_API
/**
* Inequality operator. Performs only bitwise comparison.
* @param text The UnicodeString to compare to this one.
@ -1897,6 +1925,24 @@ public:
*/
UnicodeString &fastCopyFrom(const UnicodeString &src);
#ifndef U_HIDE_DRAFT_API
/**
* Assignment operator. Replaces the characters in this UnicodeString
* with a copy of the characters from the `src`
* which is, or which is implicitly convertible to,
* a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view.
*
* @param src The string view containing the characters to copy.
* @return a reference to this
* @draft ICU 76
*/
template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
inline UnicodeString &operator=(const S &src) {
unBogus();
return doReplace(0, length(), internal::toU16StringView(src));
}
#endif // U_HIDE_DRAFT_API
/**
* Move assignment operator; might leave src in bogus state.
* This string will have the same contents and state that the source string had.
@ -2146,6 +2192,23 @@ public:
*/
inline UnicodeString& operator+= (const UnicodeString& srcText);
#ifndef U_HIDE_DRAFT_API
/**
* Append operator. Appends the characters in `src`
* which is, or which is implicitly convertible to,
* a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view,
* to the UnicodeString object.
*
* @param src the source for the new characters
* @return a reference to this
* @draft ICU 76
*/
template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
inline UnicodeString& operator+=(const S &src) {
return doAppend(internal::toU16StringView(src));
}
#endif // U_HIDE_DRAFT_API
/**
* Append the characters
* in `srcText` in the range
@ -2191,8 +2254,8 @@ public:
int32_t srcLength);
/**
* Append the characters in `srcChars` to the UnicodeString object
* at offset `start`. `srcChars` is not modified.
* Append the characters in `srcChars` to the UnicodeString object.
* `srcChars` is not modified.
* @param srcChars the source for the new characters
* @param srcLength the number of Unicode characters in `srcChars`;
* can be -1 if `srcChars` is NUL-terminated
@ -2202,6 +2265,23 @@ public:
inline UnicodeString& append(ConstChar16Ptr srcChars,
int32_t srcLength);
#ifndef U_HIDE_DRAFT_API
/**
* Appends the characters in `src`
* which is, or which is implicitly convertible to,
* a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view,
* to the UnicodeString object.
*
* @param src the source for the new characters
* @return a reference to this
* @draft ICU 76
*/
template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
inline UnicodeString& append(const S &src) {
return doAppend(internal::toU16StringView(src));
}
#endif // U_HIDE_DRAFT_API
/**
* Append the code unit `srcChar` to the UnicodeString object.
* @param srcChar the code unit to append
@ -2925,6 +3005,37 @@ public:
*/
const char16_t *getTerminatedBuffer();
#ifndef U_HIDE_DRAFT_API
/**
* Converts to a std::u16string_view.
*
* @return a string view of the contents of this string
* @draft ICU 76
*/
inline operator std::u16string_view() const {
return { getBuffer(), (std::u16string_view::size_type)length() };
}
#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
/**
* Converts to a std::wstring_view.
*
* Note: This should remain draft until C++ standard plans
* about char16_t vs. wchar_t become clearer.
*
* @return a string view of the contents of this string
* @draft ICU 76
*/
inline operator std::wstring_view() const {
const char16_t *p = getBuffer();
#ifdef U_ALIASING_BARRIER
U_ALIASING_BARRIER(p);
#endif
return { reinterpret_cast<const wchar_t *>(p), (std::wstring_view::size_type)length() };
}
#endif // U_SIZEOF_WCHAR_T
#endif // U_HIDE_DRAFT_API
//========================================
// Constructors
//========================================
@ -2975,6 +3086,17 @@ public:
* It is recommended to mark this constructor "explicit" by
* `-DUNISTR_FROM_STRING_EXPLICIT=explicit`
* on the compiler command line or similar.
*
* Note, for string literals:
* Since C++17 and ICU 76, you can use std::u16string_view literals with compile-time
* length determination:
* \code
* #include &lt;string_view&gt;
* using namespace std::string_view_literals;
* UnicodeString str(u"literal"sv);
* if (str == u"other literal"sv) { ... }
* \endcode
*
* @param text The characters to place in the UnicodeString. `text`
* must be NUL (U+0000) terminated.
* @stable ICU 2.0
@ -2989,6 +3111,17 @@ public:
* It is recommended to mark this constructor "explicit" by
* `-DUNISTR_FROM_STRING_EXPLICIT=explicit`
* on the compiler command line or similar.
*
* Note, for string literals:
* Since C++17 and ICU 76, you can use std::u16string_view literals with compile-time
* length determination:
* \code
* #include &lt;string_view&gt;
* using namespace std::string_view_literals;
* UnicodeString str(u"literal"sv);
* if (str == u"other literal"sv) { ... }
* \endcode
*
* @param text NUL-terminated UTF-16 string
* @stable ICU 59
*/
@ -3005,6 +3138,17 @@ public:
* It is recommended to mark this constructor "explicit" by
* `-DUNISTR_FROM_STRING_EXPLICIT=explicit`
* on the compiler command line or similar.
*
* Note, for string literals:
* Since C++17 and ICU 76, you can use std::u16string_view literals with compile-time
* length determination:
* \code
* #include &lt;string_view&gt;
* using namespace std::string_view_literals;
* UnicodeString str(u"literal"sv);
* if (str == u"other literal"sv) { ... }
* \endcode
*
* @param text NUL-terminated UTF-16 string
* @stable ICU 59
*/
@ -3026,6 +3170,17 @@ public:
/**
* char16_t* constructor.
*
* Note, for string literals:
* Since C++17 and ICU 76, you can use std::u16string_view literals with compile-time
* length determination:
* \code
* #include &lt;string_view&gt;
* using namespace std::string_view_literals;
* UnicodeString str(u"literal"sv);
* if (str == u"other literal"sv) { ... }
* \endcode
*
* @param text The characters to place in the UnicodeString.
* @param textLength The number of Unicode characters in `text`
* to copy.
@ -3038,6 +3193,17 @@ public:
/**
* uint16_t * constructor.
* Delegates to UnicodeString(const char16_t *, int32_t).
*
* Note, for string literals:
* Since C++17 and ICU 76, you can use std::u16string_view literals with compile-time
* length determination:
* \code
* #include &lt;string_view&gt;
* using namespace std::string_view_literals;
* UnicodeString str(u"literal"sv);
* if (str == u"other literal"sv) { ... }
* \endcode
*
* @param text UTF-16 string
* @param textLength string length
* @stable ICU 59
@ -3051,7 +3217,18 @@ public:
* wchar_t * constructor.
* (Only defined if U_SIZEOF_WCHAR_T==2.)
* Delegates to UnicodeString(const char16_t *, int32_t).
* @param text NUL-terminated UTF-16 string
*
* Note, for string literals:
* Since C++17 and ICU 76, you can use std::u16string_view literals with compile-time
* length determination:
* \code
* #include &lt;string_view&gt;
* using namespace std::string_view_literals;
* UnicodeString str(u"literal"sv);
* if (str == u"other literal"sv) { ... }
* \endcode
*
* @param text UTF-16 string
* @param textLength string length
* @stable ICU 59
*/
@ -3068,6 +3245,26 @@ public:
*/
inline UnicodeString(const std::nullptr_t text, int32_t textLength);
#ifndef U_HIDE_DRAFT_API
/**
* Constructor from `text`
* which is, or which is implicitly convertible to,
* a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view.
* The string is bogus if the string view is too long.
*
* If you need a UnicodeString but need not copy the string view contents,
* then you can call the UnicodeString::readOnlyAlias() function instead of this constructor.
*
* @param text UTF-16 string
* @draft ICU 76
*/
template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
explicit inline UnicodeString(const S &text) {
fUnion.fFields.fLengthAndFlags = kShortString;
doAppend(internal::toU16StringView(text));
}
#endif // U_HIDE_DRAFT_API
/**
* Readonly-aliasing char16_t* constructor.
* The text will be used for the UnicodeString object, but
@ -3082,6 +3279,16 @@ public:
* When using fastCopyFrom(), the text will be aliased again,
* so that both strings then alias the same readonly-text.
*
* Note, for string literals:
* Since C++17 and ICU 76, you can use std::u16string_view literals with compile-time
* length determination:
* \code
* #include &lt;string_view&gt;
* using namespace std::string_view_literals;
* UnicodeString alias = UnicodeString::readOnlyAlias(u"literal"sv);
* if (str == u"other literal"sv) { ... }
* \endcode
*
* @param isTerminated specifies if `text` is `NUL`-terminated.
* This must be true if `textLength==-1`.
* @param text The characters to alias for the UnicodeString.
@ -3160,8 +3367,16 @@ public:
*
* For ASCII (really "invariant character") strings it is more efficient to use
* the constructor that takes a US_INV (for its enum EInvariant).
* For ASCII (invariant-character) string literals, see UNICODE_STRING and
* UNICODE_STRING_SIMPLE.
*
* Note, for string literals:
* Since C++17 and ICU 76, you can use std::u16string_view literals with compile-time
* length determination:
* \code
* #include &lt;string_view&gt;
* using namespace std::string_view_literals;
* UnicodeString str(u"literal"sv);
* if (str == u"other literal"sv) { ... }
* \endcode
*
* It is recommended to mark this constructor "explicit" by
* `-DUNISTR_FROM_STRING_EXPLICIT=explicit`
@ -3169,8 +3384,6 @@ public:
* @param codepageData an array of bytes, null-terminated,
* in the platform's default codepage.
* @stable ICU 2.0
* @see UNICODE_STRING
* @see UNICODE_STRING_SIMPLE
*/
UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char *codepageData);
@ -3270,6 +3483,17 @@ public:
* // use ustr ...
* }
* \endcode
*
* Note, for string literals:
* Since C++17 and ICU 76, you can use std::u16string_view literals with compile-time
* length determination:
* \code
* #include &lt;string_view&gt;
* using namespace std::string_view_literals;
* UnicodeString str(u"literal"sv);
* if (str == u"other literal"sv) { ... }
* \endcode
*
* @param src String using only invariant characters.
* @param textLength Length of src, or -1 if NUL-terminated.
* @param inv Signature-distinguishing parameter, use US_INV.
@ -3343,6 +3567,35 @@ public:
*/
virtual ~UnicodeString();
#ifndef U_HIDE_DRAFT_API
/**
* Readonly-aliasing factory method.
* Aliases the same buffer as the input `text`
* which is, or which is implicitly convertible to,
* a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view.
* The string is bogus if the string view is too long.
*
* The text will be used for the UnicodeString object, but
* it will not be released when the UnicodeString is destroyed.
* This has copy-on-write semantics:
* When the string is modified, then the buffer is first copied into
* newly allocated memory.
* The aliased buffer is never modified.
*
* In an assignment to another UnicodeString, when using the copy constructor
* or the assignment operator, the text will be copied.
* When using fastCopyFrom(), the text will be aliased again,
* so that both strings then alias the same readonly-text.
*
* @param text The string view to alias for the UnicodeString.
* @draft ICU 76
*/
template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
static inline UnicodeString readOnlyAlias(const S &text) {
return readOnlyAliasFromU16StringView(internal::toU16StringView(text));
}
#endif // U_HIDE_DRAFT_API
/**
* Create a UnicodeString from a UTF-8 string.
* Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.
@ -3470,6 +3723,8 @@ protected:
virtual UChar32 getChar32At(int32_t offset) const override;
private:
static UnicodeString readOnlyAliasFromU16StringView(std::u16string_view text);
// For char* constructors. Could be made public.
UnicodeString &setToUTF8(StringPiece utf8);
// For extract(char*).
@ -3485,7 +3740,10 @@ private:
* Internal string contents comparison, called by operator==.
* Requires: this & text not bogus and have same lengths.
*/
UBool doEquals(const UnicodeString &text, int32_t len) const;
inline UBool doEquals(const UnicodeString &text, int32_t len) const {
return doEquals(text.getArrayStart(), len);
}
UBool doEquals(const char16_t *text, int32_t len) const;
inline UBool
doEqualsSubstring(int32_t start,
@ -3580,9 +3838,11 @@ private:
const char16_t *srcChars,
int32_t srcStart,
int32_t srcLength);
UnicodeString& doReplace(int32_t start, int32_t length, std::u16string_view src);
UnicodeString& doAppend(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
UnicodeString& doAppend(const char16_t *srcChars, int32_t srcStart, int32_t srcLength);
UnicodeString& doAppend(std::u16string_view src);
UnicodeString& doReverse(int32_t start,
int32_t length);
@ -3802,7 +4062,7 @@ private:
};
/**
* Create a new UnicodeString with the concatenation of two others.
* Creates a new UnicodeString from the concatenation of two others.
*
* @param s1 The first string to be copied to the new one.
* @param s2 The second string to be copied to the new one, after s1.
@ -3812,6 +4072,29 @@ private:
U_COMMON_API UnicodeString U_EXPORT2
operator+ (const UnicodeString &s1, const UnicodeString &s2);
#ifndef U_HIDE_DRAFT_API
/**
* Creates a new UnicodeString from the concatenation of a UnicodeString and `s2`
* which is, or which is implicitly convertible to,
* a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view.
*
* @param s1 The string to be copied to the new one.
* @param s2 The string view to be copied to the new string, after s1.
* @return UnicodeString(s1).append(s2)
* @draft ICU 76
*/
template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
inline UnicodeString operator+(const UnicodeString &s1, const S &s2) {
return unistr_internalConcat(s1, internal::toU16StringView(s2));
}
#endif // U_HIDE_DRAFT_API
#ifndef U_FORCE_HIDE_INTERNAL_API
/** @internal */
U_COMMON_API UnicodeString U_EXPORT2
unistr_internalConcat(const UnicodeString &s1, std::u16string_view s2);
#endif
//========================================
// Inline members
//========================================

View file

@ -18,6 +18,8 @@
* Character property dependent functions moved here from uniset.cpp
*/
#include <string_view>
#include "unicode/utypes.h"
#include "unicode/uniset.h"
#include "unicode/parsepos.h"
@ -45,16 +47,23 @@
#include "uassert.h"
#include "hash.h"
// Makes u"literal"sv std::u16string_view literals possible.
// https://en.cppreference.com/w/cpp/string/basic_string_view/operator%22%22sv
using namespace std::string_view_literals;
U_NAMESPACE_USE
namespace {
// Special property set IDs
static const char ANY[] = "ANY"; // [\u0000-\U0010FFFF]
static const char ASCII[] = "ASCII"; // [\u0000-\u007F]
static const char ASSIGNED[] = "Assigned"; // [:^Cn:]
constexpr char ANY[] = "ANY"; // [\u0000-\U0010FFFF]
constexpr char ASCII[] = "ASCII"; // [\u0000-\u007F]
constexpr char ASSIGNED[] = "Assigned"; // [:^Cn:]
// Unicode name property alias
#define NAME_PROP "na"
#define NAME_PROP_LENGTH 2
constexpr std::u16string_view NAME_PROP(u"na"sv);
} // namespace
// Cached sets ------------------------------------------------------------- ***
@ -83,7 +92,7 @@ namespace {
// Cache some sets for other services -------------------------------------- ***
void U_CALLCONV createUni32Set(UErrorCode &errorCode) {
U_ASSERT(uni32Singleton == nullptr);
uni32Singleton = new UnicodeSet(UNICODE_STRING_SIMPLE("[:age=3.2:]"), errorCode);
uni32Singleton = new UnicodeSet(UnicodeString(u"[:age=3.2:]"sv), errorCode);
if(uni32Singleton==nullptr) {
errorCode=U_MEMORY_ALLOCATION_ERROR;
} else {
@ -1105,7 +1114,7 @@ UnicodeSet& UnicodeSet::applyPropertyPattern(const UnicodeString& pattern,
// support args of (UProperty, char*) then we can remove
// NAME_PROP and make this a little more efficient.
valueName = propName;
propName = UnicodeString(NAME_PROP, NAME_PROP_LENGTH, US_INV);
propName = NAME_PROP;
}
}

View file

@ -20,6 +20,8 @@
******************************************************************************
*/
#include <string_view>
#include "unicode/utypes.h"
#include "unicode/appendable.h"
#include "unicode/putil.h"
@ -107,12 +109,34 @@ UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeString)
UnicodeString U_EXPORT2
operator+ (const UnicodeString &s1, const UnicodeString &s2) {
return
UnicodeString(s1.length() + s2.length() + 1, static_cast<UChar32>(0), 0).
append(s1).
append(s2);
int32_t sumLengths;
if (uprv_add32_overflow(s1.length(), s2.length(), &sumLengths)) {
UnicodeString bogus;
bogus.setToBogus();
return bogus;
}
if (sumLengths != INT32_MAX) {
++sumLengths; // space for a terminating NUL if we need one
}
return UnicodeString(sumLengths, static_cast<UChar32>(0), 0).append(s1).append(s2);
}
U_COMMON_API UnicodeString U_EXPORT2
unistr_internalConcat(const UnicodeString &s1, std::u16string_view s2) {
int32_t sumLengths;
if (s2.length() > INT32_MAX ||
uprv_add32_overflow(s1.length(), (int32_t)s2.length(), &sumLengths)) {
UnicodeString bogus;
bogus.setToBogus();
return bogus;
}
if (sumLengths != INT32_MAX) {
++sumLengths; // space for a terminating NUL if we need one
}
return UnicodeString(sumLengths, static_cast<UChar32>(0), 0).append(s1).append(s2);
}
//========================================
// Reference Counting functions, put at top of file so that optimizing compilers
// have a chance to automatically inline.
@ -279,6 +303,16 @@ UnicodeString::UnicodeString(const char *src, int32_t length, EInvariant) {
}
}
UnicodeString UnicodeString::readOnlyAliasFromU16StringView(std::u16string_view text) {
UnicodeString result;
if (text.length() <= INT32_MAX) {
result.setTo(false, text.data(), (int32_t)text.length());
} else {
result.setToBogus();
}
return result;
}
#if U_CHARSET_IS_UTF8
UnicodeString::UnicodeString(const char *codepageData) {
@ -656,10 +690,10 @@ UChar32 UnicodeString::unescapeAt(int32_t &offset) const {
// Read-only implementation
//========================================
UBool
UnicodeString::doEquals(const UnicodeString &text, int32_t len) const {
// Requires: this & text not bogus and have same lengths.
UnicodeString::doEquals(const char16_t *text, int32_t len) const {
// Requires: this not bogus and have same lengths.
// Byte-wise comparison works for equality regardless of endianness.
return uprv_memcmp(getArrayStart(), text.getArrayStart(), len * U_SIZEOF_UCHAR) == 0;
return uprv_memcmp(getArrayStart(), text, len * U_SIZEOF_UCHAR) == 0;
}
UBool
@ -1574,6 +1608,18 @@ UnicodeString::doReplace(int32_t start,
return *this;
}
UnicodeString&
UnicodeString::doReplace(int32_t start, int32_t length, std::u16string_view src) {
if (!isWritable()) {
return *this;
}
if (src.length() > INT32_MAX) {
setToBogus();
return *this;
}
return doReplace(start, length, src.data(), 0, (int32_t)src.length());
}
// Versions of doReplace() only for append() variants.
// doReplace() and doAppend() optimize for different cases.
@ -1662,6 +1708,18 @@ UnicodeString::doAppend(const char16_t *srcChars, int32_t srcStart, int32_t srcL
return *this;
}
UnicodeString&
UnicodeString::doAppend(std::u16string_view src) {
if (!isWritable() || src.empty()) {
return *this;
}
if (src.length() > INT32_MAX) {
setToBogus();
return *this;
}
return doAppend(src.data(), 0, (int32_t)src.length());
}
/**
* Replaceable API
*/

View file

@ -8,6 +8,7 @@
#include <cstdlib>
#include <cmath>
#include <limits>
#include <string_view>
#include <stdlib.h>
#include "unicode/plurrule.h"
@ -22,6 +23,10 @@
#include "uassert.h"
#include "util.h"
// Makes u"literal"sv std::u16string_view literals possible.
// https://en.cppreference.com/w/cpp/string/basic_string_view/operator%22%22sv
using namespace std::string_view_literals;
using namespace icu;
using namespace icu::number;
using namespace icu::number::impl;
@ -1082,7 +1087,7 @@ UnicodeString DecimalQuantity::toScientificString() const {
result.append(u'E');
int32_t _scale = upperPos + scale + exponent;
if (_scale == INT32_MIN) {
result.append({u"-2147483648", -1});
result.append(u"-2147483648"sv);
return result;
} else if (_scale < 0) {
_scale *= -1;

View file

@ -6,6 +6,8 @@
* others. All Rights Reserved.
********************************************************************/
#include <string>
#include <string_view>
#include <utility>
#include "ustrtest.h"
@ -22,6 +24,13 @@
#include "cmemory.h"
#include "charstr.h"
// Makes u"literal"sv std::u16string_view literals possible.
// https://en.cppreference.com/w/cpp/string/basic_string_view/operator%22%22sv
using namespace std::string_view_literals;
// Same for u"literal"s std::u16string literals.
using namespace std::string_literals;
#if 0
#include "unicode/ustream.h"
@ -68,6 +77,8 @@ void UnicodeStringTest::runIndexedTest( int32_t index, UBool exec, const char* &
TESTCASE_AUTO(TestNullPointers);
TESTCASE_AUTO(TestUnicodeStringInsertAppendToSelf);
TESTCASE_AUTO(TestLargeAppend);
TESTCASE_AUTO(TestU16StringView);
TESTCASE_AUTO(TestWStringView);
TESTCASE_AUTO_END;
}
@ -2400,3 +2411,177 @@ void UnicodeStringTest::TestLargeAppend() {
}
}
}
void UnicodeStringTest::TestU16StringView() {
IcuTestErrorCode status(*this, "TestU16StringView");
// ICU-22843 Test ICU 76 new UnicodeString APIs that take or return a std::u16string_view
// or something convertible to it.
// NOTE: Keep this function very parallel with TestWStringView()!
const char16_t *p16 = u"p16";
std::u16string_view sv16 = u"sv16";
std::u16string str16 = u"str16";
// These copy the string contents.
UnicodeString fromPtr(p16); // pointer is convertible to std::u16string_view
UnicodeString fromSV(sv16); // std::u16string_view itself
UnicodeString fromSV2(u"sv16_2"sv); // std::u16string_view literal
UnicodeString fromStr(str16); // std::u16string is convertible to std::u16string_view
assertEquals("UnicodeString(const char16_t *pointer)", UnicodeString(u"p16", 3), fromPtr);
assertEquals("UnicodeString(std::u16string_view)", UnicodeString(u"sv16", 4), fromSV);
assertEquals("UnicodeString(std::u16string_view literal)", UnicodeString(u"sv16_2", 6), fromSV2);
assertEquals("UnicodeString(std::u16string)", UnicodeString(u"str16", 5), fromStr);
// Read-only aliases
UnicodeString aliasFromPtr = UnicodeString::readOnlyAlias(p16);
assertTrue("aliasFromPtr pointer alias", aliasFromPtr.getBuffer() == p16);
assertEquals("aliasFromPtr length", 3, aliasFromPtr.length());
UnicodeString aliasFromSV = UnicodeString::readOnlyAlias(sv16);
assertTrue("aliasFromSV pointer alias", aliasFromSV.getBuffer() == sv16.data());
assertEquals("aliasFromSV length", (int32_t)sv16.length(), aliasFromSV.length());
UnicodeString aliasFromStr = UnicodeString::readOnlyAlias(str16);
assertTrue("aliasFromStr pointer alias", aliasFromStr.getBuffer() == str16.data());
assertEquals("aliasFromStr length", (int32_t)str16.length(), aliasFromStr.length());
// operator==
UnicodeString any(true, u"any", 3);
assertFalse("any == pointer-p16", any == p16);
assertTrue("any == pointer-any", any == u"any");
assertFalse("any == string_view-sv16", any == sv16);
assertTrue("any == string_view-any", any == u"any"sv);
assertFalse("any == string-str16", any == str16);
assertTrue("any == string-any", any == u"any"s);
// Assignment copies the string contents.
UnicodeString x;
x = p16;
assertEquals("x = p16", UnicodeString(true, u"p16", 3), x);
x = sv16;
assertEquals("x = sv16", UnicodeString(true, u"sv16", 4), x);
x = str16;
assertEquals("x = str16", UnicodeString(true, u"str16", 5), x);
// Append
x += p16;
assertEquals("+= p16", UnicodeString(true, u"str16p16", 8), x);
x += sv16;
assertEquals("+= sv16", UnicodeString(true, u"str16p16sv16", 12), x);
x += str16;
assertEquals("+= str16", UnicodeString(true, u"str16p16sv16str16", 17), x);
x = u"x"sv;
x.append(p16);
assertEquals("append(p16)", UnicodeString(true, u"xp16", 4), x);
x.append(sv16);
assertEquals("append(sv16)", UnicodeString(true, u"xp16sv16", 8), x);
x.append(str16);
assertEquals("append(str16)", UnicodeString(true, u"xp16sv16str16", 13), x);
// Convert UnicodeString to string view.
std::u16string_view sv16FromUniStr(any);
assertTrue("sv16FromUniStr buffer alias", sv16FromUniStr.data() == any.getBuffer());
assertEquals("sv16FromUniStr length", any.length(), (int32_t)sv16FromUniStr.length());
// Just to show convenience: Convert UnicodeString to string view, then to std string.
std::u16string str16FromUniStr(any);
assertTrue("str16FromUniStr contents", str16FromUniStr == u"any"s);
// operator+
x = any + p16;
assertEquals("any + p16", UnicodeString(true, u"anyp16", 6), x);
x = any + sv16;
assertEquals("any + sv16", UnicodeString(true, u"anysv16", 7), x);
x = any + str16;
assertEquals("any + str16", UnicodeString(true, u"anystr16", 8), x);
}
void UnicodeStringTest::TestWStringView() {
#if U_SIZEOF_WCHAR_T==2
IcuTestErrorCode status(*this, "TestU16StringView");
// ICU-22843 Test ICU 76 new UnicodeString APIs that take or return a std::wstring_view
// or something convertible to it.
// NOTE: Keep this function very parallel with TestU16StringView()!
const wchar_t *p16 = L"p16";
std::wstring_view sv16 = L"sv16";
std::wstring str16 = L"str16";
// These copy the string contents.
UnicodeString fromPtr(p16); // pointer is convertible to std::wstring_view
UnicodeString fromSV(sv16); // std::wstring_view itself
UnicodeString fromSV2(L"sv16_2"sv); // std::wstring_view literal
UnicodeString fromStr(str16); // std::wstring is convertible to std::wstring_view
assertEquals("UnicodeString(const wchar_t *pointer)", UnicodeString(L"p16", 3), fromPtr);
assertEquals("UnicodeString(std::wstring_view)", UnicodeString(L"sv16", 4), fromSV);
assertEquals("UnicodeString(std::wstring_view literal)", UnicodeString(L"sv16_2", 6), fromSV2);
assertEquals("UnicodeString(std::wstring)", UnicodeString(L"str16", 5), fromStr);
// Read-only aliases
UnicodeString aliasFromPtr = UnicodeString::readOnlyAlias(p16);
assertTrue("aliasFromPtr pointer alias",
aliasFromPtr.getBuffer() == reinterpret_cast<const char16_t *>(p16));
assertEquals("aliasFromPtr length", 3, aliasFromPtr.length());
UnicodeString aliasFromSV = UnicodeString::readOnlyAlias(sv16);
assertTrue("aliasFromSV pointer alias",
aliasFromSV.getBuffer() == reinterpret_cast<const char16_t *>(sv16.data()));
assertEquals("aliasFromSV length", (int32_t)sv16.length(), aliasFromSV.length());
UnicodeString aliasFromStr = UnicodeString::readOnlyAlias(str16);
assertTrue("aliasFromStr pointer alias",
aliasFromStr.getBuffer() == reinterpret_cast<const char16_t *>(str16.data()));
assertEquals("aliasFromStr length", (int32_t)str16.length(), aliasFromStr.length());
// operator==
UnicodeString any(true, L"any", 3);
assertFalse("any == pointer-p16", any == p16);
assertTrue("any == pointer-any", any == L"any");
assertFalse("any == string_view-sv16", any == sv16);
assertTrue("any == string_view-any", any == L"any"sv);
assertFalse("any == string-str16", any == str16);
assertTrue("any == string-any", any == L"any"s);
// Assignment copies the string contents.
UnicodeString x;
x = p16;
assertEquals("x = p16", UnicodeString(true, L"p16", 3), x);
x = sv16;
assertEquals("x = sv16", UnicodeString(true, L"sv16", 4), x);
x = str16;
assertEquals("x = str16", UnicodeString(true, L"str16", 5), x);
// Append
x += p16;
assertEquals("+= p16", UnicodeString(true, L"str16p16", 8), x);
x += sv16;
assertEquals("+= sv16", UnicodeString(true, L"str16p16sv16", 12), x);
x += str16;
assertEquals("+= str16", UnicodeString(true, L"str16p16sv16str16", 17), x);
x = L"x"sv;
x.append(p16);
assertEquals("append(p16)", UnicodeString(true, L"xp16", 4), x);
x.append(sv16);
assertEquals("append(sv16)", UnicodeString(true, L"xp16sv16", 8), x);
x.append(str16);
assertEquals("append(str16)", UnicodeString(true, L"xp16sv16str16", 13), x);
// Convert UnicodeString to string view.
std::wstring_view sv16FromUniStr(any);
assertTrue("sv16FromUniStr buffer alias",
reinterpret_cast<const char16_t *>(sv16FromUniStr.data()) == any.getBuffer());
assertEquals("sv16FromUniStr length", any.length(), (int32_t)sv16FromUniStr.length());
// Just to show convenience: Convert UnicodeString to string view, then to std string.
std::wstring str16FromUniStr(any);
assertTrue("str16FromUniStr contents", str16FromUniStr == L"any"s);
// operator+
x = any + p16;
assertEquals("any + p16", UnicodeString(true, L"anyp16", 6), x);
x = any + sv16;
assertEquals("any + sv16", UnicodeString(true, L"anysv16", 7), x);
x = any + str16;
assertEquals("any + str16", UnicodeString(true, L"anystr16", 8), x);
#endif
}

View file

@ -98,6 +98,8 @@ public:
void TestNullPointers();
void TestUnicodeStringInsertAppendToSelf();
void TestLargeAppend();
void TestU16StringView();
void TestWStringView();
};
#endif