mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-10 07:39:16 +00:00
ICU-22843 UnicodeString <-> std::u16string_view / wstring_view via templates
This commit is contained in:
parent
7ffbe77e12
commit
72206495de
8 changed files with 614 additions and 39 deletions
icu4c/source
common
i18n
test/intltest
|
@ -12,6 +12,7 @@
|
|||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
#include <cstddef>
|
||||
#include <string_view>
|
||||
|
||||
/**
|
||||
* \file
|
||||
|
@ -306,6 +307,36 @@ inline OldUChar *toOldUCharPtr(char16_t *p) {
|
|||
return reinterpret_cast<OldUChar *>(p);
|
||||
}
|
||||
|
||||
#ifndef U_FORCE_HIDE_INTERNAL_API
|
||||
/**
|
||||
* Is T convertible to a std::u16string_view or to a 16-bit std::wstring_view?
|
||||
* @internal
|
||||
*/
|
||||
template<typename T>
|
||||
constexpr bool ConvertibleToU16StringView =
|
||||
std::is_convertible_v<T, std::u16string_view> ||
|
||||
(U_SIZEOF_WCHAR_T==2 && std::is_convertible_v<T, std::wstring_view>);
|
||||
|
||||
namespace internal {
|
||||
/**
|
||||
* Pass-through overload.
|
||||
* @internal
|
||||
*/
|
||||
inline std::u16string_view toU16StringView(std::u16string_view sv) { return sv; }
|
||||
|
||||
#if U_SIZEOF_WCHAR_T==2
|
||||
/**
|
||||
* Basically undefined behavior but sometimes necessary conversion
|
||||
* from std::wstring_view to std::u16string_view.
|
||||
* @internal
|
||||
*/
|
||||
inline std::u16string_view toU16StringView(std::wstring_view sv) {
|
||||
return { ConstChar16Ptr(sv.data()), sv.length() };
|
||||
}
|
||||
#endif
|
||||
} // internal
|
||||
#endif // U_FORCE_HIDE_INTERNAL_API
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* U_SHOW_CPLUSPLUS_API */
|
||||
|
|
|
@ -735,7 +735,9 @@
|
|||
* @{
|
||||
* \def U_DECLARE_UTF16
|
||||
* Do not use this macro because it is not defined on all platforms.
|
||||
* Use the UNICODE_STRING or U_STRING_DECL macros instead.
|
||||
* In C++, use std::u16string_view literals, see the UNICODE_STRING docs.
|
||||
* In C, use u"UTF-16 literals".
|
||||
* See also the public U_STRING_DECL macro.
|
||||
* @internal
|
||||
*/
|
||||
#ifdef U_DECLARE_UTF16
|
||||
|
|
|
@ -33,6 +33,7 @@
|
|||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
#include <cstddef>
|
||||
#include <string_view>
|
||||
#include "unicode/char16ptr.h"
|
||||
#include "unicode/rep.h"
|
||||
#include "unicode/std_string.h"
|
||||
|
@ -97,16 +98,21 @@ class UnicodeStringAppendable; // unicode/appendable.h
|
|||
#define US_INV icu::UnicodeString::kInvariant
|
||||
|
||||
/**
|
||||
* Unicode String literals in C++.
|
||||
* \def UNICODE_STRING
|
||||
* Obsolete macro approximating UnicodeString literals.
|
||||
*
|
||||
* Note: these macros are not recommended for new code.
|
||||
* Prior to the availability of C++11 and u"unicode string literals",
|
||||
* these macros were provided for portability and efficiency when
|
||||
* Prior to the availability of C++11 and u"UTF-16 string literals",
|
||||
* this macro was provided for portability and efficiency when
|
||||
* initializing UnicodeStrings from literals.
|
||||
*
|
||||
* They work only for strings that contain "invariant characters", i.e.,
|
||||
* only latin letters, digits, and some punctuation.
|
||||
* See utypes.h for details.
|
||||
* Since C++17 and ICU 76, you can use std::u16string_view literals with compile-time
|
||||
* length determination:
|
||||
* \code
|
||||
* #include <string_view>
|
||||
* using namespace std::string_view_literals;
|
||||
* UnicodeString str(u"literal"sv);
|
||||
* if (str == u"other literal"sv) { ... }
|
||||
* \endcode
|
||||
*
|
||||
* The string parameter must be a C string literal.
|
||||
* The length of the string, not including the terminating
|
||||
|
@ -121,16 +127,12 @@ class UnicodeStringAppendable; // unicode/appendable.h
|
|||
|
||||
/**
|
||||
* Unicode String literals in C++.
|
||||
* Dependent on the platform properties, different UnicodeString
|
||||
* constructors should be used to create a UnicodeString object from
|
||||
* a string literal.
|
||||
* The macros are defined for improved performance.
|
||||
* They work only for strings that contain "invariant characters", i.e.,
|
||||
* only latin letters, digits, and some punctuation.
|
||||
* See utypes.h for details.
|
||||
* Obsolete macro approximating UnicodeString literals.
|
||||
* See UNICODE_STRING.
|
||||
*
|
||||
* The string parameter must be a C string literal.
|
||||
* @stable ICU 2.0
|
||||
* @see UNICODE_STRING
|
||||
*/
|
||||
#define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1)
|
||||
|
||||
|
@ -327,6 +329,32 @@ public:
|
|||
*/
|
||||
inline bool operator== (const UnicodeString& text) const;
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
/**
|
||||
* Equality operator. Performs only bitwise comparison with `text`
|
||||
* which is, or which is implicitly convertible to,
|
||||
* a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view.
|
||||
*
|
||||
* For performance, you can use std::u16string_view literals with compile-time
|
||||
* length determination:
|
||||
* \code
|
||||
* #include <string_view>
|
||||
* using namespace std::string_view_literals;
|
||||
* UnicodeString str = ...;
|
||||
* if (str == u"literal"sv) { ... }
|
||||
* \endcode
|
||||
* @param text The string view to compare to this string.
|
||||
* @return true if `text` contains the same characters as this one, false otherwise.
|
||||
* @draft ICU 76
|
||||
*/
|
||||
template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
|
||||
inline bool operator==(const S &text) const {
|
||||
std::u16string_view sv(internal::toU16StringView(text));
|
||||
uint32_t len; // unsigned to avoid a compiler warning
|
||||
return !isBogus() && (len = length()) == sv.length() && doEquals(sv.data(), len);
|
||||
}
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
/**
|
||||
* Inequality operator. Performs only bitwise comparison.
|
||||
* @param text The UnicodeString to compare to this one.
|
||||
|
@ -1897,6 +1925,24 @@ public:
|
|||
*/
|
||||
UnicodeString &fastCopyFrom(const UnicodeString &src);
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
/**
|
||||
* Assignment operator. Replaces the characters in this UnicodeString
|
||||
* with a copy of the characters from the `src`
|
||||
* which is, or which is implicitly convertible to,
|
||||
* a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view.
|
||||
*
|
||||
* @param src The string view containing the characters to copy.
|
||||
* @return a reference to this
|
||||
* @draft ICU 76
|
||||
*/
|
||||
template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
|
||||
inline UnicodeString &operator=(const S &src) {
|
||||
unBogus();
|
||||
return doReplace(0, length(), internal::toU16StringView(src));
|
||||
}
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
/**
|
||||
* Move assignment operator; might leave src in bogus state.
|
||||
* This string will have the same contents and state that the source string had.
|
||||
|
@ -2146,6 +2192,23 @@ public:
|
|||
*/
|
||||
inline UnicodeString& operator+= (const UnicodeString& srcText);
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
/**
|
||||
* Append operator. Appends the characters in `src`
|
||||
* which is, or which is implicitly convertible to,
|
||||
* a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view,
|
||||
* to the UnicodeString object.
|
||||
*
|
||||
* @param src the source for the new characters
|
||||
* @return a reference to this
|
||||
* @draft ICU 76
|
||||
*/
|
||||
template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
|
||||
inline UnicodeString& operator+=(const S &src) {
|
||||
return doAppend(internal::toU16StringView(src));
|
||||
}
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
/**
|
||||
* Append the characters
|
||||
* in `srcText` in the range
|
||||
|
@ -2191,8 +2254,8 @@ public:
|
|||
int32_t srcLength);
|
||||
|
||||
/**
|
||||
* Append the characters in `srcChars` to the UnicodeString object
|
||||
* at offset `start`. `srcChars` is not modified.
|
||||
* Append the characters in `srcChars` to the UnicodeString object.
|
||||
* `srcChars` is not modified.
|
||||
* @param srcChars the source for the new characters
|
||||
* @param srcLength the number of Unicode characters in `srcChars`;
|
||||
* can be -1 if `srcChars` is NUL-terminated
|
||||
|
@ -2202,6 +2265,23 @@ public:
|
|||
inline UnicodeString& append(ConstChar16Ptr srcChars,
|
||||
int32_t srcLength);
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
/**
|
||||
* Appends the characters in `src`
|
||||
* which is, or which is implicitly convertible to,
|
||||
* a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view,
|
||||
* to the UnicodeString object.
|
||||
*
|
||||
* @param src the source for the new characters
|
||||
* @return a reference to this
|
||||
* @draft ICU 76
|
||||
*/
|
||||
template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
|
||||
inline UnicodeString& append(const S &src) {
|
||||
return doAppend(internal::toU16StringView(src));
|
||||
}
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
/**
|
||||
* Append the code unit `srcChar` to the UnicodeString object.
|
||||
* @param srcChar the code unit to append
|
||||
|
@ -2925,6 +3005,37 @@ public:
|
|||
*/
|
||||
const char16_t *getTerminatedBuffer();
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
/**
|
||||
* Converts to a std::u16string_view.
|
||||
*
|
||||
* @return a string view of the contents of this string
|
||||
* @draft ICU 76
|
||||
*/
|
||||
inline operator std::u16string_view() const {
|
||||
return { getBuffer(), (std::u16string_view::size_type)length() };
|
||||
}
|
||||
|
||||
#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
|
||||
/**
|
||||
* Converts to a std::wstring_view.
|
||||
*
|
||||
* Note: This should remain draft until C++ standard plans
|
||||
* about char16_t vs. wchar_t become clearer.
|
||||
*
|
||||
* @return a string view of the contents of this string
|
||||
* @draft ICU 76
|
||||
*/
|
||||
inline operator std::wstring_view() const {
|
||||
const char16_t *p = getBuffer();
|
||||
#ifdef U_ALIASING_BARRIER
|
||||
U_ALIASING_BARRIER(p);
|
||||
#endif
|
||||
return { reinterpret_cast<const wchar_t *>(p), (std::wstring_view::size_type)length() };
|
||||
}
|
||||
#endif // U_SIZEOF_WCHAR_T
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
//========================================
|
||||
// Constructors
|
||||
//========================================
|
||||
|
@ -2975,6 +3086,17 @@ public:
|
|||
* It is recommended to mark this constructor "explicit" by
|
||||
* `-DUNISTR_FROM_STRING_EXPLICIT=explicit`
|
||||
* on the compiler command line or similar.
|
||||
*
|
||||
* Note, for string literals:
|
||||
* Since C++17 and ICU 76, you can use std::u16string_view literals with compile-time
|
||||
* length determination:
|
||||
* \code
|
||||
* #include <string_view>
|
||||
* using namespace std::string_view_literals;
|
||||
* UnicodeString str(u"literal"sv);
|
||||
* if (str == u"other literal"sv) { ... }
|
||||
* \endcode
|
||||
*
|
||||
* @param text The characters to place in the UnicodeString. `text`
|
||||
* must be NUL (U+0000) terminated.
|
||||
* @stable ICU 2.0
|
||||
|
@ -2989,6 +3111,17 @@ public:
|
|||
* It is recommended to mark this constructor "explicit" by
|
||||
* `-DUNISTR_FROM_STRING_EXPLICIT=explicit`
|
||||
* on the compiler command line or similar.
|
||||
*
|
||||
* Note, for string literals:
|
||||
* Since C++17 and ICU 76, you can use std::u16string_view literals with compile-time
|
||||
* length determination:
|
||||
* \code
|
||||
* #include <string_view>
|
||||
* using namespace std::string_view_literals;
|
||||
* UnicodeString str(u"literal"sv);
|
||||
* if (str == u"other literal"sv) { ... }
|
||||
* \endcode
|
||||
*
|
||||
* @param text NUL-terminated UTF-16 string
|
||||
* @stable ICU 59
|
||||
*/
|
||||
|
@ -3005,6 +3138,17 @@ public:
|
|||
* It is recommended to mark this constructor "explicit" by
|
||||
* `-DUNISTR_FROM_STRING_EXPLICIT=explicit`
|
||||
* on the compiler command line or similar.
|
||||
*
|
||||
* Note, for string literals:
|
||||
* Since C++17 and ICU 76, you can use std::u16string_view literals with compile-time
|
||||
* length determination:
|
||||
* \code
|
||||
* #include <string_view>
|
||||
* using namespace std::string_view_literals;
|
||||
* UnicodeString str(u"literal"sv);
|
||||
* if (str == u"other literal"sv) { ... }
|
||||
* \endcode
|
||||
*
|
||||
* @param text NUL-terminated UTF-16 string
|
||||
* @stable ICU 59
|
||||
*/
|
||||
|
@ -3026,6 +3170,17 @@ public:
|
|||
|
||||
/**
|
||||
* char16_t* constructor.
|
||||
*
|
||||
* Note, for string literals:
|
||||
* Since C++17 and ICU 76, you can use std::u16string_view literals with compile-time
|
||||
* length determination:
|
||||
* \code
|
||||
* #include <string_view>
|
||||
* using namespace std::string_view_literals;
|
||||
* UnicodeString str(u"literal"sv);
|
||||
* if (str == u"other literal"sv) { ... }
|
||||
* \endcode
|
||||
*
|
||||
* @param text The characters to place in the UnicodeString.
|
||||
* @param textLength The number of Unicode characters in `text`
|
||||
* to copy.
|
||||
|
@ -3038,6 +3193,17 @@ public:
|
|||
/**
|
||||
* uint16_t * constructor.
|
||||
* Delegates to UnicodeString(const char16_t *, int32_t).
|
||||
*
|
||||
* Note, for string literals:
|
||||
* Since C++17 and ICU 76, you can use std::u16string_view literals with compile-time
|
||||
* length determination:
|
||||
* \code
|
||||
* #include <string_view>
|
||||
* using namespace std::string_view_literals;
|
||||
* UnicodeString str(u"literal"sv);
|
||||
* if (str == u"other literal"sv) { ... }
|
||||
* \endcode
|
||||
*
|
||||
* @param text UTF-16 string
|
||||
* @param textLength string length
|
||||
* @stable ICU 59
|
||||
|
@ -3051,7 +3217,18 @@ public:
|
|||
* wchar_t * constructor.
|
||||
* (Only defined if U_SIZEOF_WCHAR_T==2.)
|
||||
* Delegates to UnicodeString(const char16_t *, int32_t).
|
||||
* @param text NUL-terminated UTF-16 string
|
||||
*
|
||||
* Note, for string literals:
|
||||
* Since C++17 and ICU 76, you can use std::u16string_view literals with compile-time
|
||||
* length determination:
|
||||
* \code
|
||||
* #include <string_view>
|
||||
* using namespace std::string_view_literals;
|
||||
* UnicodeString str(u"literal"sv);
|
||||
* if (str == u"other literal"sv) { ... }
|
||||
* \endcode
|
||||
*
|
||||
* @param text UTF-16 string
|
||||
* @param textLength string length
|
||||
* @stable ICU 59
|
||||
*/
|
||||
|
@ -3068,6 +3245,26 @@ public:
|
|||
*/
|
||||
inline UnicodeString(const std::nullptr_t text, int32_t textLength);
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
/**
|
||||
* Constructor from `text`
|
||||
* which is, or which is implicitly convertible to,
|
||||
* a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view.
|
||||
* The string is bogus if the string view is too long.
|
||||
*
|
||||
* If you need a UnicodeString but need not copy the string view contents,
|
||||
* then you can call the UnicodeString::readOnlyAlias() function instead of this constructor.
|
||||
*
|
||||
* @param text UTF-16 string
|
||||
* @draft ICU 76
|
||||
*/
|
||||
template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
|
||||
explicit inline UnicodeString(const S &text) {
|
||||
fUnion.fFields.fLengthAndFlags = kShortString;
|
||||
doAppend(internal::toU16StringView(text));
|
||||
}
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
/**
|
||||
* Readonly-aliasing char16_t* constructor.
|
||||
* The text will be used for the UnicodeString object, but
|
||||
|
@ -3082,6 +3279,16 @@ public:
|
|||
* When using fastCopyFrom(), the text will be aliased again,
|
||||
* so that both strings then alias the same readonly-text.
|
||||
*
|
||||
* Note, for string literals:
|
||||
* Since C++17 and ICU 76, you can use std::u16string_view literals with compile-time
|
||||
* length determination:
|
||||
* \code
|
||||
* #include <string_view>
|
||||
* using namespace std::string_view_literals;
|
||||
* UnicodeString alias = UnicodeString::readOnlyAlias(u"literal"sv);
|
||||
* if (str == u"other literal"sv) { ... }
|
||||
* \endcode
|
||||
*
|
||||
* @param isTerminated specifies if `text` is `NUL`-terminated.
|
||||
* This must be true if `textLength==-1`.
|
||||
* @param text The characters to alias for the UnicodeString.
|
||||
|
@ -3160,8 +3367,16 @@ public:
|
|||
*
|
||||
* For ASCII (really "invariant character") strings it is more efficient to use
|
||||
* the constructor that takes a US_INV (for its enum EInvariant).
|
||||
* For ASCII (invariant-character) string literals, see UNICODE_STRING and
|
||||
* UNICODE_STRING_SIMPLE.
|
||||
*
|
||||
* Note, for string literals:
|
||||
* Since C++17 and ICU 76, you can use std::u16string_view literals with compile-time
|
||||
* length determination:
|
||||
* \code
|
||||
* #include <string_view>
|
||||
* using namespace std::string_view_literals;
|
||||
* UnicodeString str(u"literal"sv);
|
||||
* if (str == u"other literal"sv) { ... }
|
||||
* \endcode
|
||||
*
|
||||
* It is recommended to mark this constructor "explicit" by
|
||||
* `-DUNISTR_FROM_STRING_EXPLICIT=explicit`
|
||||
|
@ -3169,8 +3384,6 @@ public:
|
|||
* @param codepageData an array of bytes, null-terminated,
|
||||
* in the platform's default codepage.
|
||||
* @stable ICU 2.0
|
||||
* @see UNICODE_STRING
|
||||
* @see UNICODE_STRING_SIMPLE
|
||||
*/
|
||||
UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char *codepageData);
|
||||
|
||||
|
@ -3270,6 +3483,17 @@ public:
|
|||
* // use ustr ...
|
||||
* }
|
||||
* \endcode
|
||||
*
|
||||
* Note, for string literals:
|
||||
* Since C++17 and ICU 76, you can use std::u16string_view literals with compile-time
|
||||
* length determination:
|
||||
* \code
|
||||
* #include <string_view>
|
||||
* using namespace std::string_view_literals;
|
||||
* UnicodeString str(u"literal"sv);
|
||||
* if (str == u"other literal"sv) { ... }
|
||||
* \endcode
|
||||
*
|
||||
* @param src String using only invariant characters.
|
||||
* @param textLength Length of src, or -1 if NUL-terminated.
|
||||
* @param inv Signature-distinguishing parameter, use US_INV.
|
||||
|
@ -3343,6 +3567,35 @@ public:
|
|||
*/
|
||||
virtual ~UnicodeString();
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
/**
|
||||
* Readonly-aliasing factory method.
|
||||
* Aliases the same buffer as the input `text`
|
||||
* which is, or which is implicitly convertible to,
|
||||
* a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view.
|
||||
* The string is bogus if the string view is too long.
|
||||
*
|
||||
* The text will be used for the UnicodeString object, but
|
||||
* it will not be released when the UnicodeString is destroyed.
|
||||
* This has copy-on-write semantics:
|
||||
* When the string is modified, then the buffer is first copied into
|
||||
* newly allocated memory.
|
||||
* The aliased buffer is never modified.
|
||||
*
|
||||
* In an assignment to another UnicodeString, when using the copy constructor
|
||||
* or the assignment operator, the text will be copied.
|
||||
* When using fastCopyFrom(), the text will be aliased again,
|
||||
* so that both strings then alias the same readonly-text.
|
||||
*
|
||||
* @param text The string view to alias for the UnicodeString.
|
||||
* @draft ICU 76
|
||||
*/
|
||||
template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
|
||||
static inline UnicodeString readOnlyAlias(const S &text) {
|
||||
return readOnlyAliasFromU16StringView(internal::toU16StringView(text));
|
||||
}
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
/**
|
||||
* Create a UnicodeString from a UTF-8 string.
|
||||
* Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.
|
||||
|
@ -3470,6 +3723,8 @@ protected:
|
|||
virtual UChar32 getChar32At(int32_t offset) const override;
|
||||
|
||||
private:
|
||||
static UnicodeString readOnlyAliasFromU16StringView(std::u16string_view text);
|
||||
|
||||
// For char* constructors. Could be made public.
|
||||
UnicodeString &setToUTF8(StringPiece utf8);
|
||||
// For extract(char*).
|
||||
|
@ -3485,7 +3740,10 @@ private:
|
|||
* Internal string contents comparison, called by operator==.
|
||||
* Requires: this & text not bogus and have same lengths.
|
||||
*/
|
||||
UBool doEquals(const UnicodeString &text, int32_t len) const;
|
||||
inline UBool doEquals(const UnicodeString &text, int32_t len) const {
|
||||
return doEquals(text.getArrayStart(), len);
|
||||
}
|
||||
UBool doEquals(const char16_t *text, int32_t len) const;
|
||||
|
||||
inline UBool
|
||||
doEqualsSubstring(int32_t start,
|
||||
|
@ -3580,9 +3838,11 @@ private:
|
|||
const char16_t *srcChars,
|
||||
int32_t srcStart,
|
||||
int32_t srcLength);
|
||||
UnicodeString& doReplace(int32_t start, int32_t length, std::u16string_view src);
|
||||
|
||||
UnicodeString& doAppend(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
|
||||
UnicodeString& doAppend(const char16_t *srcChars, int32_t srcStart, int32_t srcLength);
|
||||
UnicodeString& doAppend(std::u16string_view src);
|
||||
|
||||
UnicodeString& doReverse(int32_t start,
|
||||
int32_t length);
|
||||
|
@ -3802,7 +4062,7 @@ private:
|
|||
};
|
||||
|
||||
/**
|
||||
* Create a new UnicodeString with the concatenation of two others.
|
||||
* Creates a new UnicodeString from the concatenation of two others.
|
||||
*
|
||||
* @param s1 The first string to be copied to the new one.
|
||||
* @param s2 The second string to be copied to the new one, after s1.
|
||||
|
@ -3812,6 +4072,29 @@ private:
|
|||
U_COMMON_API UnicodeString U_EXPORT2
|
||||
operator+ (const UnicodeString &s1, const UnicodeString &s2);
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
/**
|
||||
* Creates a new UnicodeString from the concatenation of a UnicodeString and `s2`
|
||||
* which is, or which is implicitly convertible to,
|
||||
* a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view.
|
||||
*
|
||||
* @param s1 The string to be copied to the new one.
|
||||
* @param s2 The string view to be copied to the new string, after s1.
|
||||
* @return UnicodeString(s1).append(s2)
|
||||
* @draft ICU 76
|
||||
*/
|
||||
template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
|
||||
inline UnicodeString operator+(const UnicodeString &s1, const S &s2) {
|
||||
return unistr_internalConcat(s1, internal::toU16StringView(s2));
|
||||
}
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
#ifndef U_FORCE_HIDE_INTERNAL_API
|
||||
/** @internal */
|
||||
U_COMMON_API UnicodeString U_EXPORT2
|
||||
unistr_internalConcat(const UnicodeString &s1, std::u16string_view s2);
|
||||
#endif
|
||||
|
||||
//========================================
|
||||
// Inline members
|
||||
//========================================
|
||||
|
|
|
@ -18,6 +18,8 @@
|
|||
* Character property dependent functions moved here from uniset.cpp
|
||||
*/
|
||||
|
||||
#include <string_view>
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/uniset.h"
|
||||
#include "unicode/parsepos.h"
|
||||
|
@ -45,16 +47,23 @@
|
|||
#include "uassert.h"
|
||||
#include "hash.h"
|
||||
|
||||
// Makes u"literal"sv std::u16string_view literals possible.
|
||||
// https://en.cppreference.com/w/cpp/string/basic_string_view/operator%22%22sv
|
||||
using namespace std::string_view_literals;
|
||||
|
||||
U_NAMESPACE_USE
|
||||
|
||||
namespace {
|
||||
|
||||
// Special property set IDs
|
||||
static const char ANY[] = "ANY"; // [\u0000-\U0010FFFF]
|
||||
static const char ASCII[] = "ASCII"; // [\u0000-\u007F]
|
||||
static const char ASSIGNED[] = "Assigned"; // [:^Cn:]
|
||||
constexpr char ANY[] = "ANY"; // [\u0000-\U0010FFFF]
|
||||
constexpr char ASCII[] = "ASCII"; // [\u0000-\u007F]
|
||||
constexpr char ASSIGNED[] = "Assigned"; // [:^Cn:]
|
||||
|
||||
// Unicode name property alias
|
||||
#define NAME_PROP "na"
|
||||
#define NAME_PROP_LENGTH 2
|
||||
constexpr std::u16string_view NAME_PROP(u"na"sv);
|
||||
|
||||
} // namespace
|
||||
|
||||
// Cached sets ------------------------------------------------------------- ***
|
||||
|
||||
|
@ -83,7 +92,7 @@ namespace {
|
|||
// Cache some sets for other services -------------------------------------- ***
|
||||
void U_CALLCONV createUni32Set(UErrorCode &errorCode) {
|
||||
U_ASSERT(uni32Singleton == nullptr);
|
||||
uni32Singleton = new UnicodeSet(UNICODE_STRING_SIMPLE("[:age=3.2:]"), errorCode);
|
||||
uni32Singleton = new UnicodeSet(UnicodeString(u"[:age=3.2:]"sv), errorCode);
|
||||
if(uni32Singleton==nullptr) {
|
||||
errorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
} else {
|
||||
|
@ -1105,7 +1114,7 @@ UnicodeSet& UnicodeSet::applyPropertyPattern(const UnicodeString& pattern,
|
|||
// support args of (UProperty, char*) then we can remove
|
||||
// NAME_PROP and make this a little more efficient.
|
||||
valueName = propName;
|
||||
propName = UnicodeString(NAME_PROP, NAME_PROP_LENGTH, US_INV);
|
||||
propName = NAME_PROP;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -20,6 +20,8 @@
|
|||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include <string_view>
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/appendable.h"
|
||||
#include "unicode/putil.h"
|
||||
|
@ -107,12 +109,34 @@ UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeString)
|
|||
|
||||
UnicodeString U_EXPORT2
|
||||
operator+ (const UnicodeString &s1, const UnicodeString &s2) {
|
||||
return
|
||||
UnicodeString(s1.length() + s2.length() + 1, static_cast<UChar32>(0), 0).
|
||||
append(s1).
|
||||
append(s2);
|
||||
int32_t sumLengths;
|
||||
if (uprv_add32_overflow(s1.length(), s2.length(), &sumLengths)) {
|
||||
UnicodeString bogus;
|
||||
bogus.setToBogus();
|
||||
return bogus;
|
||||
}
|
||||
if (sumLengths != INT32_MAX) {
|
||||
++sumLengths; // space for a terminating NUL if we need one
|
||||
}
|
||||
return UnicodeString(sumLengths, static_cast<UChar32>(0), 0).append(s1).append(s2);
|
||||
}
|
||||
|
||||
U_COMMON_API UnicodeString U_EXPORT2
|
||||
unistr_internalConcat(const UnicodeString &s1, std::u16string_view s2) {
|
||||
int32_t sumLengths;
|
||||
if (s2.length() > INT32_MAX ||
|
||||
uprv_add32_overflow(s1.length(), (int32_t)s2.length(), &sumLengths)) {
|
||||
UnicodeString bogus;
|
||||
bogus.setToBogus();
|
||||
return bogus;
|
||||
}
|
||||
if (sumLengths != INT32_MAX) {
|
||||
++sumLengths; // space for a terminating NUL if we need one
|
||||
}
|
||||
return UnicodeString(sumLengths, static_cast<UChar32>(0), 0).append(s1).append(s2);
|
||||
}
|
||||
|
||||
|
||||
//========================================
|
||||
// Reference Counting functions, put at top of file so that optimizing compilers
|
||||
// have a chance to automatically inline.
|
||||
|
@ -279,6 +303,16 @@ UnicodeString::UnicodeString(const char *src, int32_t length, EInvariant) {
|
|||
}
|
||||
}
|
||||
|
||||
UnicodeString UnicodeString::readOnlyAliasFromU16StringView(std::u16string_view text) {
|
||||
UnicodeString result;
|
||||
if (text.length() <= INT32_MAX) {
|
||||
result.setTo(false, text.data(), (int32_t)text.length());
|
||||
} else {
|
||||
result.setToBogus();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
#if U_CHARSET_IS_UTF8
|
||||
|
||||
UnicodeString::UnicodeString(const char *codepageData) {
|
||||
|
@ -656,10 +690,10 @@ UChar32 UnicodeString::unescapeAt(int32_t &offset) const {
|
|||
// Read-only implementation
|
||||
//========================================
|
||||
UBool
|
||||
UnicodeString::doEquals(const UnicodeString &text, int32_t len) const {
|
||||
// Requires: this & text not bogus and have same lengths.
|
||||
UnicodeString::doEquals(const char16_t *text, int32_t len) const {
|
||||
// Requires: this not bogus and have same lengths.
|
||||
// Byte-wise comparison works for equality regardless of endianness.
|
||||
return uprv_memcmp(getArrayStart(), text.getArrayStart(), len * U_SIZEOF_UCHAR) == 0;
|
||||
return uprv_memcmp(getArrayStart(), text, len * U_SIZEOF_UCHAR) == 0;
|
||||
}
|
||||
|
||||
UBool
|
||||
|
@ -1574,6 +1608,18 @@ UnicodeString::doReplace(int32_t start,
|
|||
return *this;
|
||||
}
|
||||
|
||||
UnicodeString&
|
||||
UnicodeString::doReplace(int32_t start, int32_t length, std::u16string_view src) {
|
||||
if (!isWritable()) {
|
||||
return *this;
|
||||
}
|
||||
if (src.length() > INT32_MAX) {
|
||||
setToBogus();
|
||||
return *this;
|
||||
}
|
||||
return doReplace(start, length, src.data(), 0, (int32_t)src.length());
|
||||
}
|
||||
|
||||
// Versions of doReplace() only for append() variants.
|
||||
// doReplace() and doAppend() optimize for different cases.
|
||||
|
||||
|
@ -1662,6 +1708,18 @@ UnicodeString::doAppend(const char16_t *srcChars, int32_t srcStart, int32_t srcL
|
|||
return *this;
|
||||
}
|
||||
|
||||
UnicodeString&
|
||||
UnicodeString::doAppend(std::u16string_view src) {
|
||||
if (!isWritable() || src.empty()) {
|
||||
return *this;
|
||||
}
|
||||
if (src.length() > INT32_MAX) {
|
||||
setToBogus();
|
||||
return *this;
|
||||
}
|
||||
return doAppend(src.data(), 0, (int32_t)src.length());
|
||||
}
|
||||
|
||||
/**
|
||||
* Replaceable API
|
||||
*/
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
#include <cstdlib>
|
||||
#include <cmath>
|
||||
#include <limits>
|
||||
#include <string_view>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "unicode/plurrule.h"
|
||||
|
@ -22,6 +23,10 @@
|
|||
#include "uassert.h"
|
||||
#include "util.h"
|
||||
|
||||
// Makes u"literal"sv std::u16string_view literals possible.
|
||||
// https://en.cppreference.com/w/cpp/string/basic_string_view/operator%22%22sv
|
||||
using namespace std::string_view_literals;
|
||||
|
||||
using namespace icu;
|
||||
using namespace icu::number;
|
||||
using namespace icu::number::impl;
|
||||
|
@ -1082,7 +1087,7 @@ UnicodeString DecimalQuantity::toScientificString() const {
|
|||
result.append(u'E');
|
||||
int32_t _scale = upperPos + scale + exponent;
|
||||
if (_scale == INT32_MIN) {
|
||||
result.append({u"-2147483648", -1});
|
||||
result.append(u"-2147483648"sv);
|
||||
return result;
|
||||
} else if (_scale < 0) {
|
||||
_scale *= -1;
|
||||
|
|
|
@ -6,6 +6,8 @@
|
|||
* others. All Rights Reserved.
|
||||
********************************************************************/
|
||||
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <utility>
|
||||
|
||||
#include "ustrtest.h"
|
||||
|
@ -22,6 +24,13 @@
|
|||
#include "cmemory.h"
|
||||
#include "charstr.h"
|
||||
|
||||
// Makes u"literal"sv std::u16string_view literals possible.
|
||||
// https://en.cppreference.com/w/cpp/string/basic_string_view/operator%22%22sv
|
||||
using namespace std::string_view_literals;
|
||||
|
||||
// Same for u"literal"s std::u16string literals.
|
||||
using namespace std::string_literals;
|
||||
|
||||
#if 0
|
||||
#include "unicode/ustream.h"
|
||||
|
||||
|
@ -68,6 +77,8 @@ void UnicodeStringTest::runIndexedTest( int32_t index, UBool exec, const char* &
|
|||
TESTCASE_AUTO(TestNullPointers);
|
||||
TESTCASE_AUTO(TestUnicodeStringInsertAppendToSelf);
|
||||
TESTCASE_AUTO(TestLargeAppend);
|
||||
TESTCASE_AUTO(TestU16StringView);
|
||||
TESTCASE_AUTO(TestWStringView);
|
||||
TESTCASE_AUTO_END;
|
||||
}
|
||||
|
||||
|
@ -2400,3 +2411,177 @@ void UnicodeStringTest::TestLargeAppend() {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
void UnicodeStringTest::TestU16StringView() {
|
||||
IcuTestErrorCode status(*this, "TestU16StringView");
|
||||
// ICU-22843 Test ICU 76 new UnicodeString APIs that take or return a std::u16string_view
|
||||
// or something convertible to it.
|
||||
// NOTE: Keep this function very parallel with TestWStringView()!
|
||||
const char16_t *p16 = u"p16";
|
||||
std::u16string_view sv16 = u"sv16";
|
||||
std::u16string str16 = u"str16";
|
||||
|
||||
// These copy the string contents.
|
||||
UnicodeString fromPtr(p16); // pointer is convertible to std::u16string_view
|
||||
UnicodeString fromSV(sv16); // std::u16string_view itself
|
||||
UnicodeString fromSV2(u"sv16_2"sv); // std::u16string_view literal
|
||||
UnicodeString fromStr(str16); // std::u16string is convertible to std::u16string_view
|
||||
assertEquals("UnicodeString(const char16_t *pointer)", UnicodeString(u"p16", 3), fromPtr);
|
||||
assertEquals("UnicodeString(std::u16string_view)", UnicodeString(u"sv16", 4), fromSV);
|
||||
assertEquals("UnicodeString(std::u16string_view literal)", UnicodeString(u"sv16_2", 6), fromSV2);
|
||||
assertEquals("UnicodeString(std::u16string)", UnicodeString(u"str16", 5), fromStr);
|
||||
|
||||
// Read-only aliases
|
||||
UnicodeString aliasFromPtr = UnicodeString::readOnlyAlias(p16);
|
||||
assertTrue("aliasFromPtr pointer alias", aliasFromPtr.getBuffer() == p16);
|
||||
assertEquals("aliasFromPtr length", 3, aliasFromPtr.length());
|
||||
|
||||
UnicodeString aliasFromSV = UnicodeString::readOnlyAlias(sv16);
|
||||
assertTrue("aliasFromSV pointer alias", aliasFromSV.getBuffer() == sv16.data());
|
||||
assertEquals("aliasFromSV length", (int32_t)sv16.length(), aliasFromSV.length());
|
||||
|
||||
UnicodeString aliasFromStr = UnicodeString::readOnlyAlias(str16);
|
||||
assertTrue("aliasFromStr pointer alias", aliasFromStr.getBuffer() == str16.data());
|
||||
assertEquals("aliasFromStr length", (int32_t)str16.length(), aliasFromStr.length());
|
||||
|
||||
// operator==
|
||||
UnicodeString any(true, u"any", 3);
|
||||
assertFalse("any == pointer-p16", any == p16);
|
||||
assertTrue("any == pointer-any", any == u"any");
|
||||
assertFalse("any == string_view-sv16", any == sv16);
|
||||
assertTrue("any == string_view-any", any == u"any"sv);
|
||||
assertFalse("any == string-str16", any == str16);
|
||||
assertTrue("any == string-any", any == u"any"s);
|
||||
|
||||
// Assignment copies the string contents.
|
||||
UnicodeString x;
|
||||
x = p16;
|
||||
assertEquals("x = p16", UnicodeString(true, u"p16", 3), x);
|
||||
x = sv16;
|
||||
assertEquals("x = sv16", UnicodeString(true, u"sv16", 4), x);
|
||||
x = str16;
|
||||
assertEquals("x = str16", UnicodeString(true, u"str16", 5), x);
|
||||
|
||||
// Append
|
||||
x += p16;
|
||||
assertEquals("+= p16", UnicodeString(true, u"str16p16", 8), x);
|
||||
x += sv16;
|
||||
assertEquals("+= sv16", UnicodeString(true, u"str16p16sv16", 12), x);
|
||||
x += str16;
|
||||
assertEquals("+= str16", UnicodeString(true, u"str16p16sv16str16", 17), x);
|
||||
|
||||
x = u"x"sv;
|
||||
x.append(p16);
|
||||
assertEquals("append(p16)", UnicodeString(true, u"xp16", 4), x);
|
||||
x.append(sv16);
|
||||
assertEquals("append(sv16)", UnicodeString(true, u"xp16sv16", 8), x);
|
||||
x.append(str16);
|
||||
assertEquals("append(str16)", UnicodeString(true, u"xp16sv16str16", 13), x);
|
||||
|
||||
// Convert UnicodeString to string view.
|
||||
std::u16string_view sv16FromUniStr(any);
|
||||
assertTrue("sv16FromUniStr buffer alias", sv16FromUniStr.data() == any.getBuffer());
|
||||
assertEquals("sv16FromUniStr length", any.length(), (int32_t)sv16FromUniStr.length());
|
||||
|
||||
// Just to show convenience: Convert UnicodeString to string view, then to std string.
|
||||
std::u16string str16FromUniStr(any);
|
||||
assertTrue("str16FromUniStr contents", str16FromUniStr == u"any"s);
|
||||
|
||||
// operator+
|
||||
x = any + p16;
|
||||
assertEquals("any + p16", UnicodeString(true, u"anyp16", 6), x);
|
||||
x = any + sv16;
|
||||
assertEquals("any + sv16", UnicodeString(true, u"anysv16", 7), x);
|
||||
x = any + str16;
|
||||
assertEquals("any + str16", UnicodeString(true, u"anystr16", 8), x);
|
||||
}
|
||||
|
||||
void UnicodeStringTest::TestWStringView() {
|
||||
#if U_SIZEOF_WCHAR_T==2
|
||||
IcuTestErrorCode status(*this, "TestU16StringView");
|
||||
// ICU-22843 Test ICU 76 new UnicodeString APIs that take or return a std::wstring_view
|
||||
// or something convertible to it.
|
||||
// NOTE: Keep this function very parallel with TestU16StringView()!
|
||||
const wchar_t *p16 = L"p16";
|
||||
std::wstring_view sv16 = L"sv16";
|
||||
std::wstring str16 = L"str16";
|
||||
|
||||
// These copy the string contents.
|
||||
UnicodeString fromPtr(p16); // pointer is convertible to std::wstring_view
|
||||
UnicodeString fromSV(sv16); // std::wstring_view itself
|
||||
UnicodeString fromSV2(L"sv16_2"sv); // std::wstring_view literal
|
||||
UnicodeString fromStr(str16); // std::wstring is convertible to std::wstring_view
|
||||
assertEquals("UnicodeString(const wchar_t *pointer)", UnicodeString(L"p16", 3), fromPtr);
|
||||
assertEquals("UnicodeString(std::wstring_view)", UnicodeString(L"sv16", 4), fromSV);
|
||||
assertEquals("UnicodeString(std::wstring_view literal)", UnicodeString(L"sv16_2", 6), fromSV2);
|
||||
assertEquals("UnicodeString(std::wstring)", UnicodeString(L"str16", 5), fromStr);
|
||||
|
||||
// Read-only aliases
|
||||
UnicodeString aliasFromPtr = UnicodeString::readOnlyAlias(p16);
|
||||
assertTrue("aliasFromPtr pointer alias",
|
||||
aliasFromPtr.getBuffer() == reinterpret_cast<const char16_t *>(p16));
|
||||
assertEquals("aliasFromPtr length", 3, aliasFromPtr.length());
|
||||
|
||||
UnicodeString aliasFromSV = UnicodeString::readOnlyAlias(sv16);
|
||||
assertTrue("aliasFromSV pointer alias",
|
||||
aliasFromSV.getBuffer() == reinterpret_cast<const char16_t *>(sv16.data()));
|
||||
assertEquals("aliasFromSV length", (int32_t)sv16.length(), aliasFromSV.length());
|
||||
|
||||
UnicodeString aliasFromStr = UnicodeString::readOnlyAlias(str16);
|
||||
assertTrue("aliasFromStr pointer alias",
|
||||
aliasFromStr.getBuffer() == reinterpret_cast<const char16_t *>(str16.data()));
|
||||
assertEquals("aliasFromStr length", (int32_t)str16.length(), aliasFromStr.length());
|
||||
|
||||
// operator==
|
||||
UnicodeString any(true, L"any", 3);
|
||||
assertFalse("any == pointer-p16", any == p16);
|
||||
assertTrue("any == pointer-any", any == L"any");
|
||||
assertFalse("any == string_view-sv16", any == sv16);
|
||||
assertTrue("any == string_view-any", any == L"any"sv);
|
||||
assertFalse("any == string-str16", any == str16);
|
||||
assertTrue("any == string-any", any == L"any"s);
|
||||
|
||||
// Assignment copies the string contents.
|
||||
UnicodeString x;
|
||||
x = p16;
|
||||
assertEquals("x = p16", UnicodeString(true, L"p16", 3), x);
|
||||
x = sv16;
|
||||
assertEquals("x = sv16", UnicodeString(true, L"sv16", 4), x);
|
||||
x = str16;
|
||||
assertEquals("x = str16", UnicodeString(true, L"str16", 5), x);
|
||||
|
||||
// Append
|
||||
x += p16;
|
||||
assertEquals("+= p16", UnicodeString(true, L"str16p16", 8), x);
|
||||
x += sv16;
|
||||
assertEquals("+= sv16", UnicodeString(true, L"str16p16sv16", 12), x);
|
||||
x += str16;
|
||||
assertEquals("+= str16", UnicodeString(true, L"str16p16sv16str16", 17), x);
|
||||
|
||||
x = L"x"sv;
|
||||
x.append(p16);
|
||||
assertEquals("append(p16)", UnicodeString(true, L"xp16", 4), x);
|
||||
x.append(sv16);
|
||||
assertEquals("append(sv16)", UnicodeString(true, L"xp16sv16", 8), x);
|
||||
x.append(str16);
|
||||
assertEquals("append(str16)", UnicodeString(true, L"xp16sv16str16", 13), x);
|
||||
|
||||
// Convert UnicodeString to string view.
|
||||
std::wstring_view sv16FromUniStr(any);
|
||||
assertTrue("sv16FromUniStr buffer alias",
|
||||
reinterpret_cast<const char16_t *>(sv16FromUniStr.data()) == any.getBuffer());
|
||||
assertEquals("sv16FromUniStr length", any.length(), (int32_t)sv16FromUniStr.length());
|
||||
|
||||
// Just to show convenience: Convert UnicodeString to string view, then to std string.
|
||||
std::wstring str16FromUniStr(any);
|
||||
assertTrue("str16FromUniStr contents", str16FromUniStr == L"any"s);
|
||||
|
||||
// operator+
|
||||
x = any + p16;
|
||||
assertEquals("any + p16", UnicodeString(true, L"anyp16", 6), x);
|
||||
x = any + sv16;
|
||||
assertEquals("any + sv16", UnicodeString(true, L"anysv16", 7), x);
|
||||
x = any + str16;
|
||||
assertEquals("any + str16", UnicodeString(true, L"anystr16", 8), x);
|
||||
#endif
|
||||
}
|
||||
|
|
|
@ -98,6 +98,8 @@ public:
|
|||
void TestNullPointers();
|
||||
void TestUnicodeStringInsertAppendToSelf();
|
||||
void TestLargeAppend();
|
||||
void TestU16StringView();
|
||||
void TestWStringView();
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
Loading…
Add table
Reference in a new issue