From 7dfefa55f5247e3827ccde43082e9670b390ded2 Mon Sep 17 00:00:00 2001 From: Markus Scherer Date: Sat, 25 Feb 2017 01:08:35 +0000 Subject: [PATCH 01/26] ICU-12992 experimental UnicodeString with Char16Ptr and ConstChar16Ptr in constructors and extract() X-SVN-Rev: 39706 --- icu4c/source/common/unicode/unistr.h | 297 ++++++++++++++++++++++++++- icu4c/source/common/unistr.cpp | 7 +- 2 files changed, 295 insertions(+), 9 deletions(-) diff --git a/icu4c/source/common/unicode/unistr.h b/icu4c/source/common/unicode/unistr.h index 270229b849a..70b87ac553e 100644 --- a/icu4c/source/common/unicode/unistr.h +++ b/icu4c/source/common/unicode/unistr.h @@ -28,6 +28,7 @@ * \brief C++ API: Unicode String */ +#include #include "unicode/utypes.h" #include "unicode/rep.h" #include "unicode/std_string.h" @@ -56,6 +57,173 @@ u_strlen(const UChar *s); U_NAMESPACE_BEGIN +// TODO begin experiment --------------- + +/** + * \def U_ALIASING_BARRIER + * Barrier for pointer anti-aliasing optimizations even across function boundaries. + * @internal + */ +#ifdef U_ALIASING_BARRIER + // Use the predefined value. +#elif defined(__clang__) || defined(__GNUC__) +# define U_ALIASING_BARRIER(ptr) asm volatile("" : "+rm"(ptr)) +#endif + +/** + * char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types, + * and from NULL. + * @draft ICU 59 + */ +class U_COMMON_API Char16Ptr { +public: + /** + * Copies the pointer. + * @draft ICU 59 + */ + inline Char16Ptr(char16_t *p); + /** + * Converts the pointer to char16_t *. + * @draft ICU 59 + */ + inline Char16Ptr(uint16_t *p); +#if U_SIZEOF_WCHAR_T==2 + /** + * Converts the pointer to char16_t *. + * @draft ICU 59 + */ + inline Char16Ptr(wchar_t *p); +#endif + /** + * nullptr constructor. + * @draft ICU 59 + */ + inline Char16Ptr(std::nullptr_t p); + /** + * Pointer access. + * @draft ICU 59 + */ + inline operator char16_t *(); + +private: +#ifdef U_ALIASING_BARRIER + template char16_t *cast(T *t) { + U_ALIASING_BARRIER(t); + return reinterpret_cast(t); + } + + char16_t *p; +#else + union { + char16_t *cp; + uint16_t *up; + wchar_t *wp; + } u; +#endif +}; + +#ifdef U_ALIASING_BARRIER + +Char16Ptr::Char16Ptr(char16_t *p) : p(p) {} +Char16Ptr::Char16Ptr(uint16_t *p) : p(cast(p)) {} +#if U_SIZEOF_WCHAR_T==2 +Char16Ptr::Char16Ptr(wchar_t *p) : p(cast(p)) {} +#endif +Char16Ptr::Char16Ptr(std::nullptr_t p) : p(p) {} + +Char16Ptr::operator char16_t *() { return p; } + +#else + +Char16Ptr::Char16Ptr(char16_t *p) { u.cp = p; } +Char16Ptr::Char16Ptr(uint16_t *p) { u.up = p; } +#if U_SIZEOF_WCHAR_T==2 +Char16Ptr::Char16Ptr(wchar_t *p) { u.wp = p; } +#endif +Char16Ptr::Char16Ptr(std::nullptr_t p) { u.cp = p; } + +Char16Ptr::operator char16_t *() { return u.cp; } + +#endif + +/** + * const char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types, + * and from NULL. + * @draft ICU 59 + */ +class U_COMMON_API ConstChar16Ptr { +public: + /** + * Copies the pointer. + * @draft ICU 59 + */ + inline ConstChar16Ptr(const char16_t *p); + /** + * Converts the pointer to char16_t *. + * @draft ICU 59 + */ + inline ConstChar16Ptr(const uint16_t *p); +#if U_SIZEOF_WCHAR_T==2 + /** + * Converts the pointer to char16_t *. + * @draft ICU 59 + */ + inline ConstChar16Ptr(const wchar_t *p); +#endif + /** + * nullptr constructor. + * @draft ICU 59 + */ + inline ConstChar16Ptr(const std::nullptr_t p); + /** + * Pointer access. + * @draft ICU 59 + */ + inline operator const char16_t *() const; + +private: +#ifdef U_ALIASING_BARRIER + template const char16_t *cast(const T *t) { + U_ALIASING_BARRIER(t); + return reinterpret_cast(t); + } + + const char16_t *p; +#else + union { + const char16_t *cp; + const uint16_t *up; + const wchar_t *wp; + } u; +#endif +}; + +#ifdef U_ALIASING_BARRIER + +ConstChar16Ptr::ConstChar16Ptr(const char16_t *p) : p(p) {} +ConstChar16Ptr::ConstChar16Ptr(const uint16_t *p) : p(cast(p)) {} +#if U_SIZEOF_WCHAR_T==2 +ConstChar16Ptr::ConstChar16Ptr(const wchar_t *p) : p(cast(p)) {} +#endif +ConstChar16Ptr::ConstChar16Ptr(const std::nullptr_t p) : p(p) {} + +ConstChar16Ptr::operator const char16_t *() const { return p; } + +#else + +ConstChar16Ptr::ConstChar16Ptr(const char16_t *p) { u.cp = p; } +ConstChar16Ptr::ConstChar16Ptr(const uint16_t *p) { u.up = p; } +#if U_SIZEOF_WCHAR_T==2 +ConstChar16Ptr::ConstChar16Ptr(const wchar_t *p) { u.wp = p; } +#endif +ConstChar16Ptr::ConstChar16Ptr(const std::nullptr_t p) { u.cp = p; } + +ConstChar16Ptr::operator const char16_t *() const { return u.cp; } + +#endif + +// TODO end experiment ----------------- + #if !UCONFIG_NO_BREAK_ITERATION class BreakIterator; // unicode/brkiter.h #endif @@ -1454,7 +1622,7 @@ public: */ inline void extract(int32_t start, int32_t length, - UChar *dst, + Char16Ptr dst, int32_t dstStart = 0) const; /** @@ -1479,7 +1647,7 @@ public: * @stable ICU 2.0 */ int32_t - extract(UChar *dest, int32_t destCapacity, + extract(Char16Ptr dest, int32_t destCapacity, UErrorCode &errorCode) const; /** @@ -2070,7 +2238,7 @@ public: * a new buffer will be allocated and the contents copied as with regularly * constructed strings. * In an assignment to another UnicodeString, the buffer will be copied. - * The extract(UChar *dst) function detects whether the dst pointer is the same + * The extract(Char16Ptr dst) function detects whether the dst pointer is the same * as the string buffer itself and will in this case not copy the contents. * * @param buffer The characters to alias for the UnicodeString. @@ -3004,6 +3172,46 @@ public: */ UNISTR_FROM_STRING_EXPLICIT UnicodeString(const UChar *text); + /** + * uint16_t * constructor. + * Delegates to UnicodeString(const UChar *). + * + * It is recommended to mark this constructor "explicit" by + * -DUNISTR_FROM_STRING_EXPLICIT=explicit + * on the compiler command line or similar. + * @param text NUL-terminated UTF-16 string + * @draft ICU 59 + */ + UNISTR_FROM_STRING_EXPLICIT UnicodeString(const uint16_t *text) : + UnicodeString(static_cast(ConstChar16Ptr(text))) {} + +#if U_SIZEOF_WCHAR_T==2 + /** + * wchar_t * constructor. + * Delegates to UnicodeString(const UChar *). + * + * It is recommended to mark this constructor "explicit" by + * -DUNISTR_FROM_STRING_EXPLICIT=explicit + * on the compiler command line or similar. + * @param text NUL-terminated UTF-16 string + * @draft ICU 59 + */ + UNISTR_FROM_STRING_EXPLICIT UnicodeString(const wchar_t *text) : + UnicodeString(static_cast(ConstChar16Ptr(text))) {} +#endif + + /** + * nullptr_t constructor. + * Effectively the same as the default constructor, makes an empty string object. + * + * It is recommended to mark this constructor "explicit" by + * -DUNISTR_FROM_STRING_EXPLICIT=explicit + * on the compiler command line or similar. + * @param text nullptr + * @draft ICU 59 + */ + UNISTR_FROM_STRING_EXPLICIT inline UnicodeString(const std::nullptr_t text); + /** * UChar* constructor. * @param text The characters to place in the UnicodeString. @@ -3014,6 +3222,37 @@ public: UnicodeString(const UChar *text, int32_t textLength); + /** + * uint16_t * constructor. + * Delegates to UnicodeString(const UChar *, int32_t). + * @param text UTF-16 string + * @param length string length + * @draft ICU 59 + */ + UnicodeString(const uint16_t *text, int32_t length) : + UnicodeString(static_cast(ConstChar16Ptr(text)), length) {} + +#if U_SIZEOF_WCHAR_T==2 + /** + * wchar_t * constructor. + * Delegates to UnicodeString(const UChar *, int32_t). + * @param text NUL-terminated UTF-16 string + * @param length string length + * @draft ICU 59 + */ + UnicodeString(const wchar_t *text, int32_t length) : + UnicodeString(static_cast(ConstChar16Ptr(text)), length) {} +#endif + + /** + * nullptr_t constructor. + * Effectively the same as the default constructor, makes an empty string object. + * @param text nullptr + * @param length ignored + * @draft ICU 59 + */ + inline UnicodeString(const std::nullptr_t text, int32_t length); + /** * Readonly-aliasing UChar* constructor. * The text will be used for the UnicodeString object, but @@ -3037,7 +3276,7 @@ public: * @stable ICU 2.0 */ UnicodeString(UBool isTerminated, - const UChar *text, + ConstChar16Ptr text, int32_t textLength); /** @@ -3050,7 +3289,7 @@ public: * a new buffer will be allocated and the contents copied as with regularly * constructed strings. * In an assignment to another UnicodeString, the buffer will be copied. - * The extract(UChar *dst) function detects whether the dst pointer is the same + * The extract(Char16Ptr dst) function detects whether the dst pointer is the same * as the string buffer itself and will in this case not copy the contents. * * @param buffer The characters to alias for the UnicodeString. @@ -3060,6 +3299,40 @@ public: */ UnicodeString(UChar *buffer, int32_t buffLength, int32_t buffCapacity); + /** + * Writable-aliasing uint16_t * constructor. + * Delegates to UnicodeString(const UChar *, int32_t, int32_t). + * @param buffer writable buffer of/for UTF-16 text + * @param buffLength length of the current buffer contents + * @param buffCapacity buffer capacity + * @draft ICU 59 + */ + UnicodeString(uint16_t *buffer, int32_t buffLength, int32_t buffCapacity) : + UnicodeString(static_cast(Char16Ptr(buffer)), buffLength, buffCapacity) {} + +#if U_SIZEOF_WCHAR_T==2 + /** + * Writable-aliasing wchar_t * constructor. + * Delegates to UnicodeString(const UChar *, int32_t, int32_t). + * @param buffer writable buffer of/for UTF-16 text + * @param buffLength length of the current buffer contents + * @param buffCapacity buffer capacity + * @draft ICU 59 + */ + UnicodeString(wchar_t *buffer, int32_t buffLength, int32_t buffCapacity) : + UnicodeString(static_cast(Char16Ptr(buffer)), buffLength, buffCapacity) {} +#endif + + /** + * Writable-aliasing nullptr_t constructor. + * Effectively the same as the default constructor, makes an empty string object. + * @param buffer nullptr + * @param buffLength ignored + * @param buffCapacity ignored + * @draft ICU 59 + */ + inline UnicodeString(std::nullptr_t buffer, int32_t buffLength, int32_t buffCapacity); + #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION /** @@ -3772,6 +4045,18 @@ UnicodeString::UnicodeString() { fUnion.fStackFields.fLengthAndFlags=kShortString; } +inline UnicodeString::UnicodeString(const std::nullptr_t /*text*/) { + fUnion.fStackFields.fLengthAndFlags=kShortString; +} + +inline UnicodeString::UnicodeString(const std::nullptr_t /*text*/, int32_t /*length*/) { + fUnion.fStackFields.fLengthAndFlags=kShortString; +} + +inline UnicodeString::UnicodeString(std::nullptr_t /*buffer*/, int32_t /*buffLength*/, int32_t /*buffCapacity*/) { + fUnion.fStackFields.fLengthAndFlags=kShortString; +} + //======================================== // Read-only implementation methods //======================================== @@ -4364,7 +4649,7 @@ UnicodeString::doExtract(int32_t start, inline void UnicodeString::extract(int32_t start, int32_t _length, - UChar *target, + Char16Ptr target, int32_t targetStart) const { doExtract(start, _length, target, targetStart); } diff --git a/icu4c/source/common/unistr.cpp b/icu4c/source/common/unistr.cpp index bb4de3afa7e..a4d921948f5 100644 --- a/icu4c/source/common/unistr.cpp +++ b/icu4c/source/common/unistr.cpp @@ -218,7 +218,7 @@ UnicodeString::UnicodeString(const UChar *text, } UnicodeString::UnicodeString(UBool isTerminated, - const UChar *text, + ConstChar16Ptr text, int32_t textLength) { fUnion.fFields.fLengthAndFlags = kReadonlyAlias; if(text == NULL) { @@ -234,7 +234,8 @@ UnicodeString::UnicodeString(UBool isTerminated, // text is terminated, or else it would have failed the above test textLength = u_strlen(text); } - setArray((UChar *)text, textLength, isTerminated ? textLength + 1 : textLength); + setArray(const_cast(static_cast(text)), textLength, + isTerminated ? textLength + 1 : textLength); } } @@ -873,7 +874,7 @@ UnicodeString::doExtract(int32_t start, } int32_t -UnicodeString::extract(UChar *dest, int32_t destCapacity, +UnicodeString::extract(Char16Ptr dest, int32_t destCapacity, UErrorCode &errorCode) const { int32_t len = length(); if(U_SUCCESS(errorCode)) { From 9634351bd768ed76752e06dceab4e2fcebb72f53 Mon Sep 17 00:00:00 2001 From: Markus Scherer Date: Tue, 28 Feb 2017 06:50:27 +0000 Subject: [PATCH 02/26] ICU-12992 test overloads with pointer-wrapper class, add constructor(int null) to match NULL==0 X-SVN-Rev: 39713 --- icu4c/source/common/unicode/unistr.h | 69 +++++++++++++++------ icu4c/source/common/unistr.cpp | 44 +++++++++++++- icu4c/source/test/intltest/ustrtest.cpp | 80 +++++++++++++++++++++++++ icu4c/source/test/intltest/ustrtest.h | 5 ++ 4 files changed, 178 insertions(+), 20 deletions(-) diff --git a/icu4c/source/common/unicode/unistr.h b/icu4c/source/common/unicode/unistr.h index 70b87ac553e..7b325f2df0f 100644 --- a/icu4c/source/common/unicode/unistr.h +++ b/icu4c/source/common/unicode/unistr.h @@ -75,7 +75,7 @@ U_NAMESPACE_BEGIN * and from NULL. * @draft ICU 59 */ -class U_COMMON_API Char16Ptr { +class U_COMMON_API Char16Ptr final { public: /** * Copies the pointer. @@ -87,9 +87,10 @@ public: * @draft ICU 59 */ inline Char16Ptr(uint16_t *p); -#if U_SIZEOF_WCHAR_T==2 +#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) /** * Converts the pointer to char16_t *. + * (Only defined if U_SIZEOF_WCHAR_T==2.) * @draft ICU 59 */ inline Char16Ptr(wchar_t *p); @@ -99,13 +100,26 @@ public: * @draft ICU 59 */ inline Char16Ptr(std::nullptr_t p); + /** + * NULL constructor. + * Must only be used for 0 which is usually the value of NULL. + * @draft ICU 59 + */ + Char16Ptr(int null); /** * Pointer access. * @draft ICU 59 */ - inline operator char16_t *(); + inline char16_t *get(); + /** + * Pointer access via type conversion (e.g., static_cast). + * @draft ICU 59 + */ + operator char16_t *() { return get(); } private: + Char16Ptr() = delete; + #ifdef U_ALIASING_BARRIER template char16_t *cast(T *t) { U_ALIASING_BARRIER(t); @@ -131,7 +145,7 @@ Char16Ptr::Char16Ptr(wchar_t *p) : p(cast(p)) {} #endif Char16Ptr::Char16Ptr(std::nullptr_t p) : p(p) {} -Char16Ptr::operator char16_t *() { return p; } +char16_t *Char16Ptr::get() { return p; } #else @@ -142,7 +156,7 @@ Char16Ptr::Char16Ptr(wchar_t *p) { u.wp = p; } #endif Char16Ptr::Char16Ptr(std::nullptr_t p) { u.cp = p; } -Char16Ptr::operator char16_t *() { return u.cp; } +char16_t *Char16Ptr::get() { return u.cp; } #endif @@ -151,7 +165,7 @@ Char16Ptr::operator char16_t *() { return u.cp; } * and from NULL. * @draft ICU 59 */ -class U_COMMON_API ConstChar16Ptr { +class U_COMMON_API ConstChar16Ptr final { public: /** * Copies the pointer. @@ -163,9 +177,10 @@ public: * @draft ICU 59 */ inline ConstChar16Ptr(const uint16_t *p); -#if U_SIZEOF_WCHAR_T==2 +#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) /** * Converts the pointer to char16_t *. + * (Only defined if U_SIZEOF_WCHAR_T==2.) * @draft ICU 59 */ inline ConstChar16Ptr(const wchar_t *p); @@ -175,13 +190,26 @@ public: * @draft ICU 59 */ inline ConstChar16Ptr(const std::nullptr_t p); + /** + * NULL constructor. + * Must only be used for 0 which is usually the value of NULL. + * @draft ICU 59 + */ + ConstChar16Ptr(int null); /** * Pointer access. * @draft ICU 59 */ - inline operator const char16_t *() const; + inline const char16_t *get() const; + /** + * Pointer access via type conversion (e.g., static_cast). + * @draft ICU 59 + */ + operator const char16_t *() { return get(); } private: + ConstChar16Ptr() = delete; + #ifdef U_ALIASING_BARRIER template const char16_t *cast(const T *t) { U_ALIASING_BARRIER(t); @@ -207,7 +235,7 @@ ConstChar16Ptr::ConstChar16Ptr(const wchar_t *p) : p(cast(p)) {} #endif ConstChar16Ptr::ConstChar16Ptr(const std::nullptr_t p) : p(p) {} -ConstChar16Ptr::operator const char16_t *() const { return p; } +const char16_t *ConstChar16Ptr::get() const { return p; } #else @@ -218,7 +246,7 @@ ConstChar16Ptr::ConstChar16Ptr(const wchar_t *p) { u.wp = p; } #endif ConstChar16Ptr::ConstChar16Ptr(const std::nullptr_t p) { u.cp = p; } -ConstChar16Ptr::operator const char16_t *() const { return u.cp; } +const char16_t *ConstChar16Ptr::get() const { return u.cp; } #endif @@ -3183,11 +3211,12 @@ public: * @draft ICU 59 */ UNISTR_FROM_STRING_EXPLICIT UnicodeString(const uint16_t *text) : - UnicodeString(static_cast(ConstChar16Ptr(text))) {} + UnicodeString(ConstChar16Ptr(text).get()) {} -#if U_SIZEOF_WCHAR_T==2 +#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) /** * wchar_t * constructor. + * (Only defined if U_SIZEOF_WCHAR_T==2.) * Delegates to UnicodeString(const UChar *). * * It is recommended to mark this constructor "explicit" by @@ -3197,7 +3226,7 @@ public: * @draft ICU 59 */ UNISTR_FROM_STRING_EXPLICIT UnicodeString(const wchar_t *text) : - UnicodeString(static_cast(ConstChar16Ptr(text))) {} + UnicodeString(ConstChar16Ptr(text).get()) {} #endif /** @@ -3230,18 +3259,19 @@ public: * @draft ICU 59 */ UnicodeString(const uint16_t *text, int32_t length) : - UnicodeString(static_cast(ConstChar16Ptr(text)), length) {} + UnicodeString(ConstChar16Ptr(text).get(), length) {} -#if U_SIZEOF_WCHAR_T==2 +#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) /** * wchar_t * constructor. + * (Only defined if U_SIZEOF_WCHAR_T==2.) * Delegates to UnicodeString(const UChar *, int32_t). * @param text NUL-terminated UTF-16 string * @param length string length * @draft ICU 59 */ UnicodeString(const wchar_t *text, int32_t length) : - UnicodeString(static_cast(ConstChar16Ptr(text)), length) {} + UnicodeString(ConstChar16Ptr(text).get(), length) {} #endif /** @@ -3308,11 +3338,12 @@ public: * @draft ICU 59 */ UnicodeString(uint16_t *buffer, int32_t buffLength, int32_t buffCapacity) : - UnicodeString(static_cast(Char16Ptr(buffer)), buffLength, buffCapacity) {} + UnicodeString(Char16Ptr(buffer).get(), buffLength, buffCapacity) {} -#if U_SIZEOF_WCHAR_T==2 +#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) /** * Writable-aliasing wchar_t * constructor. + * (Only defined if U_SIZEOF_WCHAR_T==2.) * Delegates to UnicodeString(const UChar *, int32_t, int32_t). * @param buffer writable buffer of/for UTF-16 text * @param buffLength length of the current buffer contents @@ -3320,7 +3351,7 @@ public: * @draft ICU 59 */ UnicodeString(wchar_t *buffer, int32_t buffLength, int32_t buffCapacity) : - UnicodeString(static_cast(Char16Ptr(buffer)), buffLength, buffCapacity) {} + UnicodeString(Char16Ptr(buffer).get(), buffLength, buffCapacity) {} #endif /** diff --git a/icu4c/source/common/unistr.cpp b/icu4c/source/common/unistr.cpp index a4d921948f5..c24d8b7f0a9 100644 --- a/icu4c/source/common/unistr.cpp +++ b/icu4c/source/common/unistr.cpp @@ -98,6 +98,48 @@ U_CDECL_END U_NAMESPACE_BEGIN +#ifdef U_ALIASING_BARRIER + +Char16Ptr::Char16Ptr(int null) : p(nullptr) { + U_ASSERT(null == 0); + if (null != 0) { + // Try to provoke a crash. + p = reinterpret_cast(1); + } +} + +ConstChar16Ptr::ConstChar16Ptr(int null) : p(nullptr) { + U_ASSERT(null == 0); + if (null != 0) { + // Try to provoke a crash. + p = reinterpret_cast(1); + } +} + +#else + +Char16Ptr::Char16Ptr(int null) { + U_ASSERT(null == 0); + if (null == 0) { + u.cp = nullptr; + } else { + // Try to provoke a crash. + u.cp = reinterpret_cast(1); + } +} + +ConstChar16Ptr::ConstChar16Ptr(int null) { + U_ASSERT(null == 0); + if (null == 0) { + u.cp = nullptr; + } else { + // Try to provoke a crash. + u.cp = reinterpret_cast(1); + } +} + +#endif + /* The Replaceable virtual destructor can't be defined in the header due to how AIX works with multiple definitions of virtual functions. */ @@ -234,7 +276,7 @@ UnicodeString::UnicodeString(UBool isTerminated, // text is terminated, or else it would have failed the above test textLength = u_strlen(text); } - setArray(const_cast(static_cast(text)), textLength, + setArray(const_cast(text.get()), textLength, isTerminated ? textLength + 1 : textLength); } } diff --git a/icu4c/source/test/intltest/ustrtest.cpp b/icu4c/source/test/intltest/ustrtest.cpp index df579456515..afa503bcbf3 100644 --- a/icu4c/source/test/intltest/ustrtest.cpp +++ b/icu4c/source/test/intltest/ustrtest.cpp @@ -61,6 +61,10 @@ void UnicodeStringTest::runIndexedTest( int32_t index, UBool exec, const char* & TESTCASE_AUTO(TestSizeofUnicodeString); TESTCASE_AUTO(TestStartsWithAndEndsWithNulTerminated); TESTCASE_AUTO(TestMoveSwap); + TESTCASE_AUTO(TestUInt16Pointers); + TESTCASE_AUTO(TestWCharPointers); + TESTCASE_AUTO(TestNullPointers); + TESTCASE_AUTO(TestZeroPointers); TESTCASE_AUTO_END; } @@ -2190,3 +2194,79 @@ UnicodeStringTest::TestMoveSwap() { errln("UnicodeString copy after self-move did not work"); } } + +void +UnicodeStringTest::TestUInt16Pointers() { + static const uint16_t carr[] = { 0x61, 0x62, 0x63, 0 }; + uint16_t arr[4]; + + UnicodeString expected(u"abc"); + assertEquals("abc from pointer", expected, UnicodeString(carr)); + assertEquals("abc from pointer+length", expected, UnicodeString(carr, 3)); + assertEquals("abc from read-only-alias pointer", expected, UnicodeString(TRUE, carr, 3)); + + UnicodeString alias(arr, 0, 4); + alias.append(u'a').append(u'b').append(u'c'); + assertEquals("abc from writable alias", expected, alias); + assertEquals("buffer=abc from writable alias", expected, UnicodeString(arr, 3)); + + UErrorCode errorCode = U_ZERO_ERROR; + int32_t length = UnicodeString(u"def").extract(arr, 4, errorCode); + TEST_ASSERT_STATUS(errorCode); + assertEquals("def from extract()", UnicodeString(u"def"), UnicodeString(arr, length)); +} + +void +UnicodeStringTest::TestWCharPointers() { +#if U_SIZEOF_WCHAR_T==2 + static const wchar_t carr[] = { 0x61, 0x62, 0x63, 0 }; + wchar_t arr[4]; + + UnicodeString expected(u"abc"); + assertEquals("abc from pointer", expected, UnicodeString(carr)); + assertEquals("abc from pointer+length", expected, UnicodeString(carr, 3)); + assertEquals("abc from read-only-alias pointer", expected, UnicodeString(TRUE, carr, 3)); + + UnicodeString alias(arr, 0, 4); + alias.append(u'a').append(u'b').append(u'c'); + assertEquals("abc from writable alias", expected, alias); + assertEquals("buffer=abc from writable alias", expected, UnicodeString(arr, 3)); + + UErrorCode errorCode = U_ZERO_ERROR; + int32_t length = UnicodeString(u"def").extract(arr, 4, errorCode); + TEST_ASSERT_STATUS(errorCode); + assertEquals("def from extract()", UnicodeString(u"def"), UnicodeString(arr, length)); +#endif +} + +void +UnicodeStringTest::TestNullPointers() { + assertTrue("empty from nullptr", UnicodeString(nullptr).isEmpty()); + assertTrue("empty from nullptr+length", UnicodeString(nullptr, 2).isEmpty()); + assertTrue("empty from read-only-alias nullptr", UnicodeString(TRUE, nullptr, 3).isEmpty()); + + UnicodeString alias(nullptr, 4, 4); // empty, no alias + assertTrue("empty from writable alias", alias.isEmpty()); + alias.append(u'a').append(u'b').append(u'c'); + UnicodeString expected(u"abc"); + assertEquals("abc from writable alias", expected, alias); + + UErrorCode errorCode = U_ZERO_ERROR; + UnicodeString(u"def").extract(nullptr, 0, errorCode); + assertEquals("buffer overflow extracting to nullptr", U_BUFFER_OVERFLOW_ERROR, errorCode); +} + +void +UnicodeStringTest::TestZeroPointers() { + // There are constructor overloads with one and three integer parameters + // which match passing 0, so we cannot test using 0 for UnicodeString(pointer) + // or UnicodeString(read-only or writable alias). + // There are multiple two-parameter constructors that make using 0 + // for the first parameter ambiguous already, + // so we cannot test using 0 for UnicodeString(pointer, length). + + // extract() also has enough overloads to be ambiguous with 0. + // Test the pointer wrapper directly. + assertTrue("0 --> nullptr", Char16Ptr(0).get() == nullptr); + assertTrue("0 --> const nullptr", ConstChar16Ptr(0).get() == nullptr); +} diff --git a/icu4c/source/test/intltest/ustrtest.h b/icu4c/source/test/intltest/ustrtest.h index 0c449350f01..a2e2fbd4b71 100644 --- a/icu4c/source/test/intltest/ustrtest.h +++ b/icu4c/source/test/intltest/ustrtest.h @@ -92,6 +92,11 @@ public: void TestUnicodeStringImplementsAppendable(); void TestSizeofUnicodeString(); void TestMoveSwap(); + + void TestUInt16Pointers(); + void TestWCharPointers(); + void TestNullPointers(); + void TestZeroPointers(); }; #endif From d924dda84bca1bdba455225ee0d5210e122f2725 Mon Sep 17 00:00:00 2001 From: Markus Scherer Date: Tue, 28 Feb 2017 22:07:03 +0000 Subject: [PATCH 03/26] ICU-12992 return pointer-wrapper class from UnicodeString::getBuffer() and siblings, add wrapper class overloads to make this mostly work as is, fix a few call sites X-SVN-Rev: 39715 --- icu4c/source/common/ucurr.cpp | 3 +- icu4c/source/common/unicode/unistr.h | 168 ++++++++++++++++-- icu4c/source/common/unistr.cpp | 14 +- icu4c/source/extra/uconv/uconv.cpp | 42 ++--- icu4c/source/i18n/collationfastlatinbuilder.h | 2 +- icu4c/source/test/intltest/dtfmtrtts.cpp | 4 +- icu4c/source/test/intltest/ustrtest.cpp | 7 +- icu4c/source/tools/genrb/reslist.cpp | 4 +- 8 files changed, 194 insertions(+), 50 deletions(-) diff --git a/icu4c/source/common/ucurr.cpp b/icu4c/source/common/ucurr.cpp index c63e6ca593f..8cc817ade9e 100644 --- a/icu4c/source/common/ucurr.cpp +++ b/icu4c/source/common/ucurr.cpp @@ -1030,7 +1030,8 @@ collectCurrencyNames(const char* locale, const UnicodeString *symbol; while ((symbol = iter.next()) != NULL) { (*currencySymbols)[*total_currency_symbol_count].IsoCode = iso; - (*currencySymbols)[*total_currency_symbol_count].currencyName = (UChar*) symbol->getBuffer(); + (*currencySymbols)[*total_currency_symbol_count].currencyName = + const_cast(symbol->getBuffer().get()); (*currencySymbols)[*total_currency_symbol_count].flag = 0; (*currencySymbols)[(*total_currency_symbol_count)++].currencyNameLen = symbol->length(); } diff --git a/icu4c/source/common/unicode/unistr.h b/icu4c/source/common/unicode/unistr.h index 7b325f2df0f..3ed6b7f41cc 100644 --- a/icu4c/source/common/unicode/unistr.h +++ b/icu4c/source/common/unicode/unistr.h @@ -79,6 +79,7 @@ class U_COMMON_API Char16Ptr final { public: /** * Copies the pointer. + * TODO: @param p ... * @draft ICU 59 */ inline Char16Ptr(char16_t *p); @@ -107,21 +108,70 @@ public: */ Char16Ptr(int null); /** - * Pointer access. + * Destructor. * @draft ICU 59 */ - inline char16_t *get(); + inline ~Char16Ptr(); + /** - * Pointer access via type conversion (e.g., static_cast). + * Pointer access. + * TODO @return ... * @draft ICU 59 */ - operator char16_t *() { return get(); } + inline char16_t *get() const; + /** + * char16_t pointer access via type conversion (e.g., static_cast). + * @draft ICU 59 + */ + operator char16_t *() const { return get(); } + /** + * uint16_t pointer access via type conversion (e.g., static_cast). + * @draft ICU 59 + */ + inline operator uint16_t *() const; +#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) + /** + * wchar_t pointer access via type conversion (e.g., static_cast). + * @draft ICU 59 + */ + inline operator wchar_t *() const; +#endif + operator void *() const { return get(); } + + char16_t operator[](size_t offset) const { return get()[offset]; } + + UBool operator==(const Char16Ptr &other) const { return get() == other.get(); } + UBool operator!=(const Char16Ptr &other) const { return !operator==(other); } + UBool operator==(const char16_t *other) const { return get() == other; } + UBool operator!=(const char16_t *other) const { return !operator==(other); } + UBool operator==(const uint16_t *other) const { return static_cast(*this) == other; } + UBool operator!=(const uint16_t *other) const { return !operator==(other); } +#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) + UBool operator==(const wchar_t *other) const { return static_cast(*this) == other; } + UBool operator!=(const wchar_t *other) const { return !operator==(other); } +#endif + UBool operator==(const std::nullptr_t null) const { return get() == null; } + UBool operator!=(const std::nullptr_t null) const { return !operator==(null); } + /** + * Comparison with NULL. + * @return TRUE if the pointer is nullptr and null==0 + * @draft ICU 59 + */ + UBool operator==(int null) const { return get() == nullptr && null == 0; } + /** + * Comparison with NULL. + * @return TRUE if the pointer is not nullptr and null==0 + * @draft ICU 59 + */ + UBool operator!=(int null) const { return get() != nullptr && null == 0; } + + Char16Ptr operator+(size_t offset) const { return Char16Ptr(get() + offset); } private: Char16Ptr() = delete; #ifdef U_ALIASING_BARRIER - template char16_t *cast(T *t) { + template static char16_t *cast(T *t) { U_ALIASING_BARRIER(t); return reinterpret_cast(t); } @@ -144,8 +194,22 @@ Char16Ptr::Char16Ptr(uint16_t *p) : p(cast(p)) {} Char16Ptr::Char16Ptr(wchar_t *p) : p(cast(p)) {} #endif Char16Ptr::Char16Ptr(std::nullptr_t p) : p(p) {} +Char16Ptr::~Char16Ptr() { + U_ALIASING_BARRIER(p); +} -char16_t *Char16Ptr::get() { return p; } +char16_t *Char16Ptr::get() const { return p; } + +Char16Ptr::operator uint16_t *() const { + U_ALIASING_BARRIER(p); + return reinterpret_cast(p); +} +#if U_SIZEOF_WCHAR_T==2 +Char16Ptr::operator wchar_t *() const { + U_ALIASING_BARRIER(p); + return reinterpret_cast(p); +} +#endif #else @@ -155,8 +219,18 @@ Char16Ptr::Char16Ptr(uint16_t *p) { u.up = p; } Char16Ptr::Char16Ptr(wchar_t *p) { u.wp = p; } #endif Char16Ptr::Char16Ptr(std::nullptr_t p) { u.cp = p; } +Char16Ptr::~Char16Ptr() {} -char16_t *Char16Ptr::get() { return u.cp; } +char16_t *Char16Ptr::get() const { return u.cp; } + +Char16Ptr::operator uint16_t *() const { + return u.up; +} +#if U_SIZEOF_WCHAR_T==2 +Char16Ptr::operator wchar_t *() const { + return u.wp; +} +#endif #endif @@ -196,22 +270,60 @@ public: * @draft ICU 59 */ ConstChar16Ptr(int null); + /** + * Destructor. + * @draft ICU 59 + */ + inline ~ConstChar16Ptr(); + /** * Pointer access. * @draft ICU 59 */ inline const char16_t *get() const; /** - * Pointer access via type conversion (e.g., static_cast). + * char16_t pointer access via type conversion (e.g., static_cast). * @draft ICU 59 */ - operator const char16_t *() { return get(); } + operator const char16_t *() const { return get(); } + /** + * uint16_t pointer access via type conversion (e.g., static_cast). + * @draft ICU 59 + */ + inline operator const uint16_t *() const; +#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) + /** + * wchar_t pointer access via type conversion (e.g., static_cast). + * @draft ICU 59 + */ + inline operator const wchar_t *() const; +#endif + operator const void *() const { return get(); } + + char16_t operator[](size_t offset) const { return get()[offset]; } + + UBool operator==(const ConstChar16Ptr &other) const { return get() == other.get(); } + UBool operator!=(const ConstChar16Ptr &other) const { return !operator==(other); } + UBool operator==(const char16_t *other) const { return get() == other; } + UBool operator!=(const char16_t *other) const { return !operator==(other); } + UBool operator==(const uint16_t *other) const { return static_cast(*this) == other; } + UBool operator!=(const uint16_t *other) const { return !operator==(other); } +#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) + UBool operator==(const wchar_t *other) const { return static_cast(*this) == other; } + UBool operator!=(const wchar_t *other) const { return !operator==(other); } +#endif + UBool operator==(const std::nullptr_t null) const { return get() == null; } + UBool operator!=(const std::nullptr_t null) const { return !operator==(null); } + UBool operator==(int null) const { return get() == nullptr && null == 0; } + UBool operator!=(int null) const { return get() != nullptr && null == 0; } + + ConstChar16Ptr operator+(size_t offset) { return ConstChar16Ptr(get() + offset); } private: ConstChar16Ptr() = delete; #ifdef U_ALIASING_BARRIER - template const char16_t *cast(const T *t) { + template static const char16_t *cast(const T *t) { U_ALIASING_BARRIER(t); return reinterpret_cast(t); } @@ -234,9 +346,23 @@ ConstChar16Ptr::ConstChar16Ptr(const uint16_t *p) : p(cast(p)) {} ConstChar16Ptr::ConstChar16Ptr(const wchar_t *p) : p(cast(p)) {} #endif ConstChar16Ptr::ConstChar16Ptr(const std::nullptr_t p) : p(p) {} +ConstChar16Ptr::~ConstChar16Ptr() { + U_ALIASING_BARRIER(p); +} const char16_t *ConstChar16Ptr::get() const { return p; } +ConstChar16Ptr::operator const uint16_t *() const { + U_ALIASING_BARRIER(p); + return reinterpret_cast(p); +} +#if U_SIZEOF_WCHAR_T==2 +ConstChar16Ptr::operator const wchar_t *() const { + U_ALIASING_BARRIER(p); + return reinterpret_cast(p); +} +#endif + #else ConstChar16Ptr::ConstChar16Ptr(const char16_t *p) { u.cp = p; } @@ -245,9 +371,19 @@ ConstChar16Ptr::ConstChar16Ptr(const uint16_t *p) { u.up = p; } ConstChar16Ptr::ConstChar16Ptr(const wchar_t *p) { u.wp = p; } #endif ConstChar16Ptr::ConstChar16Ptr(const std::nullptr_t p) { u.cp = p; } +ConstChar16Ptr::~ConstChar16Ptr() {} const char16_t *ConstChar16Ptr::get() const { return u.cp; } +ConstChar16Ptr::operator const uint16_t *() const { + return u.up; +} +#if U_SIZEOF_WCHAR_T==2 +ConstChar16Ptr::operator const wchar_t *() const { + return u.wp; +} +#endif + #endif // TODO end experiment ----------------- @@ -3047,13 +3183,13 @@ public: * in the buffer, starting at the returned pointer; * default to the current string capacity if minCapacity==-1 * @return a writable pointer to the internal string buffer, - * or 0 if an error occurs (nested calls, out of memory) + * or nullptr if an error occurs (nested calls, out of memory) * * @see releaseBuffer * @see getTerminatedBuffer() * @stable ICU 2.0 */ - UChar *getBuffer(int32_t minCapacity); + Char16Ptr getBuffer(int32_t minCapacity); /** * Release a read/write buffer on a UnicodeString object with an @@ -3101,13 +3237,13 @@ public: * be modified. * * @return a read-only pointer to the internal string buffer, - * or 0 if the string is empty or bogus + * or nullptr if the string is empty or bogus * * @see getBuffer(int32_t minCapacity) * @see getTerminatedBuffer() * @stable ICU 2.0 */ - inline const UChar *getBuffer() const; + inline ConstChar16Ptr getBuffer() const; /** * Get a read-only pointer to the internal buffer, @@ -3142,7 +3278,7 @@ public: * @see getBuffer() * @stable ICU 2.2 */ - const UChar *getTerminatedBuffer(); + ConstChar16Ptr getTerminatedBuffer(); //======================================== // Constructors @@ -4134,7 +4270,7 @@ UnicodeString::isBufferWritable() const (!(fUnion.fFields.fLengthAndFlags&kRefCounted) || refCount()==1)); } -inline const UChar * +inline ConstChar16Ptr UnicodeString::getBuffer() const { if(fUnion.fFields.fLengthAndFlags&(kIsBogus|kOpenGetBuffer)) { return 0; diff --git a/icu4c/source/common/unistr.cpp b/icu4c/source/common/unistr.cpp index c24d8b7f0a9..a891dec799b 100644 --- a/icu4c/source/common/unistr.cpp +++ b/icu4c/source/common/unistr.cpp @@ -260,9 +260,10 @@ UnicodeString::UnicodeString(const UChar *text, } UnicodeString::UnicodeString(UBool isTerminated, - ConstChar16Ptr text, + ConstChar16Ptr textPtr, int32_t textLength) { fUnion.fFields.fLengthAndFlags = kReadonlyAlias; + const UChar *text = textPtr; if(text == NULL) { // treat as an empty string, do not alias setToEmpty(); @@ -276,7 +277,7 @@ UnicodeString::UnicodeString(UBool isTerminated, // text is terminated, or else it would have failed the above test textLength = u_strlen(text); } - setArray(const_cast(text.get()), textLength, + setArray(const_cast(text), textLength, isTerminated ? textLength + 1 : textLength); } } @@ -916,10 +917,11 @@ UnicodeString::doExtract(int32_t start, } int32_t -UnicodeString::extract(Char16Ptr dest, int32_t destCapacity, +UnicodeString::extract(Char16Ptr destPtr, int32_t destCapacity, UErrorCode &errorCode) const { int32_t len = length(); if(U_SUCCESS(errorCode)) { + UChar *dest = destPtr; if(isBogus() || destCapacity<0 || (destCapacity>0 && dest==0)) { errorCode=U_ILLEGAL_ARGUMENT_ERROR; } else { @@ -1258,7 +1260,7 @@ UnicodeString::unBogus() { } } -const UChar * +ConstChar16Ptr UnicodeString::getTerminatedBuffer() { if(!isWritable()) { return 0; @@ -1292,7 +1294,7 @@ UnicodeString::getTerminatedBuffer() { array[len] = 0; return array; } else { - return NULL; + return nullptr; } } @@ -1756,7 +1758,7 @@ UnicodeString::doHashCode() const // External Buffer //======================================== -UChar * +Char16Ptr UnicodeString::getBuffer(int32_t minCapacity) { if(minCapacity>=-1 && cloneArrayIfNeeded(minCapacity)) { fUnion.fFields.fLengthAndFlags|=kOpenGetBuffer; diff --git a/icu4c/source/extra/uconv/uconv.cpp b/icu4c/source/extra/uconv/uconv.cpp index 3bc807c819d..6aad36b5a9c 100644 --- a/icu4c/source/extra/uconv/uconv.cpp +++ b/icu4c/source/extra/uconv/uconv.cpp @@ -290,7 +290,7 @@ static int printConverters(const char *pname, const char *lookfor, UnicodeString str(name, ""); putchar('\t'); - u_wmsg(stderr, "cantGetAliases", str.getTerminatedBuffer(), + u_wmsg(stderr, "cantGetAliases", str.getTerminatedBuffer().get(), u_wmsg_errorName(err)); goto error_cleanup; } else { @@ -304,7 +304,7 @@ static int printConverters(const char *pname, const char *lookfor, if (U_FAILURE(err)) { UnicodeString str(name, ""); putchar('\t'); - u_wmsg(stderr, "cantGetAliases", str.getTerminatedBuffer(), + u_wmsg(stderr, "cantGetAliases", str.getTerminatedBuffer().get(), u_wmsg_errorName(err)); goto error_cleanup; } @@ -627,7 +627,7 @@ ConvertFile::convertFile(const char *pname, UnicodeString str2(strerror(errno), ""); str2.append((UChar32) 0); initMsg(pname); - u_wmsg(stderr, "cantOpenInputF", str1.getBuffer(), str2.getBuffer()); + u_wmsg(stderr, "cantOpenInputF", str1.getBuffer().get(), str2.getBuffer().get()); return FALSE; } closeFile = TRUE; @@ -672,10 +672,10 @@ ConvertFile::convertFile(const char *pname, UChar linebuf[20], offsetbuf[20]; uprv_itou(linebuf, 20, parse.line, 10, 0); uprv_itou(offsetbuf, 20, parse.offset, 10, 0); - u_wmsg(stderr, "cantCreateTranslitParseErr", str.getTerminatedBuffer(), + u_wmsg(stderr, "cantCreateTranslitParseErr", str.getTerminatedBuffer().get(), u_wmsg_errorName(err), linebuf, offsetbuf); } else { - u_wmsg(stderr, "cantCreateTranslit", str.getTerminatedBuffer(), + u_wmsg(stderr, "cantCreateTranslit", str.getTerminatedBuffer().get(), u_wmsg_errorName(err)); } @@ -698,7 +698,7 @@ ConvertFile::convertFile(const char *pname, if (U_FAILURE(err)) { UnicodeString str(fromcpage, ""); initMsg(pname); - u_wmsg(stderr, "cantOpenFromCodeset", str.getTerminatedBuffer(), + u_wmsg(stderr, "cantOpenFromCodeset", str.getTerminatedBuffer().get(), u_wmsg_errorName(err)); goto error_exit; } @@ -713,7 +713,7 @@ ConvertFile::convertFile(const char *pname, if (U_FAILURE(err)) { UnicodeString str(tocpage, ""); initMsg(pname); - u_wmsg(stderr, "cantOpenToCodeset", str.getTerminatedBuffer(), + u_wmsg(stderr, "cantOpenToCodeset", str.getTerminatedBuffer().get(), u_wmsg_errorName(err)); goto error_exit; } @@ -742,7 +742,7 @@ ConvertFile::convertFile(const char *pname, if (ferror(infile) != 0) { UnicodeString str(strerror(errno)); initMsg(pname); - u_wmsg(stderr, "cantRead", str.getTerminatedBuffer()); + u_wmsg(stderr, "cantRead", str.getTerminatedBuffer().get()); goto error_exit; } @@ -819,8 +819,8 @@ ConvertFile::convertFile(const char *pname, initMsg(pname); u_wmsg(stderr, "problemCvtToU", - UnicodeString(pos, length, "").getTerminatedBuffer(), - str.getTerminatedBuffer(), + UnicodeString(pos, length, "").getTerminatedBuffer().get(), + str.getTerminatedBuffer().get(), u_wmsg_errorName(err)); willexit = TRUE; @@ -1008,10 +1008,10 @@ ConvertFile::convertFile(const char *pname, initMsg(pname); u_wmsg(stderr, errtag, - UnicodeString(pos, length, "").getTerminatedBuffer(), - str.getTerminatedBuffer(), + UnicodeString(pos, length, "").getTerminatedBuffer().get(), + str.getTerminatedBuffer().get(), u_wmsg_errorName(err)); - u_wmsg(stderr, "errorUnicode", str.getTerminatedBuffer()); + u_wmsg(stderr, "errorUnicode", str.getTerminatedBuffer().get()); willexit = TRUE; err = U_ZERO_ERROR; /* reset the error for the rest of the conversion. */ @@ -1026,7 +1026,7 @@ ConvertFile::convertFile(const char *pname, if (wr != outlen) { UnicodeString str(strerror(errno)); initMsg(pname); - u_wmsg(stderr, "cantWrite", str.getTerminatedBuffer()); + u_wmsg(stderr, "cantWrite", str.getTerminatedBuffer().get()); willexit = TRUE; } @@ -1075,7 +1075,7 @@ static void usage(const char *pname, int ecode) { UnicodeString upname(pname, (int32_t)(uprv_strlen(pname) + 1)); UnicodeString mname(msg, msgLen + 1); - res = u_wmsg(fp, "usage", mname.getBuffer(), upname.getBuffer()); + res = u_wmsg(fp, "usage", mname.getBuffer().get(), upname.getBuffer().get()); if (!ecode) { if (!res) { fputc('\n', fp); @@ -1184,7 +1184,7 @@ main(int argc, char **argv) initMsg(pname); UnicodeString str(*iter); initMsg(pname); - u_wmsg(stderr, "badBlockSize", str.getTerminatedBuffer()); + u_wmsg(stderr, "badBlockSize", str.getTerminatedBuffer().get()); return 3; } } else { @@ -1212,7 +1212,7 @@ main(int argc, char **argv) if (U_FAILURE(e) || !printName) { UnicodeString str(*iter); initMsg(pname); - u_wmsg(stderr, "noSuchCodeset", str.getTerminatedBuffer()); + u_wmsg(stderr, "noSuchCodeset", str.getTerminatedBuffer().get()); return 2; } } else @@ -1240,7 +1240,7 @@ main(int argc, char **argv) } else { UnicodeString str(*iter); initMsg(pname); - u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer()); + u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer().get()); return 4; } } else { @@ -1256,7 +1256,7 @@ main(int argc, char **argv) } else { UnicodeString str(*iter); initMsg(pname); - u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer()); + u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer().get()); return 4; } } else { @@ -1276,7 +1276,7 @@ main(int argc, char **argv) } else { UnicodeString str(*iter); initMsg(pname); - u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer()); + u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer().get()); return 4; } } else { @@ -1329,7 +1329,7 @@ main(int argc, char **argv) UnicodeString str2(strerror(errno), ""); initMsg(pname); u_wmsg(stderr, "cantCreateOutputF", - str1.getBuffer(), str2.getBuffer()); + str1.getBuffer().get(), str2.getBuffer().get()); return 1; } } else { diff --git a/icu4c/source/i18n/collationfastlatinbuilder.h b/icu4c/source/i18n/collationfastlatinbuilder.h index 8b63b86815f..ad64d03b75f 100644 --- a/icu4c/source/i18n/collationfastlatinbuilder.h +++ b/icu4c/source/i18n/collationfastlatinbuilder.h @@ -37,7 +37,7 @@ public: UBool forData(const CollationData &data, UErrorCode &errorCode); const uint16_t *getTable() const { - return reinterpret_cast(result.getBuffer()); + return result.getBuffer(); } int32_t lengthOfTable() const { return result.length(); } diff --git a/icu4c/source/test/intltest/dtfmtrtts.cpp b/icu4c/source/test/intltest/dtfmtrtts.cpp index 5bac60ed7db..32f4715df37 100644 --- a/icu4c/source/test/intltest/dtfmtrtts.cpp +++ b/icu4c/source/test/intltest/dtfmtrtts.cpp @@ -121,7 +121,9 @@ void DateFormatRoundTripTest::TestCentury() */ //if (date[1] != date[2] || result[0] != result[1]) { if (date[1] != date[2]) { - errln("Round trip failure: \"%S\" (%f), \"%S\" (%f)", result[0].getBuffer(), date[1], result[1].getBuffer(), date[2]); + errln("Round trip failure: \"%S\" (%f), \"%S\" (%f)", + static_cast(result[0].getBuffer()), date[1], + static_cast(result[1].getBuffer()), date[2]); } } diff --git a/icu4c/source/test/intltest/ustrtest.cpp b/icu4c/source/test/intltest/ustrtest.cpp index afa503bcbf3..429ae325b0c 100644 --- a/icu4c/source/test/intltest/ustrtest.cpp +++ b/icu4c/source/test/intltest/ustrtest.cpp @@ -1087,7 +1087,7 @@ UnicodeStringTest::TestMiscellaneous() } // test releaseBuffer() with a NUL-terminated buffer - test1.getBuffer(20)[2]=0; + test1.getBuffer(20).get()[2]=0; test1.releaseBuffer(); // implicit -1 if(test1.length()!=2 || test1.charAt(0)!=1 || test1.charAt(1) !=2) { errln("UnicodeString::releaseBuffer(-1) does not properly set the length of the UnicodeString"); @@ -1558,7 +1558,10 @@ UnicodeStringTest::TestBogus() { // writable alias to another string's buffer: very bad idea, just convenient for this test test3.setToBogus(); - if(!test3.isBogus() || test3.setTo((UChar *)test1.getBuffer(), test1.length(), test1.getCapacity()).isBogus() || test3!=test1) { + if(!test3.isBogus() || + test3.setTo(const_cast(test1.getBuffer().get()), + test1.length(), test1.getCapacity()).isBogus() || + test3!=test1) { errln("bogus.setTo(writable alias) failed"); } diff --git a/icu4c/source/tools/genrb/reslist.cpp b/icu4c/source/tools/genrb/reslist.cpp index 86c577654f8..8c8ed4162da 100644 --- a/icu4c/source/tools/genrb/reslist.cpp +++ b/icu4c/source/tools/genrb/reslist.cpp @@ -1031,7 +1031,7 @@ void SRBRoot::write(const char *outputDir, const char *outputPkg, if (f16BitUnits.length() <= 1) { // no pool strings to checksum } else if (U_IS_BIG_ENDIAN) { - checksum = computeCRC((const char *)f16BitUnits.getBuffer(), + checksum = computeCRC(reinterpret_cast(f16BitUnits.getBuffer().get()), (uint32_t)f16BitUnits.length() * 2, checksum); } else { // Swap to big-endian so we get the same checksum on all platforms @@ -1039,7 +1039,7 @@ void SRBRoot::write(const char *outputDir, const char *outputPkg, UnicodeString s(f16BitUnits); s.append((UChar)1); // Ensure that we own this buffer. assert(!s.isBogus()); - uint16_t *p = (uint16_t *)s.getBuffer(); + uint16_t *p = const_cast(static_cast(s.getBuffer())); for (int32_t count = f16BitUnits.length(); count > 0; --count) { uint16_t x = *p; *p++ = (uint16_t)((x << 8) | (x >> 8)); From 031be519111a7bf86368ffd6cad96dd67ef9f720 Mon Sep 17 00:00:00 2001 From: Markus Scherer Date: Wed, 1 Mar 2017 05:52:24 +0000 Subject: [PATCH 04/26] ICU-12992 move Char16Ptr to new char16ptr.h; change non-UnicodeString C++ functions from raw pointers to Char16Ptr where possible X-SVN-Rev: 39716 --- icu4c/source/common/Makefile.in | 2 +- icu4c/source/common/char16ptr.cpp | 55 +++ icu4c/source/common/common.vcxproj | 15 + icu4c/source/common/common.vcxproj.filters | 6 + icu4c/source/common/normlzr.cpp | 4 +- icu4c/source/common/ucharstrie.cpp | 3 +- icu4c/source/common/ucharstrieiterator.cpp | 2 +- icu4c/source/common/uchriter.cpp | 8 +- icu4c/source/common/unicode/casemap.h | 17 +- icu4c/source/common/unicode/char16ptr.h | 350 ++++++++++++++++++ icu4c/source/common/unicode/normlzr.h | 4 +- icu4c/source/common/unicode/ucharstrie.h | 6 +- icu4c/source/common/unicode/uchriter.h | 8 +- icu4c/source/common/unicode/unistr.h | 332 +---------------- icu4c/source/common/unistr.cpp | 42 --- .../source/common/ustr_titlecase_brkiter.cpp | 4 +- icu4c/source/common/ustrcase.cpp | 4 +- icu4c/source/common/ustrcase_locale.cpp | 8 +- icu4c/source/i18n/curramt.cpp | 4 +- icu4c/source/i18n/currunit.cpp | 4 +- icu4c/source/i18n/dtfmtsym.cpp | 2 +- icu4c/source/i18n/numfmt.cpp | 2 +- icu4c/source/i18n/smpdtfmt.cpp | 4 +- icu4c/source/i18n/unicode/curramt.h | 8 +- icu4c/source/i18n/unicode/currunit.h | 6 +- icu4c/source/i18n/unicode/dtfmtsym.h | 2 +- icu4c/source/i18n/unicode/numfmt.h | 2 +- icu4c/source/i18n/unum.cpp | 2 +- icu4c/source/test/intltest/dtfmttst.cpp | 2 +- icu4c/source/test/intltest/intltest.cpp | 2 +- icu4c/source/test/intltest/measfmttest.cpp | 2 +- icu4c/source/test/intltest/numfmtst.cpp | 8 +- icu4c/source/test/intltest/tsmthred.cpp | 4 +- 33 files changed, 490 insertions(+), 434 deletions(-) create mode 100644 icu4c/source/common/char16ptr.cpp create mode 100644 icu4c/source/common/unicode/char16ptr.h diff --git a/icu4c/source/common/Makefile.in b/icu4c/source/common/Makefile.in index 10fa8de38eb..5a82fbf7262 100644 --- a/icu4c/source/common/Makefile.in +++ b/icu4c/source/common/Makefile.in @@ -95,7 +95,7 @@ bytestrie.o bytestrieiterator.o \ ucharstrie.o ucharstriebuilder.o ucharstrieiterator.o \ dictionarydata.o \ edits.o \ -appendable.o ustr_cnv.o unistr_cnv.o unistr.o unistr_case.o unistr_props.o \ +char16ptr.o appendable.o ustr_cnv.o unistr_cnv.o unistr.o unistr_case.o unistr_props.o \ utf_impl.o ustring.o ustrcase.o ucasemap.o ucasemap_titlecase_brkiter.o cstring.o ustrfmt.o ustrtrns.o ustr_wcs.o utext.o \ unistr_case_locale.o ustrcase_locale.o unistr_titlecase_brkiter.o ustr_titlecase_brkiter.o \ normalizer2impl.o normalizer2.o filterednormalizer2.o normlzr.o unorm.o unormcmp.o loadednormalizer2impl.o \ diff --git a/icu4c/source/common/char16ptr.cpp b/icu4c/source/common/char16ptr.cpp new file mode 100644 index 00000000000..1da9cd8490c --- /dev/null +++ b/icu4c/source/common/char16ptr.cpp @@ -0,0 +1,55 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +// char16ptr.cpp +// created: 2017feb28 Markus W. Scherer + +#include "unicode/utypes.h" +#include "unicode/char16ptr.h" +#include "uassert.h" + +U_NAMESPACE_BEGIN + +#ifdef U_ALIASING_BARRIER + +Char16Ptr::Char16Ptr(int null) : p(nullptr) { + U_ASSERT(null == 0); + if (null != 0) { + // Try to provoke a crash. + p = reinterpret_cast(1); + } +} + +ConstChar16Ptr::ConstChar16Ptr(int null) : p(nullptr) { + U_ASSERT(null == 0); + if (null != 0) { + // Try to provoke a crash. + p = reinterpret_cast(1); + } +} + +#else + +Char16Ptr::Char16Ptr(int null) { + U_ASSERT(null == 0); + if (null == 0) { + u.cp = nullptr; + } else { + // Try to provoke a crash. + u.cp = reinterpret_cast(1); + } +} + +ConstChar16Ptr::ConstChar16Ptr(int null) { + U_ASSERT(null == 0); + if (null == 0) { + u.cp = nullptr; + } else { + // Try to provoke a crash. + u.cp = reinterpret_cast(1); + } +} + +#endif + +U_NAMESPACE_END diff --git a/icu4c/source/common/common.vcxproj b/icu4c/source/common/common.vcxproj index b735ee9398e..9e760180666 100644 --- a/icu4c/source/common/common.vcxproj +++ b/icu4c/source/common/common.vcxproj @@ -448,6 +448,7 @@ + @@ -1530,6 +1531,20 @@ ..\..\include\unicode\%(Filename)%(Extension);%(Outputs) copy "%(FullPath)" ..\..\include\unicode + + ..\..\include\unicode\%(Filename)%(Extension);%(Outputs) + + + copy "%(FullPath)" ..\..\include\unicode + + ..\..\include\unicode\%(Filename)%(Extension);%(Outputs) + copy "%(FullPath)" ..\..\include\unicode + + ..\..\include\unicode\%(Filename)%(Extension);%(Outputs) + copy "%(FullPath)" ..\..\include\unicode + + ..\..\include\unicode\%(Filename)%(Extension);%(Outputs) + copy "%(FullPath)" ..\..\include\unicode ..\..\include\unicode\%(Filename)%(Extension);%(Outputs) diff --git a/icu4c/source/common/common.vcxproj.filters b/icu4c/source/common/common.vcxproj.filters index 21387cd7508..b29fe2fc4f2 100644 --- a/icu4c/source/common/common.vcxproj.filters +++ b/icu4c/source/common/common.vcxproj.filters @@ -463,6 +463,9 @@ strings + + strings + strings @@ -1105,6 +1108,9 @@ strings + + strings + strings diff --git a/icu4c/source/common/normlzr.cpp b/icu4c/source/common/normlzr.cpp index 4569d742e2d..a1a4949d30a 100644 --- a/icu4c/source/common/normlzr.cpp +++ b/icu4c/source/common/normlzr.cpp @@ -40,7 +40,7 @@ Normalizer::Normalizer(const UnicodeString& str, UNormalizationMode mode) : init(); } -Normalizer::Normalizer(const UChar *str, int32_t length, UNormalizationMode mode) : +Normalizer::Normalizer(ConstChar16Ptr str, int32_t length, UNormalizationMode mode) : UObject(), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(mode), fOptions(0), text(new UCharCharacterIterator(str, length)), currentIndex(0), nextIndex(0), @@ -435,7 +435,7 @@ Normalizer::setText(const CharacterIterator& newText, } void -Normalizer::setText(const UChar* newText, +Normalizer::setText(ConstChar16Ptr newText, int32_t length, UErrorCode &status) { diff --git a/icu4c/source/common/ucharstrie.cpp b/icu4c/source/common/ucharstrie.cpp index b83aec51b2c..e0b33af5194 100644 --- a/icu4c/source/common/ucharstrie.cpp +++ b/icu4c/source/common/ucharstrie.cpp @@ -175,7 +175,8 @@ UCharsTrie::next(int32_t uchar) { } UStringTrieResult -UCharsTrie::next(const UChar *s, int32_t sLength) { +UCharsTrie::next(ConstChar16Ptr ptr, int32_t sLength) { + const UChar *s=ptr; if(sLength<0 ? *s==0 : sLength==0) { // Empty input. return current(); diff --git a/icu4c/source/common/ucharstrieiterator.cpp b/icu4c/source/common/ucharstrieiterator.cpp index 56729951b40..b3132241fe2 100644 --- a/icu4c/source/common/ucharstrieiterator.cpp +++ b/icu4c/source/common/ucharstrieiterator.cpp @@ -21,7 +21,7 @@ U_NAMESPACE_BEGIN -UCharsTrie::Iterator::Iterator(const UChar *trieUChars, int32_t maxStringLength, +UCharsTrie::Iterator::Iterator(ConstChar16Ptr trieUChars, int32_t maxStringLength, UErrorCode &errorCode) : uchars_(trieUChars), pos_(uchars_), initialPos_(uchars_), diff --git a/icu4c/source/common/uchriter.cpp b/icu4c/source/common/uchriter.cpp index 0b3868e67d7..822168f5c8e 100644 --- a/icu4c/source/common/uchriter.cpp +++ b/icu4c/source/common/uchriter.cpp @@ -25,14 +25,14 @@ UCharCharacterIterator::UCharCharacterIterator() // never default construct! } -UCharCharacterIterator::UCharCharacterIterator(const UChar* textPtr, +UCharCharacterIterator::UCharCharacterIterator(ConstChar16Ptr textPtr, int32_t length) : CharacterIterator(textPtr != 0 ? (length>=0 ? length : u_strlen(textPtr)) : 0), text(textPtr) { } -UCharCharacterIterator::UCharCharacterIterator(const UChar* textPtr, +UCharCharacterIterator::UCharCharacterIterator(ConstChar16Ptr textPtr, int32_t length, int32_t position) : CharacterIterator(textPtr != 0 ? (length>=0 ? length : u_strlen(textPtr)) : 0, position), @@ -40,7 +40,7 @@ UCharCharacterIterator::UCharCharacterIterator(const UChar* textPtr, { } -UCharCharacterIterator::UCharCharacterIterator(const UChar* textPtr, +UCharCharacterIterator::UCharCharacterIterator(ConstChar16Ptr textPtr, int32_t length, int32_t textBegin, int32_t textEnd, @@ -349,7 +349,7 @@ UCharCharacterIterator::move32(int32_t delta, CharacterIterator::EOrigin origin) return pos; } -void UCharCharacterIterator::setText(const UChar* newText, +void UCharCharacterIterator::setText(ConstChar16Ptr newText, int32_t newTextLength) { text = newText; if(newText == 0 || newTextLength < 0) { diff --git a/icu4c/source/common/unicode/casemap.h b/icu4c/source/common/unicode/casemap.h index 2224172118c..1050f52d7e8 100644 --- a/icu4c/source/common/unicode/casemap.h +++ b/icu4c/source/common/unicode/casemap.h @@ -8,6 +8,7 @@ #define __CASEMAP_H__ #include "unicode/utypes.h" +#include "unicode/char16ptr.h" #include "unicode/uobject.h" /** @@ -59,8 +60,8 @@ public: */ static int32_t toLower( const char *locale, uint32_t options, - const UChar *src, int32_t srcLength, - UChar *dest, int32_t destCapacity, Edits *edits, + ConstChar16Ptr src, int32_t srcLength, + Char16Ptr dest, int32_t destCapacity, Edits *edits, UErrorCode &errorCode); /** @@ -93,8 +94,8 @@ public: */ static int32_t toUpper( const char *locale, uint32_t options, - const UChar *src, int32_t srcLength, - UChar *dest, int32_t destCapacity, Edits *edits, + ConstChar16Ptr src, int32_t srcLength, + Char16Ptr dest, int32_t destCapacity, Edits *edits, UErrorCode &errorCode); #if !UCONFIG_NO_BREAK_ITERATION @@ -140,8 +141,8 @@ public: */ static int32_t toTitle( const char *locale, uint32_t options, BreakIterator *iter, - const UChar *src, int32_t srcLength, - UChar *dest, int32_t destCapacity, Edits *edits, + ConstChar16Ptr src, int32_t srcLength, + Char16Ptr dest, int32_t destCapacity, Edits *edits, UErrorCode &errorCode); #endif // UCONFIG_NO_BREAK_ITERATION @@ -180,8 +181,8 @@ public: */ static int32_t fold( uint32_t options, - const UChar *src, int32_t srcLength, - UChar *dest, int32_t destCapacity, Edits *edits, + ConstChar16Ptr src, int32_t srcLength, + Char16Ptr dest, int32_t destCapacity, Edits *edits, UErrorCode &errorCode); private: diff --git a/icu4c/source/common/unicode/char16ptr.h b/icu4c/source/common/unicode/char16ptr.h new file mode 100644 index 00000000000..a949b7d4f54 --- /dev/null +++ b/icu4c/source/common/unicode/char16ptr.h @@ -0,0 +1,350 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +// char16ptr.h +// created: 2017feb28 Markus W. Scherer + +#ifndef __CHAR16PTR_H__ +#define __CHAR16PTR_H__ + +#include +#include "unicode/utypes.h" + +/** + * \file + * \brief C++ API: char16_t pointer wrappers with + * implicit conversion to/from bit-compatible raw pointer types. + */ + +U_NAMESPACE_BEGIN + +/** + * \def U_ALIASING_BARRIER + * Barrier for pointer anti-aliasing optimizations even across function boundaries. + * @internal + */ +#ifdef U_ALIASING_BARRIER + // Use the predefined value. +#elif defined(__clang__) || defined(__GNUC__) +# define U_ALIASING_BARRIER(ptr) asm volatile("" : "+rm"(ptr)) +#endif + +/** + * char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types, + * and from NULL. + * @draft ICU 59 + */ +class U_COMMON_API Char16Ptr final { +public: + /** + * Copies the pointer. + * TODO: @param p ... + * @draft ICU 59 + */ + inline Char16Ptr(char16_t *p); + /** + * Converts the pointer to char16_t *. + * @draft ICU 59 + */ + inline Char16Ptr(uint16_t *p); +#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) + /** + * Converts the pointer to char16_t *. + * (Only defined if U_SIZEOF_WCHAR_T==2.) + * @draft ICU 59 + */ + inline Char16Ptr(wchar_t *p); +#endif + /** + * nullptr constructor. + * @draft ICU 59 + */ + inline Char16Ptr(std::nullptr_t p); + /** + * NULL constructor. + * Must only be used for 0 which is usually the value of NULL. + * @draft ICU 59 + */ + Char16Ptr(int null); + /** + * Destructor. + * @draft ICU 59 + */ + inline ~Char16Ptr(); + + /** + * Pointer access. + * TODO @return ... + * @draft ICU 59 + */ + inline char16_t *get() const; + /** + * char16_t pointer access via type conversion (e.g., static_cast). + * @draft ICU 59 + */ + operator char16_t *() const { return get(); } + /** + * uint16_t pointer access via type conversion (e.g., static_cast). + * @draft ICU 59 + */ + inline operator uint16_t *() const; +#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) + /** + * wchar_t pointer access via type conversion (e.g., static_cast). + * @draft ICU 59 + */ + inline operator wchar_t *() const; +#endif + operator void *() const { return get(); } + + char16_t operator[](size_t offset) const { return get()[offset]; } + + UBool operator==(const Char16Ptr &other) const { return get() == other.get(); } + UBool operator!=(const Char16Ptr &other) const { return !operator==(other); } + UBool operator==(const char16_t *other) const { return get() == other; } + UBool operator!=(const char16_t *other) const { return !operator==(other); } + UBool operator==(const uint16_t *other) const { return static_cast(*this) == other; } + UBool operator!=(const uint16_t *other) const { return !operator==(other); } +#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) + UBool operator==(const wchar_t *other) const { return static_cast(*this) == other; } + UBool operator!=(const wchar_t *other) const { return !operator==(other); } +#endif + UBool operator==(const std::nullptr_t null) const { return get() == null; } + UBool operator!=(const std::nullptr_t null) const { return !operator==(null); } + /** + * Comparison with NULL. + * @return TRUE if the pointer is nullptr and null==0 + * @draft ICU 59 + */ + UBool operator==(int null) const { return get() == nullptr && null == 0; } + /** + * Comparison with NULL. + * @return TRUE if the pointer is not nullptr and null==0 + * @draft ICU 59 + */ + UBool operator!=(int null) const { return get() != nullptr && null == 0; } + + Char16Ptr operator+(size_t offset) const { return Char16Ptr(get() + offset); } + +private: + Char16Ptr() = delete; + +#ifdef U_ALIASING_BARRIER + template static char16_t *cast(T *t) { + U_ALIASING_BARRIER(t); + return reinterpret_cast(t); + } + + char16_t *p; +#else + union { + char16_t *cp; + uint16_t *up; + wchar_t *wp; + } u; +#endif +}; + +#ifdef U_ALIASING_BARRIER + +Char16Ptr::Char16Ptr(char16_t *p) : p(p) {} +Char16Ptr::Char16Ptr(uint16_t *p) : p(cast(p)) {} +#if U_SIZEOF_WCHAR_T==2 +Char16Ptr::Char16Ptr(wchar_t *p) : p(cast(p)) {} +#endif +Char16Ptr::Char16Ptr(std::nullptr_t p) : p(p) {} +Char16Ptr::~Char16Ptr() { + U_ALIASING_BARRIER(p); +} + +char16_t *Char16Ptr::get() const { return p; } + +Char16Ptr::operator uint16_t *() const { + U_ALIASING_BARRIER(p); + return reinterpret_cast(p); +} +#if U_SIZEOF_WCHAR_T==2 +Char16Ptr::operator wchar_t *() const { + U_ALIASING_BARRIER(p); + return reinterpret_cast(p); +} +#endif + +#else + +Char16Ptr::Char16Ptr(char16_t *p) { u.cp = p; } +Char16Ptr::Char16Ptr(uint16_t *p) { u.up = p; } +#if U_SIZEOF_WCHAR_T==2 +Char16Ptr::Char16Ptr(wchar_t *p) { u.wp = p; } +#endif +Char16Ptr::Char16Ptr(std::nullptr_t p) { u.cp = p; } +Char16Ptr::~Char16Ptr() {} + +char16_t *Char16Ptr::get() const { return u.cp; } + +Char16Ptr::operator uint16_t *() const { + return u.up; +} +#if U_SIZEOF_WCHAR_T==2 +Char16Ptr::operator wchar_t *() const { + return u.wp; +} +#endif + +#endif + +/** + * const char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types, + * and from NULL. + * @draft ICU 59 + */ +class U_COMMON_API ConstChar16Ptr final { +public: + /** + * Copies the pointer. + * @draft ICU 59 + */ + inline ConstChar16Ptr(const char16_t *p); + /** + * Converts the pointer to char16_t *. + * @draft ICU 59 + */ + inline ConstChar16Ptr(const uint16_t *p); +#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) + /** + * Converts the pointer to char16_t *. + * (Only defined if U_SIZEOF_WCHAR_T==2.) + * @draft ICU 59 + */ + inline ConstChar16Ptr(const wchar_t *p); +#endif + /** + * nullptr constructor. + * @draft ICU 59 + */ + inline ConstChar16Ptr(const std::nullptr_t p); + /** + * NULL constructor. + * Must only be used for 0 which is usually the value of NULL. + * @draft ICU 59 + */ + ConstChar16Ptr(int null); + /** + * Destructor. + * @draft ICU 59 + */ + inline ~ConstChar16Ptr(); + + /** + * Pointer access. + * @draft ICU 59 + */ + inline const char16_t *get() const; + /** + * char16_t pointer access via type conversion (e.g., static_cast). + * @draft ICU 59 + */ + operator const char16_t *() const { return get(); } + /** + * uint16_t pointer access via type conversion (e.g., static_cast). + * @draft ICU 59 + */ + inline operator const uint16_t *() const; +#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) + /** + * wchar_t pointer access via type conversion (e.g., static_cast). + * @draft ICU 59 + */ + inline operator const wchar_t *() const; +#endif + operator const void *() const { return get(); } + + char16_t operator[](size_t offset) const { return get()[offset]; } + + UBool operator==(const ConstChar16Ptr &other) const { return get() == other.get(); } + UBool operator!=(const ConstChar16Ptr &other) const { return !operator==(other); } + UBool operator==(const char16_t *other) const { return get() == other; } + UBool operator!=(const char16_t *other) const { return !operator==(other); } + UBool operator==(const uint16_t *other) const { return static_cast(*this) == other; } + UBool operator!=(const uint16_t *other) const { return !operator==(other); } +#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) + UBool operator==(const wchar_t *other) const { return static_cast(*this) == other; } + UBool operator!=(const wchar_t *other) const { return !operator==(other); } +#endif + UBool operator==(const std::nullptr_t null) const { return get() == null; } + UBool operator!=(const std::nullptr_t null) const { return !operator==(null); } + UBool operator==(int null) const { return get() == nullptr && null == 0; } + UBool operator!=(int null) const { return get() != nullptr && null == 0; } + + ConstChar16Ptr operator+(size_t offset) { return ConstChar16Ptr(get() + offset); } + +private: + ConstChar16Ptr() = delete; + +#ifdef U_ALIASING_BARRIER + template static const char16_t *cast(const T *t) { + U_ALIASING_BARRIER(t); + return reinterpret_cast(t); + } + + const char16_t *p; +#else + union { + const char16_t *cp; + const uint16_t *up; + const wchar_t *wp; + } u; +#endif +}; + +#ifdef U_ALIASING_BARRIER + +ConstChar16Ptr::ConstChar16Ptr(const char16_t *p) : p(p) {} +ConstChar16Ptr::ConstChar16Ptr(const uint16_t *p) : p(cast(p)) {} +#if U_SIZEOF_WCHAR_T==2 +ConstChar16Ptr::ConstChar16Ptr(const wchar_t *p) : p(cast(p)) {} +#endif +ConstChar16Ptr::ConstChar16Ptr(const std::nullptr_t p) : p(p) {} +ConstChar16Ptr::~ConstChar16Ptr() { + U_ALIASING_BARRIER(p); +} + +const char16_t *ConstChar16Ptr::get() const { return p; } + +ConstChar16Ptr::operator const uint16_t *() const { + U_ALIASING_BARRIER(p); + return reinterpret_cast(p); +} +#if U_SIZEOF_WCHAR_T==2 +ConstChar16Ptr::operator const wchar_t *() const { + U_ALIASING_BARRIER(p); + return reinterpret_cast(p); +} +#endif + +#else + +ConstChar16Ptr::ConstChar16Ptr(const char16_t *p) { u.cp = p; } +ConstChar16Ptr::ConstChar16Ptr(const uint16_t *p) { u.up = p; } +#if U_SIZEOF_WCHAR_T==2 +ConstChar16Ptr::ConstChar16Ptr(const wchar_t *p) { u.wp = p; } +#endif +ConstChar16Ptr::ConstChar16Ptr(const std::nullptr_t p) { u.cp = p; } +ConstChar16Ptr::~ConstChar16Ptr() {} + +const char16_t *ConstChar16Ptr::get() const { return u.cp; } + +ConstChar16Ptr::operator const uint16_t *() const { + return u.up; +} +#if U_SIZEOF_WCHAR_T==2 +ConstChar16Ptr::operator const wchar_t *() const { + return u.wp; +} +#endif + +#endif + +U_NAMESPACE_END + +#endif // __CHAR16PTR_H__ diff --git a/icu4c/source/common/unicode/normlzr.h b/icu4c/source/common/unicode/normlzr.h index 53665e97842..31af07b7cc1 100644 --- a/icu4c/source/common/unicode/normlzr.h +++ b/icu4c/source/common/unicode/normlzr.h @@ -168,7 +168,7 @@ public: * @param mode The normalization mode. * @deprecated ICU 56 Use Normalizer2 instead. */ - Normalizer(const UChar* str, int32_t length, UNormalizationMode mode); + Normalizer(ConstChar16Ptr str, int32_t length, UNormalizationMode mode); /** * Creates a new Normalizer object for iterating over the @@ -704,7 +704,7 @@ public: * @param status a UErrorCode * @deprecated ICU 56 Use Normalizer2 instead. */ - void setText(const UChar* newText, + void setText(ConstChar16Ptr newText, int32_t length, UErrorCode &status); /** diff --git a/icu4c/source/common/unicode/ucharstrie.h b/icu4c/source/common/unicode/ucharstrie.h index 91c5ba1c2c2..fdff53f4d33 100644 --- a/icu4c/source/common/unicode/ucharstrie.h +++ b/icu4c/source/common/unicode/ucharstrie.h @@ -63,7 +63,7 @@ public: * @param trieUChars The UChar array that contains the serialized trie. * @stable ICU 4.8 */ - UCharsTrie(const UChar *trieUChars) + UCharsTrie(ConstChar16Ptr trieUChars) : ownedArray_(NULL), uchars_(trieUChars), pos_(uchars_), remainingMatchLength_(-1) {} @@ -208,7 +208,7 @@ public: * @return The match/value Result. * @stable ICU 4.8 */ - UStringTrieResult next(const UChar *s, int32_t length); + UStringTrieResult next(ConstChar16Ptr s, int32_t length); /** * Returns a matching string's value if called immediately after @@ -268,7 +268,7 @@ public: * function chaining. (See User Guide for details.) * @stable ICU 4.8 */ - Iterator(const UChar *trieUChars, int32_t maxStringLength, UErrorCode &errorCode); + Iterator(ConstChar16Ptr trieUChars, int32_t maxStringLength, UErrorCode &errorCode); /** * Iterates from the current state of the specified UCharsTrie. diff --git a/icu4c/source/common/unicode/uchriter.h b/icu4c/source/common/unicode/uchriter.h index 3408a555071..644fbd8a2a9 100644 --- a/icu4c/source/common/unicode/uchriter.h +++ b/icu4c/source/common/unicode/uchriter.h @@ -43,7 +43,7 @@ public: * @param length The length of the UChar array * @stable ICU 2.0 */ - UCharCharacterIterator(const UChar* textPtr, int32_t length); + UCharCharacterIterator(ConstChar16Ptr textPtr, int32_t length); /** * Create an iterator over the UChar array referred to by "textPtr". @@ -58,7 +58,7 @@ public: * @param position The starting position of the iteration * @stable ICU 2.0 */ - UCharCharacterIterator(const UChar* textPtr, int32_t length, + UCharCharacterIterator(ConstChar16Ptr textPtr, int32_t length, int32_t position); /** @@ -77,7 +77,7 @@ public: * @param position The starting position of the iteration * @stable ICU 2.0 */ - UCharCharacterIterator(const UChar* textPtr, int32_t length, + UCharCharacterIterator(ConstChar16Ptr textPtr, int32_t length, int32_t textBegin, int32_t textEnd, int32_t position); @@ -340,7 +340,7 @@ public: * Sets the iterator to iterate over a new range of text * @stable ICU 2.0 */ - void setText(const UChar* newText, int32_t newTextLength); + void setText(ConstChar16Ptr newText, int32_t newTextLength); /** * Copies the UChar array under iteration into the UnicodeString diff --git a/icu4c/source/common/unicode/unistr.h b/icu4c/source/common/unicode/unistr.h index 3ed6b7f41cc..331b01281ad 100644 --- a/icu4c/source/common/unicode/unistr.h +++ b/icu4c/source/common/unicode/unistr.h @@ -30,6 +30,7 @@ #include #include "unicode/utypes.h" +#include "unicode/char16ptr.h" #include "unicode/rep.h" #include "unicode/std_string.h" #include "unicode/stringpiece.h" @@ -57,337 +58,6 @@ u_strlen(const UChar *s); U_NAMESPACE_BEGIN -// TODO begin experiment --------------- - -/** - * \def U_ALIASING_BARRIER - * Barrier for pointer anti-aliasing optimizations even across function boundaries. - * @internal - */ -#ifdef U_ALIASING_BARRIER - // Use the predefined value. -#elif defined(__clang__) || defined(__GNUC__) -# define U_ALIASING_BARRIER(ptr) asm volatile("" : "+rm"(ptr)) -#endif - -/** - * char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types, - * and from NULL. - * @draft ICU 59 - */ -class U_COMMON_API Char16Ptr final { -public: - /** - * Copies the pointer. - * TODO: @param p ... - * @draft ICU 59 - */ - inline Char16Ptr(char16_t *p); - /** - * Converts the pointer to char16_t *. - * @draft ICU 59 - */ - inline Char16Ptr(uint16_t *p); -#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) - /** - * Converts the pointer to char16_t *. - * (Only defined if U_SIZEOF_WCHAR_T==2.) - * @draft ICU 59 - */ - inline Char16Ptr(wchar_t *p); -#endif - /** - * nullptr constructor. - * @draft ICU 59 - */ - inline Char16Ptr(std::nullptr_t p); - /** - * NULL constructor. - * Must only be used for 0 which is usually the value of NULL. - * @draft ICU 59 - */ - Char16Ptr(int null); - /** - * Destructor. - * @draft ICU 59 - */ - inline ~Char16Ptr(); - - /** - * Pointer access. - * TODO @return ... - * @draft ICU 59 - */ - inline char16_t *get() const; - /** - * char16_t pointer access via type conversion (e.g., static_cast). - * @draft ICU 59 - */ - operator char16_t *() const { return get(); } - /** - * uint16_t pointer access via type conversion (e.g., static_cast). - * @draft ICU 59 - */ - inline operator uint16_t *() const; -#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) - /** - * wchar_t pointer access via type conversion (e.g., static_cast). - * @draft ICU 59 - */ - inline operator wchar_t *() const; -#endif - operator void *() const { return get(); } - - char16_t operator[](size_t offset) const { return get()[offset]; } - - UBool operator==(const Char16Ptr &other) const { return get() == other.get(); } - UBool operator!=(const Char16Ptr &other) const { return !operator==(other); } - UBool operator==(const char16_t *other) const { return get() == other; } - UBool operator!=(const char16_t *other) const { return !operator==(other); } - UBool operator==(const uint16_t *other) const { return static_cast(*this) == other; } - UBool operator!=(const uint16_t *other) const { return !operator==(other); } -#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) - UBool operator==(const wchar_t *other) const { return static_cast(*this) == other; } - UBool operator!=(const wchar_t *other) const { return !operator==(other); } -#endif - UBool operator==(const std::nullptr_t null) const { return get() == null; } - UBool operator!=(const std::nullptr_t null) const { return !operator==(null); } - /** - * Comparison with NULL. - * @return TRUE if the pointer is nullptr and null==0 - * @draft ICU 59 - */ - UBool operator==(int null) const { return get() == nullptr && null == 0; } - /** - * Comparison with NULL. - * @return TRUE if the pointer is not nullptr and null==0 - * @draft ICU 59 - */ - UBool operator!=(int null) const { return get() != nullptr && null == 0; } - - Char16Ptr operator+(size_t offset) const { return Char16Ptr(get() + offset); } - -private: - Char16Ptr() = delete; - -#ifdef U_ALIASING_BARRIER - template static char16_t *cast(T *t) { - U_ALIASING_BARRIER(t); - return reinterpret_cast(t); - } - - char16_t *p; -#else - union { - char16_t *cp; - uint16_t *up; - wchar_t *wp; - } u; -#endif -}; - -#ifdef U_ALIASING_BARRIER - -Char16Ptr::Char16Ptr(char16_t *p) : p(p) {} -Char16Ptr::Char16Ptr(uint16_t *p) : p(cast(p)) {} -#if U_SIZEOF_WCHAR_T==2 -Char16Ptr::Char16Ptr(wchar_t *p) : p(cast(p)) {} -#endif -Char16Ptr::Char16Ptr(std::nullptr_t p) : p(p) {} -Char16Ptr::~Char16Ptr() { - U_ALIASING_BARRIER(p); -} - -char16_t *Char16Ptr::get() const { return p; } - -Char16Ptr::operator uint16_t *() const { - U_ALIASING_BARRIER(p); - return reinterpret_cast(p); -} -#if U_SIZEOF_WCHAR_T==2 -Char16Ptr::operator wchar_t *() const { - U_ALIASING_BARRIER(p); - return reinterpret_cast(p); -} -#endif - -#else - -Char16Ptr::Char16Ptr(char16_t *p) { u.cp = p; } -Char16Ptr::Char16Ptr(uint16_t *p) { u.up = p; } -#if U_SIZEOF_WCHAR_T==2 -Char16Ptr::Char16Ptr(wchar_t *p) { u.wp = p; } -#endif -Char16Ptr::Char16Ptr(std::nullptr_t p) { u.cp = p; } -Char16Ptr::~Char16Ptr() {} - -char16_t *Char16Ptr::get() const { return u.cp; } - -Char16Ptr::operator uint16_t *() const { - return u.up; -} -#if U_SIZEOF_WCHAR_T==2 -Char16Ptr::operator wchar_t *() const { - return u.wp; -} -#endif - -#endif - -/** - * const char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types, - * and from NULL. - * @draft ICU 59 - */ -class U_COMMON_API ConstChar16Ptr final { -public: - /** - * Copies the pointer. - * @draft ICU 59 - */ - inline ConstChar16Ptr(const char16_t *p); - /** - * Converts the pointer to char16_t *. - * @draft ICU 59 - */ - inline ConstChar16Ptr(const uint16_t *p); -#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) - /** - * Converts the pointer to char16_t *. - * (Only defined if U_SIZEOF_WCHAR_T==2.) - * @draft ICU 59 - */ - inline ConstChar16Ptr(const wchar_t *p); -#endif - /** - * nullptr constructor. - * @draft ICU 59 - */ - inline ConstChar16Ptr(const std::nullptr_t p); - /** - * NULL constructor. - * Must only be used for 0 which is usually the value of NULL. - * @draft ICU 59 - */ - ConstChar16Ptr(int null); - /** - * Destructor. - * @draft ICU 59 - */ - inline ~ConstChar16Ptr(); - - /** - * Pointer access. - * @draft ICU 59 - */ - inline const char16_t *get() const; - /** - * char16_t pointer access via type conversion (e.g., static_cast). - * @draft ICU 59 - */ - operator const char16_t *() const { return get(); } - /** - * uint16_t pointer access via type conversion (e.g., static_cast). - * @draft ICU 59 - */ - inline operator const uint16_t *() const; -#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) - /** - * wchar_t pointer access via type conversion (e.g., static_cast). - * @draft ICU 59 - */ - inline operator const wchar_t *() const; -#endif - operator const void *() const { return get(); } - - char16_t operator[](size_t offset) const { return get()[offset]; } - - UBool operator==(const ConstChar16Ptr &other) const { return get() == other.get(); } - UBool operator!=(const ConstChar16Ptr &other) const { return !operator==(other); } - UBool operator==(const char16_t *other) const { return get() == other; } - UBool operator!=(const char16_t *other) const { return !operator==(other); } - UBool operator==(const uint16_t *other) const { return static_cast(*this) == other; } - UBool operator!=(const uint16_t *other) const { return !operator==(other); } -#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) - UBool operator==(const wchar_t *other) const { return static_cast(*this) == other; } - UBool operator!=(const wchar_t *other) const { return !operator==(other); } -#endif - UBool operator==(const std::nullptr_t null) const { return get() == null; } - UBool operator!=(const std::nullptr_t null) const { return !operator==(null); } - UBool operator==(int null) const { return get() == nullptr && null == 0; } - UBool operator!=(int null) const { return get() != nullptr && null == 0; } - - ConstChar16Ptr operator+(size_t offset) { return ConstChar16Ptr(get() + offset); } - -private: - ConstChar16Ptr() = delete; - -#ifdef U_ALIASING_BARRIER - template static const char16_t *cast(const T *t) { - U_ALIASING_BARRIER(t); - return reinterpret_cast(t); - } - - const char16_t *p; -#else - union { - const char16_t *cp; - const uint16_t *up; - const wchar_t *wp; - } u; -#endif -}; - -#ifdef U_ALIASING_BARRIER - -ConstChar16Ptr::ConstChar16Ptr(const char16_t *p) : p(p) {} -ConstChar16Ptr::ConstChar16Ptr(const uint16_t *p) : p(cast(p)) {} -#if U_SIZEOF_WCHAR_T==2 -ConstChar16Ptr::ConstChar16Ptr(const wchar_t *p) : p(cast(p)) {} -#endif -ConstChar16Ptr::ConstChar16Ptr(const std::nullptr_t p) : p(p) {} -ConstChar16Ptr::~ConstChar16Ptr() { - U_ALIASING_BARRIER(p); -} - -const char16_t *ConstChar16Ptr::get() const { return p; } - -ConstChar16Ptr::operator const uint16_t *() const { - U_ALIASING_BARRIER(p); - return reinterpret_cast(p); -} -#if U_SIZEOF_WCHAR_T==2 -ConstChar16Ptr::operator const wchar_t *() const { - U_ALIASING_BARRIER(p); - return reinterpret_cast(p); -} -#endif - -#else - -ConstChar16Ptr::ConstChar16Ptr(const char16_t *p) { u.cp = p; } -ConstChar16Ptr::ConstChar16Ptr(const uint16_t *p) { u.up = p; } -#if U_SIZEOF_WCHAR_T==2 -ConstChar16Ptr::ConstChar16Ptr(const wchar_t *p) { u.wp = p; } -#endif -ConstChar16Ptr::ConstChar16Ptr(const std::nullptr_t p) { u.cp = p; } -ConstChar16Ptr::~ConstChar16Ptr() {} - -const char16_t *ConstChar16Ptr::get() const { return u.cp; } - -ConstChar16Ptr::operator const uint16_t *() const { - return u.up; -} -#if U_SIZEOF_WCHAR_T==2 -ConstChar16Ptr::operator const wchar_t *() const { - return u.wp; -} -#endif - -#endif - -// TODO end experiment ----------------- - #if !UCONFIG_NO_BREAK_ITERATION class BreakIterator; // unicode/brkiter.h #endif diff --git a/icu4c/source/common/unistr.cpp b/icu4c/source/common/unistr.cpp index a891dec799b..10c781746e1 100644 --- a/icu4c/source/common/unistr.cpp +++ b/icu4c/source/common/unistr.cpp @@ -98,48 +98,6 @@ U_CDECL_END U_NAMESPACE_BEGIN -#ifdef U_ALIASING_BARRIER - -Char16Ptr::Char16Ptr(int null) : p(nullptr) { - U_ASSERT(null == 0); - if (null != 0) { - // Try to provoke a crash. - p = reinterpret_cast(1); - } -} - -ConstChar16Ptr::ConstChar16Ptr(int null) : p(nullptr) { - U_ASSERT(null == 0); - if (null != 0) { - // Try to provoke a crash. - p = reinterpret_cast(1); - } -} - -#else - -Char16Ptr::Char16Ptr(int null) { - U_ASSERT(null == 0); - if (null == 0) { - u.cp = nullptr; - } else { - // Try to provoke a crash. - u.cp = reinterpret_cast(1); - } -} - -ConstChar16Ptr::ConstChar16Ptr(int null) { - U_ASSERT(null == 0); - if (null == 0) { - u.cp = nullptr; - } else { - // Try to provoke a crash. - u.cp = reinterpret_cast(1); - } -} - -#endif - /* The Replaceable virtual destructor can't be defined in the header due to how AIX works with multiple definitions of virtual functions. */ diff --git a/icu4c/source/common/ustr_titlecase_brkiter.cpp b/icu4c/source/common/ustr_titlecase_brkiter.cpp index 0b2ba02064b..ac46bc9359f 100644 --- a/icu4c/source/common/ustr_titlecase_brkiter.cpp +++ b/icu4c/source/common/ustr_titlecase_brkiter.cpp @@ -66,8 +66,8 @@ U_NAMESPACE_BEGIN int32_t CaseMap::toTitle( const char *locale, uint32_t options, BreakIterator *iter, - const UChar *src, int32_t srcLength, - UChar *dest, int32_t destCapacity, Edits *edits, + ConstChar16Ptr src, int32_t srcLength, + Char16Ptr dest, int32_t destCapacity, Edits *edits, UErrorCode &errorCode) { LocalPointer ownedIter; if(iter==NULL) { diff --git a/icu4c/source/common/ustrcase.cpp b/icu4c/source/common/ustrcase.cpp index 0e38a42e103..d0bdd1511ab 100644 --- a/icu4c/source/common/ustrcase.cpp +++ b/icu4c/source/common/ustrcase.cpp @@ -1198,8 +1198,8 @@ U_NAMESPACE_BEGIN int32_t CaseMap::fold( uint32_t options, - const UChar *src, int32_t srcLength, - UChar *dest, int32_t destCapacity, Edits *edits, + ConstChar16Ptr src, int32_t srcLength, + Char16Ptr dest, int32_t destCapacity, Edits *edits, UErrorCode &errorCode) { return ustrcase_map( UCASE_LOC_ROOT, options, UCASEMAP_BREAK_ITERATOR_NULL diff --git a/icu4c/source/common/ustrcase_locale.cpp b/icu4c/source/common/ustrcase_locale.cpp index 2ecd24f03ec..0c6d095d367 100644 --- a/icu4c/source/common/ustrcase_locale.cpp +++ b/icu4c/source/common/ustrcase_locale.cpp @@ -69,8 +69,8 @@ U_NAMESPACE_BEGIN int32_t CaseMap::toLower( const char *locale, uint32_t options, - const UChar *src, int32_t srcLength, - UChar *dest, int32_t destCapacity, Edits *edits, + ConstChar16Ptr src, int32_t srcLength, + Char16Ptr dest, int32_t destCapacity, Edits *edits, UErrorCode &errorCode) { return ustrcase_map( ustrcase_getCaseLocale(locale), options, UCASEMAP_BREAK_ITERATOR_NULL @@ -81,8 +81,8 @@ int32_t CaseMap::toLower( int32_t CaseMap::toUpper( const char *locale, uint32_t options, - const UChar *src, int32_t srcLength, - UChar *dest, int32_t destCapacity, Edits *edits, + ConstChar16Ptr src, int32_t srcLength, + Char16Ptr dest, int32_t destCapacity, Edits *edits, UErrorCode &errorCode) { return ustrcase_map( ustrcase_getCaseLocale(locale), options, UCASEMAP_BREAK_ITERATOR_NULL diff --git a/icu4c/source/i18n/curramt.cpp b/icu4c/source/i18n/curramt.cpp index 51f23b437ab..d2242707a1e 100644 --- a/icu4c/source/i18n/curramt.cpp +++ b/icu4c/source/i18n/curramt.cpp @@ -19,12 +19,12 @@ U_NAMESPACE_BEGIN -CurrencyAmount::CurrencyAmount(const Formattable& amount, const UChar* isoCode, +CurrencyAmount::CurrencyAmount(const Formattable& amount, ConstChar16Ptr isoCode, UErrorCode& ec) : Measure(amount, new CurrencyUnit(isoCode, ec), ec) { } -CurrencyAmount::CurrencyAmount(double amount, const UChar* isoCode, +CurrencyAmount::CurrencyAmount(double amount, ConstChar16Ptr isoCode, UErrorCode& ec) : Measure(Formattable(amount), new CurrencyUnit(isoCode, ec), ec) { } diff --git a/icu4c/source/i18n/currunit.cpp b/icu4c/source/i18n/currunit.cpp index 3c74c1c1d99..197885452f5 100644 --- a/icu4c/source/i18n/currunit.cpp +++ b/icu4c/source/i18n/currunit.cpp @@ -19,10 +19,10 @@ U_NAMESPACE_BEGIN -CurrencyUnit::CurrencyUnit(const UChar* _isoCode, UErrorCode& ec) { +CurrencyUnit::CurrencyUnit(ConstChar16Ptr _isoCode, UErrorCode& ec) { *isoCode = 0; if (U_SUCCESS(ec)) { - if (_isoCode && u_strlen(_isoCode)==3) { + if (_isoCode != nullptr && u_strlen(_isoCode)==3) { u_strcpy(isoCode, _isoCode); char simpleIsoCode[4]; u_UCharsToChars(isoCode, simpleIsoCode, 4); diff --git a/icu4c/source/i18n/dtfmtsym.cpp b/icu4c/source/i18n/dtfmtsym.cpp index 333c85c8fe0..960971980bc 100644 --- a/icu4c/source/i18n/dtfmtsym.cpp +++ b/icu4c/source/i18n/dtfmtsym.cpp @@ -1368,7 +1368,7 @@ DateFormatSymbols::setZoneStrings(const UnicodeString* const *strings, int32_t r //------------------------------------------------------ -const UChar * U_EXPORT2 +ConstChar16Ptr U_EXPORT2 DateFormatSymbols::getPatternUChars(void) { return gPatternChars; diff --git a/icu4c/source/i18n/numfmt.cpp b/icu4c/source/i18n/numfmt.cpp index 951146725af..e84f9fdd8d2 100644 --- a/icu4c/source/i18n/numfmt.cpp +++ b/icu4c/source/i18n/numfmt.cpp @@ -1188,7 +1188,7 @@ void NumberFormat::setCurrency(const UChar* theCurrency, UErrorCode& ec) { } } -const UChar* NumberFormat::getCurrency() const { +ConstChar16Ptr NumberFormat::getCurrency() const { return fCurrency; } diff --git a/icu4c/source/i18n/smpdtfmt.cpp b/icu4c/source/i18n/smpdtfmt.cpp index 3c0670446b3..69a13450a68 100644 --- a/icu4c/source/i18n/smpdtfmt.cpp +++ b/icu4c/source/i18n/smpdtfmt.cpp @@ -3789,7 +3789,7 @@ SimpleDateFormat::toLocalizedPattern(UnicodeString& result, UErrorCode& status) const { translatePattern(fPattern, result, - UnicodeString(DateFormatSymbols::getPatternUChars()), + UnicodeString(DateFormatSymbols::getPatternUChars().get()), fSymbols->fLocalPatternChars, status); return result; } @@ -3811,7 +3811,7 @@ SimpleDateFormat::applyLocalizedPattern(const UnicodeString& pattern, { translatePattern(pattern, fPattern, fSymbols->fLocalPatternChars, - UnicodeString(DateFormatSymbols::getPatternUChars()), status); + UnicodeString(DateFormatSymbols::getPatternUChars().get()), status); } //---------------------------------------------------------------------- diff --git a/icu4c/source/i18n/unicode/curramt.h b/icu4c/source/i18n/unicode/curramt.h index 9071f11bd10..a645667d9d0 100644 --- a/icu4c/source/i18n/unicode/curramt.h +++ b/icu4c/source/i18n/unicode/curramt.h @@ -46,7 +46,7 @@ class U_I18N_API CurrencyAmount: public Measure { * is invalid, then this will be set to a failing value. * @stable ICU 3.0 */ - CurrencyAmount(const Formattable& amount, const UChar* isoCode, + CurrencyAmount(const Formattable& amount, ConstChar16Ptr isoCode, UErrorCode &ec); /** @@ -59,7 +59,7 @@ class U_I18N_API CurrencyAmount: public Measure { * then this will be set to a failing value. * @stable ICU 3.0 */ - CurrencyAmount(double amount, const UChar* isoCode, + CurrencyAmount(double amount, ConstChar16Ptr isoCode, UErrorCode &ec); /** @@ -115,14 +115,14 @@ class U_I18N_API CurrencyAmount: public Measure { * Return the ISO currency code of this object. * @stable ICU 3.0 */ - inline const UChar* getISOCurrency() const; + inline ConstChar16Ptr getISOCurrency() const; }; inline const CurrencyUnit& CurrencyAmount::getCurrency() const { return (const CurrencyUnit&) getUnit(); } -inline const UChar* CurrencyAmount::getISOCurrency() const { +inline ConstChar16Ptr CurrencyAmount::getISOCurrency() const { return getCurrency().getISOCurrency(); } diff --git a/icu4c/source/i18n/unicode/currunit.h b/icu4c/source/i18n/unicode/currunit.h index 61f9201ebb2..857d9ceafe0 100644 --- a/icu4c/source/i18n/unicode/currunit.h +++ b/icu4c/source/i18n/unicode/currunit.h @@ -44,7 +44,7 @@ class U_I18N_API CurrencyUnit: public MeasureUnit { * then this will be set to a failing value. * @stable ICU 3.0 */ - CurrencyUnit(const UChar* isoCode, UErrorCode &ec); + CurrencyUnit(ConstChar16Ptr isoCode, UErrorCode &ec); /** * Copy constructor @@ -93,7 +93,7 @@ class U_I18N_API CurrencyUnit: public MeasureUnit { * Return the ISO currency code of this object. * @stable ICU 3.0 */ - inline const UChar* getISOCurrency() const; + inline ConstChar16Ptr getISOCurrency() const; private: /** @@ -102,7 +102,7 @@ class U_I18N_API CurrencyUnit: public MeasureUnit { UChar isoCode[4]; }; -inline const UChar* CurrencyUnit::getISOCurrency() const { +inline ConstChar16Ptr CurrencyUnit::getISOCurrency() const { return isoCode; } diff --git a/icu4c/source/i18n/unicode/dtfmtsym.h b/icu4c/source/i18n/unicode/dtfmtsym.h index 512ebb8351f..8374de3c207 100644 --- a/icu4c/source/i18n/unicode/dtfmtsym.h +++ b/icu4c/source/i18n/unicode/dtfmtsym.h @@ -566,7 +566,7 @@ public: * @return the non-localized date-time pattern characters * @stable ICU 2.0 */ - static const UChar * U_EXPORT2 getPatternUChars(void); + static ConstChar16Ptr U_EXPORT2 getPatternUChars(void); /** * Gets localized date-time pattern characters. For example: 'u', 't', etc. diff --git a/icu4c/source/i18n/unicode/numfmt.h b/icu4c/source/i18n/unicode/numfmt.h index 1907476ff30..009d59f0916 100644 --- a/icu4c/source/i18n/unicode/numfmt.h +++ b/icu4c/source/i18n/unicode/numfmt.h @@ -940,7 +940,7 @@ public: * the currency in use, or a pointer to the empty string. * @stable ICU 2.6 */ - const UChar* getCurrency() const; + ConstChar16Ptr getCurrency() const; /** * Set a particular UDisplayContext value in the formatter, such as diff --git a/icu4c/source/i18n/unum.cpp b/icu4c/source/i18n/unum.cpp index b8d26612ff2..458a79c7fde 100644 --- a/icu4c/source/i18n/unum.cpp +++ b/icu4c/source/i18n/unum.cpp @@ -609,7 +609,7 @@ unum_getTextAttribute(const UNumberFormat* fmt, break; case UNUM_CURRENCY_CODE: - res = UnicodeString(df->getCurrency()); + res = UnicodeString(df->getCurrency().get()); break; default: diff --git a/icu4c/source/test/intltest/dtfmttst.cpp b/icu4c/source/test/intltest/dtfmttst.cpp index 63d127346da..f9461739257 100644 --- a/icu4c/source/test/intltest/dtfmttst.cpp +++ b/icu4c/source/test/intltest/dtfmttst.cpp @@ -514,7 +514,7 @@ void DateFormatTest::TestFieldPosition() { // local pattern chars data is not longer loaded // from icu locale bundle assertEquals("patternChars", PATTERN_CHARS, rootSyms.getLocalPatternChars(buf)); - assertEquals("patternChars", PATTERN_CHARS, DateFormatSymbols::getPatternUChars()); + assertEquals("patternChars", PATTERN_CHARS, DateFormatSymbols::getPatternUChars().get()); assertTrue("DATEFORMAT_FIELD_NAMES", DATEFORMAT_FIELD_NAMES_LENGTH == UDAT_FIELD_COUNT); #if UDAT_HAS_PATTERN_CHAR_FOR_TIME_SEPARATOR assertTrue("Data", UDAT_FIELD_COUNT == uprv_strlen(PATTERN_CHARS)); diff --git a/icu4c/source/test/intltest/intltest.cpp b/icu4c/source/test/intltest/intltest.cpp index ab5ce8c1272..bfefe3f8570 100644 --- a/icu4c/source/test/intltest/intltest.cpp +++ b/icu4c/source/test/intltest/intltest.cpp @@ -187,7 +187,7 @@ UnicodeString _toString(const Formattable& f) { case Formattable::kObject: { const CurrencyAmount* c = dynamic_cast(f.getObject()); if (c != NULL) { - s = _toString(c->getNumber()) + " " + UnicodeString(c->getISOCurrency()); + s = _toString(c->getNumber()) + " " + UnicodeString(c->getISOCurrency().get()); } else { s = UnicodeString("Unknown UObject"); } diff --git a/icu4c/source/test/intltest/measfmttest.cpp b/icu4c/source/test/intltest/measfmttest.cpp index f34d8397c01..10fa1c74950 100644 --- a/icu4c/source/test/intltest/measfmttest.cpp +++ b/icu4c/source/test/intltest/measfmttest.cpp @@ -1856,7 +1856,7 @@ void MeasureFormatTest::TestCurrencies() { u_uastrcpy(USD, "USD"); UErrorCode status = U_ZERO_ERROR; CurrencyAmount USD_1(1.0, USD, status); - assertEquals("Currency Code", USD, USD_1.getISOCurrency()); + assertEquals("Currency Code", USD, USD_1.getISOCurrency().get()); CurrencyAmount USD_2(2.0, USD, status); CurrencyAmount USD_NEG_1(-1.0, USD, status); if (!assertSuccess("Error creating currencies", status)) { diff --git a/icu4c/source/test/intltest/numfmtst.cpp b/icu4c/source/test/intltest/numfmtst.cpp index a8553f57802..ea67aa2d47c 100644 --- a/icu4c/source/test/intltest/numfmtst.cpp +++ b/icu4c/source/test/intltest/numfmtst.cpp @@ -440,7 +440,7 @@ UBool NumberFormatTestDataDriven::isParseCurrencyPass( } return TRUE; } - UnicodeString currStr(currAmt->getISOCurrency()); + UnicodeString currStr(currAmt->getISOCurrency().get()); Formattable resultFormattable(currAmt->getNumber()); UnicodeString resultStr(UnicodeString::fromUTF8(resultFormattable.getDecimalNumber(status))); if (tuple.output == "fail") { @@ -3168,7 +3168,7 @@ void NumberFormatTest::expectParseCurrency(const NumberFormat &fmt, const UChar* uprv_strcpy(theOperation, theInfo); uprv_strcat(theOperation, ", check currency:"); - assertEquals(theOperation, currency, currencyAmount->getISOCurrency()); + assertEquals(theOperation, currency, currencyAmount->getISOCurrency().get()); } @@ -3763,14 +3763,14 @@ NumberFormatTest::TestCurrencyFormatForMixParsing() { } else if (result.getType() != Formattable::kObject || (curramt = dynamic_cast(result.getObject())) == NULL || curramt->getNumber().getDouble() != 1234.56 || - UnicodeString(curramt->getISOCurrency()).compare(ISO_CURRENCY_USD) + UnicodeString(curramt->getISOCurrency().get()).compare(ISO_CURRENCY_USD) ) { errln("FAIL: getCurrencyFormat of default locale (en_US) failed roundtripping the number "); if (curramt->getNumber().getDouble() != 1234.56) { errln((UnicodeString)"wong number, expect: 1234.56" + ", got: " + curramt->getNumber().getDouble()); } if (curramt->getISOCurrency() != ISO_CURRENCY_USD) { - errln((UnicodeString)"wong currency, expect: USD" + ", got: " + curramt->getISOCurrency()); + errln((UnicodeString)"wong currency, expect: USD" + ", got: " + curramt->getISOCurrency().get()); } } } diff --git a/icu4c/source/test/intltest/tsmthred.cpp b/icu4c/source/test/intltest/tsmthred.cpp index 036d5e1d355..5cff8ee51d0 100644 --- a/icu4c/source/test/intltest/tsmthred.cpp +++ b/icu4c/source/test/intltest/tsmthred.cpp @@ -547,7 +547,7 @@ UBool ThreadSafeFormat::doStuff(int32_t offset, UnicodeString &appendErr, UError appendErr.append("fFormat currency != ") .append(kUSD) .append(", =") - .append(fFormat->getCurrency()) + .append(fFormat->getCurrency().get()) .append("! "); okay = FALSE; } @@ -556,7 +556,7 @@ UBool ThreadSafeFormat::doStuff(int32_t offset, UnicodeString &appendErr, UError appendErr.append("gFormat currency != ") .append(kUSD) .append(", =") - .append(gSharedData->fFormat->getCurrency()) + .append(gSharedData->fFormat->getCurrency().get()) .append("! "); okay = FALSE; } From 4c1c730f5757ba1ea72f9442a9d5cda1879279fc Mon Sep 17 00:00:00 2001 From: Markus Scherer Date: Wed, 1 Mar 2017 21:11:54 +0000 Subject: [PATCH 05/26] ICU-12992 remove conversion from int; NULL is often nullptr or __null not simply 0 X-SVN-Rev: 39717 --- icu4c/source/common/char16ptr.cpp | 42 ------------------------- icu4c/source/common/unicode/char16ptr.h | 42 +++++++++---------------- icu4c/source/common/unicode/unistr.h | 2 +- icu4c/source/common/unistr.cpp | 4 +-- icu4c/source/test/intltest/ustrtest.cpp | 16 ---------- icu4c/source/test/intltest/ustrtest.h | 1 - 6 files changed, 17 insertions(+), 90 deletions(-) diff --git a/icu4c/source/common/char16ptr.cpp b/icu4c/source/common/char16ptr.cpp index 1da9cd8490c..5afec2af303 100644 --- a/icu4c/source/common/char16ptr.cpp +++ b/icu4c/source/common/char16ptr.cpp @@ -10,46 +10,4 @@ U_NAMESPACE_BEGIN -#ifdef U_ALIASING_BARRIER - -Char16Ptr::Char16Ptr(int null) : p(nullptr) { - U_ASSERT(null == 0); - if (null != 0) { - // Try to provoke a crash. - p = reinterpret_cast(1); - } -} - -ConstChar16Ptr::ConstChar16Ptr(int null) : p(nullptr) { - U_ASSERT(null == 0); - if (null != 0) { - // Try to provoke a crash. - p = reinterpret_cast(1); - } -} - -#else - -Char16Ptr::Char16Ptr(int null) { - U_ASSERT(null == 0); - if (null == 0) { - u.cp = nullptr; - } else { - // Try to provoke a crash. - u.cp = reinterpret_cast(1); - } -} - -ConstChar16Ptr::ConstChar16Ptr(int null) { - U_ASSERT(null == 0); - if (null == 0) { - u.cp = nullptr; - } else { - // Try to provoke a crash. - u.cp = reinterpret_cast(1); - } -} - -#endif - U_NAMESPACE_END diff --git a/icu4c/source/common/unicode/char16ptr.h b/icu4c/source/common/unicode/char16ptr.h index a949b7d4f54..672fa46ed27 100644 --- a/icu4c/source/common/unicode/char16ptr.h +++ b/icu4c/source/common/unicode/char16ptr.h @@ -30,8 +30,7 @@ U_NAMESPACE_BEGIN #endif /** - * char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types, - * and from NULL. + * char16_t * wrapper with implicit conversion from/to distinct but bit-compatible pointer types. * @draft ICU 59 */ class U_COMMON_API Char16Ptr final { @@ -60,12 +59,6 @@ public: * @draft ICU 59 */ inline Char16Ptr(std::nullptr_t p); - /** - * NULL constructor. - * Must only be used for 0 which is usually the value of NULL. - * @draft ICU 59 - */ - Char16Ptr(int null); /** * Destructor. * @draft ICU 59 @@ -97,7 +90,7 @@ public: #endif operator void *() const { return get(); } - char16_t operator[](size_t offset) const { return get()[offset]; } + char16_t operator[](std::ptrdiff_t offset) const { return get()[offset]; } UBool operator==(const Char16Ptr &other) const { return get() == other.get(); } UBool operator!=(const Char16Ptr &other) const { return !operator==(other); } @@ -112,19 +105,19 @@ public: UBool operator==(const std::nullptr_t null) const { return get() == null; } UBool operator!=(const std::nullptr_t null) const { return !operator==(null); } /** - * Comparison with NULL. - * @return TRUE if the pointer is nullptr and null==0 + * Comparison with 0. + * @return TRUE if the pointer is nullptr and zero==0 * @draft ICU 59 */ - UBool operator==(int null) const { return get() == nullptr && null == 0; } + UBool operator==(int zero) const { return get() == nullptr && zero == 0; } /** - * Comparison with NULL. - * @return TRUE if the pointer is not nullptr and null==0 + * Comparison with 0. + * @return TRUE if the pointer is not nullptr and zero==0 * @draft ICU 59 */ - UBool operator!=(int null) const { return get() != nullptr && null == 0; } + UBool operator!=(int zero) const { return get() != nullptr && zero == 0; } - Char16Ptr operator+(size_t offset) const { return Char16Ptr(get() + offset); } + Char16Ptr operator+(std::ptrdiff_t offset) const { return Char16Ptr(get() + offset); } private: Char16Ptr() = delete; @@ -194,8 +187,7 @@ Char16Ptr::operator wchar_t *() const { #endif /** - * const char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types, - * and from NULL. + * const char16_t * wrapper with implicit conversion from/to distinct but bit-compatible pointer types. * @draft ICU 59 */ class U_COMMON_API ConstChar16Ptr final { @@ -223,12 +215,6 @@ public: * @draft ICU 59 */ inline ConstChar16Ptr(const std::nullptr_t p); - /** - * NULL constructor. - * Must only be used for 0 which is usually the value of NULL. - * @draft ICU 59 - */ - ConstChar16Ptr(int null); /** * Destructor. * @draft ICU 59 @@ -259,7 +245,7 @@ public: #endif operator const void *() const { return get(); } - char16_t operator[](size_t offset) const { return get()[offset]; } + char16_t operator[](std::ptrdiff_t offset) const { return get()[offset]; } UBool operator==(const ConstChar16Ptr &other) const { return get() == other.get(); } UBool operator!=(const ConstChar16Ptr &other) const { return !operator==(other); } @@ -273,10 +259,10 @@ public: #endif UBool operator==(const std::nullptr_t null) const { return get() == null; } UBool operator!=(const std::nullptr_t null) const { return !operator==(null); } - UBool operator==(int null) const { return get() == nullptr && null == 0; } - UBool operator!=(int null) const { return get() != nullptr && null == 0; } + UBool operator==(int zero) const { return get() == nullptr && zero == 0; } + UBool operator!=(int zero) const { return get() != nullptr && zero == 0; } - ConstChar16Ptr operator+(size_t offset) { return ConstChar16Ptr(get() + offset); } + ConstChar16Ptr operator+(std::ptrdiff_t offset) { return ConstChar16Ptr(get() + offset); } private: ConstChar16Ptr() = delete; diff --git a/icu4c/source/common/unicode/unistr.h b/icu4c/source/common/unicode/unistr.h index 331b01281ad..3dd2fa6c2a5 100644 --- a/icu4c/source/common/unicode/unistr.h +++ b/icu4c/source/common/unicode/unistr.h @@ -3943,7 +3943,7 @@ UnicodeString::isBufferWritable() const inline ConstChar16Ptr UnicodeString::getBuffer() const { if(fUnion.fFields.fLengthAndFlags&(kIsBogus|kOpenGetBuffer)) { - return 0; + return nullptr; } else if(fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) { return fUnion.fStackFields.fBuffer; } else { diff --git a/icu4c/source/common/unistr.cpp b/icu4c/source/common/unistr.cpp index 10c781746e1..3657d15e0f3 100644 --- a/icu4c/source/common/unistr.cpp +++ b/icu4c/source/common/unistr.cpp @@ -1221,7 +1221,7 @@ UnicodeString::unBogus() { ConstChar16Ptr UnicodeString::getTerminatedBuffer() { if(!isWritable()) { - return 0; + return nullptr; } UChar *array = getArrayStart(); int32_t len = length(); @@ -1723,7 +1723,7 @@ UnicodeString::getBuffer(int32_t minCapacity) { setZeroLength(); return getArrayStart(); } else { - return 0; + return nullptr; } } diff --git a/icu4c/source/test/intltest/ustrtest.cpp b/icu4c/source/test/intltest/ustrtest.cpp index 429ae325b0c..ca031b1f423 100644 --- a/icu4c/source/test/intltest/ustrtest.cpp +++ b/icu4c/source/test/intltest/ustrtest.cpp @@ -64,7 +64,6 @@ void UnicodeStringTest::runIndexedTest( int32_t index, UBool exec, const char* & TESTCASE_AUTO(TestUInt16Pointers); TESTCASE_AUTO(TestWCharPointers); TESTCASE_AUTO(TestNullPointers); - TESTCASE_AUTO(TestZeroPointers); TESTCASE_AUTO_END; } @@ -2258,18 +2257,3 @@ UnicodeStringTest::TestNullPointers() { UnicodeString(u"def").extract(nullptr, 0, errorCode); assertEquals("buffer overflow extracting to nullptr", U_BUFFER_OVERFLOW_ERROR, errorCode); } - -void -UnicodeStringTest::TestZeroPointers() { - // There are constructor overloads with one and three integer parameters - // which match passing 0, so we cannot test using 0 for UnicodeString(pointer) - // or UnicodeString(read-only or writable alias). - // There are multiple two-parameter constructors that make using 0 - // for the first parameter ambiguous already, - // so we cannot test using 0 for UnicodeString(pointer, length). - - // extract() also has enough overloads to be ambiguous with 0. - // Test the pointer wrapper directly. - assertTrue("0 --> nullptr", Char16Ptr(0).get() == nullptr); - assertTrue("0 --> const nullptr", ConstChar16Ptr(0).get() == nullptr); -} diff --git a/icu4c/source/test/intltest/ustrtest.h b/icu4c/source/test/intltest/ustrtest.h index a2e2fbd4b71..4ba348c431f 100644 --- a/icu4c/source/test/intltest/ustrtest.h +++ b/icu4c/source/test/intltest/ustrtest.h @@ -96,7 +96,6 @@ public: void TestUInt16Pointers(); void TestWCharPointers(); void TestNullPointers(); - void TestZeroPointers(); }; #endif From b9ef7bcac4f63da4e6e949490a102d395a6b4b9d Mon Sep 17 00:00:00 2001 From: Markus Scherer Date: Wed, 1 Mar 2017 22:06:25 +0000 Subject: [PATCH 06/26] ICU-12992 change UnicodeString functions from const UChar * to ConstChar16Ptr where possible X-SVN-Rev: 39719 --- icu4c/source/common/caniter.cpp | 4 +-- icu4c/source/common/unicode/unistr.h | 42 ++++++++++++------------- icu4c/source/common/utext.cpp | 4 +-- icu4c/source/test/intltest/srchtest.cpp | 2 +- icu4c/source/test/intltest/ustrtest.cpp | 4 +-- 5 files changed, 28 insertions(+), 28 deletions(-) diff --git a/icu4c/source/common/caniter.cpp b/icu4c/source/common/caniter.cpp index e7779a1a9fe..aee9f4ee31a 100644 --- a/icu4c/source/common/caniter.cpp +++ b/icu4c/source/common/caniter.cpp @@ -311,12 +311,12 @@ void U_EXPORT2 CanonicalIterator::permute(UnicodeString &source, UBool skipZeros // see what the permutations of the characters before and after this one are //Hashtable *subpermute = permute(source.substring(0,i) + source.substring(i + UTF16.getCharCount(cp))); - permute(subPermuteString.replace(i, U16_LENGTH(cp), NULL, 0), skipZeros, &subpermute, status); + permute(subPermuteString.remove(i, U16_LENGTH(cp)), skipZeros, &subpermute, status); /* Test for buffer overflows */ if(U_FAILURE(status)) { return; } - // The upper replace is destructive. The question is do we have to make a copy, or we don't care about the contents + // The upper remove is destructive. The question is do we have to make a copy, or we don't care about the contents // of source at this point. // prefix this character to all of them diff --git a/icu4c/source/common/unicode/unistr.h b/icu4c/source/common/unicode/unistr.h index 3dd2fa6c2a5..0d1bbb6f49e 100644 --- a/icu4c/source/common/unicode/unistr.h +++ b/icu4c/source/common/unicode/unistr.h @@ -119,7 +119,7 @@ class UnicodeStringAppendable; // unicode/appendable.h * NUL, must be specified as a constant. * @stable ICU 2.0 */ -#define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)u ## cs, _length) +#define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, u ## cs, _length) /** * Unicode String literals in C++. @@ -436,7 +436,7 @@ public: * in srcChars. * @stable ICU 2.0 */ - inline int8_t compare(const UChar *srcChars, + inline int8_t compare(ConstChar16Ptr srcChars, int32_t srcLength) const; /** @@ -590,7 +590,7 @@ public: * in code point order * @stable ICU 2.0 */ - inline int8_t compareCodePointOrder(const UChar *srcChars, + inline int8_t compareCodePointOrder(ConstChar16Ptr srcChars, int32_t srcLength) const; /** @@ -763,7 +763,7 @@ public: * @return A negative, zero, or positive integer indicating the comparison result. * @stable ICU 2.0 */ - inline int8_t caseCompare(const UChar *srcChars, + inline int8_t caseCompare(ConstChar16Ptr srcChars, int32_t srcLength, uint32_t options) const; @@ -881,7 +881,7 @@ public: * FALSE otherwise * @stable ICU 2.0 */ - inline UBool startsWith(const UChar *srcChars, + inline UBool startsWith(ConstChar16Ptr srcChars, int32_t srcLength) const; /** @@ -928,7 +928,7 @@ public: * FALSE otherwise * @stable ICU 2.0 */ - inline UBool endsWith(const UChar *srcChars, + inline UBool endsWith(ConstChar16Ptr srcChars, int32_t srcLength) const; /** @@ -1034,7 +1034,7 @@ public: * or -1 if not found. * @stable ICU 2.0 */ - inline int32_t indexOf(const UChar *srcChars, + inline int32_t indexOf(ConstChar16Ptr srcChars, int32_t srcLength, int32_t start, int32_t length) const; @@ -1217,7 +1217,7 @@ public: * or -1 if not found. * @stable ICU 2.0 */ - inline int32_t lastIndexOf(const UChar *srcChars, + inline int32_t lastIndexOf(ConstChar16Ptr srcChars, int32_t srcLength, int32_t start, int32_t length) const; @@ -2220,7 +2220,7 @@ public: * @return a reference to this * @stable ICU 2.0 */ - inline UnicodeString& append(const UChar *srcChars, + inline UnicodeString& append(ConstChar16Ptr srcChars, int32_t srcLength); /** @@ -2299,7 +2299,7 @@ public: * @stable ICU 2.0 */ inline UnicodeString& insert(int32_t start, - const UChar *srcChars, + ConstChar16Ptr srcChars, int32_t srcLength); /** @@ -2403,7 +2403,7 @@ public: */ inline UnicodeString& replace(int32_t start, int32_t length, - const UChar *srcChars, + ConstChar16Ptr srcChars, int32_t srcLength); /** @@ -4011,7 +4011,7 @@ UnicodeString::compare(int32_t start, { return doCompare(start, _length, srcText, 0, srcText.length()); } inline int8_t -UnicodeString::compare(const UChar *srcChars, +UnicodeString::compare(ConstChar16Ptr srcChars, int32_t srcLength) const { return doCompare(0, length(), srcChars, 0, srcLength); } @@ -4072,7 +4072,7 @@ UnicodeString::compareCodePointOrder(int32_t start, { return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); } inline int8_t -UnicodeString::compareCodePointOrder(const UChar *srcChars, +UnicodeString::compareCodePointOrder(ConstChar16Ptr srcChars, int32_t srcLength) const { return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); } @@ -4137,7 +4137,7 @@ UnicodeString::caseCompare(int32_t start, } inline int8_t -UnicodeString::caseCompare(const UChar *srcChars, +UnicodeString::caseCompare(ConstChar16Ptr srcChars, int32_t srcLength, uint32_t options) const { return doCaseCompare(0, length(), srcChars, 0, srcLength, options); @@ -4223,7 +4223,7 @@ UnicodeString::indexOf(const UChar *srcChars, } inline int32_t -UnicodeString::indexOf(const UChar *srcChars, +UnicodeString::indexOf(ConstChar16Ptr srcChars, int32_t srcLength, int32_t start, int32_t _length) const @@ -4264,7 +4264,7 @@ UnicodeString::indexOf(UChar32 c, } inline int32_t -UnicodeString::lastIndexOf(const UChar *srcChars, +UnicodeString::lastIndexOf(ConstChar16Ptr srcChars, int32_t srcLength, int32_t start, int32_t _length) const @@ -4358,7 +4358,7 @@ UnicodeString::startsWith(const UnicodeString& srcText, { return doCompare(0, srcLength, srcText, srcStart, srcLength) == 0; } inline UBool -UnicodeString::startsWith(const UChar *srcChars, int32_t srcLength) const { +UnicodeString::startsWith(ConstChar16Ptr srcChars, int32_t srcLength) const { if(srcLength < 0) { srcLength = u_strlen(srcChars); } @@ -4388,7 +4388,7 @@ UnicodeString::endsWith(const UnicodeString& srcText, } inline UBool -UnicodeString::endsWith(const UChar *srcChars, +UnicodeString::endsWith(ConstChar16Ptr srcChars, int32_t srcLength) const { if(srcLength < 0) { srcLength = u_strlen(srcChars); @@ -4428,7 +4428,7 @@ UnicodeString::replace(int32_t start, inline UnicodeString& UnicodeString::replace(int32_t start, int32_t _length, - const UChar *srcChars, + ConstChar16Ptr srcChars, int32_t srcLength) { return doReplace(start, _length, srcChars, 0, srcLength); } @@ -4658,7 +4658,7 @@ UnicodeString::append(const UChar *srcChars, { return doAppend(srcChars, srcStart, srcLength); } inline UnicodeString& -UnicodeString::append(const UChar *srcChars, +UnicodeString::append(ConstChar16Ptr srcChars, int32_t srcLength) { return doAppend(srcChars, 0, srcLength); } @@ -4700,7 +4700,7 @@ UnicodeString::insert(int32_t start, inline UnicodeString& UnicodeString::insert(int32_t start, - const UChar *srcChars, + ConstChar16Ptr srcChars, int32_t srcLength) { return doReplace(start, 0, srcChars, 0, srcLength); } diff --git a/icu4c/source/common/utext.cpp b/icu4c/source/common/utext.cpp index d68939429ce..52ae7ff9787 100644 --- a/icu4c/source/common/utext.cpp +++ b/icu4c/source/common/utext.cpp @@ -2242,13 +2242,13 @@ unistrTextCopy(UText *ut, } if(move) { - // move: copy to destIndex, then replace original with nothing + // move: copy to destIndex, then remove original int32_t segLength=limit32-start32; us->copy(start32, limit32, destIndex32); if(destIndex32replace(start32, segLength, NULL, 0); + us->remove(start32, segLength); } else { // copy us->copy(start32, limit32, destIndex32); diff --git a/icu4c/source/test/intltest/srchtest.cpp b/icu4c/source/test/intltest/srchtest.cpp index b5ad2da92f4..a7534ddc64d 100644 --- a/icu4c/source/test/intltest/srchtest.cpp +++ b/icu4c/source/test/intltest/srchtest.cpp @@ -644,7 +644,7 @@ void StringSearchTest::TestOpenClose() } delete result; - text.append(0, 0x1); + // No-op: text.append(0, 0x1); -- what was intended here? status = U_ZERO_ERROR; result = new StringSearch(pattern, text, NULL, NULL, status); if (U_SUCCESS(status)) { diff --git a/icu4c/source/test/intltest/ustrtest.cpp b/icu4c/source/test/intltest/ustrtest.cpp index ca031b1f423..d1f2bfe6ff6 100644 --- a/icu4c/source/test/intltest/ustrtest.cpp +++ b/icu4c/source/test/intltest/ustrtest.cpp @@ -1615,8 +1615,8 @@ UnicodeStringTest::TestBogus() { // test that NULL primitive input string values are treated like // empty strings, not errors (bogus) test2.setTo((UChar32)0x10005); - if(test2.insert(1, NULL, 1).length()!=2) { - errln("UniStr.insert(...NULL...) should not modify the string but does"); + if(test2.insert(1, nullptr, 1).length()!=2) { + errln("UniStr.insert(...nullptr...) should not modify the string but does"); } UErrorCode errorCode=U_ZERO_ERROR; From 5df9572e9a850addde1e579e0778b6d0625833bc Mon Sep 17 00:00:00 2001 From: Markus Scherer Date: Wed, 1 Mar 2017 23:05:52 +0000 Subject: [PATCH 07/26] ICU-12992 UChar=char16_t inside ICU, configurable, defaults to char16_t/C++ uint16_t/C X-SVN-Rev: 39720 --- icu4c/source/common/unicode/umachine.h | 32 +++++++++++-------- icu4c/source/common/unicode/utypes.h | 4 +-- icu4c/source/test/intltest/Makefile.in | 2 +- icu4c/source/test/intltest/intltest.vcxproj | 2 +- .../test/intltest/intltest.vcxproj.filters | 2 +- .../test/intltest/{punyref.c => punyref.cpp} | 2 +- 6 files changed, 25 insertions(+), 19 deletions(-) rename icu4c/source/test/intltest/{punyref.c => punyref.cpp} (99%) diff --git a/icu4c/source/common/unicode/umachine.h b/icu4c/source/common/unicode/umachine.h index 3dab79a825f..e0fedfe9579 100644 --- a/icu4c/source/common/unicode/umachine.h +++ b/icu4c/source/common/unicode/umachine.h @@ -293,28 +293,34 @@ typedef int8_t UBool; /** * \var UChar * - * For C++, UChar is always defined to be char16_t. + * The base type for UTF-16 code units and pointers. + * Unsigned 16-bit integer. + * Starting with ICU 59, C++ API uses char16_t directly, while C API continues to use UChar. * - * For plain C, define UChar to be UCHAR_TYPE, if that is #defined (for example, to char16_t), - * or wchar_t if that is 16 bits wide; always assumed to be unsigned. - * If neither is available, then define UChar to be uint16_t. + * UChar is configurable by defining the macro UCHAR_TYPE + * on the preprocessor or compiler command line: + * -DUCHAR_TYPE=uint16_t or -DUCHAR_TYPE=wchar_t (if U_SIZEOF_WCHAR_T==2) etc. + * (The UCHAR_TYPE can also be #defined earlier in this file, for outside the ICU library code.) + * This is for transitional use from application code that uses uint16_t or wchar_t for UTF-16. * - * This makes the definition of UChar platform-dependent - * but allows direct string type compatibility with platforms with - * 16-bit wchar_t types. + * The default is UChar=char16_t. + * + * C++11 defines char16_t as bit-compatible with uint16_t, but as a distinct type. + * + * In C, char16_t is a simple typedef of uint_least16_t. + * ICU requires uint_least16_t=uint16_t for data memory mapping. + * On macOS, char16_t is not available because the uchar.h standard header is missing. * * @stable ICU 4.4 */ -#ifdef __cplusplus +#if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || \ + defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION) + // Inside the ICU library code, never configurable. typedef char16_t UChar; #elif defined(UCHAR_TYPE) typedef UCHAR_TYPE UChar; -#elif U_SIZEOF_WCHAR_T==2 - typedef wchar_t UChar; -#elif U_HAVE_CHAR16_T +#elif defined(__cplusplus) typedef char16_t UChar; -#elif defined(__CHAR16_TYPE__) - typedef __CHAR16_TYPE__ UChar; #else typedef uint16_t UChar; #endif diff --git a/icu4c/source/common/unicode/utypes.h b/icu4c/source/common/unicode/utypes.h index 6b61c4c1646..d60450b5a56 100644 --- a/icu4c/source/common/unicode/utypes.h +++ b/icu4c/source/common/unicode/utypes.h @@ -178,12 +178,12 @@ /** * \def NULL - * Define NULL if necessary, to 0 for C++ and to ((void *)0) for C. + * Define NULL if necessary, to nullptr for C++ and to ((void *)0) for C. * @stable ICU 2.0 */ #ifndef NULL #ifdef __cplusplus -#define NULL 0 +#define NULL nullptr #else #define NULL ((void *)0) #endif diff --git a/icu4c/source/test/intltest/Makefile.in b/icu4c/source/test/intltest/Makefile.in index 073935fcd9f..d040871ebea 100644 --- a/icu4c/source/test/intltest/Makefile.in +++ b/icu4c/source/test/intltest/Makefile.in @@ -35,7 +35,7 @@ BUILDDIR := $(BUILDDIR:test\\intltest/../../=) BUILDDIR := $(BUILDDIR:TEST\\INTLTEST/../../=) CPPFLAGS += -I$(top_srcdir)/common -I$(top_srcdir)/i18n -I$(top_srcdir)/tools/toolutil -I$(top_srcdir)/tools/ctestfw -CPPFLAGS += -DUNISTR_FROM_CHAR_EXPLICIT= -DUNISTR_FROM_STRING_EXPLICIT= +CPPFLAGS += -DUNISTR_FROM_CHAR_EXPLICIT= -DUNISTR_FROM_STRING_EXPLICIT= -DUCHAR_TYPE=char16_t DEFS += -D'U_TOPSRCDIR="$(top_srcdir)/"' -D'U_TOPBUILDDIR="$(BUILDDIR)"' LIBS = $(LIBCTESTFW) $(LIBICUI18N) $(LIBICUUC) $(LIBICUTOOLUTIL) $(DEFAULT_LIBS) $(LIB_M) $(LIB_THREAD) diff --git a/icu4c/source/test/intltest/intltest.vcxproj b/icu4c/source/test/intltest/intltest.vcxproj index 3779ffa2e22..315adfd51f4 100644 --- a/icu4c/source/test/intltest/intltest.vcxproj +++ b/icu4c/source/test/intltest/intltest.vcxproj @@ -368,7 +368,7 @@ - + false diff --git a/icu4c/source/test/intltest/intltest.vcxproj.filters b/icu4c/source/test/intltest/intltest.vcxproj.filters index c490d90fdfb..39a3e4ef251 100644 --- a/icu4c/source/test/intltest/intltest.vcxproj.filters +++ b/icu4c/source/test/intltest/intltest.vcxproj.filters @@ -352,7 +352,7 @@ idna - + idna diff --git a/icu4c/source/test/intltest/punyref.c b/icu4c/source/test/intltest/punyref.cpp similarity index 99% rename from icu4c/source/test/intltest/punyref.c rename to icu4c/source/test/intltest/punyref.cpp index 5e0f8422cf4..77c3e27fa6e 100644 --- a/icu4c/source/test/intltest/punyref.c +++ b/icu4c/source/test/intltest/punyref.cpp @@ -7,7 +7,7 @@ * Corporation and others. All Rights Reserved. * ******************************************************************************* - * file name: punyref.h + * file name: punyref.cpp * encoding: UTF-8 * tab size: 8 (not used) * indentation:4 From 4b154b832864d770511b07ac84a86d46d969bc18 Mon Sep 17 00:00:00 2001 From: Markus Scherer Date: Wed, 1 Mar 2017 23:55:58 +0000 Subject: [PATCH 08/26] ICU-12992 s/UChar/char16_t/g in public C++ header files X-SVN-Rev: 39721 --- icu4c/source/common/unicode/appendable.h | 46 ++-- .../source/common/unicode/bytestriebuilder.h | 4 +- icu4c/source/common/unicode/caniter.h | 4 +- icu4c/source/common/unicode/casemap.h | 8 +- icu4c/source/common/unicode/chariter.h | 32 +-- icu4c/source/common/unicode/localpointer.h | 8 +- icu4c/source/common/unicode/normalizer2.h | 2 +- icu4c/source/common/unicode/rep.h | 6 +- icu4c/source/common/unicode/resbund.h | 2 +- icu4c/source/common/unicode/schriter.h | 2 +- icu4c/source/common/unicode/simpleformatter.h | 8 +- icu4c/source/common/unicode/strenum.h | 8 +- .../source/common/unicode/stringtriebuilder.h | 16 +- icu4c/source/common/unicode/ucharstrie.h | 86 +++--- .../source/common/unicode/ucharstriebuilder.h | 20 +- icu4c/source/common/unicode/uchriter.h | 48 ++-- icu4c/source/common/unicode/unifilt.h | 2 +- icu4c/source/common/unicode/uniset.h | 6 +- icu4c/source/common/unicode/unistr.h | 254 +++++++++--------- icu4c/source/i18n/unicode/coll.h | 24 +- icu4c/source/i18n/unicode/currunit.h | 4 +- icu4c/source/i18n/unicode/dcfmtsym.h | 6 +- icu4c/source/i18n/unicode/decimfmt.h | 26 +- icu4c/source/i18n/unicode/dtfmtsym.h | 8 +- icu4c/source/i18n/unicode/dtitvfmt.h | 2 +- icu4c/source/i18n/unicode/dtptngen.h | 2 +- icu4c/source/i18n/unicode/msgfmt.h | 2 +- icu4c/source/i18n/unicode/numfmt.h | 6 +- icu4c/source/i18n/unicode/regex.h | 30 +-- icu4c/source/i18n/unicode/smpdtfmt.h | 12 +- icu4c/source/i18n/unicode/tblcoll.h | 24 +- icu4c/source/i18n/unicode/timezone.h | 8 +- icu4c/source/i18n/unicode/translit.h | 2 +- icu4c/source/i18n/unicode/tzfmt.h | 6 +- 34 files changed, 362 insertions(+), 362 deletions(-) diff --git a/icu4c/source/common/unicode/appendable.h b/icu4c/source/common/unicode/appendable.h index d611dc008af..8512c2f3031 100644 --- a/icu4c/source/common/unicode/appendable.h +++ b/icu4c/source/common/unicode/appendable.h @@ -19,7 +19,7 @@ /** * \file - * \brief C++ API: Appendable class: Sink for Unicode code points and 16-bit code units (UChars). + * \brief C++ API: Appendable class: Sink for Unicode code points and 16-bit code units (char16_ts). */ #include "unicode/utypes.h" @@ -34,10 +34,10 @@ class UnicodeString; * Combines elements of Java Appendable and ICU4C ByteSink. * * This class can be used in APIs where it does not matter whether the actual destination is - * a UnicodeString, a UChar[] array, a UnicodeSet, or any other object + * a UnicodeString, a char16_t[] array, a UnicodeSet, or any other object * that receives and processes characters and/or strings. * - * Implementation classes must implement at least appendCodeUnit(UChar). + * Implementation classes must implement at least appendCodeUnit(char16_t). * The base class provides default implementations for the other methods. * * The methods do not take UErrorCode parameters. @@ -62,11 +62,11 @@ public: * @return TRUE if the operation succeeded * @stable ICU 4.8 */ - virtual UBool appendCodeUnit(UChar c) = 0; + virtual UBool appendCodeUnit(char16_t c) = 0; /** * Appends a code point. - * The default implementation calls appendCodeUnit(UChar) once or twice. + * The default implementation calls appendCodeUnit(char16_t) once or twice. * @param c code point 0..0x10ffff * @return TRUE if the operation succeeded * @stable ICU 4.8 @@ -75,20 +75,20 @@ public: /** * Appends a string. - * The default implementation calls appendCodeUnit(UChar) for each code unit. + * The default implementation calls appendCodeUnit(char16_t) for each code unit. * @param s string, must not be NULL if length!=0 * @param length string length, or -1 if NUL-terminated * @return TRUE if the operation succeeded * @stable ICU 4.8 */ - virtual UBool appendString(const UChar *s, int32_t length); + virtual UBool appendString(const char16_t *s, int32_t length); /** * Tells the object that the caller is going to append roughly - * appendCapacity UChars. A subclass might use this to pre-allocate + * appendCapacity char16_ts. A subclass might use this to pre-allocate * a larger buffer if necessary. * The default implementation does nothing. (It always returns TRUE.) - * @param appendCapacity estimated number of UChars that will be appended + * @param appendCapacity estimated number of char16_ts that will be appended * @return TRUE if the operation succeeded * @stable ICU 4.8 */ @@ -102,19 +102,19 @@ public: * The returned buffer is only valid until the next operation * on this Appendable. * - * After writing at most *resultCapacity UChars, call appendString() with the - * pointer returned from this function and the number of UChars written. - * Many appendString() implementations will avoid copying UChars if this function + * After writing at most *resultCapacity char16_ts, call appendString() with the + * pointer returned from this function and the number of char16_ts written. + * Many appendString() implementations will avoid copying char16_ts if this function * returned an internal buffer. * * Partial usage example: * \code * int32_t capacity; - * UChar* buffer = app.getAppendBuffer(..., &capacity); - * ... Write n UChars into buffer, with n <= capacity. + * char16_t* buffer = app.getAppendBuffer(..., &capacity); + * ... Write n char16_ts into buffer, with n <= capacity. * app.appendString(buffer, n); * \endcode - * In many implementations, that call to append will avoid copying UChars. + * In many implementations, that call to append will avoid copying char16_ts. * * If the Appendable allocates or reallocates an internal buffer, it should use * the desiredCapacityHint if appropriate. @@ -138,9 +138,9 @@ public: * @return a buffer with *resultCapacity>=minCapacity * @stable ICU 4.8 */ - virtual UChar *getAppendBuffer(int32_t minCapacity, + virtual char16_t *getAppendBuffer(int32_t minCapacity, int32_t desiredCapacityHint, - UChar *scratch, int32_t scratchCapacity, + char16_t *scratch, int32_t scratchCapacity, int32_t *resultCapacity); }; @@ -171,7 +171,7 @@ public: * @return TRUE if the operation succeeded * @stable ICU 4.8 */ - virtual UBool appendCodeUnit(UChar c); + virtual UBool appendCodeUnit(char16_t c); /** * Appends a code point to the string. @@ -188,12 +188,12 @@ public: * @return TRUE if the operation succeeded * @stable ICU 4.8 */ - virtual UBool appendString(const UChar *s, int32_t length); + virtual UBool appendString(const char16_t *s, int32_t length); /** * Tells the UnicodeString that the caller is going to append roughly - * appendCapacity UChars. - * @param appendCapacity estimated number of UChars that will be appended + * appendCapacity char16_ts. + * @param appendCapacity estimated number of char16_ts that will be appended * @return TRUE if the operation succeeded * @stable ICU 4.8 */ @@ -220,9 +220,9 @@ public: * @return a buffer with *resultCapacity>=minCapacity * @stable ICU 4.8 */ - virtual UChar *getAppendBuffer(int32_t minCapacity, + virtual char16_t *getAppendBuffer(int32_t minCapacity, int32_t desiredCapacityHint, - UChar *scratch, int32_t scratchCapacity, + char16_t *scratch, int32_t scratchCapacity, int32_t *resultCapacity); private: diff --git a/icu4c/source/common/unicode/bytestriebuilder.h b/icu4c/source/common/unicode/bytestriebuilder.h index 888ba8bdb59..a8412d3d6e8 100644 --- a/icu4c/source/common/unicode/bytestriebuilder.h +++ b/icu4c/source/common/unicode/bytestriebuilder.h @@ -127,14 +127,14 @@ private: void buildBytes(UStringTrieBuildOption buildOption, UErrorCode &errorCode); virtual int32_t getElementStringLength(int32_t i) const; - virtual UChar getElementUnit(int32_t i, int32_t byteIndex) const; + virtual char16_t getElementUnit(int32_t i, int32_t byteIndex) const; virtual int32_t getElementValue(int32_t i) const; virtual int32_t getLimitOfLinearMatch(int32_t first, int32_t last, int32_t byteIndex) const; virtual int32_t countElementUnits(int32_t start, int32_t limit, int32_t byteIndex) const; virtual int32_t skipElementsBySomeUnits(int32_t i, int32_t byteIndex, int32_t count) const; - virtual int32_t indexOfElementWithNextUnit(int32_t i, int32_t byteIndex, UChar byte) const; + virtual int32_t indexOfElementWithNextUnit(int32_t i, int32_t byteIndex, char16_t byte) const; virtual UBool matchNodesCanHaveValues() const { return FALSE; } diff --git a/icu4c/source/common/unicode/caniter.h b/icu4c/source/common/unicode/caniter.h index 7e83a4f6cb7..5a882fb3fb8 100644 --- a/icu4c/source/common/unicode/caniter.h +++ b/icu4c/source/common/unicode/caniter.h @@ -187,7 +187,7 @@ private: UnicodeString *getEquivalents(const UnicodeString &segment, int32_t &result_len, UErrorCode &status); //private String[] getEquivalents(String segment) //Set getEquivalents2(String segment); - Hashtable *getEquivalents2(Hashtable *fillinResult, const UChar *segment, int32_t segLen, UErrorCode &status); + Hashtable *getEquivalents2(Hashtable *fillinResult, const char16_t *segment, int32_t segLen, UErrorCode &status); //Hashtable *getEquivalents2(const UnicodeString &segment, int32_t segLen, UErrorCode &status); /** @@ -196,7 +196,7 @@ private: * If so, take the remainder, and return the equivalents */ //Set extract(int comp, String segment, int segmentPos, StringBuffer buffer); - Hashtable *extract(Hashtable *fillinResult, UChar32 comp, const UChar *segment, int32_t segLen, int32_t segmentPos, UErrorCode &status); + Hashtable *extract(Hashtable *fillinResult, UChar32 comp, const char16_t *segment, int32_t segLen, int32_t segmentPos, UErrorCode &status); //Hashtable *extract(UChar32 comp, const UnicodeString &segment, int32_t segLen, int32_t segmentPos, UErrorCode &status); void cleanPieces(); diff --git a/icu4c/source/common/unicode/casemap.h b/icu4c/source/common/unicode/casemap.h index 1050f52d7e8..8401867b8ab 100644 --- a/icu4c/source/common/unicode/casemap.h +++ b/icu4c/source/common/unicode/casemap.h @@ -43,7 +43,7 @@ public: * @param dest A buffer for the result string. The result will be NUL-terminated if * the buffer is large enough. * The contents is undefined in case of failure. - * @param destCapacity The size of the buffer (number of UChars). If it is 0, then + * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then * dest may be NULL and the function will only return the length of the result * without writing any of the result string. * @param edits Records edits for index mapping, working with styled text, @@ -77,7 +77,7 @@ public: * @param dest A buffer for the result string. The result will be NUL-terminated if * the buffer is large enough. * The contents is undefined in case of failure. - * @param destCapacity The size of the buffer (number of UChars). If it is 0, then + * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then * dest may be NULL and the function will only return the length of the result * without writing any of the result string. * @param edits Records edits for index mapping, working with styled text, @@ -123,7 +123,7 @@ public: * @param dest A buffer for the result string. The result will be NUL-terminated if * the buffer is large enough. * The contents is undefined in case of failure. - * @param destCapacity The size of the buffer (number of UChars). If it is 0, then + * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then * dest may be NULL and the function will only return the length of the result * without writing any of the result string. * @param edits Records edits for index mapping, working with styled text, @@ -164,7 +164,7 @@ public: * @param dest A buffer for the result string. The result will be NUL-terminated if * the buffer is large enough. * The contents is undefined in case of failure. - * @param destCapacity The size of the buffer (number of UChars). If it is 0, then + * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then * dest may be NULL and the function will only return the length of the result * without writing any of the result string. * @param edits Records edits for index mapping, working with styled text, diff --git a/icu4c/source/common/unicode/chariter.h b/icu4c/source/common/unicode/chariter.h index 17ab178e6c2..76f01402e7b 100644 --- a/icu4c/source/common/unicode/chariter.h +++ b/icu4c/source/common/unicode/chariter.h @@ -78,7 +78,7 @@ U_NAMESPACE_BEGIN * } * * void function1(ForwardCharacterIterator &it) { - * UChar c; + * char16_t c; * while((c=it.nextPostInc())!=ForwardCharacterIterator::DONE) { * // use c * } @@ -149,7 +149,7 @@ public: * @return the current code unit. * @stable ICU 2.0 */ - virtual UChar nextPostInc(void) = 0; + virtual char16_t nextPostInc(void) = 0; /** * Gets the current code point for returning and advances to the next code point @@ -230,7 +230,7 @@ protected: * showing a way to convert simple for() loops: * \code * void forward2(CharacterIterator &it) { - * UChar c; + * char16_t c; * for(c=it.firstPostInc(); c!=CharacterIterator::DONE; c=it.nextPostInc()) { * // use c * } @@ -249,7 +249,7 @@ protected: * Backward iteration with a more traditional for() loop: * \code * void backward2(CharacterIterator &it) { - * UChar c; + * char16_t c; * for(c=it.last(); c!=CharacterIterator::DONE; c=it.previous()) { * // use c * } @@ -266,7 +266,7 @@ protected: * // get the position * int32_t pos=it.getIndex(); * // get the previous code unit - * UChar u=it.previous(); + * char16_t u=it.previous(); * // move back one more code unit * it.move(-1, CharacterIterator::kCurrent); * // set the position back to where it was @@ -283,7 +283,7 @@ protected: * Function processing characters, in this example simple output *
  * \code
- *  void processChar( UChar c )
+ *  void processChar( char16_t c )
  *  {
  *      cout << " " << c;
  *  }
@@ -294,7 +294,7 @@ protected:
  * \code
  *  void traverseForward(CharacterIterator& iter)
  *  {
- *      for(UChar c = iter.first(); c != CharacterIterator.DONE; c = iter.next()) {
+ *      for(char16_t c = iter.first(); c != CharacterIterator.DONE; c = iter.next()) {
  *          processChar(c);
  *      }
  *  }
@@ -305,7 +305,7 @@ protected:
  * \code
  *  void traverseBackward(CharacterIterator& iter)
  *  {
- *      for(UChar c = iter.last(); c != CharacterIterator.DONE; c = iter.previous()) {
+ *      for(char16_t c = iter.last(); c != CharacterIterator.DONE; c = iter.previous()) {
  *          processChar(c);
  *      }
  *  }
@@ -317,7 +317,7 @@ protected:
  * \code
  * void traverseOut(CharacterIterator& iter, int32_t pos)
  * {
- *      UChar c;
+ *      char16_t c;
  *      for (c = iter.setIndex(pos);
  *      c != CharacterIterator.DONE && (Unicode::isLetter(c) || Unicode::isDigit(c));
  *          c = iter.next()) {}
@@ -386,7 +386,7 @@ public:
      * @return the first code unit in its iteration range.
      * @stable ICU 2.0
      */
-    virtual UChar         first(void) = 0;
+    virtual char16_t         first(void) = 0;
 
     /**
      * Sets the iterator to refer to the first code unit in its
@@ -396,7 +396,7 @@ public:
      * @return the first code unit in its iteration range.
      * @stable ICU 2.0
      */
-    virtual UChar         firstPostInc(void);
+    virtual char16_t         firstPostInc(void);
 
     /**
      * Sets the iterator to refer to the first code point in its
@@ -435,7 +435,7 @@ public:
      * @return the last code unit.
      * @stable ICU 2.0
      */
-    virtual UChar         last(void) = 0;
+    virtual char16_t         last(void) = 0;
         
     /**
      * Sets the iterator to refer to the last code point in its
@@ -463,7 +463,7 @@ public:
      * @return the "position"-th code unit.
      * @stable ICU 2.0
      */
-    virtual UChar         setIndex(int32_t position) = 0;
+    virtual char16_t         setIndex(int32_t position) = 0;
 
     /**
      * Sets the iterator to refer to the beginning of the code point
@@ -483,7 +483,7 @@ public:
      * @return the current code unit. 
      * @stable ICU 2.0
      */
-    virtual UChar         current(void) const = 0;
+    virtual char16_t         current(void) const = 0;
         
     /**
      * Returns the code point the iterator currently refers to.  
@@ -499,7 +499,7 @@ public:
      * @return the next code unit.
      * @stable ICU 2.0
      */
-    virtual UChar         next(void) = 0;
+    virtual char16_t         next(void) = 0;
         
     /**
      * Advances to the next code point in the iteration range
@@ -520,7 +520,7 @@ public:
      * @return the previous code unit.
      * @stable ICU 2.0
      */
-    virtual UChar         previous(void) = 0;
+    virtual char16_t         previous(void) = 0;
 
     /**
      * Advances to the previous code point in the iteration range
diff --git a/icu4c/source/common/unicode/localpointer.h b/icu4c/source/common/unicode/localpointer.h
index aa374f83b86..3ab820188f7 100644
--- a/icu4c/source/common/unicode/localpointer.h
+++ b/icu4c/source/common/unicode/localpointer.h
@@ -174,9 +174,9 @@ private:
  * \code
  * LocalPointer s(new UnicodeString((UChar32)0x50005));
  * int32_t length=s->length();  // 2
- * UChar lead=s->charAt(0);  // 0xd900
+ * char16_t lead=s->charAt(0);  // 0xd900
  * if(some condition) { return; }  // no need to explicitly delete the pointer
- * s.adoptInstead(new UnicodeString((UChar)0xfffc));
+ * s.adoptInstead(new UnicodeString((char16_t)0xfffc));
  * length=s->length();  // 1
  * // no need to explicitly delete the pointer
  * \endcode
@@ -323,10 +323,10 @@ public:
  * Usage example:
  * \code
  * LocalArray a(new UnicodeString[2]);
- * a[0].append((UChar)0x61);
+ * a[0].append((char16_t)0x61);
  * if(some condition) { return; }  // no need to explicitly delete the array
  * a.adoptInstead(new UnicodeString[4]);
- * a[3].append((UChar)0x62).append((UChar)0x63).reverse();
+ * a[3].append((char16_t)0x62).append((char16_t)0x63).reverse();
  * // no need to explicitly delete the array
  * \endcode
  *
diff --git a/icu4c/source/common/unicode/normalizer2.h b/icu4c/source/common/unicode/normalizer2.h
index 8995c8a56d1..d326da948a3 100644
--- a/icu4c/source/common/unicode/normalizer2.h
+++ b/icu4c/source/common/unicode/normalizer2.h
@@ -282,7 +282,7 @@ public:
      *
      * When used on a standard NFC Normalizer2 instance,
      * it returns the Decomposition_Mapping only if the Decomposition_Type (dt) is Canonical (Can);
-     * in this case, the result contains either one or two code points (=1..4 UChars).
+     * in this case, the result contains either one or two code points (=1..4 char16_ts).
      *
      * This function is independent of the mode of the Normalizer2.
      * The default implementation returns FALSE.
diff --git a/icu4c/source/common/unicode/rep.h b/icu4c/source/common/unicode/rep.h
index cf414967afb..41fdc040497 100644
--- a/icu4c/source/common/unicode/rep.h
+++ b/icu4c/source/common/unicode/rep.h
@@ -93,7 +93,7 @@ public:
      * @return 16-bit code unit of text at given offset
      * @stable ICU 1.8
      */
-    inline UChar charAt(int32_t offset) const;
+    inline char16_t charAt(int32_t offset) const;
 
     /**
      * Returns the 32-bit code point at the given 16-bit offset into
@@ -230,7 +230,7 @@ protected:
      * Virtual version of charAt().
      * @stable ICU 2.4
      */
-    virtual UChar getCharAt(int32_t offset) const = 0;
+    virtual char16_t getCharAt(int32_t offset) const = 0;
 
     /**
      * Virtual version of char32At().
@@ -246,7 +246,7 @@ Replaceable::length() const {
     return getLength();
 }
 
-inline UChar
+inline char16_t
 Replaceable::charAt(int32_t offset) const {
     return getCharAt(offset);
 }
diff --git a/icu4c/source/common/unicode/resbund.h b/icu4c/source/common/unicode/resbund.h
index 355df474178..b522a7a6154 100644
--- a/icu4c/source/common/unicode/resbund.h
+++ b/icu4c/source/common/unicode/resbund.h
@@ -216,7 +216,7 @@ public:
      *                could be U_MISSING_RESOURCE_ERROR if the key is not found
      *                could be a warning
      *                e.g.: U_USING_FALLBACK_WARNING,U_USING_DEFAULT_WARNING 
-     * @return a pointer to a zero-terminated UChar array which lives in a memory mapped/DLL file.
+     * @return a pointer to a zero-terminated char16_t array which lives in a memory mapped/DLL file.
      * @stable ICU 2.0
      */
     UnicodeString
diff --git a/icu4c/source/common/unicode/schriter.h b/icu4c/source/common/unicode/schriter.h
index f9d8117b3cc..b1dc939bd66 100644
--- a/icu4c/source/common/unicode/schriter.h
+++ b/icu4c/source/common/unicode/schriter.h
@@ -175,7 +175,7 @@ protected:
    * @param newTextLength The length of the String
    * @stable ICU 2.0
    */
-  void setText(const UChar* newText, int32_t newTextLength);
+  void setText(const char16_t* newText, int32_t newTextLength);
 
   /**
    * Copy of the iterated string object.
diff --git a/icu4c/source/common/unicode/simpleformatter.h b/icu4c/source/common/unicode/simpleformatter.h
index 16c8ad046e8..d307b4728c0 100644
--- a/icu4c/source/common/unicode/simpleformatter.h
+++ b/icu4c/source/common/unicode/simpleformatter.h
@@ -57,7 +57,7 @@ public:
      * Default constructor.
      * @draft ICU 57
      */
-    SimpleFormatter() : compiledPattern((UChar)0) {}
+    SimpleFormatter() : compiledPattern((char16_t)0) {}
 
     /**
      * Constructs a formatter from the pattern string.
@@ -275,15 +275,15 @@ private:
      */
     UnicodeString compiledPattern;
 
-    static inline int32_t getArgumentLimit(const UChar *compiledPattern,
+    static inline int32_t getArgumentLimit(const char16_t *compiledPattern,
                                               int32_t compiledPatternLength) {
         return compiledPatternLength == 0 ? 0 : compiledPattern[0];
     }
 
-    static UnicodeString getTextWithNoArguments(const UChar *compiledPattern, int32_t compiledPatternLength);
+    static UnicodeString getTextWithNoArguments(const char16_t *compiledPattern, int32_t compiledPatternLength);
 
     static UnicodeString &format(
-            const UChar *compiledPattern, int32_t compiledPatternLength,
+            const char16_t *compiledPattern, int32_t compiledPatternLength,
             const UnicodeString *const *values,
             UnicodeString &result, const UnicodeString *resultCopy, UBool forbidResultAsValue,
             int32_t *offsets, int32_t offsetsLength,
diff --git a/icu4c/source/common/unicode/strenum.h b/icu4c/source/common/unicode/strenum.h
index 368188a4aa8..61d514813b9 100644
--- a/icu4c/source/common/unicode/strenum.h
+++ b/icu4c/source/common/unicode/strenum.h
@@ -35,7 +35,7 @@ U_NAMESPACE_BEGIN
  * call, so the returned string still might not be 'valid' on
  * subsequent use.

* - *

Strings may take the form of const char*, const UChar*, or const + *

Strings may take the form of const char*, const char16_t*, or const * UnicodeString*. The type you get is determine by the variant of * 'next' that you call. In general the StringEnumeration is * optimized for one of these types, but all StringEnumerations can @@ -112,7 +112,7 @@ public: *

If the iterator is out of sync with its service, status is set * to U_ENUM_OUT_OF_SYNC_ERROR and NULL is returned.

* - *

If the native service string is a UChar* string, it is + *

If the native service string is a char16_t* string, it is * converted to char* with the invariant converter. If the * conversion fails (because a character cannot be converted) then * status is set to U_INVARIANT_CONVERSION_ERROR and the return @@ -131,7 +131,7 @@ public: virtual const char* next(int32_t *resultLength, UErrorCode& status); /** - *

Returns the next element as a NUL-terminated UChar*. If there + *

Returns the next element as a NUL-terminated char16_t*. If there * are no more elements, returns NULL. If the resultLength pointer * is not NULL, the length of the string (not counting the * terminating NUL) is returned at that address. If an error @@ -153,7 +153,7 @@ public: * * @stable ICU 2.4 */ - virtual const UChar* unext(int32_t *resultLength, UErrorCode& status); + virtual const char16_t* unext(int32_t *resultLength, UErrorCode& status); /** *

Returns the next element a UnicodeString*. If there are no diff --git a/icu4c/source/common/unicode/stringtriebuilder.h b/icu4c/source/common/unicode/stringtriebuilder.h index 91f1bd24dab..bcad2484e7b 100644 --- a/icu4c/source/common/unicode/stringtriebuilder.h +++ b/icu4c/source/common/unicode/stringtriebuilder.h @@ -105,7 +105,7 @@ protected: /** @internal */ virtual int32_t getElementStringLength(int32_t i) const = 0; /** @internal */ - virtual UChar getElementUnit(int32_t i, int32_t unitIndex) const = 0; + virtual char16_t getElementUnit(int32_t i, int32_t unitIndex) const = 0; /** @internal */ virtual int32_t getElementValue(int32_t i) const = 0; @@ -120,7 +120,7 @@ protected: /** @internal */ virtual int32_t skipElementsBySomeUnits(int32_t i, int32_t unitIndex, int32_t count) const = 0; /** @internal */ - virtual int32_t indexOfElementWithNextUnit(int32_t i, int32_t unitIndex, UChar unit) const = 0; + virtual int32_t indexOfElementWithNextUnit(int32_t i, int32_t unitIndex, char16_t unit) const = 0; /** @internal */ virtual UBool matchNodesCanHaveValues() const = 0; @@ -137,7 +137,7 @@ protected: /** @internal */ static const int32_t kMaxBranchLinearSubNodeLength=5; - // Maximum number of nested split-branch levels for a branch on all 2^16 possible UChar units. + // Maximum number of nested split-branch levels for a branch on all 2^16 possible char16_t units. // log2(2^16/kMaxBranchLinearSubNodeLength) rounded up. /** @internal */ static const int32_t kMaxSplitBranchLevels=14; @@ -338,7 +338,7 @@ protected: virtual void write(StringTrieBuilder &builder); // Adds a unit with a final value. void add(int32_t c, int32_t value) { - units[length]=(UChar)c; + units[length]=(char16_t)c; equal[length]=NULL; values[length]=value; ++length; @@ -346,7 +346,7 @@ protected: } // Adds a unit which leads to another match node. void add(int32_t c, Node *node) { - units[length]=(UChar)c; + units[length]=(char16_t)c; equal[length]=node; values[length]=0; ++length; @@ -356,7 +356,7 @@ protected: Node *equal[kMaxBranchLinearSubNodeLength]; // NULL means "has final value". int32_t length; int32_t values[kMaxBranchLinearSubNodeLength]; - UChar units[kMaxBranchLinearSubNodeLength]; + char16_t units[kMaxBranchLinearSubNodeLength]; }; /** @@ -364,7 +364,7 @@ protected: */ class SplitBranchNode : public BranchNode { public: - SplitBranchNode(UChar middleUnit, Node *lessThanNode, Node *greaterOrEqualNode) + SplitBranchNode(char16_t middleUnit, Node *lessThanNode, Node *greaterOrEqualNode) : BranchNode(((0x555555*37+middleUnit)*37+ hashCode(lessThanNode))*37+hashCode(greaterOrEqualNode)), unit(middleUnit), lessThan(lessThanNode), greaterOrEqual(greaterOrEqualNode) {} @@ -372,7 +372,7 @@ protected: virtual int32_t markRightEdgesFirst(int32_t edgeNumber); virtual void write(StringTrieBuilder &builder); protected: - UChar unit; + char16_t unit; Node *lessThan; Node *greaterOrEqual; }; diff --git a/icu4c/source/common/unicode/ucharstrie.h b/icu4c/source/common/unicode/ucharstrie.h index fdff53f4d33..dfc93f6d0ba 100644 --- a/icu4c/source/common/unicode/ucharstrie.h +++ b/icu4c/source/common/unicode/ucharstrie.h @@ -36,7 +36,7 @@ class UVector32; /** * Light-weight, non-const reader class for a UCharsTrie. - * Traverses a UChar-serialized data structure with minimal state, + * Traverses a char16_t-serialized data structure with minimal state, * for mapping strings (16-bit-unit sequences) to non-negative integer values. * * This class owns the serialized trie data only if it was constructed by @@ -52,15 +52,15 @@ public: /** * Constructs a UCharsTrie reader instance. * - * The trieUChars must contain a copy of a UChar sequence from the UCharsTrieBuilder, - * starting with the first UChar of that sequence. - * The UCharsTrie object will not read more UChars than + * The trieUChars must contain a copy of a char16_t sequence from the UCharsTrieBuilder, + * starting with the first char16_t of that sequence. + * The UCharsTrie object will not read more char16_ts than * the UCharsTrieBuilder generated in the corresponding build() call. * * The array is not copied/cloned and must not be modified while * the UCharsTrie object is in use. * - * @param trieUChars The UChar array that contains the serialized trie. + * @param trieUChars The char16_t array that contains the serialized trie. * @stable ICU 4.8 */ UCharsTrie(ConstChar16Ptr trieUChars) @@ -75,7 +75,7 @@ public: /** * Copy constructor, copies the other trie reader object and its state, - * but not the UChar array which will be shared. (Shallow copy.) + * but not the char16_t array which will be shared. (Shallow copy.) * @param other Another UCharsTrie object. * @stable ICU 4.8 */ @@ -109,8 +109,8 @@ public: private: friend class UCharsTrie; - const UChar *uchars; - const UChar *pos; + const char16_t *uchars; + const char16_t *pos; int32_t remainingMatchLength; }; @@ -148,14 +148,14 @@ public: /** * Determines whether the string so far matches, whether it has a value, - * and whether another input UChar can continue a matching string. + * and whether another input char16_t can continue a matching string. * @return The match/value Result. * @stable ICU 4.8 */ UStringTrieResult current() const; /** - * Traverses the trie from the initial state for this input UChar. + * Traverses the trie from the initial state for this input char16_t. * Equivalent to reset().next(uchar). * @param uchar Input char value. Values below 0 and above 0xffff will never match. * @return The match/value Result. @@ -177,7 +177,7 @@ public: UStringTrieResult firstForCodePoint(UChar32 cp); /** - * Traverses the trie from the current state for this input UChar. + * Traverses the trie from the current state for this input char16_t. * @param uchar Input char value. Values below 0 and above 0xffff will never match. * @return The match/value Result. * @stable ICU 4.8 @@ -220,7 +220,7 @@ public: * @stable ICU 4.8 */ inline int32_t getValue() const { - const UChar *pos=pos_; + const char16_t *pos=pos_; int32_t leadUnit=*pos++; // U_ASSERT(leadUnit>=kMinValueLead); return leadUnit&kValueIsFinal ? @@ -237,16 +237,16 @@ public: * @stable ICU 4.8 */ inline UBool hasUniqueValue(int32_t &uniqueValue) const { - const UChar *pos=pos_; + const char16_t *pos=pos_; // Skip the rest of a pending linear-match node. return pos!=NULL && findUniqueValue(pos+remainingMatchLength_+1, FALSE, uniqueValue); } /** - * Finds each UChar which continues the string from the current state. - * That is, each UChar c for which it would be next(c)!=USTRINGTRIE_NO_MATCH now. - * @param out Each next UChar is appended to this object. - * @return the number of UChars which continue the string from here + * Finds each char16_t which continues the string from the current state. + * That is, each char16_t c for which it would be next(c)!=USTRINGTRIE_NO_MATCH now. + * @param out Each next char16_t is appended to this object. + * @return the number of char16_ts which continue the string from here * @stable ICU 4.8 */ int32_t getNextUChars(Appendable &out) const; @@ -258,8 +258,8 @@ public: class U_COMMON_API Iterator : public UMemory { public: /** - * Iterates from the root of a UChar-serialized UCharsTrie. - * @param trieUChars The trie UChars. + * Iterates from the root of a char16_t-serialized UCharsTrie. + * @param trieUChars The trie char16_ts. * @param maxStringLength If 0, the iterator returns full strings. * Otherwise, the iterator returns strings with this maximum length. * @param errorCode Standard ICU error code. Its input value must @@ -336,11 +336,11 @@ public: return TRUE; } - const UChar *branchNext(const UChar *pos, int32_t length, UErrorCode &errorCode); + const char16_t *branchNext(const char16_t *pos, int32_t length, UErrorCode &errorCode); - const UChar *uchars_; - const UChar *pos_; - const UChar *initialPos_; + const char16_t *uchars_; + const char16_t *pos_; + const char16_t *initialPos_; int32_t remainingMatchLength_; int32_t initialRemainingMatchLength_; UBool skipValue_; // Skip intermediate value which was already delivered. @@ -368,7 +368,7 @@ private: * this constructor adopts the builder's array. * This constructor is only called by the builder. */ - UCharsTrie(UChar *adoptUChars, const UChar *trieUChars) + UCharsTrie(char16_t *adoptUChars, const char16_t *trieUChars) : ownedArray_(adoptUChars), uchars_(trieUChars), pos_(uchars_), remainingMatchLength_(-1) {} @@ -381,7 +381,7 @@ private: // Reads a compact 32-bit integer. // pos is already after the leadUnit, and the lead unit has bit 15 reset. - static inline int32_t readValue(const UChar *pos, int32_t leadUnit) { + static inline int32_t readValue(const char16_t *pos, int32_t leadUnit) { int32_t value; if(leadUnit=kMinTwoUnitValueLead) { if(leadUnit=kMinTwoUnitNodeValueLead) { if(leadUnit=kMinTwoUnitDeltaLead) { if(delta==kThreeUnitDeltaLead) { @@ -444,7 +444,7 @@ private: return pos+delta; } - static const UChar *skipDelta(const UChar *pos) { + static const char16_t *skipDelta(const char16_t *pos) { int32_t delta=*pos++; if(delta>=kMinTwoUnitDeltaLead) { if(delta==kThreeUnitDeltaLead) { @@ -461,28 +461,28 @@ private: } // Handles a branch node for both next(uchar) and next(string). - UStringTrieResult branchNext(const UChar *pos, int32_t length, int32_t uchar); + UStringTrieResult branchNext(const char16_t *pos, int32_t length, int32_t uchar); // Requires remainingLength_<0. - UStringTrieResult nextImpl(const UChar *pos, int32_t uchar); + UStringTrieResult nextImpl(const char16_t *pos, int32_t uchar); // Helper functions for hasUniqueValue(). // Recursively finds a unique value (or whether there is not a unique one) // from a branch. - static const UChar *findUniqueValueFromBranch(const UChar *pos, int32_t length, + static const char16_t *findUniqueValueFromBranch(const char16_t *pos, int32_t length, UBool haveUniqueValue, int32_t &uniqueValue); // Recursively finds a unique value (or whether there is not a unique one) // starting from a position on a node lead unit. - static UBool findUniqueValue(const UChar *pos, UBool haveUniqueValue, int32_t &uniqueValue); + static UBool findUniqueValue(const char16_t *pos, UBool haveUniqueValue, int32_t &uniqueValue); // Helper functions for getNextUChars(). // getNextUChars() when pos is on a branch node. - static void getNextBranchUChars(const UChar *pos, int32_t length, Appendable &out); + static void getNextBranchUChars(const char16_t *pos, int32_t length, Appendable &out); // UCharsTrie data structure // - // The trie consists of a series of UChar-serialized nodes for incremental - // Unicode string/UChar sequence matching. (UChar=16-bit unsigned integer) + // The trie consists of a series of char16_t-serialized nodes for incremental + // Unicode string/char16_t sequence matching. (char16_t=16-bit unsigned integer) // The root node is at the beginning of the trie data. // // Types of nodes are distinguished by their node lead unit ranges. @@ -491,9 +491,9 @@ private: // // Node types: // - Final-value node: Stores a 32-bit integer in a compact, variable-length format. - // The value is for the string/UChar sequence so far. + // The value is for the string/char16_t sequence so far. // - Match node, optionally with an intermediate value in a different compact format. - // The value, if present, is for the string/UChar sequence so far. + // The value, if present, is for the string/char16_t sequence so far. // // Aside from the value, which uses the node lead unit's high bits: // @@ -560,15 +560,15 @@ private: static const int32_t kMaxTwoUnitDelta=((kThreeUnitDeltaLead-kMinTwoUnitDeltaLead)<<16)-1; // 0x03feffff - UChar *ownedArray_; + char16_t *ownedArray_; // Fixed value referencing the UCharsTrie words. - const UChar *uchars_; + const char16_t *uchars_; // Iterator variables. // Pointer to next trie unit to read. NULL if no more matches. - const UChar *pos_; + const char16_t *pos_; // Remaining length of a linear-match node, minus 1. Negative if not in such a node. int32_t remainingMatchLength_; }; diff --git a/icu4c/source/common/unicode/ucharstriebuilder.h b/icu4c/source/common/unicode/ucharstriebuilder.h index a8b75697de7..2aa4757e52c 100644 --- a/icu4c/source/common/unicode/ucharstriebuilder.h +++ b/icu4c/source/common/unicode/ucharstriebuilder.h @@ -89,21 +89,21 @@ public: UCharsTrie *build(UStringTrieBuildOption buildOption, UErrorCode &errorCode); /** - * Builds a UCharsTrie for the add()ed data and UChar-serializes it. + * Builds a UCharsTrie for the add()ed data and char16_t-serializes it. * Once built, no further data can be add()ed until clear() is called. * * A UCharsTrie cannot be empty. At least one (string, value) pair * must have been add()ed. * * Multiple calls to buildUnicodeString() set the UnicodeStrings to the - * builder's same UChar array, without rebuilding. + * builder's same char16_t array, without rebuilding. * If buildUnicodeString() is called after build(), the trie will be * re-serialized into a new array. * If build() is called after buildUnicodeString(), the trie object will become * the owner of the previously returned array. * After clear() has been called, a new array will be used as well. * @param buildOption Build option, see UStringTrieBuildOption. - * @param result A UnicodeString which will be set to the UChar-serialized + * @param result A UnicodeString which will be set to the char16_t-serialized * UCharsTrie for the add()ed data. * @param errorCode Standard ICU error code. Its input value must * pass the U_SUCCESS() test, or else the function returns @@ -135,14 +135,14 @@ private: void buildUChars(UStringTrieBuildOption buildOption, UErrorCode &errorCode); virtual int32_t getElementStringLength(int32_t i) const; - virtual UChar getElementUnit(int32_t i, int32_t unitIndex) const; + virtual char16_t getElementUnit(int32_t i, int32_t unitIndex) const; virtual int32_t getElementValue(int32_t i) const; virtual int32_t getLimitOfLinearMatch(int32_t first, int32_t last, int32_t unitIndex) const; virtual int32_t countElementUnits(int32_t start, int32_t limit, int32_t unitIndex) const; virtual int32_t skipElementsBySomeUnits(int32_t i, int32_t unitIndex, int32_t count) const; - virtual int32_t indexOfElementWithNextUnit(int32_t i, int32_t unitIndex, UChar unit) const; + virtual int32_t indexOfElementWithNextUnit(int32_t i, int32_t unitIndex, char16_t unit) const; virtual UBool matchNodesCanHaveValues() const { return TRUE; } @@ -152,11 +152,11 @@ private: class UCTLinearMatchNode : public LinearMatchNode { public: - UCTLinearMatchNode(const UChar *units, int32_t len, Node *nextNode); + UCTLinearMatchNode(const char16_t *units, int32_t len, Node *nextNode); virtual UBool operator==(const Node &other) const; virtual void write(StringTrieBuilder &builder); private: - const UChar *s; + const char16_t *s; }; virtual Node *createLinearMatchNode(int32_t i, int32_t unitIndex, int32_t length, @@ -164,7 +164,7 @@ private: UBool ensureCapacity(int32_t length); virtual int32_t write(int32_t unit); - int32_t write(const UChar *s, int32_t length); + int32_t write(const char16_t *s, int32_t length); virtual int32_t writeElementUnits(int32_t i, int32_t unitIndex, int32_t length); virtual int32_t writeValueAndFinal(int32_t i, UBool isFinal); virtual int32_t writeValueAndType(UBool hasValue, int32_t value, int32_t node); @@ -175,9 +175,9 @@ private: int32_t elementsCapacity; int32_t elementsLength; - // UChar serialization of the trie. + // char16_t serialization of the trie. // Grows from the back: ucharsLength measures from the end of the buffer! - UChar *uchars; + char16_t *uchars; int32_t ucharsCapacity; int32_t ucharsLength; }; diff --git a/icu4c/source/common/unicode/uchriter.h b/icu4c/source/common/unicode/uchriter.h index 644fbd8a2a9..21c93154141 100644 --- a/icu4c/source/common/unicode/uchriter.h +++ b/icu4c/source/common/unicode/uchriter.h @@ -15,18 +15,18 @@ /** * \file - * \brief C++ API: UChar Character Iterator + * \brief C++ API: char16_t Character Iterator */ U_NAMESPACE_BEGIN /** * A concrete subclass of CharacterIterator that iterates over the - * characters (code units or code points) in a UChar array. + * characters (code units or code points) in a char16_t array. * It's possible not only to create an - * iterator that iterates over an entire UChar array, but also to - * create one that iterates over only a subrange of a UChar array - * (iterators over different subranges of the same UChar array don't + * iterator that iterates over an entire char16_t array, but also to + * create one that iterates over only a subrange of a char16_t array + * (iterators over different subranges of the same char16_t array don't * compare equal). * @see CharacterIterator * @see ForwardCharacterIterator @@ -35,26 +35,26 @@ U_NAMESPACE_BEGIN class U_COMMON_API UCharCharacterIterator : public CharacterIterator { public: /** - * Create an iterator over the UChar array referred to by "textPtr". + * Create an iterator over the char16_t array referred to by "textPtr". * The iteration range is 0 to length-1. * text is only aliased, not adopted (the * destructor will not delete it). - * @param textPtr The UChar array to be iterated over - * @param length The length of the UChar array + * @param textPtr The char16_t array to be iterated over + * @param length The length of the char16_t array * @stable ICU 2.0 */ UCharCharacterIterator(ConstChar16Ptr textPtr, int32_t length); /** - * Create an iterator over the UChar array referred to by "textPtr". + * Create an iterator over the char16_t array referred to by "textPtr". * The iteration range is 0 to length-1. * text is only aliased, not adopted (the * destructor will not delete it). * The starting * position is specified by "position". If "position" is outside the valid * iteration range, the behavior of this object is undefined. - * @param textPtr The UChar array to be iteratd over - * @param length The length of the UChar array + * @param textPtr The char16_t array to be iteratd over + * @param length The length of the char16_t array * @param position The starting position of the iteration * @stable ICU 2.0 */ @@ -62,7 +62,7 @@ public: int32_t position); /** - * Create an iterator over the UChar array referred to by "textPtr". + * Create an iterator over the char16_t array referred to by "textPtr". * The iteration range is 0 to end-1. * text is only aliased, not adopted (the * destructor will not delete it). @@ -70,8 +70,8 @@ public: * position is specified by "position". If begin and end do not * form a valid iteration range or "position" is outside the valid * iteration range, the behavior of this object is undefined. - * @param textPtr The UChar array to be iterated over - * @param length The length of the UChar array + * @param textPtr The char16_t array to be iterated over + * @param length The length of the char16_t array * @param textBegin The begin position of the iteration range * @param textEnd The end position of the iteration range * @param position The starting position of the iteration @@ -141,7 +141,7 @@ public: * @return the first code unit in its iteration range. * @stable ICU 2.0 */ - virtual UChar first(void); + virtual char16_t first(void); /** * Sets the iterator to refer to the first code unit in its @@ -151,7 +151,7 @@ public: * @return the first code unit in its iteration range * @stable ICU 2.0 */ - virtual UChar firstPostInc(void); + virtual char16_t firstPostInc(void); /** * Sets the iterator to refer to the first code point in its @@ -181,7 +181,7 @@ public: * @return the last code unit in its iteration range. * @stable ICU 2.0 */ - virtual UChar last(void); + virtual char16_t last(void); /** * Sets the iterator to refer to the last code point in its @@ -200,7 +200,7 @@ public: * @return the code unit * @stable ICU 2.0 */ - virtual UChar setIndex(int32_t position); + virtual char16_t setIndex(int32_t position); /** * Sets the iterator to refer to the beginning of the code point @@ -220,7 +220,7 @@ public: * @return the code unit the iterator currently refers to. * @stable ICU 2.0 */ - virtual UChar current(void) const; + virtual char16_t current(void) const; /** * Returns the code point the iterator currently refers to. @@ -236,7 +236,7 @@ public: * @return the next code unit in the iteration range. * @stable ICU 2.0 */ - virtual UChar next(void); + virtual char16_t next(void); /** * Gets the current code unit for returning and advances to the next code unit @@ -246,7 +246,7 @@ public: * @return the current code unit. * @stable ICU 2.0 */ - virtual UChar nextPostInc(void); + virtual char16_t nextPostInc(void); /** * Advances to the next code point in the iteration range (toward @@ -288,7 +288,7 @@ public: * @return the previous code unit in the iteration range. * @stable ICU 2.0 */ - virtual UChar previous(void); + virtual char16_t previous(void); /** * Advances to the previous code point in the iteration range (toward @@ -343,7 +343,7 @@ public: void setText(ConstChar16Ptr newText, int32_t newTextLength); /** - * Copies the UChar array under iteration into the UnicodeString + * Copies the char16_t array under iteration into the UnicodeString * referred to by "result". Even if this iterator iterates across * only a part of this string, the whole string is copied. * @param result Receives a copy of the text under iteration. @@ -375,7 +375,7 @@ protected: * Protected member text * @stable ICU 2.0 */ - const UChar* text; + const char16_t* text; }; diff --git a/icu4c/source/common/unicode/unifilt.h b/icu4c/source/common/unicode/unifilt.h index a23d871a151..e10527154b6 100644 --- a/icu4c/source/common/unicode/unifilt.h +++ b/icu4c/source/common/unicode/unifilt.h @@ -30,7 +30,7 @@ U_NAMESPACE_BEGIN * defined range. * @stable ICU 3.0 */ -#define U_ETHER ((UChar)0xFFFF) +#define U_ETHER ((char16_t)0xFFFF) /** * diff --git a/icu4c/source/common/unicode/uniset.h b/icu4c/source/common/unicode/uniset.h index 133c2cf7f8d..4a4ce193b64 100644 --- a/icu4c/source/common/unicode/uniset.h +++ b/icu4c/source/common/unicode/uniset.h @@ -294,7 +294,7 @@ class U_COMMON_API UnicodeSet U_FINAL : public UnicodeFilter { * indicating that toPattern() must generate a pattern * representation from the inversion list. */ - UChar *pat; + char16_t *pat; UVector* strings; // maintained in sorted order UnicodeSetStringSpan *stringSpan; @@ -891,7 +891,7 @@ public: * @stable ICU 3.8 * @see USetSpanCondition */ - int32_t span(const UChar *s, int32_t length, USetSpanCondition spanCondition) const; + int32_t span(const char16_t *s, int32_t length, USetSpanCondition spanCondition) const; /** * Returns the end of the substring of the input string according to the USetSpanCondition. @@ -924,7 +924,7 @@ public: * @stable ICU 3.8 * @see USetSpanCondition */ - int32_t spanBack(const UChar *s, int32_t length, USetSpanCondition spanCondition) const; + int32_t spanBack(const char16_t *s, int32_t length, USetSpanCondition spanCondition) const; /** * Returns the start of the substring of the input string according to the USetSpanCondition. diff --git a/icu4c/source/common/unicode/unistr.h b/icu4c/source/common/unicode/unistr.h index 0d1bbb6f49e..4f1f824e9cb 100644 --- a/icu4c/source/common/unicode/unistr.h +++ b/icu4c/source/common/unicode/unistr.h @@ -77,8 +77,8 @@ UStringCaseMapper(int32_t caseLocale, uint32_t options, #if !UCONFIG_NO_BREAK_ITERATION icu::BreakIterator *iter, #endif - UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, + char16_t *dest, int32_t destCapacity, + const char16_t *src, int32_t srcLength, icu::Edits *edits, UErrorCode &errorCode); @@ -139,7 +139,7 @@ class UnicodeStringAppendable; // unicode/appendable.h /** * \def UNISTR_FROM_CHAR_EXPLICIT * This can be defined to be empty or "explicit". - * If explicit, then the UnicodeString(UChar) and UnicodeString(UChar32) + * If explicit, then the UnicodeString(char16_t) and UnicodeString(UChar32) * constructors are marked as explicit, preventing their inadvertent use. * @stable ICU 49 */ @@ -156,7 +156,7 @@ class UnicodeStringAppendable; // unicode/appendable.h /** * \def UNISTR_FROM_STRING_EXPLICIT * This can be defined to be empty or "explicit". - * If explicit, then the UnicodeString(const char *) and UnicodeString(const UChar *) + * If explicit, then the UnicodeString(const char *) and UnicodeString(const char16_t *) * constructors are marked as explicit, preventing their inadvertent use. * * In particular, this helps prevent accidentally depending on ICU conversion code @@ -190,18 +190,18 @@ class UnicodeStringAppendable; // unicode/appendable.h * to 4 * sizeof(pointer) (or 3 * sizeof(pointer) for P128 data models), * to hold the fields for heap-allocated strings. * Such a minimum size also ensures that the object is easily large enough - * to hold at least 2 UChars, for one supplementary code point (U16_MAX_LENGTH). + * to hold at least 2 char16_ts, for one supplementary code point (U16_MAX_LENGTH). * * sizeof(UnicodeString) >= 48 should work for all known platforms. * * For example, on a 64-bit machine where sizeof(vtable pointer) is 8, * sizeof(UnicodeString) = 64 would leave space for * (64 - sizeof(vtable pointer) - 2) / U_SIZEOF_UCHAR = (64 - 8 - 2) / 2 = 27 - * UChars stored inside the object. + * char16_ts stored inside the object. * * The minimum object size on a 64-bit machine would be * 4 * sizeof(pointer) = 4 * 8 = 32 bytes, - * and the internal buffer would hold up to 11 UChars in that case. + * and the internal buffer would hold up to 11 char16_ts in that case. * * @see U16_MAX_LENGTH * @stable ICU 56 @@ -233,7 +233,7 @@ class UnicodeStringAppendable; // unicode/appendable.h *

In ICU, a Unicode string consists of 16-bit Unicode code units. * A Unicode character may be stored with either one code unit * (the most common case) or with a matched pair of special code units - * ("surrogates"). The data type for code units is UChar. + * ("surrogates"). The data type for code units is char16_t. * For single-character handling, a Unicode character code point is a value * in the range 0..0x10ffff. ICU uses the UChar32 type for code points.

* @@ -249,7 +249,7 @@ class UnicodeStringAppendable; // unicode/appendable.h * than other ICU APIs. In particular: * - If indexes are out of bounds for a UnicodeString object * (<0 or >length()) then they are "pinned" to the nearest boundary. - * - If primitive string pointer values (e.g., const UChar * or char *) + * - If primitive string pointer values (e.g., const char16_t * or char *) * for input strings are NULL, then those input string parameters are treated * as if they pointed to an empty string. * However, this is not the case for char * parameters for charset names @@ -455,7 +455,7 @@ public: */ inline int8_t compare(int32_t start, int32_t length, - const UChar *srcChars) const; + const char16_t *srcChars) const; /** * Compare the characters bitwise in the range @@ -476,7 +476,7 @@ public: */ inline int8_t compare(int32_t start, int32_t length, - const UChar *srcChars, + const char16_t *srcChars, int32_t srcStart, int32_t srcLength) const; @@ -614,7 +614,7 @@ public: */ inline int8_t compareCodePointOrder(int32_t start, int32_t length, - const UChar *srcChars) const; + const char16_t *srcChars) const; /** * Compare two Unicode strings in code point order. @@ -639,7 +639,7 @@ public: */ inline int8_t compareCodePointOrder(int32_t start, int32_t length, - const UChar *srcChars, + const char16_t *srcChars, int32_t srcStart, int32_t srcLength) const; @@ -789,7 +789,7 @@ public: */ inline int8_t caseCompare(int32_t start, int32_t length, - const UChar *srcChars, + const char16_t *srcChars, uint32_t options) const; /** @@ -816,7 +816,7 @@ public: */ inline int8_t caseCompare(int32_t start, int32_t length, - const UChar *srcChars, + const char16_t *srcChars, int32_t srcStart, int32_t srcLength, uint32_t options) const; @@ -893,7 +893,7 @@ public: * @return TRUE if this ends with the characters in srcChars, FALSE otherwise * @stable ICU 2.0 */ - inline UBool startsWith(const UChar *srcChars, + inline UBool startsWith(const char16_t *srcChars, int32_t srcStart, int32_t srcLength) const; @@ -941,7 +941,7 @@ public: * FALSE otherwise * @stable ICU 2.0 */ - inline UBool endsWith(const UChar *srcChars, + inline UBool endsWith(const char16_t *srcChars, int32_t srcStart, int32_t srcLength) const; @@ -1018,7 +1018,7 @@ public: * or -1 if not found. * @stable ICU 2.0 */ - inline int32_t indexOf(const UChar *srcChars, + inline int32_t indexOf(const char16_t *srcChars, int32_t srcLength, int32_t start) const; @@ -1055,7 +1055,7 @@ public: * or -1 if not found. * @stable ICU 2.0 */ - int32_t indexOf(const UChar *srcChars, + int32_t indexOf(const char16_t *srcChars, int32_t srcStart, int32_t srcLength, int32_t start, @@ -1068,7 +1068,7 @@ public: * @return The offset into this of c, or -1 if not found. * @stable ICU 2.0 */ - inline int32_t indexOf(UChar c) const; + inline int32_t indexOf(char16_t c) const; /** * Locate in this the first occurrence of the code point c, @@ -1088,7 +1088,7 @@ public: * @return The offset into this of c, or -1 if not found. * @stable ICU 2.0 */ - inline int32_t indexOf(UChar c, + inline int32_t indexOf(char16_t c, int32_t start) const; /** @@ -1113,7 +1113,7 @@ public: * @return The offset into this of c, or -1 if not found. * @stable ICU 2.0 */ - inline int32_t indexOf(UChar c, + inline int32_t indexOf(char16_t c, int32_t start, int32_t length) const; @@ -1201,7 +1201,7 @@ public: * or -1 if not found. * @stable ICU 2.0 */ - inline int32_t lastIndexOf(const UChar *srcChars, + inline int32_t lastIndexOf(const char16_t *srcChars, int32_t srcLength, int32_t start) const; @@ -1238,7 +1238,7 @@ public: * or -1 if not found. * @stable ICU 2.0 */ - int32_t lastIndexOf(const UChar *srcChars, + int32_t lastIndexOf(const char16_t *srcChars, int32_t srcStart, int32_t srcLength, int32_t start, @@ -1251,7 +1251,7 @@ public: * @return The offset into this of c, or -1 if not found. * @stable ICU 2.0 */ - inline int32_t lastIndexOf(UChar c) const; + inline int32_t lastIndexOf(char16_t c) const; /** * Locate in this the last occurrence of the code point c, @@ -1271,7 +1271,7 @@ public: * @return The offset into this of c, or -1 if not found. * @stable ICU 2.0 */ - inline int32_t lastIndexOf(UChar c, + inline int32_t lastIndexOf(char16_t c, int32_t start) const; /** @@ -1296,7 +1296,7 @@ public: * @return The offset into this of c, or -1 if not found. * @stable ICU 2.0 */ - inline int32_t lastIndexOf(UChar c, + inline int32_t lastIndexOf(char16_t c, int32_t start, int32_t length) const; @@ -1326,7 +1326,7 @@ public: * or 0xffff if the offset is not valid for this string * @stable ICU 2.0 */ - inline UChar charAt(int32_t offset) const; + inline char16_t charAt(int32_t offset) const; /** * Return the code unit at offset offset. @@ -1335,7 +1335,7 @@ public: * @return the code unit at offset offset * @stable ICU 2.0 */ - inline UChar operator[] (int32_t offset) const; + inline char16_t operator[] (int32_t offset) const; /** * Return the code point that contains the code unit @@ -1475,7 +1475,7 @@ public: * then extract() will not copy the contents. * * @param dest Destination string buffer. - * @param destCapacity Number of UChars available at dest. + * @param destCapacity Number of char16_ts available at dest. * @param errorCode ICU error code. * @return length() * @stable ICU 2.0 @@ -1511,7 +1511,7 @@ public: */ inline void extractBetween(int32_t start, int32_t limit, - UChar *dst, + char16_t *dst, int32_t dstStart = 0) const; /** @@ -1756,7 +1756,7 @@ public: /** * Return the length of the UnicodeString object. - * The length is the number of UChar code units are in the UnicodeString. + * The length is the number of char16_t code units are in the UnicodeString. * If you want the number of code points, please use countChar32(). * @return the length of the UnicodeString object * @see countChar32 @@ -1765,14 +1765,14 @@ public: inline int32_t length(void) const; /** - * Count Unicode code points in the length UChar code units of the string. - * A code point may occupy either one or two UChar code units. + * Count Unicode code points in the length char16_t code units of the string. + * A code point may occupy either one or two char16_t code units. * Counting code points involves reading all code units. * * This functions is basically the inverse of moveIndex32(). * * @param start the index of the first code unit to check - * @param length the number of UChar code units to check + * @param length the number of char16_t code units to check * @return the number of code points in the specified code units * @see length * @stable ICU 2.0 @@ -1781,7 +1781,7 @@ public: countChar32(int32_t start=0, int32_t length=INT32_MAX) const; /** - * Check if the length UChar code units of the string + * Check if the length char16_t code units of the string * contain more Unicode code points than a certain number. * This is more efficient than counting all code points in this part of the string * and comparing that number with a threshold. @@ -1789,10 +1789,10 @@ public: * falls within a certain range, and * never needs to count more than 'number+1' code points. * Logically equivalent to (countChar32(start, length)>number). - * A Unicode code point may occupy either one or two UChar code units. + * A Unicode code point may occupy either one or two char16_t code units. * * @param start the index of the first code unit to check (0 for the entire string) - * @param length the number of UChar code units to check + * @param length the number of char16_t code units to check * (use INT32_MAX for the entire string; remember that start/length * values are pinned) * @param number The number of code points in the (sub)string is compared against @@ -1818,7 +1818,7 @@ public: * This is useful together with the getBuffer functions. * See there for details. * - * @return the number of UChars available in the internal buffer + * @return the number of char16_ts available in the internal buffer * @see getBuffer * @stable ICU 2.0 */ @@ -1952,7 +1952,7 @@ public: * @return a reference to this * @stable ICU 2.0 */ - inline UnicodeString& operator= (UChar ch); + inline UnicodeString& operator= (char16_t ch); /** * Assignment operator. Replace the characters in this UnicodeString @@ -2012,7 +2012,7 @@ public: * @return a reference to this * @stable ICU 2.0 */ - inline UnicodeString& setTo(const UChar *srcChars, + inline UnicodeString& setTo(const char16_t *srcChars, int32_t srcLength); /** @@ -2023,7 +2023,7 @@ public: * @return a reference to this * @stable ICU 2.0 */ - UnicodeString& setTo(UChar srcChar); + UnicodeString& setTo(char16_t srcChar); /** * Set the characters in the UnicodeString object to the code point @@ -2036,7 +2036,7 @@ public: UnicodeString& setTo(UChar32 srcChar); /** - * Aliasing setTo() function, analogous to the readonly-aliasing UChar* constructor. + * Aliasing setTo() function, analogous to the readonly-aliasing char16_t* constructor. * The text will be used for the UnicodeString object, but * it will not be released when the UnicodeString is destroyed. * This has copy-on-write semantics: @@ -2059,11 +2059,11 @@ public: * @stable ICU 2.0 */ UnicodeString &setTo(UBool isTerminated, - const UChar *text, + const char16_t *text, int32_t textLength); /** - * Aliasing setTo() function, analogous to the writable-aliasing UChar* constructor. + * Aliasing setTo() function, analogous to the writable-aliasing char16_t* constructor. * The text will be used for the UnicodeString object, but * it will not be released when the UnicodeString is destroyed. * This has write-through semantics: @@ -2077,11 +2077,11 @@ public: * * @param buffer The characters to alias for the UnicodeString. * @param buffLength The number of Unicode characters in buffer to alias. - * @param buffCapacity The size of buffer in UChars. + * @param buffCapacity The size of buffer in char16_ts. * @return a reference to this * @stable ICU 2.0 */ - UnicodeString &setTo(UChar *buffer, + UnicodeString &setTo(char16_t *buffer, int32_t buffLength, int32_t buffCapacity); @@ -2117,7 +2117,7 @@ public: * s.truncate(0); // set to an empty string (complete truncation), or * s=UnicodeString(); // assign an empty string, or * s.setTo((UChar32)-1); // set to a pseudo code point that is out of range, or - * static const UChar nul=0; + * static const char16_t nul=0; * s.setTo(&nul, 0); // set to an empty C Unicode string * } * \endcode @@ -2135,7 +2135,7 @@ public: * @stable ICU 2.0 */ UnicodeString& setCharAt(int32_t offset, - UChar ch); + char16_t ch); /* Append operations */ @@ -2147,7 +2147,7 @@ public: * @return a reference to this * @stable ICU 2.0 */ - inline UnicodeString& operator+= (UChar ch); + inline UnicodeString& operator+= (char16_t ch); /** * Append operator. Append the code point ch to the UnicodeString @@ -2207,7 +2207,7 @@ public: * @return a reference to this * @stable ICU 2.0 */ - inline UnicodeString& append(const UChar *srcChars, + inline UnicodeString& append(const char16_t *srcChars, int32_t srcStart, int32_t srcLength); @@ -2229,7 +2229,7 @@ public: * @return a reference to this * @stable ICU 2.0 */ - inline UnicodeString& append(UChar srcChar); + inline UnicodeString& append(char16_t srcChar); /** * Append the code point srcChar to the UnicodeString object. @@ -2285,7 +2285,7 @@ public: * @stable ICU 2.0 */ inline UnicodeString& insert(int32_t start, - const UChar *srcChars, + const char16_t *srcChars, int32_t srcStart, int32_t srcLength); @@ -2311,7 +2311,7 @@ public: * @stable ICU 2.0 */ inline UnicodeString& insert(int32_t start, - UChar srcChar); + char16_t srcChar); /** * Insert the code point srcChar into the UnicodeString object at @@ -2385,7 +2385,7 @@ public: */ UnicodeString& replace(int32_t start, int32_t length, - const UChar *srcChars, + const char16_t *srcChars, int32_t srcStart, int32_t srcLength); @@ -2419,7 +2419,7 @@ public: */ inline UnicodeString& replace(int32_t start, int32_t length, - UChar srcChar); + char16_t srcChar); /** * Replace the characters in the range @@ -2617,7 +2617,7 @@ public: * @stable ICU 2.0 */ UBool padLeading(int32_t targetLength, - UChar padChar = 0x0020); + char16_t padChar = 0x0020); /** * Pad the end of this UnicodeString with the character padChar. @@ -2631,7 +2631,7 @@ public: * @stable ICU 2.0 */ UBool padTrailing(int32_t targetLength, - UChar padChar = 0x0020); + char16_t padChar = 0x0020); /** * Truncate this UnicodeString to the targetLength. @@ -2818,7 +2818,7 @@ public: /** * Get a read/write pointer to the internal buffer. - * The buffer is guaranteed to be large enough for at least minCapacity UChars, + * The buffer is guaranteed to be large enough for at least minCapacity char16_ts, * writable, and is still owned by the UnicodeString object. * Calls to getBuffer(minCapacity) must not be nested, and * must be matched with calls to releaseBuffer(newLength). @@ -2849,7 +2849,7 @@ public: * - You must call releaseBuffer(newLength) before and in order to * return to normal UnicodeString operation. * - * @param minCapacity the minimum number of UChars that are to be available + * @param minCapacity the minimum number of char16_ts that are to be available * in the buffer, starting at the returned pointer; * default to the current string capacity if minCapacity==-1 * @return a writable pointer to the internal string buffer, @@ -2960,8 +2960,8 @@ public: inline UnicodeString(); /** - * Construct a UnicodeString with capacity to hold capacity UChars - * @param capacity the number of UChars this UnicodeString should hold + * Construct a UnicodeString with capacity to hold capacity char16_ts + * @param capacity the number of char16_ts this UnicodeString should hold * before a resize is necessary; if count is greater than 0 and count * code points c take up more space than capacity, then capacity is adjusted * accordingly. @@ -2973,7 +2973,7 @@ public: UnicodeString(int32_t capacity, UChar32 c, int32_t count); /** - * Single UChar (code unit) constructor. + * Single char16_t (code unit) constructor. * * It is recommended to mark this constructor "explicit" by * -DUNISTR_FROM_CHAR_EXPLICIT=explicit @@ -2981,7 +2981,7 @@ public: * @param ch the character to place in the UnicodeString * @stable ICU 2.0 */ - UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar ch); + UNISTR_FROM_CHAR_EXPLICIT UnicodeString(char16_t ch); /** * Single UChar32 (code point) constructor. @@ -2995,7 +2995,7 @@ public: UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar32 ch); /** - * UChar* constructor. + * char16_t* constructor. * * It is recommended to mark this constructor "explicit" by * -DUNISTR_FROM_STRING_EXPLICIT=explicit @@ -3004,11 +3004,11 @@ public: * must be NULL (U+0000) terminated. * @stable ICU 2.0 */ - UNISTR_FROM_STRING_EXPLICIT UnicodeString(const UChar *text); + UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char16_t *text); /** * uint16_t * constructor. - * Delegates to UnicodeString(const UChar *). + * Delegates to UnicodeString(const char16_t *). * * It is recommended to mark this constructor "explicit" by * -DUNISTR_FROM_STRING_EXPLICIT=explicit @@ -3023,7 +3023,7 @@ public: /** * wchar_t * constructor. * (Only defined if U_SIZEOF_WCHAR_T==2.) - * Delegates to UnicodeString(const UChar *). + * Delegates to UnicodeString(const char16_t *). * * It is recommended to mark this constructor "explicit" by * -DUNISTR_FROM_STRING_EXPLICIT=explicit @@ -3048,18 +3048,18 @@ public: UNISTR_FROM_STRING_EXPLICIT inline UnicodeString(const std::nullptr_t text); /** - * UChar* constructor. + * char16_t* constructor. * @param text The characters to place in the UnicodeString. * @param textLength The number of Unicode characters in text * to copy. * @stable ICU 2.0 */ - UnicodeString(const UChar *text, + UnicodeString(const char16_t *text, int32_t textLength); /** * uint16_t * constructor. - * Delegates to UnicodeString(const UChar *, int32_t). + * Delegates to UnicodeString(const char16_t *, int32_t). * @param text UTF-16 string * @param length string length * @draft ICU 59 @@ -3071,7 +3071,7 @@ public: /** * wchar_t * constructor. * (Only defined if U_SIZEOF_WCHAR_T==2.) - * Delegates to UnicodeString(const UChar *, int32_t). + * Delegates to UnicodeString(const char16_t *, int32_t). * @param text NUL-terminated UTF-16 string * @param length string length * @draft ICU 59 @@ -3090,7 +3090,7 @@ public: inline UnicodeString(const std::nullptr_t text, int32_t length); /** - * Readonly-aliasing UChar* constructor. + * Readonly-aliasing char16_t* constructor. * The text will be used for the UnicodeString object, but * it will not be released when the UnicodeString is destroyed. * This has copy-on-write semantics: @@ -3116,7 +3116,7 @@ public: int32_t textLength); /** - * Writable-aliasing UChar* constructor. + * Writable-aliasing char16_t* constructor. * The text will be used for the UnicodeString object, but * it will not be released when the UnicodeString is destroyed. * This has write-through semantics: @@ -3130,14 +3130,14 @@ public: * * @param buffer The characters to alias for the UnicodeString. * @param buffLength The number of Unicode characters in buffer to alias. - * @param buffCapacity The size of buffer in UChars. + * @param buffCapacity The size of buffer in char16_ts. * @stable ICU 2.0 */ - UnicodeString(UChar *buffer, int32_t buffLength, int32_t buffCapacity); + UnicodeString(char16_t *buffer, int32_t buffLength, int32_t buffCapacity); /** * Writable-aliasing uint16_t * constructor. - * Delegates to UnicodeString(const UChar *, int32_t, int32_t). + * Delegates to UnicodeString(const char16_t *, int32_t, int32_t). * @param buffer writable buffer of/for UTF-16 text * @param buffLength length of the current buffer contents * @param buffCapacity buffer capacity @@ -3150,7 +3150,7 @@ public: /** * Writable-aliasing wchar_t * constructor. * (Only defined if U_SIZEOF_WCHAR_T==2.) - * Delegates to UnicodeString(const UChar *, int32_t, int32_t). + * Delegates to UnicodeString(const char16_t *, int32_t, int32_t). * @param buffer writable buffer of/for UTF-16 text * @param buffLength length of the current buffer contents * @param buffCapacity buffer capacity @@ -3485,7 +3485,7 @@ protected: * UnicodeString::charAt() to be inline again (see jitterbug 709). * @stable ICU 2.4 */ - virtual UChar getCharAt(int32_t offset) const; + virtual char16_t getCharAt(int32_t offset) const; /** * The change in Replaceable to use virtual getChar32At() allows @@ -3521,7 +3521,7 @@ private: int8_t doCompare(int32_t start, int32_t length, - const UChar *srcChars, + const char16_t *srcChars, int32_t srcStart, int32_t srcLength) const; @@ -3534,7 +3534,7 @@ private: int8_t doCompareCodePointOrder(int32_t start, int32_t length, - const UChar *srcChars, + const char16_t *srcChars, int32_t srcStart, int32_t srcLength) const; @@ -3549,12 +3549,12 @@ private: int8_t doCaseCompare(int32_t start, int32_t length, - const UChar *srcChars, + const char16_t *srcChars, int32_t srcStart, int32_t srcLength, uint32_t options) const; - int32_t doIndexOf(UChar c, + int32_t doIndexOf(char16_t c, int32_t start, int32_t length) const; @@ -3562,7 +3562,7 @@ private: int32_t start, int32_t length) const; - int32_t doLastIndexOf(UChar c, + int32_t doLastIndexOf(char16_t c, int32_t start, int32_t length) const; @@ -3572,14 +3572,14 @@ private: void doExtract(int32_t start, int32_t length, - UChar *dst, + char16_t *dst, int32_t dstStart) const; inline void doExtract(int32_t start, int32_t length, UnicodeString& target) const; - inline UChar doCharAt(int32_t offset) const; + inline char16_t doCharAt(int32_t offset) const; UnicodeString& doReplace(int32_t start, int32_t length, @@ -3589,12 +3589,12 @@ private: UnicodeString& doReplace(int32_t start, int32_t length, - const UChar *srcChars, + const char16_t *srcChars, int32_t srcStart, int32_t srcLength); UnicodeString& doAppend(const UnicodeString& src, int32_t srcStart, int32_t srcLength); - UnicodeString& doAppend(const UChar *srcChars, int32_t srcStart, int32_t srcLength); + UnicodeString& doAppend(const char16_t *srcChars, int32_t srcStart, int32_t srcLength); UnicodeString& doReverse(int32_t start, int32_t length); @@ -3604,8 +3604,8 @@ private: // get pointer to start of array // these do not check for kOpenGetBuffer, unlike the public getBuffer() function - inline UChar* getArrayStart(void); - inline const UChar* getArrayStart(void) const; + inline char16_t* getArrayStart(void); + inline const char16_t* getArrayStart(void) const; inline UBool hasShortLength() const; inline int32_t getShortLength() const; @@ -3622,7 +3622,7 @@ private: inline void setShortLength(int32_t len); inline void setLength(int32_t len); inline void setToEmpty(); - inline void setArray(UChar *array, int32_t len, int32_t capacity); // sets length but not flags + inline void setArray(char16_t *array, int32_t len, int32_t capacity); // sets length but not flags // allocate the array; result may be the stack buffer // sets refCount to 1 if appropriate @@ -3800,15 +3800,15 @@ private: // Each struct of the union must begin with fLengthAndFlags. struct { int16_t fLengthAndFlags; // bit fields: see constants above - UChar fBuffer[US_STACKBUF_SIZE]; // buffer for short strings + char16_t fBuffer[US_STACKBUF_SIZE]; // buffer for short strings } fStackFields; struct { int16_t fLengthAndFlags; // bit fields: see constants above int32_t fLength; // number of characters in fArray if >127; else undefined - int32_t fCapacity; // capacity of fArray (in UChars) + int32_t fCapacity; // capacity of fArray (in char16_ts) // array pointer last to minimize padding for machines with P128 data model // or pointer sizes that are not a power of 2 - UChar *fArray; // the Unicode data + char16_t *fArray; // the Unicode data } fFields; } fUnion; }; @@ -3861,13 +3861,13 @@ UnicodeString::pinIndices(int32_t& start, } } -inline UChar* +inline char16_t* UnicodeString::getArrayStart() { return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ? fUnion.fStackFields.fBuffer : fUnion.fFields.fArray; } -inline const UChar* +inline const char16_t* UnicodeString::getArrayStart() const { return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ? fUnion.fStackFields.fBuffer : fUnion.fFields.fArray; @@ -4026,13 +4026,13 @@ UnicodeString::compare(int32_t start, inline int8_t UnicodeString::compare(int32_t start, int32_t _length, - const UChar *srcChars) const + const char16_t *srcChars) const { return doCompare(start, _length, srcChars, 0, _length); } inline int8_t UnicodeString::compare(int32_t start, int32_t _length, - const UChar *srcChars, + const char16_t *srcChars, int32_t srcStart, int32_t srcLength) const { return doCompare(start, _length, srcChars, srcStart, srcLength); } @@ -4087,13 +4087,13 @@ UnicodeString::compareCodePointOrder(int32_t start, inline int8_t UnicodeString::compareCodePointOrder(int32_t start, int32_t _length, - const UChar *srcChars) const + const char16_t *srcChars) const { return doCompareCodePointOrder(start, _length, srcChars, 0, _length); } inline int8_t UnicodeString::compareCodePointOrder(int32_t start, int32_t _length, - const UChar *srcChars, + const char16_t *srcChars, int32_t srcStart, int32_t srcLength) const { return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); } @@ -4156,7 +4156,7 @@ UnicodeString::caseCompare(int32_t start, inline int8_t UnicodeString::caseCompare(int32_t start, int32_t _length, - const UChar *srcChars, + const char16_t *srcChars, uint32_t options) const { return doCaseCompare(start, _length, srcChars, 0, _length, options); } @@ -4164,7 +4164,7 @@ UnicodeString::caseCompare(int32_t start, inline int8_t UnicodeString::caseCompare(int32_t start, int32_t _length, - const UChar *srcChars, + const char16_t *srcChars, int32_t srcStart, int32_t srcLength, uint32_t options) const { @@ -4215,7 +4215,7 @@ UnicodeString::indexOf(const UnicodeString& text, { return indexOf(text, 0, text.length(), start, _length); } inline int32_t -UnicodeString::indexOf(const UChar *srcChars, +UnicodeString::indexOf(const char16_t *srcChars, int32_t srcLength, int32_t start) const { pinIndex(start); @@ -4230,7 +4230,7 @@ UnicodeString::indexOf(ConstChar16Ptr srcChars, { return indexOf(srcChars, 0, srcLength, start, _length); } inline int32_t -UnicodeString::indexOf(UChar c, +UnicodeString::indexOf(char16_t c, int32_t start, int32_t _length) const { return doIndexOf(c, start, _length); } @@ -4242,7 +4242,7 @@ UnicodeString::indexOf(UChar32 c, { return doIndexOf(c, start, _length); } inline int32_t -UnicodeString::indexOf(UChar c) const +UnicodeString::indexOf(char16_t c) const { return doIndexOf(c, 0, length()); } inline int32_t @@ -4250,7 +4250,7 @@ UnicodeString::indexOf(UChar32 c) const { return indexOf(c, 0, length()); } inline int32_t -UnicodeString::indexOf(UChar c, +UnicodeString::indexOf(char16_t c, int32_t start) const { pinIndex(start); return doIndexOf(c, start, length() - start); @@ -4271,7 +4271,7 @@ UnicodeString::lastIndexOf(ConstChar16Ptr srcChars, { return lastIndexOf(srcChars, 0, srcLength, start, _length); } inline int32_t -UnicodeString::lastIndexOf(const UChar *srcChars, +UnicodeString::lastIndexOf(const char16_t *srcChars, int32_t srcLength, int32_t start) const { pinIndex(start); @@ -4312,7 +4312,7 @@ UnicodeString::lastIndexOf(const UnicodeString& text) const { return lastIndexOf(text, 0, text.length(), 0, length()); } inline int32_t -UnicodeString::lastIndexOf(UChar c, +UnicodeString::lastIndexOf(char16_t c, int32_t start, int32_t _length) const { return doLastIndexOf(c, start, _length); } @@ -4325,7 +4325,7 @@ UnicodeString::lastIndexOf(UChar32 c, } inline int32_t -UnicodeString::lastIndexOf(UChar c) const +UnicodeString::lastIndexOf(char16_t c) const { return doLastIndexOf(c, 0, length()); } inline int32_t @@ -4334,7 +4334,7 @@ UnicodeString::lastIndexOf(UChar32 c) const { } inline int32_t -UnicodeString::lastIndexOf(UChar c, +UnicodeString::lastIndexOf(char16_t c, int32_t start) const { pinIndex(start); return doLastIndexOf(c, start, length() - start); @@ -4366,7 +4366,7 @@ UnicodeString::startsWith(ConstChar16Ptr srcChars, int32_t srcLength) const { } inline UBool -UnicodeString::startsWith(const UChar *srcChars, int32_t srcStart, int32_t srcLength) const { +UnicodeString::startsWith(const char16_t *srcChars, int32_t srcStart, int32_t srcLength) const { if(srcLength < 0) { srcLength = u_strlen(srcChars); } @@ -4398,7 +4398,7 @@ UnicodeString::endsWith(ConstChar16Ptr srcChars, } inline UBool -UnicodeString::endsWith(const UChar *srcChars, +UnicodeString::endsWith(const char16_t *srcChars, int32_t srcStart, int32_t srcLength) const { if(srcLength < 0) { @@ -4435,7 +4435,7 @@ UnicodeString::replace(int32_t start, inline UnicodeString& UnicodeString::replace(int32_t start, int32_t _length, - const UChar *srcChars, + const char16_t *srcChars, int32_t srcStart, int32_t srcLength) { return doReplace(start, _length, srcChars, srcStart, srcLength); } @@ -4443,7 +4443,7 @@ UnicodeString::replace(int32_t start, inline UnicodeString& UnicodeString::replace(int32_t start, int32_t _length, - UChar srcChar) + char16_t srcChar) { return doReplace(start, _length, &srcChar, 0, 1); } inline UnicodeString& @@ -4514,7 +4514,7 @@ UnicodeString::extract(int32_t start, inline void UnicodeString::extractBetween(int32_t start, int32_t limit, - UChar *dst, + char16_t *dst, int32_t dstStart) const { pinIndex(start); pinIndex(limit); @@ -4526,7 +4526,7 @@ UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const { return tempSubString(start, limit - start); } -inline UChar +inline char16_t UnicodeString::doCharAt(int32_t offset) const { if((uint32_t)offset < (uint32_t)length()) { @@ -4536,11 +4536,11 @@ UnicodeString::doCharAt(int32_t offset) const } } -inline UChar +inline char16_t UnicodeString::charAt(int32_t offset) const { return doCharAt(offset); } -inline UChar +inline char16_t UnicodeString::operator[] (int32_t offset) const { return doCharAt(offset); } @@ -4581,14 +4581,14 @@ UnicodeString::setToEmpty() { } inline void -UnicodeString::setArray(UChar *array, int32_t len, int32_t capacity) { +UnicodeString::setArray(char16_t *array, int32_t len, int32_t capacity) { setLength(len); fUnion.fFields.fArray = array; fUnion.fFields.fCapacity = capacity; } inline UnicodeString& -UnicodeString::operator= (UChar ch) +UnicodeString::operator= (char16_t ch) { return doReplace(0, length(), &ch, 0, 1); } inline UnicodeString& @@ -4620,7 +4620,7 @@ UnicodeString::setTo(const UnicodeString& srcText) } inline UnicodeString& -UnicodeString::setTo(const UChar *srcChars, +UnicodeString::setTo(const char16_t *srcChars, int32_t srcLength) { unBogus(); @@ -4628,7 +4628,7 @@ UnicodeString::setTo(const UChar *srcChars, } inline UnicodeString& -UnicodeString::setTo(UChar srcChar) +UnicodeString::setTo(char16_t srcChar) { unBogus(); return doReplace(0, length(), &srcChar, 0, 1); @@ -4652,7 +4652,7 @@ UnicodeString::append(const UnicodeString& srcText) { return doAppend(srcText, 0, srcText.length()); } inline UnicodeString& -UnicodeString::append(const UChar *srcChars, +UnicodeString::append(const char16_t *srcChars, int32_t srcStart, int32_t srcLength) { return doAppend(srcChars, srcStart, srcLength); } @@ -4663,11 +4663,11 @@ UnicodeString::append(ConstChar16Ptr srcChars, { return doAppend(srcChars, 0, srcLength); } inline UnicodeString& -UnicodeString::append(UChar srcChar) +UnicodeString::append(char16_t srcChar) { return doAppend(&srcChar, 0, 1); } inline UnicodeString& -UnicodeString::operator+= (UChar ch) +UnicodeString::operator+= (char16_t ch) { return doAppend(&ch, 0, 1); } inline UnicodeString& @@ -4693,7 +4693,7 @@ UnicodeString::insert(int32_t start, inline UnicodeString& UnicodeString::insert(int32_t start, - const UChar *srcChars, + const char16_t *srcChars, int32_t srcStart, int32_t srcLength) { return doReplace(start, 0, srcChars, srcStart, srcLength); } @@ -4706,7 +4706,7 @@ UnicodeString::insert(int32_t start, inline UnicodeString& UnicodeString::insert(int32_t start, - UChar srcChar) + char16_t srcChar) { return doReplace(start, 0, &srcChar, 0, 1); } inline UnicodeString& diff --git a/icu4c/source/i18n/unicode/coll.h b/icu4c/source/i18n/unicode/coll.h index f7b2b52d1c5..a53df5deebd 100644 --- a/icu4c/source/i18n/unicode/coll.h +++ b/icu4c/source/i18n/unicode/coll.h @@ -393,8 +393,8 @@ public: * is less than, greater than or equal to another string array. *

Example of use: *

-     * .       UChar ABC[] = {0x41, 0x42, 0x43, 0};  // = "ABC"
-     * .       UChar abc[] = {0x61, 0x62, 0x63, 0};  // = "abc"
+     * .       char16_t ABC[] = {0x41, 0x42, 0x43, 0};  // = "ABC"
+     * .       char16_t abc[] = {0x61, 0x62, 0x63, 0};  // = "abc"
      * .       UErrorCode status = U_ZERO_ERROR;
      * .       Collator *myCollation =
      * .                         Collator::createInstance(Locale::getUS(), status);
@@ -420,8 +420,8 @@ public:
      *         target
      * @deprecated ICU 2.6 use the overload with UErrorCode &
      */
-    virtual EComparisonResult compare(const UChar* source, int32_t sourceLength,
-                                      const UChar* target, int32_t targetLength)
+    virtual EComparisonResult compare(const char16_t* source, int32_t sourceLength,
+                                      const char16_t* target, int32_t targetLength)
                                       const;
 
     /**
@@ -440,8 +440,8 @@ public:
      * than target
      * @stable ICU 2.6
      */
-    virtual UCollationResult compare(const UChar* source, int32_t sourceLength,
-                                      const UChar* target, int32_t targetLength,
+    virtual UCollationResult compare(const char16_t* source, int32_t sourceLength,
+                                      const char16_t* target, int32_t targetLength,
                                       UErrorCode &status) const = 0;
 
     /**
@@ -517,7 +517,7 @@ public:
      * @see CollationKey#compare
      * @stable ICU 2.0
      */
-    virtual CollationKey& getCollationKey(const UChar*source,
+    virtual CollationKey& getCollationKey(const char16_t*source,
                                           int32_t sourceLength,
                                           CollationKey& key,
                                           UErrorCode& status) const = 0;
@@ -911,7 +911,7 @@ public:
      * the top of one of the supported reordering groups,
      * and it must not be beyond the last of those groups.
      * See setMaxVariable().
-     * @param varTop one or more (if contraction) UChars to which the variable top should be set
+     * @param varTop one or more (if contraction) char16_ts to which the variable top should be set
      * @param len length of variable top string. If -1 it is considered to be zero terminated.
      * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: 
* U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such contraction
@@ -920,7 +920,7 @@ public: * @return variable top primary weight * @deprecated ICU 53 Call setMaxVariable() instead. */ - virtual uint32_t setVariableTop(const UChar *varTop, int32_t len, UErrorCode &status) = 0; + virtual uint32_t setVariableTop(const char16_t *varTop, int32_t len, UErrorCode &status) = 0; /** * Sets the variable top to the primary weight of the specified string. @@ -929,7 +929,7 @@ public: * the top of one of the supported reordering groups, * and it must not be beyond the last of those groups. * See setMaxVariable(). - * @param varTop a UnicodeString size 1 or more (if contraction) of UChars to which the variable top should be set + * @param varTop a UnicodeString size 1 or more (if contraction) of char16_ts to which the variable top should be set * @param status error code. If error code is set, the return value is undefined. Errors set by this function are:
* U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such contraction
* U_ILLEGAL_ARGUMENT_ERROR if the variable top is beyond @@ -1002,7 +1002,7 @@ public: int32_t resultLength) const = 0; /** - * Get the sort key as an array of bytes from a UChar buffer. + * Get the sort key as an array of bytes from a char16_t buffer. * Sort key byte arrays are zero-terminated and can be compared using * strcmp(). * @@ -1020,7 +1020,7 @@ public: * @return Number of bytes needed for storing the sort key * @stable ICU 2.2 */ - virtual int32_t getSortKey(const UChar*source, int32_t sourceLength, + virtual int32_t getSortKey(const char16_t*source, int32_t sourceLength, uint8_t*result, int32_t resultLength) const = 0; /** diff --git a/icu4c/source/i18n/unicode/currunit.h b/icu4c/source/i18n/unicode/currunit.h index 857d9ceafe0..cefea536eb0 100644 --- a/icu4c/source/i18n/unicode/currunit.h +++ b/icu4c/source/i18n/unicode/currunit.h @@ -28,7 +28,7 @@ U_NAMESPACE_BEGIN /** * A unit of currency, such as USD (U.S. dollars) or JPY (Japanese - * yen). This class is a thin wrapper over a UChar string that + * yen). This class is a thin wrapper over a char16_t string that * subclasses MeasureUnit, for use with Measure and MeasureFormat. * * @author Alan Liu @@ -99,7 +99,7 @@ class U_I18N_API CurrencyUnit: public MeasureUnit { /** * The ISO 4217 code of this object. */ - UChar isoCode[4]; + char16_t isoCode[4]; }; inline ConstChar16Ptr CurrencyUnit::getISOCurrency() const { diff --git a/icu4c/source/i18n/unicode/dcfmtsym.h b/icu4c/source/i18n/unicode/dcfmtsym.h index 6e6615ebd9f..3a502d0ec03 100644 --- a/icu4c/source/i18n/unicode/dcfmtsym.h +++ b/icu4c/source/i18n/unicode/dcfmtsym.h @@ -393,7 +393,7 @@ public: * Returns that pattern stored in currecy info. Internal API for use by NumberFormat API. * @internal */ - inline const UChar* getCurrencyPattern(void) const; + inline const char16_t* getCurrencyPattern(void) const; #endif /* U_HIDE_INTERNAL_API */ private: @@ -424,7 +424,7 @@ private: char actualLocale[ULOC_FULLNAME_CAPACITY]; char validLocale[ULOC_FULLNAME_CAPACITY]; - const UChar* currPattern; + const char16_t* currPattern; UnicodeString currencySpcBeforeSym[UNUM_CURRENCY_SPACING_COUNT]; UnicodeString currencySpcAfterSym[UNUM_CURRENCY_SPACING_COUNT]; @@ -492,7 +492,7 @@ DecimalFormatSymbols::getLocale() const { } #ifndef U_HIDE_INTERNAL_API -inline const UChar* +inline const char16_t* DecimalFormatSymbols::getCurrencyPattern() const { return currPattern; } diff --git a/icu4c/source/i18n/unicode/decimfmt.h b/icu4c/source/i18n/unicode/decimfmt.h index 7ba34dd4a94..1deff5bf921 100644 --- a/icu4c/source/i18n/unicode/decimfmt.h +++ b/icu4c/source/i18n/unicode/decimfmt.h @@ -604,7 +604,7 @@ template class U_I18N_API EnumSet"* #0 o''clock", the format width is 10. * - *
  • The width is counted in 16-bit code units (UChars). + *
  • The width is counted in 16-bit code units (char16_ts). * *
  • Some parameters which usually do not matter have meaning when padding is * used, because the pattern width is significant with padding. In the pattern @@ -1961,14 +1961,14 @@ public: * @param ec input-output error code * @stable ICU 3.0 */ - virtual void setCurrency(const UChar* theCurrency, UErrorCode& ec); + virtual void setCurrency(const char16_t* theCurrency, UErrorCode& ec); /** * Sets the currency used to display currency amounts. See - * setCurrency(const UChar*, UErrorCode&). - * @deprecated ICU 3.0. Use setCurrency(const UChar*, UErrorCode&). + * setCurrency(const char16_t*, UErrorCode&). + * @deprecated ICU 3.0. Use setCurrency(const char16_t*, UErrorCode&). */ - virtual void setCurrency(const UChar* theCurrency); + virtual void setCurrency(const char16_t* theCurrency); /** * Sets the Currency Context object used to display currency. @@ -2108,7 +2108,7 @@ private: void parse(const UnicodeString& text, Formattable& result, ParsePosition& pos, - UChar* currency) const; + char16_t* currency) const; enum { fgStatusInfinite, @@ -2124,7 +2124,7 @@ private: int8_t type, ParsePosition& parsePosition, DigitList& digits, UBool* status, - UChar* currency) const; + char16_t* currency) const; // Mixed style parsing for currency. // It parses against the current currency pattern @@ -2135,7 +2135,7 @@ private: ParsePosition& parsePosition, DigitList& digits, UBool* status, - UChar* currency) const; + char16_t* currency) const; int32_t skipPadding(const UnicodeString& text, int32_t position) const; @@ -2146,7 +2146,7 @@ private: const UnicodeString* affixPat, UBool complexCurrencyParsing, int8_t type, - UChar* currency) const; + char16_t* currency) const; static UnicodeString& trimMarksFromAffix(const UnicodeString& affix, UnicodeString& trimmedAffix); @@ -2169,7 +2169,7 @@ private: const UnicodeString& input, int32_t pos, int8_t type, - UChar* currency) const; + char16_t* currency) const; static int32_t match(const UnicodeString& text, int32_t pos, UChar32 ch); @@ -2195,11 +2195,11 @@ private: void setupCurrencyAffixPatterns(UErrorCode& status); // get the currency rounding with respect to currency usage - double getCurrencyRounding(const UChar* currency, + double getCurrencyRounding(const char16_t* currency, UErrorCode* ec) const; // get the currency fraction with respect to currency usage - int getCurrencyFractionDigits(const UChar* currency, + int getCurrencyFractionDigits(const char16_t* currency, UErrorCode* ec) const; // hashtable operations @@ -2271,7 +2271,7 @@ protected: * have a capacity of at least 4 * @internal */ - virtual void getEffectiveCurrency(UChar* result, UErrorCode& ec) const; + virtual void getEffectiveCurrency(char16_t* result, UErrorCode& ec) const; /** number of integer digits * @stable ICU 2.4 diff --git a/icu4c/source/i18n/unicode/dtfmtsym.h b/icu4c/source/i18n/unicode/dtfmtsym.h index 8374de3c207..e5f4d5c36c8 100644 --- a/icu4c/source/i18n/unicode/dtfmtsym.h +++ b/icu4c/source/i18n/unicode/dtfmtsym.h @@ -426,13 +426,13 @@ public: * doesn't specify any time separator, and always recognized when parsing. * @internal */ - static const UChar DEFAULT_TIME_SEPARATOR = 0x003a; // ':' + static const char16_t DEFAULT_TIME_SEPARATOR = 0x003a; // ':' /** * This alternate time separator is always recognized when parsing. * @internal */ - static const UChar ALTERNATE_TIME_SEPARATOR = 0x002e; // '.' + static const char16_t ALTERNATE_TIME_SEPARATOR = 0x002e; // '.' /** * Gets the time separator string. For example: ":". @@ -977,7 +977,7 @@ private: * Returns the date format field index of the pattern character c, * or UDAT_FIELD_COUNT if c is not a pattern character. */ - static UDateFormatField U_EXPORT2 getPatternCharIndex(UChar c); + static UDateFormatField U_EXPORT2 getPatternCharIndex(char16_t c); /** * Returns TRUE if f (with its pattern character repeated count times) is a numeric field. @@ -987,7 +987,7 @@ private: /** * Returns TRUE if c (repeated count times) is the pattern character for a numeric field. */ - static UBool U_EXPORT2 isNumericPatternChar(UChar c, int32_t count); + static UBool U_EXPORT2 isNumericPatternChar(char16_t c, int32_t count); public: #ifndef U_HIDE_INTERNAL_API /** diff --git a/icu4c/source/i18n/unicode/dtitvfmt.h b/icu4c/source/i18n/unicode/dtitvfmt.h index 3c42e15e675..5eaa559d0ea 100644 --- a/icu4c/source/i18n/unicode/dtitvfmt.h +++ b/icu4c/source/i18n/unicode/dtitvfmt.h @@ -996,7 +996,7 @@ private: // from calendar field to pattern letter - static const UChar fgCalendarFieldToPatternLetter[]; + static const char16_t fgCalendarFieldToPatternLetter[]; /** diff --git a/icu4c/source/i18n/unicode/dtptngen.h b/icu4c/source/i18n/unicode/dtptngen.h index 6748e57ff85..5f991db6838 100644 --- a/icu4c/source/i18n/unicode/dtptngen.h +++ b/icu4c/source/i18n/unicode/dtptngen.h @@ -517,7 +517,7 @@ private: DateTimeMatcher *skipMatcher; Hashtable *fAvailableFormatKeyHash; UnicodeString emptyString; - UChar fDefaultHourFormatChar; + char16_t fDefaultHourFormatChar; int32_t fAllowedHourFormats[7]; // Actually an array of AllowedHourFormat enum type, ending with UNKNOWN. diff --git a/icu4c/source/i18n/unicode/msgfmt.h b/icu4c/source/i18n/unicode/msgfmt.h index 55cbee35247..fef80107747 100644 --- a/icu4c/source/i18n/unicode/msgfmt.h +++ b/icu4c/source/i18n/unicode/msgfmt.h @@ -939,7 +939,7 @@ private: * @return the index of the list which matches the keyword s. */ static int32_t findKeyword( const UnicodeString& s, - const UChar * const *list); + const char16_t * const *list); /** * Thin wrapper around the format(... AppendableWrapper ...) variant. diff --git a/icu4c/source/i18n/unicode/numfmt.h b/icu4c/source/i18n/unicode/numfmt.h index 009d59f0916..6d83f43f520 100644 --- a/icu4c/source/i18n/unicode/numfmt.h +++ b/icu4c/source/i18n/unicode/numfmt.h @@ -931,7 +931,7 @@ public: * @param ec input-output error code * @stable ICU 3.0 */ - virtual void setCurrency(const UChar* theCurrency, UErrorCode& ec); + virtual void setCurrency(const char16_t* theCurrency, UErrorCode& ec); /** * Gets the currency used to display currency @@ -1018,7 +1018,7 @@ protected: * have a capacity of at least 4 * @internal */ - virtual void getEffectiveCurrency(UChar* result, UErrorCode& ec) const; + virtual void getEffectiveCurrency(char16_t* result, UErrorCode& ec) const; #ifndef U_HIDE_INTERNAL_API /** @@ -1065,7 +1065,7 @@ private: UBool fLenient; // TRUE => lenient parse is enabled // ISO currency code - UChar fCurrency[4]; + char16_t fCurrency[4]; UDisplayContext fCapitalizationContext; diff --git a/icu4c/source/i18n/unicode/regex.h b/icu4c/source/i18n/unicode/regex.h index 4f2562466a0..6829f7ba472 100644 --- a/icu4c/source/i18n/unicode/regex.h +++ b/icu4c/source/i18n/unicode/regex.h @@ -350,17 +350,17 @@ public: private: /** * Cause a compilation error if an application accidentally attempts to - * create a matcher with a (UChar *) string as input rather than + * create a matcher with a (char16_t *) string as input rather than * a UnicodeString. Avoids a dangling reference to a temporary string. *

    - * To efficiently work with UChar *strings, wrap the data in a UnicodeString + * To efficiently work with char16_t *strings, wrap the data in a UnicodeString * using one of the aliasing constructors, such as - * UnicodeString(UBool isTerminated, const UChar *text, int32_t textLength); + * UnicodeString(UBool isTerminated, const char16_t *text, int32_t textLength); * or in a UText, using - * utext_openUChars(UText *ut, const UChar *text, int64_t textLength, UErrorCode *status); + * utext_openUChars(UText *ut, const char16_t *text, int64_t textLength, UErrorCode *status); * */ - RegexMatcher *matcher(const UChar *input, + RegexMatcher *matcher(const char16_t *input, UErrorCode &status) const; public: @@ -748,17 +748,17 @@ public: private: /** * Cause a compilation error if an application accidentally attempts to - * create a matcher with a (UChar *) string as input rather than + * create a matcher with a (char16_t *) string as input rather than * a UnicodeString. Avoids a dangling reference to a temporary string. *

    - * To efficiently work with UChar *strings, wrap the data in a UnicodeString + * To efficiently work with char16_t *strings, wrap the data in a UnicodeString * using one of the aliasing constructors, such as - * UnicodeString(UBool isTerminated, const UChar *text, int32_t textLength); + * UnicodeString(UBool isTerminated, const char16_t *text, int32_t textLength); * or in a UText, using - * utext_openUChars(UText *ut, const UChar *text, int64_t textLength, UErrorCode *status); + * utext_openUChars(UText *ut, const char16_t *text, int64_t textLength, UErrorCode *status); * */ - RegexMatcher(const UnicodeString ®exp, const UChar *input, + RegexMatcher(const UnicodeString ®exp, const char16_t *input, uint32_t flags, UErrorCode &status); public: @@ -1156,17 +1156,17 @@ public: private: /** * Cause a compilation error if an application accidentally attempts to - * reset a matcher with a (UChar *) string as input rather than + * reset a matcher with a (char16_t *) string as input rather than * a UnicodeString. Avoids a dangling reference to a temporary string. *

    - * To efficiently work with UChar *strings, wrap the data in a UnicodeString + * To efficiently work with char16_t *strings, wrap the data in a UnicodeString * using one of the aliasing constructors, such as - * UnicodeString(UBool isTerminated, const UChar *text, int32_t textLength); + * UnicodeString(UBool isTerminated, const char16_t *text, int32_t textLength); * or in a UText, using - * utext_openUChars(UText *ut, const UChar *text, int64_t textLength, UErrorCode *status); + * utext_openUChars(UText *ut, const char16_t *text, int64_t textLength, UErrorCode *status); * */ - RegexMatcher &reset(const UChar *input); + RegexMatcher &reset(const char16_t *input); public: /** diff --git a/icu4c/source/i18n/unicode/smpdtfmt.h b/icu4c/source/i18n/unicode/smpdtfmt.h index e0ab9702ced..4733e759aa7 100644 --- a/icu4c/source/i18n/unicode/smpdtfmt.h +++ b/icu4c/source/i18n/unicode/smpdtfmt.h @@ -1170,7 +1170,7 @@ public: * @param field The UDateFormatField to get * @stable ICU 54 */ - const NumberFormat * getNumberFormatForField(UChar field) const; + const NumberFormat * getNumberFormatForField(char16_t field) const; #ifndef U_HIDE_INTERNAL_API /** @@ -1262,7 +1262,7 @@ private: * succeeds. */ void subFormat(UnicodeString &appendTo, - UChar ch, + char16_t ch, int32_t count, UDisplayContext capitalizationContext, int32_t fieldNum, @@ -1294,7 +1294,7 @@ private: * Return true if the given format character, occuring count * times, represents a numeric field. */ - static UBool isNumeric(UChar formatChar, int32_t count); + static UBool isNumeric(char16_t formatChar, int32_t count); /** * Returns TRUE if the patternOffset is at the start of a numeric field. @@ -1412,7 +1412,7 @@ private: * @return the new start position if matching succeeded; a negative number * indicating matching failure, otherwise. */ - int32_t subParse(const UnicodeString& text, int32_t& start, UChar ch, int32_t count, + int32_t subParse(const UnicodeString& text, int32_t& start, char16_t ch, int32_t count, UBool obeyCount, UBool allowNegative, UBool ambiguousYear[], int32_t& saveHebrewMonth, Calendar& cal, int32_t patLoc, MessageFormat * numericLeapMonthFormatter, UTimeZoneFormatTimeType *tzTimeType, SimpleDateFormatMutableNFs &mutableNFs, int32_t *dayPeriod=NULL) const; @@ -1523,12 +1523,12 @@ private: /** * Map calendar field letter into calendar field level. */ - static int32_t getLevelFromChar(UChar ch); + static int32_t getLevelFromChar(char16_t ch); /** * Tell if a character can be used to define a field in a format string. */ - static UBool isSyntaxChar(UChar ch); + static UBool isSyntaxChar(char16_t ch); /** * The formatting pattern for this formatter. diff --git a/icu4c/source/i18n/unicode/tblcoll.h b/icu4c/source/i18n/unicode/tblcoll.h index 6767ee93fc0..24ba213b41e 100644 --- a/icu4c/source/i18n/unicode/tblcoll.h +++ b/icu4c/source/i18n/unicode/tblcoll.h @@ -308,8 +308,8 @@ public: * than target * @stable ICU 2.6 */ - virtual UCollationResult compare(const UChar* source, int32_t sourceLength, - const UChar* target, int32_t targetLength, + virtual UCollationResult compare(const char16_t* source, int32_t sourceLength, + const char16_t* target, int32_t targetLength, UErrorCode &status) const; /** @@ -377,7 +377,7 @@ public: * @see CollationKey * @stable ICU 2.0 */ - virtual CollationKey& getCollationKey(const UChar *source, + virtual CollationKey& getCollationKey(const char16_t *source, int32_t sourceLength, CollationKey& key, UErrorCode& status) const; @@ -552,7 +552,7 @@ public: * the top of one of the supported reordering groups, * and it must not be beyond the last of those groups. * See setMaxVariable(). - * @param varTop one or more (if contraction) UChars to which the variable top should be set + * @param varTop one or more (if contraction) char16_ts to which the variable top should be set * @param len length of variable top string. If -1 it is considered to be zero terminated. * @param status error code. If error code is set, the return value is undefined. Errors set by this function are:
    * U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such contraction
    @@ -561,7 +561,7 @@ public: * @return variable top primary weight * @deprecated ICU 53 Call setMaxVariable() instead. */ - virtual uint32_t setVariableTop(const UChar *varTop, int32_t len, UErrorCode &status); + virtual uint32_t setVariableTop(const char16_t *varTop, int32_t len, UErrorCode &status); /** * Sets the variable top to the primary weight of the specified string. @@ -570,7 +570,7 @@ public: * the top of one of the supported reordering groups, * and it must not be beyond the last of those groups. * See setMaxVariable(). - * @param varTop a UnicodeString size 1 or more (if contraction) of UChars to which the variable top should be set + * @param varTop a UnicodeString size 1 or more (if contraction) of char16_ts to which the variable top should be set * @param status error code. If error code is set, the return value is undefined. Errors set by this function are:
    * U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such contraction
    * U_ILLEGAL_ARGUMENT_ERROR if the variable top is beyond @@ -631,7 +631,7 @@ public: int32_t resultLength) const; /** - * Get the sort key as an array of bytes from a UChar buffer. + * Get the sort key as an array of bytes from a char16_t buffer. * * Note that sort keys are often less efficient than simply doing comparison. * For more details, see the ICU User Guide. @@ -646,7 +646,7 @@ public: * @return Number of bytes needed for storing the sort key * @stable ICU 2.2 */ - virtual int32_t getSortKey(const UChar *source, int32_t sourceLength, + virtual int32_t getSortKey(const char16_t *source, int32_t sourceLength, uint8_t *result, int32_t resultLength) const; /** @@ -821,17 +821,17 @@ private: void adoptTailoring(CollationTailoring *t, UErrorCode &errorCode); // Both lengths must be <0 or else both must be >=0. - UCollationResult doCompare(const UChar *left, int32_t leftLength, - const UChar *right, int32_t rightLength, + UCollationResult doCompare(const char16_t *left, int32_t leftLength, + const char16_t *right, int32_t rightLength, UErrorCode &errorCode) const; UCollationResult doCompare(const uint8_t *left, int32_t leftLength, const uint8_t *right, int32_t rightLength, UErrorCode &errorCode) const; - void writeSortKey(const UChar *s, int32_t length, + void writeSortKey(const char16_t *s, int32_t length, SortKeyByteSink &sink, UErrorCode &errorCode) const; - void writeIdenticalLevel(const UChar *s, const UChar *limit, + void writeIdenticalLevel(const char16_t *s, const char16_t *limit, SortKeyByteSink &sink, UErrorCode &errorCode) const; const CollationSettings &getDefaultSettings() const; diff --git a/icu4c/source/i18n/unicode/timezone.h b/icu4c/source/i18n/unicode/timezone.h index 3d570d23bde..83dee317784 100644 --- a/icu4c/source/i18n/unicode/timezone.h +++ b/icu4c/source/i18n/unicode/timezone.h @@ -863,7 +863,7 @@ private: * @param id zone id string * @return the pointer of the ID resource, or NULL. */ - static const UChar* findID(const UnicodeString& id); + static const char16_t* findID(const UnicodeString& id); /** * Resolve a link in Olson tzdata. When the given id is known and it's not a link, @@ -873,7 +873,7 @@ private: * @param id zone id string * @return the dereferenced zone or NULL */ - static const UChar* dereferOlsonLink(const UnicodeString& id); + static const char16_t* dereferOlsonLink(const UnicodeString& id); /** * Returns the region code associated with the given zone, @@ -881,7 +881,7 @@ private: * @param id zone id string * @return the region associated with the given zone */ - static const UChar* getRegion(const UnicodeString& id); + static const char16_t* getRegion(const UnicodeString& id); public: #ifndef U_HIDE_INTERNAL_API @@ -893,7 +893,7 @@ private: * @return the region associated with the given zone * @internal */ - static const UChar* getRegion(const UnicodeString& id, UErrorCode& status); + static const char16_t* getRegion(const UnicodeString& id, UErrorCode& status); #endif /* U_HIDE_INTERNAL_API */ private: diff --git a/icu4c/source/i18n/unicode/translit.h b/icu4c/source/i18n/unicode/translit.h index f4ea9ae8014..bccba548024 100644 --- a/icu4c/source/i18n/unicode/translit.h +++ b/icu4c/source/i18n/unicode/translit.h @@ -1319,7 +1319,7 @@ inline int32_t Transliterator::getMaximumContextLength(void) const { inline void Transliterator::setID(const UnicodeString& id) { ID = id; // NUL-terminate the ID string, which is a non-aliased copy. - ID.append((UChar)0); + ID.append((char16_t)0); ID.truncate(ID.length()-1); } diff --git a/icu4c/source/i18n/unicode/tzfmt.h b/icu4c/source/i18n/unicode/tzfmt.h index 6d2de5bcf06..724ff4d85bb 100644 --- a/icu4c/source/i18n/unicode/tzfmt.h +++ b/icu4c/source/i18n/unicode/tzfmt.h @@ -942,7 +942,7 @@ private: * @param parsedLen the parsed length, or 0 on failure. * @return the parsed offset in milliseconds. */ - int32_t parseDefaultOffsetFields(const UnicodeString& text, int32_t start, UChar separator, + int32_t parseDefaultOffsetFields(const UnicodeString& text, int32_t start, char16_t separator, int32_t& parsedLen) const; /** @@ -982,7 +982,7 @@ private: * @param maxFields The maximum fields * @return The offset string */ - static UnicodeString& formatOffsetWithAsciiDigits(int32_t offset, UChar sep, + static UnicodeString& formatOffsetWithAsciiDigits(int32_t offset, char16_t sep, OffsetFields minFields, OffsetFields maxFields, UnicodeString& result); /** @@ -1012,7 +1012,7 @@ private: * @param maxFields The maximum Fields to be parsed * @return Parsed offset, 0 or positive number. */ - static int32_t parseAsciiOffsetFields(const UnicodeString& text, ParsePosition& pos, UChar sep, + static int32_t parseAsciiOffsetFields(const UnicodeString& text, ParsePosition& pos, char16_t sep, OffsetFields minFields, OffsetFields maxFields); /** From 8c7d1b2deb2d86a023ce2d03ca14754fdd5dd667 Mon Sep 17 00:00:00 2001 From: Markus Scherer Date: Wed, 1 Mar 2017 23:58:37 +0000 Subject: [PATCH 09/26] ICU-12992 disable U_ALIASING_BARRIER for now X-SVN-Rev: 39722 --- icu4c/source/common/unicode/char16ptr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/icu4c/source/common/unicode/char16ptr.h b/icu4c/source/common/unicode/char16ptr.h index 672fa46ed27..d58426a0296 100644 --- a/icu4c/source/common/unicode/char16ptr.h +++ b/icu4c/source/common/unicode/char16ptr.h @@ -25,7 +25,7 @@ U_NAMESPACE_BEGIN */ #ifdef U_ALIASING_BARRIER // Use the predefined value. -#elif defined(__clang__) || defined(__GNUC__) +#elif 0 // TODO: only some versions of clang?? defined(__clang__) || defined(__GNUC__) # define U_ALIASING_BARRIER(ptr) asm volatile("" : "+rm"(ptr)) #endif From 5a13a8ca5819bf0a14ce40c6036d4655e7739d53 Mon Sep 17 00:00:00 2001 From: Markus Scherer Date: Fri, 3 Mar 2017 22:42:52 +0000 Subject: [PATCH 10/26] ICU-12992 return raw pointers not pointer wrappers X-SVN-Rev: 39729 --- icu4c/source/common/ucurr.cpp | 2 +- icu4c/source/common/unicode/char16ptr.h | 2 + icu4c/source/common/unicode/unistr.h | 8 ++-- icu4c/source/common/unistr.cpp | 4 +- icu4c/source/extra/uconv/uconv.cpp | 42 +++++++++---------- icu4c/source/i18n/collationfastlatinbuilder.h | 2 +- icu4c/source/i18n/dtfmtsym.cpp | 2 +- icu4c/source/i18n/numfmt.cpp | 2 +- icu4c/source/i18n/smpdtfmt.cpp | 4 +- icu4c/source/i18n/unicode/curramt.h | 4 +- icu4c/source/i18n/unicode/currunit.h | 4 +- icu4c/source/i18n/unicode/dtfmtsym.h | 2 +- icu4c/source/i18n/unicode/numfmt.h | 2 +- icu4c/source/i18n/unum.cpp | 2 +- icu4c/source/test/intltest/dtfmtrtts.cpp | 4 +- icu4c/source/test/intltest/dtfmttst.cpp | 2 +- icu4c/source/test/intltest/intltest.cpp | 2 +- icu4c/source/test/intltest/measfmttest.cpp | 2 +- icu4c/source/test/intltest/numfmtst.cpp | 8 ++-- icu4c/source/test/intltest/tsmthred.cpp | 4 +- icu4c/source/test/intltest/ustrtest.cpp | 4 +- icu4c/source/tools/genrb/reslist.cpp | 4 +- 22 files changed, 56 insertions(+), 56 deletions(-) diff --git a/icu4c/source/common/ucurr.cpp b/icu4c/source/common/ucurr.cpp index 8cc817ade9e..885ca3a9221 100644 --- a/icu4c/source/common/ucurr.cpp +++ b/icu4c/source/common/ucurr.cpp @@ -1031,7 +1031,7 @@ collectCurrencyNames(const char* locale, while ((symbol = iter.next()) != NULL) { (*currencySymbols)[*total_currency_symbol_count].IsoCode = iso; (*currencySymbols)[*total_currency_symbol_count].currencyName = - const_cast(symbol->getBuffer().get()); + const_cast(symbol->getBuffer()); (*currencySymbols)[*total_currency_symbol_count].flag = 0; (*currencySymbols)[(*total_currency_symbol_count)++].currencyNameLen = symbol->length(); } diff --git a/icu4c/source/common/unicode/char16ptr.h b/icu4c/source/common/unicode/char16ptr.h index d58426a0296..1d75f694f31 100644 --- a/icu4c/source/common/unicode/char16ptr.h +++ b/icu4c/source/common/unicode/char16ptr.h @@ -76,6 +76,8 @@ public: * @draft ICU 59 */ operator char16_t *() const { return get(); } + // TODO: do we need output conversion and other operator overloads + // if we do not change return values to pointer wrappers? /** * uint16_t pointer access via type conversion (e.g., static_cast). * @draft ICU 59 diff --git a/icu4c/source/common/unicode/unistr.h b/icu4c/source/common/unicode/unistr.h index 4f1f824e9cb..3de890666aa 100644 --- a/icu4c/source/common/unicode/unistr.h +++ b/icu4c/source/common/unicode/unistr.h @@ -2859,7 +2859,7 @@ public: * @see getTerminatedBuffer() * @stable ICU 2.0 */ - Char16Ptr getBuffer(int32_t minCapacity); + char16_t *getBuffer(int32_t minCapacity); /** * Release a read/write buffer on a UnicodeString object with an @@ -2913,7 +2913,7 @@ public: * @see getTerminatedBuffer() * @stable ICU 2.0 */ - inline ConstChar16Ptr getBuffer() const; + inline const char16_t *getBuffer() const; /** * Get a read-only pointer to the internal buffer, @@ -2948,7 +2948,7 @@ public: * @see getBuffer() * @stable ICU 2.2 */ - ConstChar16Ptr getTerminatedBuffer(); + const char16_t *getTerminatedBuffer(); //======================================== // Constructors @@ -3940,7 +3940,7 @@ UnicodeString::isBufferWritable() const (!(fUnion.fFields.fLengthAndFlags&kRefCounted) || refCount()==1)); } -inline ConstChar16Ptr +inline const char16_t * UnicodeString::getBuffer() const { if(fUnion.fFields.fLengthAndFlags&(kIsBogus|kOpenGetBuffer)) { return nullptr; diff --git a/icu4c/source/common/unistr.cpp b/icu4c/source/common/unistr.cpp index 3657d15e0f3..29ea82df7f1 100644 --- a/icu4c/source/common/unistr.cpp +++ b/icu4c/source/common/unistr.cpp @@ -1218,7 +1218,7 @@ UnicodeString::unBogus() { } } -ConstChar16Ptr +const char16_t * UnicodeString::getTerminatedBuffer() { if(!isWritable()) { return nullptr; @@ -1716,7 +1716,7 @@ UnicodeString::doHashCode() const // External Buffer //======================================== -Char16Ptr +char16_t * UnicodeString::getBuffer(int32_t minCapacity) { if(minCapacity>=-1 && cloneArrayIfNeeded(minCapacity)) { fUnion.fFields.fLengthAndFlags|=kOpenGetBuffer; diff --git a/icu4c/source/extra/uconv/uconv.cpp b/icu4c/source/extra/uconv/uconv.cpp index 6aad36b5a9c..3bc807c819d 100644 --- a/icu4c/source/extra/uconv/uconv.cpp +++ b/icu4c/source/extra/uconv/uconv.cpp @@ -290,7 +290,7 @@ static int printConverters(const char *pname, const char *lookfor, UnicodeString str(name, ""); putchar('\t'); - u_wmsg(stderr, "cantGetAliases", str.getTerminatedBuffer().get(), + u_wmsg(stderr, "cantGetAliases", str.getTerminatedBuffer(), u_wmsg_errorName(err)); goto error_cleanup; } else { @@ -304,7 +304,7 @@ static int printConverters(const char *pname, const char *lookfor, if (U_FAILURE(err)) { UnicodeString str(name, ""); putchar('\t'); - u_wmsg(stderr, "cantGetAliases", str.getTerminatedBuffer().get(), + u_wmsg(stderr, "cantGetAliases", str.getTerminatedBuffer(), u_wmsg_errorName(err)); goto error_cleanup; } @@ -627,7 +627,7 @@ ConvertFile::convertFile(const char *pname, UnicodeString str2(strerror(errno), ""); str2.append((UChar32) 0); initMsg(pname); - u_wmsg(stderr, "cantOpenInputF", str1.getBuffer().get(), str2.getBuffer().get()); + u_wmsg(stderr, "cantOpenInputF", str1.getBuffer(), str2.getBuffer()); return FALSE; } closeFile = TRUE; @@ -672,10 +672,10 @@ ConvertFile::convertFile(const char *pname, UChar linebuf[20], offsetbuf[20]; uprv_itou(linebuf, 20, parse.line, 10, 0); uprv_itou(offsetbuf, 20, parse.offset, 10, 0); - u_wmsg(stderr, "cantCreateTranslitParseErr", str.getTerminatedBuffer().get(), + u_wmsg(stderr, "cantCreateTranslitParseErr", str.getTerminatedBuffer(), u_wmsg_errorName(err), linebuf, offsetbuf); } else { - u_wmsg(stderr, "cantCreateTranslit", str.getTerminatedBuffer().get(), + u_wmsg(stderr, "cantCreateTranslit", str.getTerminatedBuffer(), u_wmsg_errorName(err)); } @@ -698,7 +698,7 @@ ConvertFile::convertFile(const char *pname, if (U_FAILURE(err)) { UnicodeString str(fromcpage, ""); initMsg(pname); - u_wmsg(stderr, "cantOpenFromCodeset", str.getTerminatedBuffer().get(), + u_wmsg(stderr, "cantOpenFromCodeset", str.getTerminatedBuffer(), u_wmsg_errorName(err)); goto error_exit; } @@ -713,7 +713,7 @@ ConvertFile::convertFile(const char *pname, if (U_FAILURE(err)) { UnicodeString str(tocpage, ""); initMsg(pname); - u_wmsg(stderr, "cantOpenToCodeset", str.getTerminatedBuffer().get(), + u_wmsg(stderr, "cantOpenToCodeset", str.getTerminatedBuffer(), u_wmsg_errorName(err)); goto error_exit; } @@ -742,7 +742,7 @@ ConvertFile::convertFile(const char *pname, if (ferror(infile) != 0) { UnicodeString str(strerror(errno)); initMsg(pname); - u_wmsg(stderr, "cantRead", str.getTerminatedBuffer().get()); + u_wmsg(stderr, "cantRead", str.getTerminatedBuffer()); goto error_exit; } @@ -819,8 +819,8 @@ ConvertFile::convertFile(const char *pname, initMsg(pname); u_wmsg(stderr, "problemCvtToU", - UnicodeString(pos, length, "").getTerminatedBuffer().get(), - str.getTerminatedBuffer().get(), + UnicodeString(pos, length, "").getTerminatedBuffer(), + str.getTerminatedBuffer(), u_wmsg_errorName(err)); willexit = TRUE; @@ -1008,10 +1008,10 @@ ConvertFile::convertFile(const char *pname, initMsg(pname); u_wmsg(stderr, errtag, - UnicodeString(pos, length, "").getTerminatedBuffer().get(), - str.getTerminatedBuffer().get(), + UnicodeString(pos, length, "").getTerminatedBuffer(), + str.getTerminatedBuffer(), u_wmsg_errorName(err)); - u_wmsg(stderr, "errorUnicode", str.getTerminatedBuffer().get()); + u_wmsg(stderr, "errorUnicode", str.getTerminatedBuffer()); willexit = TRUE; err = U_ZERO_ERROR; /* reset the error for the rest of the conversion. */ @@ -1026,7 +1026,7 @@ ConvertFile::convertFile(const char *pname, if (wr != outlen) { UnicodeString str(strerror(errno)); initMsg(pname); - u_wmsg(stderr, "cantWrite", str.getTerminatedBuffer().get()); + u_wmsg(stderr, "cantWrite", str.getTerminatedBuffer()); willexit = TRUE; } @@ -1075,7 +1075,7 @@ static void usage(const char *pname, int ecode) { UnicodeString upname(pname, (int32_t)(uprv_strlen(pname) + 1)); UnicodeString mname(msg, msgLen + 1); - res = u_wmsg(fp, "usage", mname.getBuffer().get(), upname.getBuffer().get()); + res = u_wmsg(fp, "usage", mname.getBuffer(), upname.getBuffer()); if (!ecode) { if (!res) { fputc('\n', fp); @@ -1184,7 +1184,7 @@ main(int argc, char **argv) initMsg(pname); UnicodeString str(*iter); initMsg(pname); - u_wmsg(stderr, "badBlockSize", str.getTerminatedBuffer().get()); + u_wmsg(stderr, "badBlockSize", str.getTerminatedBuffer()); return 3; } } else { @@ -1212,7 +1212,7 @@ main(int argc, char **argv) if (U_FAILURE(e) || !printName) { UnicodeString str(*iter); initMsg(pname); - u_wmsg(stderr, "noSuchCodeset", str.getTerminatedBuffer().get()); + u_wmsg(stderr, "noSuchCodeset", str.getTerminatedBuffer()); return 2; } } else @@ -1240,7 +1240,7 @@ main(int argc, char **argv) } else { UnicodeString str(*iter); initMsg(pname); - u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer().get()); + u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer()); return 4; } } else { @@ -1256,7 +1256,7 @@ main(int argc, char **argv) } else { UnicodeString str(*iter); initMsg(pname); - u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer().get()); + u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer()); return 4; } } else { @@ -1276,7 +1276,7 @@ main(int argc, char **argv) } else { UnicodeString str(*iter); initMsg(pname); - u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer().get()); + u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer()); return 4; } } else { @@ -1329,7 +1329,7 @@ main(int argc, char **argv) UnicodeString str2(strerror(errno), ""); initMsg(pname); u_wmsg(stderr, "cantCreateOutputF", - str1.getBuffer().get(), str2.getBuffer().get()); + str1.getBuffer(), str2.getBuffer()); return 1; } } else { diff --git a/icu4c/source/i18n/collationfastlatinbuilder.h b/icu4c/source/i18n/collationfastlatinbuilder.h index ad64d03b75f..8b63b86815f 100644 --- a/icu4c/source/i18n/collationfastlatinbuilder.h +++ b/icu4c/source/i18n/collationfastlatinbuilder.h @@ -37,7 +37,7 @@ public: UBool forData(const CollationData &data, UErrorCode &errorCode); const uint16_t *getTable() const { - return result.getBuffer(); + return reinterpret_cast(result.getBuffer()); } int32_t lengthOfTable() const { return result.length(); } diff --git a/icu4c/source/i18n/dtfmtsym.cpp b/icu4c/source/i18n/dtfmtsym.cpp index 960971980bc..6dd4380a4df 100644 --- a/icu4c/source/i18n/dtfmtsym.cpp +++ b/icu4c/source/i18n/dtfmtsym.cpp @@ -1368,7 +1368,7 @@ DateFormatSymbols::setZoneStrings(const UnicodeString* const *strings, int32_t r //------------------------------------------------------ -ConstChar16Ptr U_EXPORT2 +const char16_t * U_EXPORT2 DateFormatSymbols::getPatternUChars(void) { return gPatternChars; diff --git a/icu4c/source/i18n/numfmt.cpp b/icu4c/source/i18n/numfmt.cpp index e84f9fdd8d2..ea9aed1a359 100644 --- a/icu4c/source/i18n/numfmt.cpp +++ b/icu4c/source/i18n/numfmt.cpp @@ -1188,7 +1188,7 @@ void NumberFormat::setCurrency(const UChar* theCurrency, UErrorCode& ec) { } } -ConstChar16Ptr NumberFormat::getCurrency() const { +const char16_t* NumberFormat::getCurrency() const { return fCurrency; } diff --git a/icu4c/source/i18n/smpdtfmt.cpp b/icu4c/source/i18n/smpdtfmt.cpp index 69a13450a68..3c0670446b3 100644 --- a/icu4c/source/i18n/smpdtfmt.cpp +++ b/icu4c/source/i18n/smpdtfmt.cpp @@ -3789,7 +3789,7 @@ SimpleDateFormat::toLocalizedPattern(UnicodeString& result, UErrorCode& status) const { translatePattern(fPattern, result, - UnicodeString(DateFormatSymbols::getPatternUChars().get()), + UnicodeString(DateFormatSymbols::getPatternUChars()), fSymbols->fLocalPatternChars, status); return result; } @@ -3811,7 +3811,7 @@ SimpleDateFormat::applyLocalizedPattern(const UnicodeString& pattern, { translatePattern(pattern, fPattern, fSymbols->fLocalPatternChars, - UnicodeString(DateFormatSymbols::getPatternUChars().get()), status); + UnicodeString(DateFormatSymbols::getPatternUChars()), status); } //---------------------------------------------------------------------- diff --git a/icu4c/source/i18n/unicode/curramt.h b/icu4c/source/i18n/unicode/curramt.h index a645667d9d0..e321df861d2 100644 --- a/icu4c/source/i18n/unicode/curramt.h +++ b/icu4c/source/i18n/unicode/curramt.h @@ -115,14 +115,14 @@ class U_I18N_API CurrencyAmount: public Measure { * Return the ISO currency code of this object. * @stable ICU 3.0 */ - inline ConstChar16Ptr getISOCurrency() const; + inline const char16_t* getISOCurrency() const; }; inline const CurrencyUnit& CurrencyAmount::getCurrency() const { return (const CurrencyUnit&) getUnit(); } -inline ConstChar16Ptr CurrencyAmount::getISOCurrency() const { +inline const char16_t* CurrencyAmount::getISOCurrency() const { return getCurrency().getISOCurrency(); } diff --git a/icu4c/source/i18n/unicode/currunit.h b/icu4c/source/i18n/unicode/currunit.h index cefea536eb0..fd0f9f2bcce 100644 --- a/icu4c/source/i18n/unicode/currunit.h +++ b/icu4c/source/i18n/unicode/currunit.h @@ -93,7 +93,7 @@ class U_I18N_API CurrencyUnit: public MeasureUnit { * Return the ISO currency code of this object. * @stable ICU 3.0 */ - inline ConstChar16Ptr getISOCurrency() const; + inline const char16_t* getISOCurrency() const; private: /** @@ -102,7 +102,7 @@ class U_I18N_API CurrencyUnit: public MeasureUnit { char16_t isoCode[4]; }; -inline ConstChar16Ptr CurrencyUnit::getISOCurrency() const { +inline const char16_t* CurrencyUnit::getISOCurrency() const { return isoCode; } diff --git a/icu4c/source/i18n/unicode/dtfmtsym.h b/icu4c/source/i18n/unicode/dtfmtsym.h index e5f4d5c36c8..0cf3ce20ac6 100644 --- a/icu4c/source/i18n/unicode/dtfmtsym.h +++ b/icu4c/source/i18n/unicode/dtfmtsym.h @@ -566,7 +566,7 @@ public: * @return the non-localized date-time pattern characters * @stable ICU 2.0 */ - static ConstChar16Ptr U_EXPORT2 getPatternUChars(void); + static const char16_t * U_EXPORT2 getPatternUChars(void); /** * Gets localized date-time pattern characters. For example: 'u', 't', etc. diff --git a/icu4c/source/i18n/unicode/numfmt.h b/icu4c/source/i18n/unicode/numfmt.h index 6d83f43f520..b8dec36be64 100644 --- a/icu4c/source/i18n/unicode/numfmt.h +++ b/icu4c/source/i18n/unicode/numfmt.h @@ -940,7 +940,7 @@ public: * the currency in use, or a pointer to the empty string. * @stable ICU 2.6 */ - ConstChar16Ptr getCurrency() const; + const char16_t* getCurrency() const; /** * Set a particular UDisplayContext value in the formatter, such as diff --git a/icu4c/source/i18n/unum.cpp b/icu4c/source/i18n/unum.cpp index 458a79c7fde..b8d26612ff2 100644 --- a/icu4c/source/i18n/unum.cpp +++ b/icu4c/source/i18n/unum.cpp @@ -609,7 +609,7 @@ unum_getTextAttribute(const UNumberFormat* fmt, break; case UNUM_CURRENCY_CODE: - res = UnicodeString(df->getCurrency().get()); + res = UnicodeString(df->getCurrency()); break; default: diff --git a/icu4c/source/test/intltest/dtfmtrtts.cpp b/icu4c/source/test/intltest/dtfmtrtts.cpp index 32f4715df37..5bac60ed7db 100644 --- a/icu4c/source/test/intltest/dtfmtrtts.cpp +++ b/icu4c/source/test/intltest/dtfmtrtts.cpp @@ -121,9 +121,7 @@ void DateFormatRoundTripTest::TestCentury() */ //if (date[1] != date[2] || result[0] != result[1]) { if (date[1] != date[2]) { - errln("Round trip failure: \"%S\" (%f), \"%S\" (%f)", - static_cast(result[0].getBuffer()), date[1], - static_cast(result[1].getBuffer()), date[2]); + errln("Round trip failure: \"%S\" (%f), \"%S\" (%f)", result[0].getBuffer(), date[1], result[1].getBuffer(), date[2]); } } diff --git a/icu4c/source/test/intltest/dtfmttst.cpp b/icu4c/source/test/intltest/dtfmttst.cpp index f9461739257..63d127346da 100644 --- a/icu4c/source/test/intltest/dtfmttst.cpp +++ b/icu4c/source/test/intltest/dtfmttst.cpp @@ -514,7 +514,7 @@ void DateFormatTest::TestFieldPosition() { // local pattern chars data is not longer loaded // from icu locale bundle assertEquals("patternChars", PATTERN_CHARS, rootSyms.getLocalPatternChars(buf)); - assertEquals("patternChars", PATTERN_CHARS, DateFormatSymbols::getPatternUChars().get()); + assertEquals("patternChars", PATTERN_CHARS, DateFormatSymbols::getPatternUChars()); assertTrue("DATEFORMAT_FIELD_NAMES", DATEFORMAT_FIELD_NAMES_LENGTH == UDAT_FIELD_COUNT); #if UDAT_HAS_PATTERN_CHAR_FOR_TIME_SEPARATOR assertTrue("Data", UDAT_FIELD_COUNT == uprv_strlen(PATTERN_CHARS)); diff --git a/icu4c/source/test/intltest/intltest.cpp b/icu4c/source/test/intltest/intltest.cpp index bfefe3f8570..ab5ce8c1272 100644 --- a/icu4c/source/test/intltest/intltest.cpp +++ b/icu4c/source/test/intltest/intltest.cpp @@ -187,7 +187,7 @@ UnicodeString _toString(const Formattable& f) { case Formattable::kObject: { const CurrencyAmount* c = dynamic_cast(f.getObject()); if (c != NULL) { - s = _toString(c->getNumber()) + " " + UnicodeString(c->getISOCurrency().get()); + s = _toString(c->getNumber()) + " " + UnicodeString(c->getISOCurrency()); } else { s = UnicodeString("Unknown UObject"); } diff --git a/icu4c/source/test/intltest/measfmttest.cpp b/icu4c/source/test/intltest/measfmttest.cpp index 10fa1c74950..f34d8397c01 100644 --- a/icu4c/source/test/intltest/measfmttest.cpp +++ b/icu4c/source/test/intltest/measfmttest.cpp @@ -1856,7 +1856,7 @@ void MeasureFormatTest::TestCurrencies() { u_uastrcpy(USD, "USD"); UErrorCode status = U_ZERO_ERROR; CurrencyAmount USD_1(1.0, USD, status); - assertEquals("Currency Code", USD, USD_1.getISOCurrency().get()); + assertEquals("Currency Code", USD, USD_1.getISOCurrency()); CurrencyAmount USD_2(2.0, USD, status); CurrencyAmount USD_NEG_1(-1.0, USD, status); if (!assertSuccess("Error creating currencies", status)) { diff --git a/icu4c/source/test/intltest/numfmtst.cpp b/icu4c/source/test/intltest/numfmtst.cpp index ea67aa2d47c..a8553f57802 100644 --- a/icu4c/source/test/intltest/numfmtst.cpp +++ b/icu4c/source/test/intltest/numfmtst.cpp @@ -440,7 +440,7 @@ UBool NumberFormatTestDataDriven::isParseCurrencyPass( } return TRUE; } - UnicodeString currStr(currAmt->getISOCurrency().get()); + UnicodeString currStr(currAmt->getISOCurrency()); Formattable resultFormattable(currAmt->getNumber()); UnicodeString resultStr(UnicodeString::fromUTF8(resultFormattable.getDecimalNumber(status))); if (tuple.output == "fail") { @@ -3168,7 +3168,7 @@ void NumberFormatTest::expectParseCurrency(const NumberFormat &fmt, const UChar* uprv_strcpy(theOperation, theInfo); uprv_strcat(theOperation, ", check currency:"); - assertEquals(theOperation, currency, currencyAmount->getISOCurrency().get()); + assertEquals(theOperation, currency, currencyAmount->getISOCurrency()); } @@ -3763,14 +3763,14 @@ NumberFormatTest::TestCurrencyFormatForMixParsing() { } else if (result.getType() != Formattable::kObject || (curramt = dynamic_cast(result.getObject())) == NULL || curramt->getNumber().getDouble() != 1234.56 || - UnicodeString(curramt->getISOCurrency().get()).compare(ISO_CURRENCY_USD) + UnicodeString(curramt->getISOCurrency()).compare(ISO_CURRENCY_USD) ) { errln("FAIL: getCurrencyFormat of default locale (en_US) failed roundtripping the number "); if (curramt->getNumber().getDouble() != 1234.56) { errln((UnicodeString)"wong number, expect: 1234.56" + ", got: " + curramt->getNumber().getDouble()); } if (curramt->getISOCurrency() != ISO_CURRENCY_USD) { - errln((UnicodeString)"wong currency, expect: USD" + ", got: " + curramt->getISOCurrency().get()); + errln((UnicodeString)"wong currency, expect: USD" + ", got: " + curramt->getISOCurrency()); } } } diff --git a/icu4c/source/test/intltest/tsmthred.cpp b/icu4c/source/test/intltest/tsmthred.cpp index 5cff8ee51d0..036d5e1d355 100644 --- a/icu4c/source/test/intltest/tsmthred.cpp +++ b/icu4c/source/test/intltest/tsmthred.cpp @@ -547,7 +547,7 @@ UBool ThreadSafeFormat::doStuff(int32_t offset, UnicodeString &appendErr, UError appendErr.append("fFormat currency != ") .append(kUSD) .append(", =") - .append(fFormat->getCurrency().get()) + .append(fFormat->getCurrency()) .append("! "); okay = FALSE; } @@ -556,7 +556,7 @@ UBool ThreadSafeFormat::doStuff(int32_t offset, UnicodeString &appendErr, UError appendErr.append("gFormat currency != ") .append(kUSD) .append(", =") - .append(gSharedData->fFormat->getCurrency().get()) + .append(gSharedData->fFormat->getCurrency()) .append("! "); okay = FALSE; } diff --git a/icu4c/source/test/intltest/ustrtest.cpp b/icu4c/source/test/intltest/ustrtest.cpp index d1f2bfe6ff6..cdd519b9bb6 100644 --- a/icu4c/source/test/intltest/ustrtest.cpp +++ b/icu4c/source/test/intltest/ustrtest.cpp @@ -1086,7 +1086,7 @@ UnicodeStringTest::TestMiscellaneous() } // test releaseBuffer() with a NUL-terminated buffer - test1.getBuffer(20).get()[2]=0; + test1.getBuffer(20)[2]=0; test1.releaseBuffer(); // implicit -1 if(test1.length()!=2 || test1.charAt(0)!=1 || test1.charAt(1) !=2) { errln("UnicodeString::releaseBuffer(-1) does not properly set the length of the UnicodeString"); @@ -1558,7 +1558,7 @@ UnicodeStringTest::TestBogus() { // writable alias to another string's buffer: very bad idea, just convenient for this test test3.setToBogus(); if(!test3.isBogus() || - test3.setTo(const_cast(test1.getBuffer().get()), + test3.setTo(const_cast(test1.getBuffer()), test1.length(), test1.getCapacity()).isBogus() || test3!=test1) { errln("bogus.setTo(writable alias) failed"); diff --git a/icu4c/source/tools/genrb/reslist.cpp b/icu4c/source/tools/genrb/reslist.cpp index 8c8ed4162da..abeaf0e49fc 100644 --- a/icu4c/source/tools/genrb/reslist.cpp +++ b/icu4c/source/tools/genrb/reslist.cpp @@ -1031,7 +1031,7 @@ void SRBRoot::write(const char *outputDir, const char *outputPkg, if (f16BitUnits.length() <= 1) { // no pool strings to checksum } else if (U_IS_BIG_ENDIAN) { - checksum = computeCRC(reinterpret_cast(f16BitUnits.getBuffer().get()), + checksum = computeCRC(reinterpret_cast(f16BitUnits.getBuffer()), (uint32_t)f16BitUnits.length() * 2, checksum); } else { // Swap to big-endian so we get the same checksum on all platforms @@ -1039,7 +1039,7 @@ void SRBRoot::write(const char *outputDir, const char *outputPkg, UnicodeString s(f16BitUnits); s.append((UChar)1); // Ensure that we own this buffer. assert(!s.isBogus()); - uint16_t *p = const_cast(static_cast(s.getBuffer())); + uint16_t *p = const_cast(reinterpret_cast(s.getBuffer())); for (int32_t count = f16BitUnits.length(); count > 0; --count) { uint16_t x = *p; *p++ = (uint16_t)((x << 8) | (x >> 8)); From 568486c964aedd75e64b9cdc2b3c941a4caee508 Mon Sep 17 00:00:00 2001 From: Markus Scherer Date: Fri, 3 Mar 2017 23:55:13 +0000 Subject: [PATCH 11/26] ICU-12992 remove Char16Ptr output conversion overloads, remove wrapper-as-pointer operators, some cleanup X-SVN-Rev: 39730 --- icu4c/source/common/Makefile.in | 2 +- icu4c/source/common/char16ptr.cpp | 13 -- icu4c/source/common/common.vcxproj | 1 - icu4c/source/common/common.vcxproj.filters | 3 - icu4c/source/common/unicode/char16ptr.h | 138 +++------------------ icu4c/source/common/unicode/unistr.h | 12 +- icu4c/source/common/unistr.cpp | 3 +- 7 files changed, 24 insertions(+), 148 deletions(-) delete mode 100644 icu4c/source/common/char16ptr.cpp diff --git a/icu4c/source/common/Makefile.in b/icu4c/source/common/Makefile.in index 5a82fbf7262..10fa8de38eb 100644 --- a/icu4c/source/common/Makefile.in +++ b/icu4c/source/common/Makefile.in @@ -95,7 +95,7 @@ bytestrie.o bytestrieiterator.o \ ucharstrie.o ucharstriebuilder.o ucharstrieiterator.o \ dictionarydata.o \ edits.o \ -char16ptr.o appendable.o ustr_cnv.o unistr_cnv.o unistr.o unistr_case.o unistr_props.o \ +appendable.o ustr_cnv.o unistr_cnv.o unistr.o unistr_case.o unistr_props.o \ utf_impl.o ustring.o ustrcase.o ucasemap.o ucasemap_titlecase_brkiter.o cstring.o ustrfmt.o ustrtrns.o ustr_wcs.o utext.o \ unistr_case_locale.o ustrcase_locale.o unistr_titlecase_brkiter.o ustr_titlecase_brkiter.o \ normalizer2impl.o normalizer2.o filterednormalizer2.o normlzr.o unorm.o unormcmp.o loadednormalizer2impl.o \ diff --git a/icu4c/source/common/char16ptr.cpp b/icu4c/source/common/char16ptr.cpp deleted file mode 100644 index 5afec2af303..00000000000 --- a/icu4c/source/common/char16ptr.cpp +++ /dev/null @@ -1,13 +0,0 @@ -// © 2017 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html - -// char16ptr.cpp -// created: 2017feb28 Markus W. Scherer - -#include "unicode/utypes.h" -#include "unicode/char16ptr.h" -#include "uassert.h" - -U_NAMESPACE_BEGIN - -U_NAMESPACE_END diff --git a/icu4c/source/common/common.vcxproj b/icu4c/source/common/common.vcxproj index 9e760180666..33ef60e2b99 100644 --- a/icu4c/source/common/common.vcxproj +++ b/icu4c/source/common/common.vcxproj @@ -448,7 +448,6 @@ - diff --git a/icu4c/source/common/common.vcxproj.filters b/icu4c/source/common/common.vcxproj.filters index b29fe2fc4f2..0542a8b1384 100644 --- a/icu4c/source/common/common.vcxproj.filters +++ b/icu4c/source/common/common.vcxproj.filters @@ -463,9 +463,6 @@ strings - - strings - strings diff --git a/icu4c/source/common/unicode/char16ptr.h b/icu4c/source/common/unicode/char16ptr.h index 1d75f694f31..780a1502738 100644 --- a/icu4c/source/common/unicode/char16ptr.h +++ b/icu4c/source/common/unicode/char16ptr.h @@ -30,19 +30,20 @@ U_NAMESPACE_BEGIN #endif /** - * char16_t * wrapper with implicit conversion from/to distinct but bit-compatible pointer types. + * char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types. * @draft ICU 59 */ class U_COMMON_API Char16Ptr final { public: /** * Copies the pointer. - * TODO: @param p ... + * @param p pointer * @draft ICU 59 */ inline Char16Ptr(char16_t *p); /** * Converts the pointer to char16_t *. + * @param p pointer to be converted * @draft ICU 59 */ inline Char16Ptr(uint16_t *p); @@ -50,12 +51,14 @@ public: /** * Converts the pointer to char16_t *. * (Only defined if U_SIZEOF_WCHAR_T==2.) + * @param p pointer to be converted * @draft ICU 59 */ inline Char16Ptr(wchar_t *p); #endif /** * nullptr constructor. + * @param p nullptr * @draft ICU 59 */ inline Char16Ptr(std::nullptr_t p); @@ -67,59 +70,16 @@ public: /** * Pointer access. - * TODO @return ... + * @return the wrapped pointer * @draft ICU 59 */ inline char16_t *get() const; /** * char16_t pointer access via type conversion (e.g., static_cast). + * @return the wrapped pointer * @draft ICU 59 */ - operator char16_t *() const { return get(); } - // TODO: do we need output conversion and other operator overloads - // if we do not change return values to pointer wrappers? - /** - * uint16_t pointer access via type conversion (e.g., static_cast). - * @draft ICU 59 - */ - inline operator uint16_t *() const; -#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) - /** - * wchar_t pointer access via type conversion (e.g., static_cast). - * @draft ICU 59 - */ - inline operator wchar_t *() const; -#endif - operator void *() const { return get(); } - - char16_t operator[](std::ptrdiff_t offset) const { return get()[offset]; } - - UBool operator==(const Char16Ptr &other) const { return get() == other.get(); } - UBool operator!=(const Char16Ptr &other) const { return !operator==(other); } - UBool operator==(const char16_t *other) const { return get() == other; } - UBool operator!=(const char16_t *other) const { return !operator==(other); } - UBool operator==(const uint16_t *other) const { return static_cast(*this) == other; } - UBool operator!=(const uint16_t *other) const { return !operator==(other); } -#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) - UBool operator==(const wchar_t *other) const { return static_cast(*this) == other; } - UBool operator!=(const wchar_t *other) const { return !operator==(other); } -#endif - UBool operator==(const std::nullptr_t null) const { return get() == null; } - UBool operator!=(const std::nullptr_t null) const { return !operator==(null); } - /** - * Comparison with 0. - * @return TRUE if the pointer is nullptr and zero==0 - * @draft ICU 59 - */ - UBool operator==(int zero) const { return get() == nullptr && zero == 0; } - /** - * Comparison with 0. - * @return TRUE if the pointer is not nullptr and zero==0 - * @draft ICU 59 - */ - UBool operator!=(int zero) const { return get() != nullptr && zero == 0; } - - Char16Ptr operator+(std::ptrdiff_t offset) const { return Char16Ptr(get() + offset); } + inline operator char16_t *() const { return get(); } private: Char16Ptr() = delete; @@ -154,17 +114,6 @@ Char16Ptr::~Char16Ptr() { char16_t *Char16Ptr::get() const { return p; } -Char16Ptr::operator uint16_t *() const { - U_ALIASING_BARRIER(p); - return reinterpret_cast(p); -} -#if U_SIZEOF_WCHAR_T==2 -Char16Ptr::operator wchar_t *() const { - U_ALIASING_BARRIER(p); - return reinterpret_cast(p); -} -#endif - #else Char16Ptr::Char16Ptr(char16_t *p) { u.cp = p; } @@ -177,30 +126,23 @@ Char16Ptr::~Char16Ptr() {} char16_t *Char16Ptr::get() const { return u.cp; } -Char16Ptr::operator uint16_t *() const { - return u.up; -} -#if U_SIZEOF_WCHAR_T==2 -Char16Ptr::operator wchar_t *() const { - return u.wp; -} -#endif - #endif /** - * const char16_t * wrapper with implicit conversion from/to distinct but bit-compatible pointer types. + * const char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types. * @draft ICU 59 */ class U_COMMON_API ConstChar16Ptr final { public: /** * Copies the pointer. + * @param p pointer * @draft ICU 59 */ inline ConstChar16Ptr(const char16_t *p); /** * Converts the pointer to char16_t *. + * @param p pointer to be converted * @draft ICU 59 */ inline ConstChar16Ptr(const uint16_t *p); @@ -208,12 +150,14 @@ public: /** * Converts the pointer to char16_t *. * (Only defined if U_SIZEOF_WCHAR_T==2.) + * @param p pointer to be converted * @draft ICU 59 */ inline ConstChar16Ptr(const wchar_t *p); #endif /** * nullptr constructor. + * @param p nullptr * @draft ICU 59 */ inline ConstChar16Ptr(const std::nullptr_t p); @@ -225,46 +169,16 @@ public: /** * Pointer access. + * @return the wrapped pointer * @draft ICU 59 */ inline const char16_t *get() const; /** * char16_t pointer access via type conversion (e.g., static_cast). + * @return the wrapped pointer * @draft ICU 59 */ - operator const char16_t *() const { return get(); } - /** - * uint16_t pointer access via type conversion (e.g., static_cast). - * @draft ICU 59 - */ - inline operator const uint16_t *() const; -#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) - /** - * wchar_t pointer access via type conversion (e.g., static_cast). - * @draft ICU 59 - */ - inline operator const wchar_t *() const; -#endif - operator const void *() const { return get(); } - - char16_t operator[](std::ptrdiff_t offset) const { return get()[offset]; } - - UBool operator==(const ConstChar16Ptr &other) const { return get() == other.get(); } - UBool operator!=(const ConstChar16Ptr &other) const { return !operator==(other); } - UBool operator==(const char16_t *other) const { return get() == other; } - UBool operator!=(const char16_t *other) const { return !operator==(other); } - UBool operator==(const uint16_t *other) const { return static_cast(*this) == other; } - UBool operator!=(const uint16_t *other) const { return !operator==(other); } -#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) - UBool operator==(const wchar_t *other) const { return static_cast(*this) == other; } - UBool operator!=(const wchar_t *other) const { return !operator==(other); } -#endif - UBool operator==(const std::nullptr_t null) const { return get() == null; } - UBool operator!=(const std::nullptr_t null) const { return !operator==(null); } - UBool operator==(int zero) const { return get() == nullptr && zero == 0; } - UBool operator!=(int zero) const { return get() != nullptr && zero == 0; } - - ConstChar16Ptr operator+(std::ptrdiff_t offset) { return ConstChar16Ptr(get() + offset); } + inline operator const char16_t *() const { return get(); } private: ConstChar16Ptr() = delete; @@ -299,17 +213,6 @@ ConstChar16Ptr::~ConstChar16Ptr() { const char16_t *ConstChar16Ptr::get() const { return p; } -ConstChar16Ptr::operator const uint16_t *() const { - U_ALIASING_BARRIER(p); - return reinterpret_cast(p); -} -#if U_SIZEOF_WCHAR_T==2 -ConstChar16Ptr::operator const wchar_t *() const { - U_ALIASING_BARRIER(p); - return reinterpret_cast(p); -} -#endif - #else ConstChar16Ptr::ConstChar16Ptr(const char16_t *p) { u.cp = p; } @@ -322,15 +225,6 @@ ConstChar16Ptr::~ConstChar16Ptr() {} const char16_t *ConstChar16Ptr::get() const { return u.cp; } -ConstChar16Ptr::operator const uint16_t *() const { - return u.up; -} -#if U_SIZEOF_WCHAR_T==2 -ConstChar16Ptr::operator const wchar_t *() const { - return u.wp; -} -#endif - #endif U_NAMESPACE_END diff --git a/icu4c/source/common/unicode/unistr.h b/icu4c/source/common/unicode/unistr.h index 3de890666aa..9eedd2f52df 100644 --- a/icu4c/source/common/unicode/unistr.h +++ b/icu4c/source/common/unicode/unistr.h @@ -3017,7 +3017,7 @@ public: * @draft ICU 59 */ UNISTR_FROM_STRING_EXPLICIT UnicodeString(const uint16_t *text) : - UnicodeString(ConstChar16Ptr(text).get()) {} + UnicodeString(ConstChar16Ptr(text)) {} #if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) /** @@ -3032,7 +3032,7 @@ public: * @draft ICU 59 */ UNISTR_FROM_STRING_EXPLICIT UnicodeString(const wchar_t *text) : - UnicodeString(ConstChar16Ptr(text).get()) {} + UnicodeString(ConstChar16Ptr(text)) {} #endif /** @@ -3065,7 +3065,7 @@ public: * @draft ICU 59 */ UnicodeString(const uint16_t *text, int32_t length) : - UnicodeString(ConstChar16Ptr(text).get(), length) {} + UnicodeString(ConstChar16Ptr(text), length) {} #if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) /** @@ -3077,7 +3077,7 @@ public: * @draft ICU 59 */ UnicodeString(const wchar_t *text, int32_t length) : - UnicodeString(ConstChar16Ptr(text).get(), length) {} + UnicodeString(ConstChar16Ptr(text), length) {} #endif /** @@ -3144,7 +3144,7 @@ public: * @draft ICU 59 */ UnicodeString(uint16_t *buffer, int32_t buffLength, int32_t buffCapacity) : - UnicodeString(Char16Ptr(buffer).get(), buffLength, buffCapacity) {} + UnicodeString(Char16Ptr(buffer), buffLength, buffCapacity) {} #if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) /** @@ -3157,7 +3157,7 @@ public: * @draft ICU 59 */ UnicodeString(wchar_t *buffer, int32_t buffLength, int32_t buffCapacity) : - UnicodeString(Char16Ptr(buffer).get(), buffLength, buffCapacity) {} + UnicodeString(Char16Ptr(buffer), buffLength, buffCapacity) {} #endif /** diff --git a/icu4c/source/common/unistr.cpp b/icu4c/source/common/unistr.cpp index 29ea82df7f1..416b909917d 100644 --- a/icu4c/source/common/unistr.cpp +++ b/icu4c/source/common/unistr.cpp @@ -875,11 +875,10 @@ UnicodeString::doExtract(int32_t start, } int32_t -UnicodeString::extract(Char16Ptr destPtr, int32_t destCapacity, +UnicodeString::extract(Char16Ptr dest, int32_t destCapacity, UErrorCode &errorCode) const { int32_t len = length(); if(U_SUCCESS(errorCode)) { - UChar *dest = destPtr; if(isBogus() || destCapacity<0 || (destCapacity>0 && dest==0)) { errorCode=U_ILLEGAL_ARGUMENT_ERROR; } else { From 85e291a4ec2ec49ce34b892c13d679a2c44bc9f9 Mon Sep 17 00:00:00 2001 From: Markus Scherer Date: Mon, 6 Mar 2017 22:47:58 +0000 Subject: [PATCH 12/26] ICU-12992 new version of the aliasing barrier X-SVN-Rev: 39739 --- icu4c/source/common/unicode/char16ptr.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/icu4c/source/common/unicode/char16ptr.h b/icu4c/source/common/unicode/char16ptr.h index 780a1502738..d484cfdf8ac 100644 --- a/icu4c/source/common/unicode/char16ptr.h +++ b/icu4c/source/common/unicode/char16ptr.h @@ -25,8 +25,8 @@ U_NAMESPACE_BEGIN */ #ifdef U_ALIASING_BARRIER // Use the predefined value. -#elif 0 // TODO: only some versions of clang?? defined(__clang__) || defined(__GNUC__) -# define U_ALIASING_BARRIER(ptr) asm volatile("" : "+rm"(ptr)) +#elif defined(__clang__) || defined(__GNUC__) +# define U_ALIASING_BARRIER(ptr) asm volatile("" : : "rm"(ptr) : "memory") #endif /** From b5b52ad027462d11000e1cbf7a4a5a597d6833b2 Mon Sep 17 00:00:00 2001 From: Markus Scherer Date: Wed, 8 Mar 2017 01:07:20 +0000 Subject: [PATCH 13/26] ICU-12992 make tools & tests work with configured UChar=uint16_t X-SVN-Rev: 39742 --- icu4c/source/common/normalizer2impl.h | 2 +- icu4c/source/common/uinvchar.h | 6 +++++- icu4c/source/common/unicode/umachine.h | 8 ++++++++ icu4c/source/common/unicode/unistr.h | 17 ++++++++++++----- icu4c/source/common/unistr.cpp | 3 ++- icu4c/source/test/iotest/stream.cpp | 11 +++++++++-- icu4c/source/tools/ctestfw/datamap.cpp | 3 ++- icu4c/source/tools/gennorm2/n2builder.cpp | 10 +++++----- icu4c/source/tools/genrb/reslist.cpp | 2 +- icu4c/source/tools/genrb/reslist.h | 3 ++- icu4c/source/tools/genrb/wrtxml.cpp | 5 +++-- icu4c/source/tools/toolutil/dbgutil.cpp | 5 +++-- icu4c/source/tools/toolutil/ppucd.cpp | 5 +++-- icu4c/source/tools/toolutil/toolutil.h | 14 ++++++++++++++ icu4c/source/tools/toolutil/xmlparser.cpp | 5 +++-- 15 files changed, 73 insertions(+), 26 deletions(-) diff --git a/icu4c/source/common/normalizer2impl.h b/icu4c/source/common/normalizer2impl.h index 6dba0eab5c4..946abee98f3 100644 --- a/icu4c/source/common/normalizer2impl.h +++ b/icu4c/source/common/normalizer2impl.h @@ -176,7 +176,7 @@ public: lastCC=0; } void copyReorderableSuffixTo(UnicodeString &s) const { - s.setTo(reorderStart, (int32_t)(limit-reorderStart)); + s.setTo(ConstChar16Ptr(reorderStart), (int32_t)(limit-reorderStart)); } private: /* diff --git a/icu4c/source/common/uinvchar.h b/icu4c/source/common/uinvchar.h index 0bb5e73d04a..2a960bdfca4 100644 --- a/icu4c/source/common/uinvchar.h +++ b/icu4c/source/common/uinvchar.h @@ -64,7 +64,11 @@ uprv_isInvariantUString(const UChar *s, int32_t length); */ U_INTERNAL inline UBool U_EXPORT2 uprv_isInvariantUnicodeString(const icu::UnicodeString &s) { - return uprv_isInvariantUString(s.getBuffer(), s.length()); + const char16_t *p = s.getBuffer(); +#ifdef U_ALIASING_BARRIER + U_ALIASING_BARRIER(p); +#endif + return uprv_isInvariantUString(reinterpret_cast(p), s.length()); } #endif /* __cplusplus */ diff --git a/icu4c/source/common/unicode/umachine.h b/icu4c/source/common/unicode/umachine.h index e0fedfe9579..4c8b5c1f1e2 100644 --- a/icu4c/source/common/unicode/umachine.h +++ b/icu4c/source/common/unicode/umachine.h @@ -313,6 +313,14 @@ typedef int8_t UBool; * * @stable ICU 4.4 */ +#if 1 + // #if 1 is normal. UChar defaults to char16_t in C++. + // For configuration testing of UChar=uint16_t temporarily change this to #if 0. + // The intltest Makefile #defines UCHAR_TYPE=char16_t, + // so we only #define it to uint16_t if it is undefined so far. +#elif !defined(UCHAR_TYPE) +# define UCHAR_TYPE uint16_t +#endif #if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || \ defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION) // Inside the ICU library code, never configurable. diff --git a/icu4c/source/common/unicode/unistr.h b/icu4c/source/common/unicode/unistr.h index 9eedd2f52df..835e64ff774 100644 --- a/icu4c/source/common/unicode/unistr.h +++ b/icu4c/source/common/unicode/unistr.h @@ -2059,7 +2059,7 @@ public: * @stable ICU 2.0 */ UnicodeString &setTo(UBool isTerminated, - const char16_t *text, + ConstChar16Ptr text, int32_t textLength); /** @@ -3495,6 +3495,13 @@ protected: virtual UChar32 getChar32At(int32_t offset) const; private: + static inline const UChar *constUCharPtr(const char16_t *p) { +#ifdef U_ALIASING_BARRIER + U_ALIASING_BARRIER(p); +#endif + return reinterpret_cast(p); + } + // For char* constructors. Could be made public. UnicodeString &setToUTF8(StringPiece utf8); // For extract(char*). @@ -4360,7 +4367,7 @@ UnicodeString::startsWith(const UnicodeString& srcText, inline UBool UnicodeString::startsWith(ConstChar16Ptr srcChars, int32_t srcLength) const { if(srcLength < 0) { - srcLength = u_strlen(srcChars); + srcLength = u_strlen(constUCharPtr(srcChars)); } return doCompare(0, srcLength, srcChars, 0, srcLength) == 0; } @@ -4368,7 +4375,7 @@ UnicodeString::startsWith(ConstChar16Ptr srcChars, int32_t srcLength) const { inline UBool UnicodeString::startsWith(const char16_t *srcChars, int32_t srcStart, int32_t srcLength) const { if(srcLength < 0) { - srcLength = u_strlen(srcChars); + srcLength = u_strlen(constUCharPtr(srcChars)); } return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0; } @@ -4391,7 +4398,7 @@ inline UBool UnicodeString::endsWith(ConstChar16Ptr srcChars, int32_t srcLength) const { if(srcLength < 0) { - srcLength = u_strlen(srcChars); + srcLength = u_strlen(constUCharPtr(srcChars)); } return doCompare(length() - srcLength, srcLength, srcChars, 0, srcLength) == 0; @@ -4402,7 +4409,7 @@ UnicodeString::endsWith(const char16_t *srcChars, int32_t srcStart, int32_t srcLength) const { if(srcLength < 0) { - srcLength = u_strlen(srcChars + srcStart); + srcLength = u_strlen(constUCharPtr(srcChars + srcStart)); } return doCompare(length() - srcLength, srcLength, srcChars, srcStart, srcLength) == 0; diff --git a/icu4c/source/common/unistr.cpp b/icu4c/source/common/unistr.cpp index 416b909917d..1bfb71aa107 100644 --- a/icu4c/source/common/unistr.cpp +++ b/icu4c/source/common/unistr.cpp @@ -1258,7 +1258,7 @@ UnicodeString::getTerminatedBuffer() { // setTo() analogous to the readonly-aliasing constructor with the same signature UnicodeString & UnicodeString::setTo(UBool isTerminated, - const UChar *text, + ConstChar16Ptr textPtr, int32_t textLength) { if(fUnion.fFields.fLengthAndFlags & kOpenGetBuffer) { @@ -1266,6 +1266,7 @@ UnicodeString::setTo(UBool isTerminated, return *this; } + const UChar *text = textPtr; if(text == NULL) { // treat as an empty string, do not alias releaseArray(); diff --git a/icu4c/source/test/iotest/stream.cpp b/icu4c/source/test/iotest/stream.cpp index 427276efb26..892e0d7d58a 100644 --- a/icu4c/source/test/iotest/stream.cpp +++ b/icu4c/source/test/iotest/stream.cpp @@ -49,6 +49,13 @@ const char C_NEW_LINE[] = {'\n',0}; #endif U_CDECL_END +inline const UChar *constUCharPtr(const char16_t *p) { +#ifdef U_ALIASING_BARRIER + U_ALIASING_BARRIER(p); +#endif + return reinterpret_cast(p); +} + U_CDECL_BEGIN static void U_CALLCONV TestStream(void) { @@ -106,12 +113,12 @@ static void U_CALLCONV TestStream(void) inTestStream >> inStr >> inStr2; if (inStr.compare(thisMu) != 0) { - u_austrncpy(inStrC, inStr.getBuffer(), inStr.length()); + u_austrncpy(inStrC, constUCharPtr(inStr.getBuffer()), inStr.length()); inStrC[inStr.length()] = 0; log_err("Got: \"%s\", Expected: \"tHis\\u03BC\"\n", inStrC); } if (inStr2.compare(mu) != 0) { - u_austrncpy(inStrC, inStr.getBuffer(), inStr.length()); + u_austrncpy(inStrC, constUCharPtr(inStr.getBuffer()), inStr.length()); inStrC[inStr.length()] = 0; log_err("Got: \"%s\", Expected: \"mu\"\n", inStrC); } diff --git a/icu4c/source/tools/ctestfw/datamap.cpp b/icu4c/source/tools/ctestfw/datamap.cpp index ded0c7b609f..d85341ac40c 100644 --- a/icu4c/source/tools/ctestfw/datamap.cpp +++ b/icu4c/source/tools/ctestfw/datamap.cpp @@ -11,6 +11,7 @@ #include "unicode/datamap.h" #include "unicode/resbund.h" #include "hash.h" +#include "toolutil.h" #include DataMap::~DataMap() {} @@ -20,7 +21,7 @@ int32_t DataMap::utoi(const UnicodeString &s) const { char ch[256]; - const UChar *u = s.getBuffer(); + const UChar *u = constUCharPtr(s.getBuffer()); int32_t len = s.length(); u_UCharsToChars(u, ch, len); ch[len] = 0; /* include terminating \0 */ diff --git a/icu4c/source/tools/gennorm2/n2builder.cpp b/icu4c/source/tools/gennorm2/n2builder.cpp index 940db3b13c4..5c7c9c0a016 100644 --- a/icu4c/source/tools/gennorm2/n2builder.cpp +++ b/icu4c/source/tools/gennorm2/n2builder.cpp @@ -282,7 +282,7 @@ uint8_t Normalizer2DataBuilder::getCC(UChar32 c) const { static UBool isWellFormed(const UnicodeString &s) { UErrorCode errorCode=U_ZERO_ERROR; - u_strToUTF8(NULL, 0, NULL, s.getBuffer(), s.length(), &errorCode); + u_strToUTF8(NULL, 0, NULL, constUCharPtr(s.getBuffer()), s.length(), &errorCode); return U_SUCCESS(errorCode) || errorCode==U_BUFFER_OVERFLOW_ERROR; } @@ -315,7 +315,7 @@ void Normalizer2DataBuilder::setRoundTripMapping(UChar32 c, const UnicodeString (int)phase, (long)c); exit(U_INVALID_FORMAT_ERROR); } - int32_t numCP=u_countChar32(m.getBuffer(), m.length()); + int32_t numCP=u_countChar32(constUCharPtr(m.getBuffer()), m.length()); if(numCP!=2) { fprintf(stderr, "error in gennorm2 phase %d: " @@ -452,7 +452,7 @@ Normalizer2DataBuilder::decompose(UChar32 start, UChar32 end, uint32_t value) { Norm &norm=norms[value]; const UnicodeString &m=*norm.mapping; UnicodeString *decomposed=NULL; - const UChar *s=m.getBuffer(); + const UChar *s=constUCharPtr(m.getBuffer()); int32_t length=m.length(); int32_t prev, i=0; UChar32 c; @@ -607,7 +607,7 @@ Normalizer2DataBuilder::reorder(Norm *p, BuilderReorderingBuffer &buffer) { if(length>Normalizer2Impl::MAPPING_LENGTH_MASK) { return; // writeMapping() will complain about it and print the code point. } - const UChar *s=m.getBuffer(); + const UChar *s=constUCharPtr(m.getBuffer()); int32_t i=0; UChar32 c; while(i(p); +} + +inline UChar *UCharPtr(char16_t *p) { +#ifdef U_ALIASING_BARRIER + U_ALIASING_BARRIER(p); +#endif + return reinterpret_cast(p); +} + /** * ErrorCode subclass for use in ICU command-line tools. * The destructor calls handleFailure() which calls exit(errorCode) when isFailure(). diff --git a/icu4c/source/tools/toolutil/xmlparser.cpp b/icu4c/source/tools/toolutil/xmlparser.cpp index 3fbf1a96f3c..baf9a73bc3e 100644 --- a/icu4c/source/tools/toolutil/xmlparser.cpp +++ b/icu4c/source/tools/toolutil/xmlparser.cpp @@ -21,6 +21,7 @@ #include "unicode/ucnv.h" #include "unicode/regex.h" #include "filestrm.h" +#include "toolutil.h" #include "xmlparser.h" #if !UCONFIG_NO_REGULAR_EXPRESSIONS && !UCONFIG_NO_CONVERSION @@ -209,7 +210,7 @@ UXMLParser::parseFile(const char *filename, UErrorCode &errorCode) { goto exit; } - buffer=src.getBuffer(bytesLength); + buffer=UCharPtr(src.getBuffer(bytesLength)); if(buffer==NULL) { // unexpected failure to reserve some string capacity errorCode=U_MEMORY_ALLOCATION_ERROR; @@ -278,7 +279,7 @@ UXMLParser::parseFile(const char *filename, UErrorCode &errorCode) { pb=bytes; for(;;) { length=src.length(); - buffer=src.getBuffer(capacity); + buffer=UCharPtr(src.getBuffer(capacity)); if(buffer==NULL) { // unexpected failure to reserve some string capacity errorCode=U_MEMORY_ALLOCATION_ERROR; From a8bc4d76826c0f9221180a7095a1c219fca32871 Mon Sep 17 00:00:00 2001 From: Markus Scherer Date: Wed, 8 Mar 2017 05:59:25 +0000 Subject: [PATCH 14/26] ICU-12992 make the old normlzr.h work with configured UChar=uint16_t, and do not include it from coll.h any more (it only needs unorm.h) X-SVN-Rev: 39744 --- icu4c/source/common/unicode/normlzr.h | 10 ++++++++-- icu4c/source/i18n/coleitr.cpp | 1 + icu4c/source/i18n/unicode/coll.h | 4 ++-- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/icu4c/source/common/unicode/normlzr.h b/icu4c/source/common/unicode/normlzr.h index 31af07b7cc1..02ef13f2fd3 100644 --- a/icu4c/source/common/unicode/normlzr.h +++ b/icu4c/source/common/unicode/normlzr.h @@ -795,9 +795,15 @@ inline int32_t Normalizer::compare(const UnicodeString &s1, const UnicodeString &s2, uint32_t options, UErrorCode &errorCode) { + const char16_t *p1 = s1.getBuffer(); + const char16_t *p2 = s2.getBuffer(); +#ifdef U_ALIASING_BARRIER + U_ALIASING_BARRIER(p1); + U_ALIASING_BARRIER(p2); +#endif // all argument checking is done in unorm_compare - return unorm_compare(s1.getBuffer(), s1.length(), - s2.getBuffer(), s2.length(), + return unorm_compare(reinterpret_cast(p1), s1.length(), + reinterpret_cast(p2), s2.length(), options, &errorCode); } diff --git a/icu4c/source/i18n/coleitr.cpp b/icu4c/source/i18n/coleitr.cpp index 5668097a8ce..64d3ab4d2bf 100644 --- a/icu4c/source/i18n/coleitr.cpp +++ b/icu4c/source/i18n/coleitr.cpp @@ -29,6 +29,7 @@ #if !UCONFIG_NO_COLLATION +#include "unicode/chariter.h" #include "unicode/coleitr.h" #include "unicode/tblcoll.h" #include "unicode/ustring.h" diff --git a/icu4c/source/i18n/unicode/coll.h b/icu4c/source/i18n/unicode/coll.h index a53df5deebd..7e467df80e0 100644 --- a/icu4c/source/i18n/unicode/coll.h +++ b/icu4c/source/i18n/unicode/coll.h @@ -58,7 +58,7 @@ #include "unicode/uobject.h" #include "unicode/ucol.h" -#include "unicode/normlzr.h" +#include "unicode/unorm.h" #include "unicode/locid.h" #include "unicode/uniset.h" #include "unicode/umisc.h" @@ -158,7 +158,7 @@ class CollationKey; * @see CollationKey * @see CollationElementIterator * @see Locale -* @see Normalizer +* @see Normalizer2 * @version 2.0 11/15/01 */ From 14dbfd1d169704f859e7a6690823707e08e12723 Mon Sep 17 00:00:00 2001 From: Markus Scherer Date: Wed, 8 Mar 2017 21:07:27 +0000 Subject: [PATCH 15/26] ICU-12992 revert new C++ API (CaseMap) to raw pointers X-SVN-Rev: 39746 --- icu4c/source/common/unicode/casemap.h | 16 ++++++++-------- icu4c/source/common/ustr_titlecase_brkiter.cpp | 4 ++-- icu4c/source/common/ustrcase.cpp | 4 ++-- icu4c/source/common/ustrcase_locale.cpp | 8 ++++---- 4 files changed, 16 insertions(+), 16 deletions(-) diff --git a/icu4c/source/common/unicode/casemap.h b/icu4c/source/common/unicode/casemap.h index 8401867b8ab..e0e4aafcf90 100644 --- a/icu4c/source/common/unicode/casemap.h +++ b/icu4c/source/common/unicode/casemap.h @@ -60,8 +60,8 @@ public: */ static int32_t toLower( const char *locale, uint32_t options, - ConstChar16Ptr src, int32_t srcLength, - Char16Ptr dest, int32_t destCapacity, Edits *edits, + const char16_t *src, int32_t srcLength, + char16_t *dest, int32_t destCapacity, Edits *edits, UErrorCode &errorCode); /** @@ -94,8 +94,8 @@ public: */ static int32_t toUpper( const char *locale, uint32_t options, - ConstChar16Ptr src, int32_t srcLength, - Char16Ptr dest, int32_t destCapacity, Edits *edits, + const char16_t *src, int32_t srcLength, + char16_t *dest, int32_t destCapacity, Edits *edits, UErrorCode &errorCode); #if !UCONFIG_NO_BREAK_ITERATION @@ -141,8 +141,8 @@ public: */ static int32_t toTitle( const char *locale, uint32_t options, BreakIterator *iter, - ConstChar16Ptr src, int32_t srcLength, - Char16Ptr dest, int32_t destCapacity, Edits *edits, + const char16_t *src, int32_t srcLength, + char16_t *dest, int32_t destCapacity, Edits *edits, UErrorCode &errorCode); #endif // UCONFIG_NO_BREAK_ITERATION @@ -181,8 +181,8 @@ public: */ static int32_t fold( uint32_t options, - ConstChar16Ptr src, int32_t srcLength, - Char16Ptr dest, int32_t destCapacity, Edits *edits, + const char16_t *src, int32_t srcLength, + char16_t *dest, int32_t destCapacity, Edits *edits, UErrorCode &errorCode); private: diff --git a/icu4c/source/common/ustr_titlecase_brkiter.cpp b/icu4c/source/common/ustr_titlecase_brkiter.cpp index ac46bc9359f..0b2ba02064b 100644 --- a/icu4c/source/common/ustr_titlecase_brkiter.cpp +++ b/icu4c/source/common/ustr_titlecase_brkiter.cpp @@ -66,8 +66,8 @@ U_NAMESPACE_BEGIN int32_t CaseMap::toTitle( const char *locale, uint32_t options, BreakIterator *iter, - ConstChar16Ptr src, int32_t srcLength, - Char16Ptr dest, int32_t destCapacity, Edits *edits, + const UChar *src, int32_t srcLength, + UChar *dest, int32_t destCapacity, Edits *edits, UErrorCode &errorCode) { LocalPointer ownedIter; if(iter==NULL) { diff --git a/icu4c/source/common/ustrcase.cpp b/icu4c/source/common/ustrcase.cpp index d0bdd1511ab..0e38a42e103 100644 --- a/icu4c/source/common/ustrcase.cpp +++ b/icu4c/source/common/ustrcase.cpp @@ -1198,8 +1198,8 @@ U_NAMESPACE_BEGIN int32_t CaseMap::fold( uint32_t options, - ConstChar16Ptr src, int32_t srcLength, - Char16Ptr dest, int32_t destCapacity, Edits *edits, + const UChar *src, int32_t srcLength, + UChar *dest, int32_t destCapacity, Edits *edits, UErrorCode &errorCode) { return ustrcase_map( UCASE_LOC_ROOT, options, UCASEMAP_BREAK_ITERATOR_NULL diff --git a/icu4c/source/common/ustrcase_locale.cpp b/icu4c/source/common/ustrcase_locale.cpp index 0c6d095d367..2ecd24f03ec 100644 --- a/icu4c/source/common/ustrcase_locale.cpp +++ b/icu4c/source/common/ustrcase_locale.cpp @@ -69,8 +69,8 @@ U_NAMESPACE_BEGIN int32_t CaseMap::toLower( const char *locale, uint32_t options, - ConstChar16Ptr src, int32_t srcLength, - Char16Ptr dest, int32_t destCapacity, Edits *edits, + const UChar *src, int32_t srcLength, + UChar *dest, int32_t destCapacity, Edits *edits, UErrorCode &errorCode) { return ustrcase_map( ustrcase_getCaseLocale(locale), options, UCASEMAP_BREAK_ITERATOR_NULL @@ -81,8 +81,8 @@ int32_t CaseMap::toLower( int32_t CaseMap::toUpper( const char *locale, uint32_t options, - ConstChar16Ptr src, int32_t srcLength, - Char16Ptr dest, int32_t destCapacity, Edits *edits, + const UChar *src, int32_t srcLength, + UChar *dest, int32_t destCapacity, Edits *edits, UErrorCode &errorCode) { return ustrcase_map( ustrcase_getCaseLocale(locale), options, UCASEMAP_BREAK_ITERATOR_NULL From bf17d52293a69e08c06d69351bd36706b62444f3 Mon Sep 17 00:00:00 2001 From: Markus Scherer Date: Wed, 8 Mar 2017 22:08:12 +0000 Subject: [PATCH 16/26] ICU-12992 add OldUChar type to match ICU 58 UChar; add public conversion functions from char16_t * to UChar * and OldUChar * with aliasing barrier X-SVN-Rev: 39747 --- icu4c/source/common/uinvchar.h | 6 +-- icu4c/source/common/unicode/char16ptr.h | 57 +++++++++++++++++++++++ icu4c/source/common/unicode/normlzr.h | 10 +--- icu4c/source/common/unicode/umachine.h | 29 ++++++++++++ icu4c/source/common/unicode/unistr.h | 15 ++---- icu4c/source/test/iotest/stream.cpp | 11 +---- icu4c/source/tools/ctestfw/datamap.cpp | 4 +- icu4c/source/tools/gennorm2/n2builder.cpp | 10 ++-- icu4c/source/tools/genrb/reslist.h | 3 +- icu4c/source/tools/genrb/wrtxml.cpp | 5 +- icu4c/source/tools/toolutil/dbgutil.cpp | 5 +- icu4c/source/tools/toolutil/ppucd.cpp | 5 +- icu4c/source/tools/toolutil/toolutil.h | 14 ------ icu4c/source/tools/toolutil/xmlparser.cpp | 5 +- 14 files changed, 111 insertions(+), 68 deletions(-) diff --git a/icu4c/source/common/uinvchar.h b/icu4c/source/common/uinvchar.h index 2a960bdfca4..c4f9f88b9ad 100644 --- a/icu4c/source/common/uinvchar.h +++ b/icu4c/source/common/uinvchar.h @@ -64,11 +64,7 @@ uprv_isInvariantUString(const UChar *s, int32_t length); */ U_INTERNAL inline UBool U_EXPORT2 uprv_isInvariantUnicodeString(const icu::UnicodeString &s) { - const char16_t *p = s.getBuffer(); -#ifdef U_ALIASING_BARRIER - U_ALIASING_BARRIER(p); -#endif - return uprv_isInvariantUString(reinterpret_cast(p), s.length()); + return uprv_isInvariantUString(icu::toUCharPtr(s.getBuffer()), s.length()); } #endif /* __cplusplus */ diff --git a/icu4c/source/common/unicode/char16ptr.h b/icu4c/source/common/unicode/char16ptr.h index d484cfdf8ac..2e85aa71aba 100644 --- a/icu4c/source/common/unicode/char16ptr.h +++ b/icu4c/source/common/unicode/char16ptr.h @@ -14,6 +14,7 @@ * \file * \brief C++ API: char16_t pointer wrappers with * implicit conversion to/from bit-compatible raw pointer types. + * Also conversion functions from char16_t * to UChar * and OldUChar *. */ U_NAMESPACE_BEGIN @@ -227,6 +228,62 @@ const char16_t *ConstChar16Ptr::get() const { return u.cp; } #endif +/** + * Converts from const char16_t * to const UChar *. + * Includes an aliasing barrier if available. + * @param p pointer + * @return p as const UChar * + * @draft ICU 59 + */ +inline U_COMMON_API const UChar *toUCharPtr(const char16_t *p) { +#ifdef U_ALIASING_BARRIER + U_ALIASING_BARRIER(p); +#endif + return reinterpret_cast(p); +} + +/** + * Converts from char16_t * to UChar *. + * Includes an aliasing barrier if available. + * @param p pointer + * @return p as UChar * + * @draft ICU 59 + */ +inline U_COMMON_API UChar *toUCharPtr(char16_t *p) { +#ifdef U_ALIASING_BARRIER + U_ALIASING_BARRIER(p); +#endif + return reinterpret_cast(p); +} + +/** + * Converts from const char16_t * to const OldUChar *. + * Includes an aliasing barrier if available. + * @param p pointer + * @return p as const OldUChar * + * @draft ICU 59 + */ +inline U_COMMON_API const OldUChar *toOldUCharPtr(const char16_t *p) { +#ifdef U_ALIASING_BARRIER + U_ALIASING_BARRIER(p); +#endif + return reinterpret_cast(p); +} + +/** + * Converts from char16_t * to OldUChar *. + * Includes an aliasing barrier if available. + * @param p pointer + * @return p as OldUChar * + * @draft ICU 59 + */ +inline U_COMMON_API OldUChar *toOldUCharPtr(char16_t *p) { +#ifdef U_ALIASING_BARRIER + U_ALIASING_BARRIER(p); +#endif + return reinterpret_cast(p); +} + U_NAMESPACE_END #endif // __CHAR16PTR_H__ diff --git a/icu4c/source/common/unicode/normlzr.h b/icu4c/source/common/unicode/normlzr.h index 02ef13f2fd3..0141058fb00 100644 --- a/icu4c/source/common/unicode/normlzr.h +++ b/icu4c/source/common/unicode/normlzr.h @@ -795,15 +795,9 @@ inline int32_t Normalizer::compare(const UnicodeString &s1, const UnicodeString &s2, uint32_t options, UErrorCode &errorCode) { - const char16_t *p1 = s1.getBuffer(); - const char16_t *p2 = s2.getBuffer(); -#ifdef U_ALIASING_BARRIER - U_ALIASING_BARRIER(p1); - U_ALIASING_BARRIER(p2); -#endif // all argument checking is done in unorm_compare - return unorm_compare(reinterpret_cast(p1), s1.length(), - reinterpret_cast(p2), s2.length(), + return unorm_compare(toUCharPtr(s1.getBuffer()), s1.length(), + toUCharPtr(s2.getBuffer()), s2.length(), options, &errorCode); } diff --git a/icu4c/source/common/unicode/umachine.h b/icu4c/source/common/unicode/umachine.h index 4c8b5c1f1e2..8c96c15cfb6 100644 --- a/icu4c/source/common/unicode/umachine.h +++ b/icu4c/source/common/unicode/umachine.h @@ -313,6 +313,7 @@ typedef int8_t UBool; * * @stable ICU 4.4 */ + #if 1 // #if 1 is normal. UChar defaults to char16_t in C++. // For configuration testing of UChar=uint16_t temporarily change this to #if 0. @@ -321,6 +322,7 @@ typedef int8_t UBool; #elif !defined(UCHAR_TYPE) # define UCHAR_TYPE uint16_t #endif + #if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || \ defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION) // Inside the ICU library code, never configurable. @@ -333,6 +335,33 @@ typedef int8_t UBool; typedef uint16_t UChar; #endif +/** + * \var OldUChar + * Default ICU 58 definition of UChar. + * A base type for UTF-16 code units and pointers. + * Unsigned 16-bit integer. + * + * Define OldUChar to be wchar_t if that is 16 bits wide. + * If wchar_t is not 16 bits wide, then define UChar to be uint16_t. + * + * This makes the definition of OldUChar platform-dependent + * but allows direct string type compatibility with platforms with + * 16-bit wchar_t types. + * + * This is how UChar was defined in ICU 58, for transition convenience. + * Exception: ICU 58 UChar was defined to UCHAR_TYPE if that macro was defined. + * The current UChar responds to UCHAR_TYPE but OldUChar does not. + * + * @draft ICU 59 + */ +#if U_SIZEOF_WCHAR_T==2 + typedef wchar_t OldUChar; +#elif defined(__CHAR16_TYPE__) + typedef __CHAR16_TYPE__ OldUChar; +#else + typedef uint16_t OldUChar; +#endif + /** * Define UChar32 as a type for single Unicode code points. * UChar32 is a signed 32-bit integer (same as int32_t). diff --git a/icu4c/source/common/unicode/unistr.h b/icu4c/source/common/unicode/unistr.h index 835e64ff774..2aa5d8e3a34 100644 --- a/icu4c/source/common/unicode/unistr.h +++ b/icu4c/source/common/unicode/unistr.h @@ -3495,13 +3495,6 @@ protected: virtual UChar32 getChar32At(int32_t offset) const; private: - static inline const UChar *constUCharPtr(const char16_t *p) { -#ifdef U_ALIASING_BARRIER - U_ALIASING_BARRIER(p); -#endif - return reinterpret_cast(p); - } - // For char* constructors. Could be made public. UnicodeString &setToUTF8(StringPiece utf8); // For extract(char*). @@ -4367,7 +4360,7 @@ UnicodeString::startsWith(const UnicodeString& srcText, inline UBool UnicodeString::startsWith(ConstChar16Ptr srcChars, int32_t srcLength) const { if(srcLength < 0) { - srcLength = u_strlen(constUCharPtr(srcChars)); + srcLength = u_strlen(toUCharPtr(srcChars)); } return doCompare(0, srcLength, srcChars, 0, srcLength) == 0; } @@ -4375,7 +4368,7 @@ UnicodeString::startsWith(ConstChar16Ptr srcChars, int32_t srcLength) const { inline UBool UnicodeString::startsWith(const char16_t *srcChars, int32_t srcStart, int32_t srcLength) const { if(srcLength < 0) { - srcLength = u_strlen(constUCharPtr(srcChars)); + srcLength = u_strlen(toUCharPtr(srcChars)); } return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0; } @@ -4398,7 +4391,7 @@ inline UBool UnicodeString::endsWith(ConstChar16Ptr srcChars, int32_t srcLength) const { if(srcLength < 0) { - srcLength = u_strlen(constUCharPtr(srcChars)); + srcLength = u_strlen(toUCharPtr(srcChars)); } return doCompare(length() - srcLength, srcLength, srcChars, 0, srcLength) == 0; @@ -4409,7 +4402,7 @@ UnicodeString::endsWith(const char16_t *srcChars, int32_t srcStart, int32_t srcLength) const { if(srcLength < 0) { - srcLength = u_strlen(constUCharPtr(srcChars + srcStart)); + srcLength = u_strlen(toUCharPtr(srcChars + srcStart)); } return doCompare(length() - srcLength, srcLength, srcChars, srcStart, srcLength) == 0; diff --git a/icu4c/source/test/iotest/stream.cpp b/icu4c/source/test/iotest/stream.cpp index 892e0d7d58a..424d4e33af5 100644 --- a/icu4c/source/test/iotest/stream.cpp +++ b/icu4c/source/test/iotest/stream.cpp @@ -49,13 +49,6 @@ const char C_NEW_LINE[] = {'\n',0}; #endif U_CDECL_END -inline const UChar *constUCharPtr(const char16_t *p) { -#ifdef U_ALIASING_BARRIER - U_ALIASING_BARRIER(p); -#endif - return reinterpret_cast(p); -} - U_CDECL_BEGIN static void U_CALLCONV TestStream(void) { @@ -113,12 +106,12 @@ static void U_CALLCONV TestStream(void) inTestStream >> inStr >> inStr2; if (inStr.compare(thisMu) != 0) { - u_austrncpy(inStrC, constUCharPtr(inStr.getBuffer()), inStr.length()); + u_austrncpy(inStrC, toUCharPtr(inStr.getBuffer()), inStr.length()); inStrC[inStr.length()] = 0; log_err("Got: \"%s\", Expected: \"tHis\\u03BC\"\n", inStrC); } if (inStr2.compare(mu) != 0) { - u_austrncpy(inStrC, constUCharPtr(inStr.getBuffer()), inStr.length()); + u_austrncpy(inStrC, toUCharPtr(inStr.getBuffer()), inStr.length()); inStrC[inStr.length()] = 0; log_err("Got: \"%s\", Expected: \"mu\"\n", inStrC); } diff --git a/icu4c/source/tools/ctestfw/datamap.cpp b/icu4c/source/tools/ctestfw/datamap.cpp index d85341ac40c..96241a0657b 100644 --- a/icu4c/source/tools/ctestfw/datamap.cpp +++ b/icu4c/source/tools/ctestfw/datamap.cpp @@ -10,8 +10,8 @@ #include "unicode/datamap.h" #include "unicode/resbund.h" +#include "unicode/unistr.h" #include "hash.h" -#include "toolutil.h" #include DataMap::~DataMap() {} @@ -21,7 +21,7 @@ int32_t DataMap::utoi(const UnicodeString &s) const { char ch[256]; - const UChar *u = constUCharPtr(s.getBuffer()); + const UChar *u = toUCharPtr(s.getBuffer()); int32_t len = s.length(); u_UCharsToChars(u, ch, len); ch[len] = 0; /* include terminating \0 */ diff --git a/icu4c/source/tools/gennorm2/n2builder.cpp b/icu4c/source/tools/gennorm2/n2builder.cpp index 5c7c9c0a016..98b10166743 100644 --- a/icu4c/source/tools/gennorm2/n2builder.cpp +++ b/icu4c/source/tools/gennorm2/n2builder.cpp @@ -282,7 +282,7 @@ uint8_t Normalizer2DataBuilder::getCC(UChar32 c) const { static UBool isWellFormed(const UnicodeString &s) { UErrorCode errorCode=U_ZERO_ERROR; - u_strToUTF8(NULL, 0, NULL, constUCharPtr(s.getBuffer()), s.length(), &errorCode); + u_strToUTF8(NULL, 0, NULL, toUCharPtr(s.getBuffer()), s.length(), &errorCode); return U_SUCCESS(errorCode) || errorCode==U_BUFFER_OVERFLOW_ERROR; } @@ -315,7 +315,7 @@ void Normalizer2DataBuilder::setRoundTripMapping(UChar32 c, const UnicodeString (int)phase, (long)c); exit(U_INVALID_FORMAT_ERROR); } - int32_t numCP=u_countChar32(constUCharPtr(m.getBuffer()), m.length()); + int32_t numCP=u_countChar32(toUCharPtr(m.getBuffer()), m.length()); if(numCP!=2) { fprintf(stderr, "error in gennorm2 phase %d: " @@ -452,7 +452,7 @@ Normalizer2DataBuilder::decompose(UChar32 start, UChar32 end, uint32_t value) { Norm &norm=norms[value]; const UnicodeString &m=*norm.mapping; UnicodeString *decomposed=NULL; - const UChar *s=constUCharPtr(m.getBuffer()); + const UChar *s=toUCharPtr(m.getBuffer()); int32_t length=m.length(); int32_t prev, i=0; UChar32 c; @@ -607,7 +607,7 @@ Normalizer2DataBuilder::reorder(Norm *p, BuilderReorderingBuffer &buffer) { if(length>Normalizer2Impl::MAPPING_LENGTH_MASK) { return; // writeMapping() will complain about it and print the code point. } - const UChar *s=constUCharPtr(m.getBuffer()); + const UChar *s=toUCharPtr(m.getBuffer()); int32_t i=0; UChar32 c; while(i(p); -} - -inline UChar *UCharPtr(char16_t *p) { -#ifdef U_ALIASING_BARRIER - U_ALIASING_BARRIER(p); -#endif - return reinterpret_cast(p); -} - /** * ErrorCode subclass for use in ICU command-line tools. * The destructor calls handleFailure() which calls exit(errorCode) when isFailure(). diff --git a/icu4c/source/tools/toolutil/xmlparser.cpp b/icu4c/source/tools/toolutil/xmlparser.cpp index baf9a73bc3e..1ca111c0756 100644 --- a/icu4c/source/tools/toolutil/xmlparser.cpp +++ b/icu4c/source/tools/toolutil/xmlparser.cpp @@ -21,7 +21,6 @@ #include "unicode/ucnv.h" #include "unicode/regex.h" #include "filestrm.h" -#include "toolutil.h" #include "xmlparser.h" #if !UCONFIG_NO_REGULAR_EXPRESSIONS && !UCONFIG_NO_CONVERSION @@ -210,7 +209,7 @@ UXMLParser::parseFile(const char *filename, UErrorCode &errorCode) { goto exit; } - buffer=UCharPtr(src.getBuffer(bytesLength)); + buffer=toUCharPtr(src.getBuffer(bytesLength)); if(buffer==NULL) { // unexpected failure to reserve some string capacity errorCode=U_MEMORY_ALLOCATION_ERROR; @@ -279,7 +278,7 @@ UXMLParser::parseFile(const char *filename, UErrorCode &errorCode) { pb=bytes; for(;;) { length=src.length(); - buffer=UCharPtr(src.getBuffer(capacity)); + buffer=toUCharPtr(src.getBuffer(capacity)); if(buffer==NULL) { // unexpected failure to reserve some string capacity errorCode=U_MEMORY_ALLOCATION_ERROR; From 1fc24f144dff6a806e1e404ec2cddf5f78a3d6ea Mon Sep 17 00:00:00 2001 From: Markus Scherer Date: Thu, 9 Mar 2017 00:10:33 +0000 Subject: [PATCH 17/26] ICU-12992 casemap.h does not need to include char16ptr.h any more X-SVN-Rev: 39748 --- icu4c/source/common/unicode/casemap.h | 1 - 1 file changed, 1 deletion(-) diff --git a/icu4c/source/common/unicode/casemap.h b/icu4c/source/common/unicode/casemap.h index e0e4aafcf90..d645a211ee2 100644 --- a/icu4c/source/common/unicode/casemap.h +++ b/icu4c/source/common/unicode/casemap.h @@ -8,7 +8,6 @@ #define __CASEMAP_H__ #include "unicode/utypes.h" -#include "unicode/char16ptr.h" #include "unicode/uobject.h" /** From 153ccb4e2e2fb2a32f3faab63bb5b7bfd229dc7b Mon Sep 17 00:00:00 2001 From: "Steven R. Loomis" Date: Fri, 10 Mar 2017 02:58:13 +0000 Subject: [PATCH 18/26] ICU-12766 revert r39756 (mistake) X-SVN-Rev: 39758 --- icu4c/source/configure.ac | 1 - icu4c/source/tools/Makefile.in | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/icu4c/source/configure.ac b/icu4c/source/configure.ac index c858e1fd428..b6065af7997 100644 --- a/icu4c/source/configure.ac +++ b/icu4c/source/configure.ac @@ -1349,7 +1349,6 @@ AC_CONFIG_FILES([icudefs.mk \ tools/pkgdata/Makefile \ tools/tzcode/Makefile \ tools/gencfu/Makefile \ - tools/escapesrc/Makefile \ test/Makefile \ test/compat/Makefile \ test/testdata/Makefile \ diff --git a/icu4c/source/tools/Makefile.in b/icu4c/source/tools/Makefile.in index 8fb7876de76..1cbef0005d4 100644 --- a/icu4c/source/tools/Makefile.in +++ b/icu4c/source/tools/Makefile.in @@ -17,7 +17,7 @@ subdir = tools SUBDIRS = toolutil ctestfw makeconv genrb genbrk \ gencnval gensprep icuinfo genccode gencmn icupkg pkgdata \ -gentest gennorm2 gencfu gendict escapesrc +gentest gennorm2 gencfu gendict ## List of phony targets .PHONY : all all-local all-recursive install install-local \ From b8f912c3c4481ca0ca2d25446c470a5ddca6d190 Mon Sep 17 00:00:00 2001 From: Markus Scherer Date: Fri, 10 Mar 2017 21:22:38 +0000 Subject: [PATCH 19/26] ICU-12992 still use U_FINAL X-SVN-Rev: 39761 --- icu4c/source/common/unicode/char16ptr.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/icu4c/source/common/unicode/char16ptr.h b/icu4c/source/common/unicode/char16ptr.h index 2e85aa71aba..4035ce9bbbc 100644 --- a/icu4c/source/common/unicode/char16ptr.h +++ b/icu4c/source/common/unicode/char16ptr.h @@ -34,7 +34,7 @@ U_NAMESPACE_BEGIN * char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types. * @draft ICU 59 */ -class U_COMMON_API Char16Ptr final { +class U_COMMON_API Char16Ptr U_FINAL { public: /** * Copies the pointer. @@ -133,7 +133,7 @@ char16_t *Char16Ptr::get() const { return u.cp; } * const char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types. * @draft ICU 59 */ -class U_COMMON_API ConstChar16Ptr final { +class U_COMMON_API ConstChar16Ptr U_FINAL { public: /** * Copies the pointer. From 72c8be3fb6480232414fea67e135d617c0b92c3d Mon Sep 17 00:00:00 2001 From: Shawn Steele Date: Mon, 13 Mar 2017 22:30:03 +0000 Subject: [PATCH 20/26] ICU-12912 - Windows timezone detection - Get rid of deprecated version checking for old OS behavior (ICU supports Win7+, the updated code supports Vista+ (and even some older version) X-SVN-Rev: 39782 --- icu4c/source/common/wintz.cpp | 216 +++++++++++++--------------------- 1 file changed, 79 insertions(+), 137 deletions(-) diff --git a/icu4c/source/common/wintz.cpp b/icu4c/source/common/wintz.cpp index b92f3260f42..e9853d42314 100644 --- a/icu4c/source/common/wintz.cpp +++ b/icu4c/source/common/wintz.cpp @@ -13,6 +13,7 @@ #include "unicode/utypes.h" +// This file contains only desktop windows behavior #if U_PLATFORM_HAS_WIN32_API #include "wintz.h" @@ -46,102 +47,25 @@ typedef struct * Various registry keys and key fragments. */ static const char CURRENT_ZONE_REGKEY[] = "SYSTEM\\CurrentControlSet\\Control\\TimeZoneInformation\\"; -/* static const char STANDARD_NAME_REGKEY[] = "StandardName"; Currently unused constant */ static const char STANDARD_TIME_REGKEY[] = " Standard Time"; static const char TZI_REGKEY[] = "TZI"; static const char STD_REGKEY[] = "Std"; /** - * HKLM subkeys used to probe for the flavor of Windows. Note that we - * specifically check for the "GMT" zone subkey; this is present on - * NT, but on XP has become "GMT Standard Time". We need to - * discriminate between these cases. + * The time zone root keys (under HKLM) for Win7+ */ -static const char* const WIN_TYPE_PROBE_REGKEY[] = { - /* WIN_9X_ME_TYPE */ - "SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\Time Zones", - - /* WIN_NT_TYPE */ - "SOFTWARE\\Microsoft\\Windows NT\\CurrentVersion\\Time Zones\\GMT" - - /* otherwise: WIN_2K_XP_TYPE */ -}; - -/** - * The time zone root subkeys (under HKLM) for different flavors of - * Windows. - */ -static const char* const TZ_REGKEY[] = { - /* WIN_9X_ME_TYPE */ - "SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\Time Zones\\", - - /* WIN_NT_TYPE | WIN_2K_XP_TYPE */ - "SOFTWARE\\Microsoft\\Windows NT\\CurrentVersion\\Time Zones\\" -}; - -/** - * Flavor of Windows, from our perspective. Not a real OS version, - * but rather the flavor of the layout of the time zone information in - * the registry. - */ -enum { - WIN_9X_ME_TYPE = 1, - WIN_NT_TYPE = 2, - WIN_2K_XP_TYPE = 3 -}; - -static int32_t gWinType = 0; - -static int32_t detectWindowsType() -{ - int32_t winType; - LONG result; - HKEY hkey; - - /* Detect the version of windows by trying to open a sequence of - probe keys. We don't use the OS version API because what we - really want to know is how the registry is laid out. - Specifically, is it 9x/Me or not, and is it "GMT" or "GMT - Standard Time". */ - for (winType = 0; winType < 2; winType++) { - result = RegOpenKeyExA(HKEY_LOCAL_MACHINE, - WIN_TYPE_PROBE_REGKEY[winType], - 0, - KEY_QUERY_VALUE, - &hkey); - RegCloseKey(hkey); - - if (result == ERROR_SUCCESS) { - break; - } - } - - return winType+1; /* +1 to bring it inline with the enum */ -} +static const char TZ_REGKEY[] = "SOFTWARE\\Microsoft\\Windows NT\\CurrentVersion\\Time Zones\\"; static LONG openTZRegKey(HKEY *hkey, const char *winid) { - char subKeyName[110]; /* TODO: why 96?? */ + char subKeyName[110]; /* TODO: why 110?? */ char *name; LONG result; - /* This isn't thread safe, but it's good enough because the result should be constant per system. */ - if (gWinType <= 0) { - gWinType = detectWindowsType(); - } - - uprv_strcpy(subKeyName, TZ_REGKEY[(gWinType != WIN_9X_ME_TYPE)]); + uprv_strcpy(subKeyName, TZ_REGKEY); name = &subKeyName[strlen(subKeyName)]; uprv_strcat(subKeyName, winid); - if (gWinType == WIN_9X_ME_TYPE) { - /* Remove " Standard Time" */ - char *pStd = uprv_strstr(subKeyName, STANDARD_TIME_REGKEY); - if (pStd) { - *pStd = 0; - } - } - result = RegOpenKeyExA(HKEY_LOCAL_MACHINE, subKeyName, 0, @@ -158,7 +82,8 @@ static LONG getTZI(const char *winid, TZI *tzi) result = openTZRegKey(&hkey, winid); - if (result == ERROR_SUCCESS) { + if (result == ERROR_SUCCESS) + { result = RegQueryValueExA(hkey, TZI_REGKEY, NULL, @@ -171,14 +96,16 @@ static LONG getTZI(const char *winid, TZI *tzi) return result; } -static LONG getSTDName(const char *winid, char *regStdName, int32_t length) { +static LONG getSTDName(const char *winid, char *regStdName, int32_t length) +{ DWORD cbData = length; LONG result; HKEY hkey; result = openTZRegKey(&hkey, winid); - if (result == ERROR_SUCCESS) { + if (result == ERROR_SUCCESS) + { result = RegQueryValueExA(hkey, STD_REGKEY, NULL, @@ -191,7 +118,8 @@ static LONG getSTDName(const char *winid, char *regStdName, int32_t length) { return result; } -static LONG getTZKeyName(char* tzKeyName, int32_t length) { +static LONG getTZKeyName(char* tzKeyName, int32_t length) +{ HKEY hkey; LONG result = FALSE; DWORD cbData = length; @@ -218,21 +146,19 @@ static LONG getTZKeyName(char* tzKeyName, int32_t length) { } /* - This code attempts to detect the Windows time zone, as set in the - Windows Date and Time control panel. It attempts to work on - multiple flavors of Windows (9x, Me, NT, 2000, XP) and on localized + This code attempts to detect the Windows time zone directly, + as set in the Windows Date and Time control panel. It attempts + to work on windows greater than Windows Vista and on localized installs. It works by directly interrogating the registry and comparing the data there with the data returned by the GetTimeZoneInformation API, along with some other strategies. The - registry contains time zone data under one of two keys (depending on - the flavor of Windows): + registry contains time zone data under this key: - HKLM\SOFTWARE\Microsoft\Windows\CurrentVersion\Time Zones\ HKLM\SOFTWARE\Microsoft\Windows NT\CurrentVersion\Time Zones\ Under this key are several subkeys, one for each time zone. These - subkeys are named "Pacific" on Win9x/Me and "Pacific Standard Time" - on WinNT/2k/XP. There are some other wrinkles; see the code for + subkeys are named "Pacific Standard Time" on Vista+. + There are some other wrinkles; see the code for details. The subkey name is NOT LOCALIZED, allowing us to support localized installs. @@ -270,7 +196,8 @@ static LONG getTZKeyName(char* tzKeyName, int32_t length) { * time zone, translated to an ICU time zone, or NULL upon failure. */ U_CFUNC const char* U_EXPORT2 -uprv_detectWindowsTimeZone() { +uprv_detectWindowsTimeZone() +{ UErrorCode status = U_ZERO_ERROR; UResourceBundle* bundle = NULL; char* icuid = NULL; @@ -288,7 +215,6 @@ uprv_detectWindowsTimeZone() { TZI tziReg; TIME_ZONE_INFORMATION apiTZI; - BOOL isVistaOrHigher; BOOL tryPreVistaFallback; OSVERSIONINFO osVerInfo; @@ -325,75 +251,86 @@ uprv_detectWindowsTimeZone() { */ uprv_memset(&osVerInfo, 0, sizeof(osVerInfo)); osVerInfo.dwOSVersionInfoSize = sizeof(osVerInfo); - GetVersionEx(&osVerInfo); - isVistaOrHigher = osVerInfo.dwMajorVersion >= 6; /* actually includes Windows Server 2008 as well, but don't worry about it */ tryPreVistaFallback = TRUE; - if(isVistaOrHigher) { - result = getTZKeyName(regStdName, sizeof(regStdName)); - if(ERROR_SUCCESS == result) { - UResourceBundle* winTZ = ures_getByKey(bundle, regStdName, NULL, &status); - if(U_SUCCESS(status)) { - const UChar* icuTZ = NULL; - if (errorCode != 0) { - icuTZ = ures_getStringByKey(winTZ, ISOcodeA, &len, &status); - } - if (errorCode==0 || icuTZ==NULL) { - /* fallback to default "001" and reset status */ - status = U_ZERO_ERROR; - icuTZ = ures_getStringByKey(winTZ, "001", &len, &status); - } - - if(U_SUCCESS(status)) { - int index=0; - while (! (*icuTZ == '\0' || *icuTZ ==' ')) { - tmpid[index++]=(char)(*icuTZ++); /* safe to assume 'char' is ASCII compatible on windows */ - } - tmpid[index]='\0'; - tryPreVistaFallback = FALSE; - } + result = getTZKeyName(regStdName, sizeof(regStdName)); + if(ERROR_SUCCESS == result) + { + UResourceBundle* winTZ = ures_getByKey(bundle, regStdName, NULL, &status); + if(U_SUCCESS(status)) + { + const UChar* icuTZ = NULL; + if (errorCode != 0) + { + icuTZ = ures_getStringByKey(winTZ, ISOcodeA, &len, &status); + } + if (errorCode==0 || icuTZ==NULL) + { + /* fallback to default "001" and reset status */ + status = U_ZERO_ERROR; + icuTZ = ures_getStringByKey(winTZ, "001", &len, &status); + } + + if(U_SUCCESS(status)) + { + int index=0; + while (! (*icuTZ == '\0' || *icuTZ ==' ')) + { + tmpid[index++]=(char)(*icuTZ++); /* safe to assume 'char' is ASCII compatible on windows */ + } + tmpid[index]='\0'; + tryPreVistaFallback = FALSE; } - ures_close(winTZ); } + ures_close(winTZ); } - if(tryPreVistaFallback) { - + if(tryPreVistaFallback) + { /* Note: We get the winid not from static tables but from resource bundle. */ - while (U_SUCCESS(status) && ures_hasNext(bundle)) { + while (U_SUCCESS(status) && ures_hasNext(bundle)) + { UBool idFound = FALSE; const char* winid; UResourceBundle* winTZ = ures_getNextResource(bundle, NULL, &status); - if (U_FAILURE(status)) { + if (U_FAILURE(status)) + { break; } winid = ures_getKey(winTZ); result = getTZI(winid, &tziReg); - if (result == ERROR_SUCCESS) { + if (result == ERROR_SUCCESS) + { /* Windows alters the DaylightBias in some situations. Using the bias and the rules suffices, so overwrite these unreliable fields. */ tziKey.standardBias = tziReg.standardBias; tziKey.daylightBias = tziReg.daylightBias; - if (uprv_memcmp((char *)&tziKey, (char*)&tziReg, sizeof(tziKey)) == 0) { + if (uprv_memcmp((char *)&tziKey, (char*)&tziReg, sizeof(tziKey)) == 0) + { const UChar* icuTZ = NULL; - if (errorCode != 0) { + if (errorCode != 0) + { icuTZ = ures_getStringByKey(winTZ, ISOcodeA, &len, &status); } - if (errorCode==0 || icuTZ==NULL) { + if (errorCode==0 || icuTZ==NULL) + { /* fallback to default "001" and reset status */ status = U_ZERO_ERROR; icuTZ = ures_getStringByKey(winTZ, "001", &len, &status); } - if (U_SUCCESS(status)) { + if (U_SUCCESS(status)) + { /* Get the standard name from the registry key to compare with the one from Windows API call. */ uprv_memset(regStdName, 0, sizeof(regStdName)); result = getSTDName(winid, regStdName, sizeof(regStdName)); - if (result == ERROR_SUCCESS) { - if (uprv_strcmp(apiStdName, regStdName) == 0) { + if (result == ERROR_SUCCESS) + { + if (uprv_strcmp(apiStdName, regStdName) == 0) + { idFound = TRUE; } } @@ -402,10 +339,12 @@ uprv_detectWindowsTimeZone() { * If none is found, tmpid buffer will contain a fallback ID (i.e. the time zone ID matching * the current time zone information) */ - if (idFound || tmpid[0] == 0) { + if (idFound || tmpid[0] == 0) + { /* if icuTZ has more than one city, take only the first (i.e. terminate icuTZ at first space) */ int index=0; - while (! (*icuTZ == '\0' || *icuTZ ==' ')) { + while (! (*icuTZ == '\0' || *icuTZ ==' ')) + { tmpid[index++]=(char)(*icuTZ++); /* safe to assume 'char' is ASCII compatible on windows */ } tmpid[index]='\0'; @@ -414,7 +353,8 @@ uprv_detectWindowsTimeZone() { } } ures_close(winTZ); - if (idFound) { + if (idFound) + { break; } } @@ -423,10 +363,12 @@ uprv_detectWindowsTimeZone() { /* * Copy the timezone ID to icuid to be returned. */ - if (tmpid[0] != 0) { + if (tmpid[0] != 0) + { len = uprv_strlen(tmpid); icuid = (char*)uprv_calloc(len + 1, sizeof(char)); - if (icuid != NULL) { + if (icuid != NULL) + { uprv_strcpy(icuid, tmpid); } } From 170693e51ebe17f38934b0f2514cc4e4d50be28c Mon Sep 17 00:00:00 2001 From: Andy Heninger Date: Mon, 13 Mar 2017 23:17:12 +0000 Subject: [PATCH 21/26] ICU-13012 Remove stray ^S character from line_normal_cj.txt. X-SVN-Rev: 39786 --- icu4c/source/test/testdata/break_rules/line_normal_cj.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/icu4c/source/test/testdata/break_rules/line_normal_cj.txt b/icu4c/source/test/testdata/break_rules/line_normal_cj.txt index d84bd877efe..1895e10a8e5 100644 --- a/icu4c/source/test/testdata/break_rules/line_normal_cj.txt +++ b/icu4c/source/test/testdata/break_rules/line_normal_cj.txt @@ -164,7 +164,7 @@ LB20.1b: CB CM* ÷; # not picking up the continuing match after the BA from 21a. # TODO: For CJ tailorings (with BAX) does this rule want to include BAX? If so, # should "HL BAX" not break when followed by a CB? Thats what the current -# rules do, which is why "[^CM CB]?" includes the ?. +# rules do, which is why "[^CM CB]?" includes the ?. LB21a: HL CM* (HY | BA | BAX) CM* [^CM CB]?; # DO allow breaks here before $BAXcm and $NSXcm, so don't include them From 4bea0e6df846acb86403050b2423cb507fcb9569 Mon Sep 17 00:00:00 2001 From: Jeff Genovy <29107334+jefgen@users.noreply.github.com> Date: Tue, 14 Mar 2017 02:32:46 +0000 Subject: [PATCH 22/26] ICU-12912 Fix a few nits in comments. X-SVN-Rev: 39790 --- icu4c/source/common/wintz.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/icu4c/source/common/wintz.cpp b/icu4c/source/common/wintz.cpp index e9853d42314..397e430f730 100644 --- a/icu4c/source/common/wintz.cpp +++ b/icu4c/source/common/wintz.cpp @@ -13,7 +13,7 @@ #include "unicode/utypes.h" -// This file contains only desktop windows behavior +// This file contains only desktop Windows behavior #if U_PLATFORM_HAS_WIN32_API #include "wintz.h" @@ -148,7 +148,7 @@ static LONG getTZKeyName(char* tzKeyName, int32_t length) /* This code attempts to detect the Windows time zone directly, as set in the Windows Date and Time control panel. It attempts - to work on windows greater than Windows Vista and on localized + to work on versions greater than Windows Vista and on localized installs. It works by directly interrogating the registry and comparing the data there with the data returned by the GetTimeZoneInformation API, along with some other strategies. The @@ -156,8 +156,8 @@ static LONG getTZKeyName(char* tzKeyName, int32_t length) HKLM\SOFTWARE\Microsoft\Windows NT\CurrentVersion\Time Zones\ - Under this key are several subkeys, one for each time zone. These - subkeys are named "Pacific Standard Time" on Vista+. + Under this key are several subkeys, one for each time zone. For + example these subkeys are named "Pacific Standard Time" on Vista+. There are some other wrinkles; see the code for details. The subkey name is NOT LOCALIZED, allowing us to support localized installs. From 7d82980d9a4cf7c7fd282f3e298d8047cd9f7652 Mon Sep 17 00:00:00 2001 From: Yoshito Umaoka Date: Tue, 14 Mar 2017 14:15:29 +0000 Subject: [PATCH 23/26] ICU-12882 Removed a section explaining calendar subclass resource data from DateFormatSymbols constructor's API doc. X-SVN-Rev: 39792 --- .../com/ibm/icu/text/DateFormatSymbols.java | 101 ------------------ 1 file changed, 101 deletions(-) diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/DateFormatSymbols.java b/icu4j/main/classes/core/src/com/ibm/icu/text/DateFormatSymbols.java index f5021533586..f4be9febaeb 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/DateFormatSymbols.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/DateFormatSymbols.java @@ -2147,57 +2147,7 @@ public class DateFormatSymbols implements Serializable, Cloneable { /** * Returns the {@link DateFormatSymbols} object that should be used to format a * calendar system's dates in the given locale. - *

    - * Subclassing:
    - * When creating a new Calendar subclass, you must create the - * {@link ResourceBundle ResourceBundle} - * containing its {@link DateFormatSymbols DateFormatSymbols} in a specific place. - * The resource bundle name is based on the calendar's fully-specified - * class name, with ".resources" inserted at the end of the package name - * (just before the class name) and "Symbols" appended to the end. - * For example, the bundle corresponding to "com.ibm.icu.util.HebrewCalendar" - * is "com.ibm.icu.impl.data.HebrewCalendarSymbols". - *

    - * Within the ResourceBundle, this method searches for five keys: - *

      - *
    • DayNames - - * An array of strings corresponding to each possible - * value of the DAY_OF_WEEK field. Even though - * DAY_OF_WEEK starts with SUNDAY = 1, - * This array is 0-based; the name for Sunday goes in the - * first position, at index 0. If this key is not found - * in the bundle, the day names are inherited from the - * default DateFormatSymbols for the requested locale. * - *
    • DayAbbreviations - - * An array of abbreviated day names corresponding - * to the values in the "DayNames" array. If this key - * is not found in the resource bundle, the "DayNames" - * values are used instead. If neither key is found, - * the day abbreviations are inherited from the default - * DateFormatSymbols for the locale. - * - *
    • MonthNames - - * An array of strings corresponding to each possible - * value of the MONTH field. If this key is not found - * in the bundle, the month names are inherited from the - * default DateFormatSymbols for the requested locale. - * - *
    • MonthAbbreviations - - * An array of abbreviated day names corresponding - * to the values in the "MonthNames" array. If this key - * is not found in the resource bundle, the "MonthNames" - * values are used instead. If neither key is found, - * the day abbreviations are inherited from the default - * DateFormatSymbols for the locale. - * - *
    • Eras - - * An array of strings corresponding to each possible - * value of the ERA field. If this key is not found - * in the bundle, the era names are inherited from the - * default DateFormatSymbols for the requested locale. - *
    - *

    * @param cal The calendar system whose date format symbols are desired. * @param locale The locale whose symbols are desired. * @@ -2211,57 +2161,6 @@ public class DateFormatSymbols implements Serializable, Cloneable { /** * Returns the {@link DateFormatSymbols} object that should be used to format a * calendar system's dates in the given locale. - *

    - * Subclassing:
    - * When creating a new Calendar subclass, you must create the - * {@link ResourceBundle ResourceBundle} - * containing its {@link DateFormatSymbols DateFormatSymbols} in a specific place. - * The resource bundle name is based on the calendar's fully-specified - * class name, with ".resources" inserted at the end of the package name - * (just before the class name) and "Symbols" appended to the end. - * For example, the bundle corresponding to "com.ibm.icu.util.HebrewCalendar" - * is "com.ibm.icu.impl.data.HebrewCalendarSymbols". - *

    - * Within the ResourceBundle, this method searches for five keys: - *

      - *
    • DayNames - - * An array of strings corresponding to each possible - * value of the DAY_OF_WEEK field. Even though - * DAY_OF_WEEK starts with SUNDAY = 1, - * This array is 0-based; the name for Sunday goes in the - * first position, at index 0. If this key is not found - * in the bundle, the day names are inherited from the - * default DateFormatSymbols for the requested locale. - * - *
    • DayAbbreviations - - * An array of abbreviated day names corresponding - * to the values in the "DayNames" array. If this key - * is not found in the resource bundle, the "DayNames" - * values are used instead. If neither key is found, - * the day abbreviations are inherited from the default - * DateFormatSymbols for the locale. - * - *
    • MonthNames - - * An array of strings corresponding to each possible - * value of the MONTH field. If this key is not found - * in the bundle, the month names are inherited from the - * default DateFormatSymbols for the requested locale. - * - *
    • MonthAbbreviations - - * An array of abbreviated day names corresponding - * to the values in the "MonthNames" array. If this key - * is not found in the resource bundle, the "MonthNames" - * values are used instead. If neither key is found, - * the day abbreviations are inherited from the default - * DateFormatSymbols for the locale. - * - *
    • Eras - - * An array of strings corresponding to each possible - * value of the ERA field. If this key is not found - * in the bundle, the era names are inherited from the - * default DateFormatSymbols for the requested locale. - *
    - *

    * @param cal The calendar system whose date format symbols are desired. * @param locale The ulocale whose symbols are desired. * From c026fe4808593aa96d7e30f77e540dd770de5093 Mon Sep 17 00:00:00 2001 From: Andy Heninger Date: Tue, 14 Mar 2017 18:10:59 +0000 Subject: [PATCH 24/26] ICU-12674 Add missing cleanup registration in Normalizer2; fixes memory leak. X-SVN-Rev: 39794 --- icu4c/source/common/loadednormalizer2impl.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/icu4c/source/common/loadednormalizer2impl.cpp b/icu4c/source/common/loadednormalizer2impl.cpp index c995ca1ea71..bd37b95eadc 100644 --- a/icu4c/source/common/loadednormalizer2impl.cpp +++ b/icu4c/source/common/loadednormalizer2impl.cpp @@ -232,6 +232,7 @@ Normalizer2::getInstance(const char *packageName, } } if(allModes==NULL) { + ucln_common_registerCleanup(UCLN_COMMON_LOADED_NORMALIZER2, uprv_loaded_normalizer2_cleanup); LocalPointer localAllModes( Norm2AllModes::createInstance(packageName, name, errorCode)); if(U_SUCCESS(errorCode)) { From 9b50ba1c2e5a238a9044aed074398ca68e0d87f9 Mon Sep 17 00:00:00 2001 From: Markus Scherer Date: Tue, 14 Mar 2017 18:12:19 +0000 Subject: [PATCH 25/26] ICU-13032 turn off the aliasing barrier for PNaCl which does not allow this specific inline assembler instruction X-SVN-Rev: 39795 --- icu4c/source/common/unicode/char16ptr.h | 2 +- icu4c/source/common/unicode/platform.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/icu4c/source/common/unicode/char16ptr.h b/icu4c/source/common/unicode/char16ptr.h index 4035ce9bbbc..27efa5da2d0 100644 --- a/icu4c/source/common/unicode/char16ptr.h +++ b/icu4c/source/common/unicode/char16ptr.h @@ -26,7 +26,7 @@ U_NAMESPACE_BEGIN */ #ifdef U_ALIASING_BARRIER // Use the predefined value. -#elif defined(__clang__) || defined(__GNUC__) +#elif (defined(__clang__) || defined(__GNUC__)) && U_PLATFORM != U_PF_BROWSER_NATIVE_CLIENT # define U_ALIASING_BARRIER(ptr) asm volatile("" : : "rm"(ptr) : "memory") #endif diff --git a/icu4c/source/common/unicode/platform.h b/icu4c/source/common/unicode/platform.h index b553b6878da..e6d449b57ab 100644 --- a/icu4c/source/common/unicode/platform.h +++ b/icu4c/source/common/unicode/platform.h @@ -150,7 +150,7 @@ # define U_PLATFORM U_PF_ANDROID /* Android wchar_t support depends on the API level. */ # include -#elif defined(__native_client__) +#elif defined(__pnacl__) || defined(__native_client__) # define U_PLATFORM U_PF_BROWSER_NATIVE_CLIENT #elif defined(linux) || defined(__linux__) || defined(__linux) # define U_PLATFORM U_PF_LINUX From f54d25d2867f6ed04a62cd5b5773a1f22ade95f6 Mon Sep 17 00:00:00 2001 From: Yoshito Umaoka Date: Tue, 14 Mar 2017 19:39:30 +0000 Subject: [PATCH 26/26] ICU-12810 Prevent assertion triggered by time zone id including characters other than ASCII invariants initialize/formatting a time zone. X-SVN-Rev: 39799 --- icu4c/source/i18n/zonemeta.cpp | 7 +++++ icu4c/source/i18n/zonemeta.h | 6 ++++- icu4c/source/test/intltest/tzfmttst.cpp | 34 +++++++++++++++++++++++++ icu4c/source/test/intltest/tzfmttst.h | 1 + 4 files changed, 47 insertions(+), 1 deletion(-) diff --git a/icu4c/source/i18n/zonemeta.cpp b/icu4c/source/i18n/zonemeta.cpp index b80ac3ea379..84a96578029 100644 --- a/icu4c/source/i18n/zonemeta.cpp +++ b/icu4c/source/i18n/zonemeta.cpp @@ -28,6 +28,7 @@ #include "uresimp.h" #include "uhash.h" #include "olsontz.h" +#include "uinvchar.h" static UMutex gZoneMetaLock = U_MUTEX_INITIALIZER; @@ -255,6 +256,12 @@ ZoneMeta::getCanonicalCLDRID(const UnicodeString &tzid, UErrorCode& status) { tzid.extract(utzid, ZID_KEY_MAX + 1, tmpStatus); U_ASSERT(tmpStatus == U_ZERO_ERROR); // we checked the length of tzid already + if (!uprv_isInvariantUString(utzid, -1)) { + // All of known tz IDs are only containing ASCII invariant characters. + status = U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } + // Check if it was already cached umtx_lock(&gZoneMetaLock); { diff --git a/icu4c/source/i18n/zonemeta.h b/icu4c/source/i18n/zonemeta.h index e0f22b779ed..9dbcc878a22 100644 --- a/icu4c/source/i18n/zonemeta.h +++ b/icu4c/source/i18n/zonemeta.h @@ -41,7 +41,11 @@ public: /** * Return the canonical id for this tzid defined by CLDR, which might be the id itself. * This overload method returns a persistent const UChar*, which is guranteed to persist - * (a pointer to a resource). + * (a pointer to a resource). If the given system tzid is not known, U_ILLEGAL_ARGUMENT_ERROR + * is set in the status. + * @param tzid Zone ID + * @param status Receives the status + * @return The canonical ID for the input time zone ID */ static const UChar* U_EXPORT2 getCanonicalCLDRID(const UnicodeString &tzid, UErrorCode& status); diff --git a/icu4c/source/test/intltest/tzfmttst.cpp b/icu4c/source/test/intltest/tzfmttst.cpp index b0d9dd5212d..e673d0aa4ae 100644 --- a/icu4c/source/test/intltest/tzfmttst.cpp +++ b/icu4c/source/test/intltest/tzfmttst.cpp @@ -82,6 +82,7 @@ TimeZoneFormatTest::runIndexedTest( int32_t index, UBool exec, const char* &name TESTCASE(3, TestISOFormat); TESTCASE(4, TestFormat); TESTCASE(5, TestFormatTZDBNames); + TESTCASE(6, TestFormatCustomZone); default: name = ""; break; } } @@ -1213,5 +1214,38 @@ TimeZoneFormatTest::TestFormatTZDBNames(void) { } } +void +TimeZoneFormatTest::TestFormatCustomZone(void) { + struct { + const char* id; + int32_t offset; + const char* expected; + } TESTDATA[] = { + { "abc", 3600000, "GMT+01:00" }, // unknown ID + { "$abc", -3600000, "GMT-01:00" }, // unknown, with ASCII variant char '$' + { "\\u00c1\\u00df\\u00c7", 5400000, "GMT+01:30"}, // unknown, with non-ASCII chars + { 0, 0, 0 } + }; + + UDate now = Calendar::getNow(); + + for (int32_t i = 0; ; i++) { + const char *id = TESTDATA[i].id; + if (id == 0) { + break; + } + UnicodeString tzid = UnicodeString(id, -1, US_INV).unescape(); + SimpleTimeZone tz(TESTDATA[i].offset, tzid); + + UErrorCode status = U_ZERO_ERROR; + LocalPointer tzfmt(TimeZoneFormat::createInstance(Locale("en"), status)); + UnicodeString tzstr; + UnicodeString expected = UnicodeString(TESTDATA[i].expected, -1, US_INV).unescape(); + + tzfmt->format(UTZFMT_STYLE_SPECIFIC_LONG, tz, now, tzstr, NULL); + assertEquals(UnicodeString("Format result for ") + tzid, expected, tzstr); + } +} + #endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/icu4c/source/test/intltest/tzfmttst.h b/icu4c/source/test/intltest/tzfmttst.h index 9e70f117da2..0bf91ae0e8e 100644 --- a/icu4c/source/test/intltest/tzfmttst.h +++ b/icu4c/source/test/intltest/tzfmttst.h @@ -27,6 +27,7 @@ class TimeZoneFormatTest : public IntlTest { void TestISOFormat(void); void TestFormat(void); void TestFormatTZDBNames(void); + void TestFormatCustomZone(void); void RunTimeRoundTripTests(int32_t threadNumber); };