diff --git a/icu4c/source/common/caniter.cpp b/icu4c/source/common/caniter.cpp index e7779a1a9fe..aee9f4ee31a 100644 --- a/icu4c/source/common/caniter.cpp +++ b/icu4c/source/common/caniter.cpp @@ -311,12 +311,12 @@ void U_EXPORT2 CanonicalIterator::permute(UnicodeString &source, UBool skipZeros // see what the permutations of the characters before and after this one are //Hashtable *subpermute = permute(source.substring(0,i) + source.substring(i + UTF16.getCharCount(cp))); - permute(subPermuteString.replace(i, U16_LENGTH(cp), NULL, 0), skipZeros, &subpermute, status); + permute(subPermuteString.remove(i, U16_LENGTH(cp)), skipZeros, &subpermute, status); /* Test for buffer overflows */ if(U_FAILURE(status)) { return; } - // The upper replace is destructive. The question is do we have to make a copy, or we don't care about the contents + // The upper remove is destructive. The question is do we have to make a copy, or we don't care about the contents // of source at this point. // prefix this character to all of them diff --git a/icu4c/source/common/common.vcxproj b/icu4c/source/common/common.vcxproj index b735ee9398e..33ef60e2b99 100644 --- a/icu4c/source/common/common.vcxproj +++ b/icu4c/source/common/common.vcxproj @@ -1530,6 +1530,20 @@ ..\..\include\unicode\%(Filename)%(Extension);%(Outputs) copy "%(FullPath)" ..\..\include\unicode + + ..\..\include\unicode\%(Filename)%(Extension);%(Outputs) + + + copy "%(FullPath)" ..\..\include\unicode + + ..\..\include\unicode\%(Filename)%(Extension);%(Outputs) + copy "%(FullPath)" ..\..\include\unicode + + ..\..\include\unicode\%(Filename)%(Extension);%(Outputs) + copy "%(FullPath)" ..\..\include\unicode + + ..\..\include\unicode\%(Filename)%(Extension);%(Outputs) + copy "%(FullPath)" ..\..\include\unicode ..\..\include\unicode\%(Filename)%(Extension);%(Outputs) diff --git a/icu4c/source/common/common.vcxproj.filters b/icu4c/source/common/common.vcxproj.filters index 21387cd7508..0542a8b1384 100644 --- a/icu4c/source/common/common.vcxproj.filters +++ b/icu4c/source/common/common.vcxproj.filters @@ -1105,6 +1105,9 @@ strings + + strings + strings diff --git a/icu4c/source/common/loadednormalizer2impl.cpp b/icu4c/source/common/loadednormalizer2impl.cpp index c995ca1ea71..bd37b95eadc 100644 --- a/icu4c/source/common/loadednormalizer2impl.cpp +++ b/icu4c/source/common/loadednormalizer2impl.cpp @@ -232,6 +232,7 @@ Normalizer2::getInstance(const char *packageName, } } if(allModes==NULL) { + ucln_common_registerCleanup(UCLN_COMMON_LOADED_NORMALIZER2, uprv_loaded_normalizer2_cleanup); LocalPointer localAllModes( Norm2AllModes::createInstance(packageName, name, errorCode)); if(U_SUCCESS(errorCode)) { diff --git a/icu4c/source/common/normalizer2impl.h b/icu4c/source/common/normalizer2impl.h index 6dba0eab5c4..946abee98f3 100644 --- a/icu4c/source/common/normalizer2impl.h +++ b/icu4c/source/common/normalizer2impl.h @@ -176,7 +176,7 @@ public: lastCC=0; } void copyReorderableSuffixTo(UnicodeString &s) const { - s.setTo(reorderStart, (int32_t)(limit-reorderStart)); + s.setTo(ConstChar16Ptr(reorderStart), (int32_t)(limit-reorderStart)); } private: /* diff --git a/icu4c/source/common/normlzr.cpp b/icu4c/source/common/normlzr.cpp index 4569d742e2d..a1a4949d30a 100644 --- a/icu4c/source/common/normlzr.cpp +++ b/icu4c/source/common/normlzr.cpp @@ -40,7 +40,7 @@ Normalizer::Normalizer(const UnicodeString& str, UNormalizationMode mode) : init(); } -Normalizer::Normalizer(const UChar *str, int32_t length, UNormalizationMode mode) : +Normalizer::Normalizer(ConstChar16Ptr str, int32_t length, UNormalizationMode mode) : UObject(), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(mode), fOptions(0), text(new UCharCharacterIterator(str, length)), currentIndex(0), nextIndex(0), @@ -435,7 +435,7 @@ Normalizer::setText(const CharacterIterator& newText, } void -Normalizer::setText(const UChar* newText, +Normalizer::setText(ConstChar16Ptr newText, int32_t length, UErrorCode &status) { diff --git a/icu4c/source/common/ucharstrie.cpp b/icu4c/source/common/ucharstrie.cpp index b83aec51b2c..e0b33af5194 100644 --- a/icu4c/source/common/ucharstrie.cpp +++ b/icu4c/source/common/ucharstrie.cpp @@ -175,7 +175,8 @@ UCharsTrie::next(int32_t uchar) { } UStringTrieResult -UCharsTrie::next(const UChar *s, int32_t sLength) { +UCharsTrie::next(ConstChar16Ptr ptr, int32_t sLength) { + const UChar *s=ptr; if(sLength<0 ? *s==0 : sLength==0) { // Empty input. return current(); diff --git a/icu4c/source/common/ucharstrieiterator.cpp b/icu4c/source/common/ucharstrieiterator.cpp index 56729951b40..b3132241fe2 100644 --- a/icu4c/source/common/ucharstrieiterator.cpp +++ b/icu4c/source/common/ucharstrieiterator.cpp @@ -21,7 +21,7 @@ U_NAMESPACE_BEGIN -UCharsTrie::Iterator::Iterator(const UChar *trieUChars, int32_t maxStringLength, +UCharsTrie::Iterator::Iterator(ConstChar16Ptr trieUChars, int32_t maxStringLength, UErrorCode &errorCode) : uchars_(trieUChars), pos_(uchars_), initialPos_(uchars_), diff --git a/icu4c/source/common/uchriter.cpp b/icu4c/source/common/uchriter.cpp index 0b3868e67d7..822168f5c8e 100644 --- a/icu4c/source/common/uchriter.cpp +++ b/icu4c/source/common/uchriter.cpp @@ -25,14 +25,14 @@ UCharCharacterIterator::UCharCharacterIterator() // never default construct! } -UCharCharacterIterator::UCharCharacterIterator(const UChar* textPtr, +UCharCharacterIterator::UCharCharacterIterator(ConstChar16Ptr textPtr, int32_t length) : CharacterIterator(textPtr != 0 ? (length>=0 ? length : u_strlen(textPtr)) : 0), text(textPtr) { } -UCharCharacterIterator::UCharCharacterIterator(const UChar* textPtr, +UCharCharacterIterator::UCharCharacterIterator(ConstChar16Ptr textPtr, int32_t length, int32_t position) : CharacterIterator(textPtr != 0 ? (length>=0 ? length : u_strlen(textPtr)) : 0, position), @@ -40,7 +40,7 @@ UCharCharacterIterator::UCharCharacterIterator(const UChar* textPtr, { } -UCharCharacterIterator::UCharCharacterIterator(const UChar* textPtr, +UCharCharacterIterator::UCharCharacterIterator(ConstChar16Ptr textPtr, int32_t length, int32_t textBegin, int32_t textEnd, @@ -349,7 +349,7 @@ UCharCharacterIterator::move32(int32_t delta, CharacterIterator::EOrigin origin) return pos; } -void UCharCharacterIterator::setText(const UChar* newText, +void UCharCharacterIterator::setText(ConstChar16Ptr newText, int32_t newTextLength) { text = newText; if(newText == 0 || newTextLength < 0) { diff --git a/icu4c/source/common/ucurr.cpp b/icu4c/source/common/ucurr.cpp index c63e6ca593f..885ca3a9221 100644 --- a/icu4c/source/common/ucurr.cpp +++ b/icu4c/source/common/ucurr.cpp @@ -1030,7 +1030,8 @@ collectCurrencyNames(const char* locale, const UnicodeString *symbol; while ((symbol = iter.next()) != NULL) { (*currencySymbols)[*total_currency_symbol_count].IsoCode = iso; - (*currencySymbols)[*total_currency_symbol_count].currencyName = (UChar*) symbol->getBuffer(); + (*currencySymbols)[*total_currency_symbol_count].currencyName = + const_cast(symbol->getBuffer()); (*currencySymbols)[*total_currency_symbol_count].flag = 0; (*currencySymbols)[(*total_currency_symbol_count)++].currencyNameLen = symbol->length(); } diff --git a/icu4c/source/common/uinvchar.h b/icu4c/source/common/uinvchar.h index 0bb5e73d04a..c4f9f88b9ad 100644 --- a/icu4c/source/common/uinvchar.h +++ b/icu4c/source/common/uinvchar.h @@ -64,7 +64,7 @@ uprv_isInvariantUString(const UChar *s, int32_t length); */ U_INTERNAL inline UBool U_EXPORT2 uprv_isInvariantUnicodeString(const icu::UnicodeString &s) { - return uprv_isInvariantUString(s.getBuffer(), s.length()); + return uprv_isInvariantUString(icu::toUCharPtr(s.getBuffer()), s.length()); } #endif /* __cplusplus */ diff --git a/icu4c/source/common/unicode/appendable.h b/icu4c/source/common/unicode/appendable.h index d611dc008af..8512c2f3031 100644 --- a/icu4c/source/common/unicode/appendable.h +++ b/icu4c/source/common/unicode/appendable.h @@ -19,7 +19,7 @@ /** * \file - * \brief C++ API: Appendable class: Sink for Unicode code points and 16-bit code units (UChars). + * \brief C++ API: Appendable class: Sink for Unicode code points and 16-bit code units (char16_ts). */ #include "unicode/utypes.h" @@ -34,10 +34,10 @@ class UnicodeString; * Combines elements of Java Appendable and ICU4C ByteSink. * * This class can be used in APIs where it does not matter whether the actual destination is - * a UnicodeString, a UChar[] array, a UnicodeSet, or any other object + * a UnicodeString, a char16_t[] array, a UnicodeSet, or any other object * that receives and processes characters and/or strings. * - * Implementation classes must implement at least appendCodeUnit(UChar). + * Implementation classes must implement at least appendCodeUnit(char16_t). * The base class provides default implementations for the other methods. * * The methods do not take UErrorCode parameters. @@ -62,11 +62,11 @@ public: * @return TRUE if the operation succeeded * @stable ICU 4.8 */ - virtual UBool appendCodeUnit(UChar c) = 0; + virtual UBool appendCodeUnit(char16_t c) = 0; /** * Appends a code point. - * The default implementation calls appendCodeUnit(UChar) once or twice. + * The default implementation calls appendCodeUnit(char16_t) once or twice. * @param c code point 0..0x10ffff * @return TRUE if the operation succeeded * @stable ICU 4.8 @@ -75,20 +75,20 @@ public: /** * Appends a string. - * The default implementation calls appendCodeUnit(UChar) for each code unit. + * The default implementation calls appendCodeUnit(char16_t) for each code unit. * @param s string, must not be NULL if length!=0 * @param length string length, or -1 if NUL-terminated * @return TRUE if the operation succeeded * @stable ICU 4.8 */ - virtual UBool appendString(const UChar *s, int32_t length); + virtual UBool appendString(const char16_t *s, int32_t length); /** * Tells the object that the caller is going to append roughly - * appendCapacity UChars. A subclass might use this to pre-allocate + * appendCapacity char16_ts. A subclass might use this to pre-allocate * a larger buffer if necessary. * The default implementation does nothing. (It always returns TRUE.) - * @param appendCapacity estimated number of UChars that will be appended + * @param appendCapacity estimated number of char16_ts that will be appended * @return TRUE if the operation succeeded * @stable ICU 4.8 */ @@ -102,19 +102,19 @@ public: * The returned buffer is only valid until the next operation * on this Appendable. * - * After writing at most *resultCapacity UChars, call appendString() with the - * pointer returned from this function and the number of UChars written. - * Many appendString() implementations will avoid copying UChars if this function + * After writing at most *resultCapacity char16_ts, call appendString() with the + * pointer returned from this function and the number of char16_ts written. + * Many appendString() implementations will avoid copying char16_ts if this function * returned an internal buffer. * * Partial usage example: * \code * int32_t capacity; - * UChar* buffer = app.getAppendBuffer(..., &capacity); - * ... Write n UChars into buffer, with n <= capacity. + * char16_t* buffer = app.getAppendBuffer(..., &capacity); + * ... Write n char16_ts into buffer, with n <= capacity. * app.appendString(buffer, n); * \endcode - * In many implementations, that call to append will avoid copying UChars. + * In many implementations, that call to append will avoid copying char16_ts. * * If the Appendable allocates or reallocates an internal buffer, it should use * the desiredCapacityHint if appropriate. @@ -138,9 +138,9 @@ public: * @return a buffer with *resultCapacity>=minCapacity * @stable ICU 4.8 */ - virtual UChar *getAppendBuffer(int32_t minCapacity, + virtual char16_t *getAppendBuffer(int32_t minCapacity, int32_t desiredCapacityHint, - UChar *scratch, int32_t scratchCapacity, + char16_t *scratch, int32_t scratchCapacity, int32_t *resultCapacity); }; @@ -171,7 +171,7 @@ public: * @return TRUE if the operation succeeded * @stable ICU 4.8 */ - virtual UBool appendCodeUnit(UChar c); + virtual UBool appendCodeUnit(char16_t c); /** * Appends a code point to the string. @@ -188,12 +188,12 @@ public: * @return TRUE if the operation succeeded * @stable ICU 4.8 */ - virtual UBool appendString(const UChar *s, int32_t length); + virtual UBool appendString(const char16_t *s, int32_t length); /** * Tells the UnicodeString that the caller is going to append roughly - * appendCapacity UChars. - * @param appendCapacity estimated number of UChars that will be appended + * appendCapacity char16_ts. + * @param appendCapacity estimated number of char16_ts that will be appended * @return TRUE if the operation succeeded * @stable ICU 4.8 */ @@ -220,9 +220,9 @@ public: * @return a buffer with *resultCapacity>=minCapacity * @stable ICU 4.8 */ - virtual UChar *getAppendBuffer(int32_t minCapacity, + virtual char16_t *getAppendBuffer(int32_t minCapacity, int32_t desiredCapacityHint, - UChar *scratch, int32_t scratchCapacity, + char16_t *scratch, int32_t scratchCapacity, int32_t *resultCapacity); private: diff --git a/icu4c/source/common/unicode/bytestriebuilder.h b/icu4c/source/common/unicode/bytestriebuilder.h index 888ba8bdb59..a8412d3d6e8 100644 --- a/icu4c/source/common/unicode/bytestriebuilder.h +++ b/icu4c/source/common/unicode/bytestriebuilder.h @@ -127,14 +127,14 @@ private: void buildBytes(UStringTrieBuildOption buildOption, UErrorCode &errorCode); virtual int32_t getElementStringLength(int32_t i) const; - virtual UChar getElementUnit(int32_t i, int32_t byteIndex) const; + virtual char16_t getElementUnit(int32_t i, int32_t byteIndex) const; virtual int32_t getElementValue(int32_t i) const; virtual int32_t getLimitOfLinearMatch(int32_t first, int32_t last, int32_t byteIndex) const; virtual int32_t countElementUnits(int32_t start, int32_t limit, int32_t byteIndex) const; virtual int32_t skipElementsBySomeUnits(int32_t i, int32_t byteIndex, int32_t count) const; - virtual int32_t indexOfElementWithNextUnit(int32_t i, int32_t byteIndex, UChar byte) const; + virtual int32_t indexOfElementWithNextUnit(int32_t i, int32_t byteIndex, char16_t byte) const; virtual UBool matchNodesCanHaveValues() const { return FALSE; } diff --git a/icu4c/source/common/unicode/caniter.h b/icu4c/source/common/unicode/caniter.h index 7e83a4f6cb7..5a882fb3fb8 100644 --- a/icu4c/source/common/unicode/caniter.h +++ b/icu4c/source/common/unicode/caniter.h @@ -187,7 +187,7 @@ private: UnicodeString *getEquivalents(const UnicodeString &segment, int32_t &result_len, UErrorCode &status); //private String[] getEquivalents(String segment) //Set getEquivalents2(String segment); - Hashtable *getEquivalents2(Hashtable *fillinResult, const UChar *segment, int32_t segLen, UErrorCode &status); + Hashtable *getEquivalents2(Hashtable *fillinResult, const char16_t *segment, int32_t segLen, UErrorCode &status); //Hashtable *getEquivalents2(const UnicodeString &segment, int32_t segLen, UErrorCode &status); /** @@ -196,7 +196,7 @@ private: * If so, take the remainder, and return the equivalents */ //Set extract(int comp, String segment, int segmentPos, StringBuffer buffer); - Hashtable *extract(Hashtable *fillinResult, UChar32 comp, const UChar *segment, int32_t segLen, int32_t segmentPos, UErrorCode &status); + Hashtable *extract(Hashtable *fillinResult, UChar32 comp, const char16_t *segment, int32_t segLen, int32_t segmentPos, UErrorCode &status); //Hashtable *extract(UChar32 comp, const UnicodeString &segment, int32_t segLen, int32_t segmentPos, UErrorCode &status); void cleanPieces(); diff --git a/icu4c/source/common/unicode/casemap.h b/icu4c/source/common/unicode/casemap.h index fe195599cd4..2e685eef7ae 100644 --- a/icu4c/source/common/unicode/casemap.h +++ b/icu4c/source/common/unicode/casemap.h @@ -42,7 +42,7 @@ public: * @param dest A buffer for the result string. The result will be NUL-terminated if * the buffer is large enough. * The contents is undefined in case of failure. - * @param destCapacity The size of the buffer (number of UChars). If it is 0, then + * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then * dest may be NULL and the function will only return the length of the result * without writing any of the result string. * @param edits Records edits for index mapping, working with styled text, @@ -59,8 +59,8 @@ public: */ static int32_t toLower( const char *locale, uint32_t options, - const UChar *src, int32_t srcLength, - UChar *dest, int32_t destCapacity, Edits *edits, + const char16_t *src, int32_t srcLength, + char16_t *dest, int32_t destCapacity, Edits *edits, UErrorCode &errorCode); /** @@ -76,7 +76,7 @@ public: * @param dest A buffer for the result string. The result will be NUL-terminated if * the buffer is large enough. * The contents is undefined in case of failure. - * @param destCapacity The size of the buffer (number of UChars). If it is 0, then + * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then * dest may be NULL and the function will only return the length of the result * without writing any of the result string. * @param edits Records edits for index mapping, working with styled text, @@ -93,8 +93,8 @@ public: */ static int32_t toUpper( const char *locale, uint32_t options, - const UChar *src, int32_t srcLength, - UChar *dest, int32_t destCapacity, Edits *edits, + const char16_t *src, int32_t srcLength, + char16_t *dest, int32_t destCapacity, Edits *edits, UErrorCode &errorCode); #if !UCONFIG_NO_BREAK_ITERATION @@ -122,7 +122,7 @@ public: * @param dest A buffer for the result string. The result will be NUL-terminated if * the buffer is large enough. * The contents is undefined in case of failure. - * @param destCapacity The size of the buffer (number of UChars). If it is 0, then + * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then * dest may be NULL and the function will only return the length of the result * without writing any of the result string. * @param edits Records edits for index mapping, working with styled text, @@ -140,8 +140,8 @@ public: */ static int32_t toTitle( const char *locale, uint32_t options, BreakIterator *iter, - const UChar *src, int32_t srcLength, - UChar *dest, int32_t destCapacity, Edits *edits, + const char16_t *src, int32_t srcLength, + char16_t *dest, int32_t destCapacity, Edits *edits, UErrorCode &errorCode); #endif // UCONFIG_NO_BREAK_ITERATION @@ -163,7 +163,7 @@ public: * @param dest A buffer for the result string. The result will be NUL-terminated if * the buffer is large enough. * The contents is undefined in case of failure. - * @param destCapacity The size of the buffer (number of UChars). If it is 0, then + * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then * dest may be NULL and the function will only return the length of the result * without writing any of the result string. * @param edits Records edits for index mapping, working with styled text, @@ -180,8 +180,8 @@ public: */ static int32_t fold( uint32_t options, - const UChar *src, int32_t srcLength, - UChar *dest, int32_t destCapacity, Edits *edits, + const char16_t *src, int32_t srcLength, + char16_t *dest, int32_t destCapacity, Edits *edits, UErrorCode &errorCode); private: diff --git a/icu4c/source/common/unicode/char16ptr.h b/icu4c/source/common/unicode/char16ptr.h new file mode 100644 index 00000000000..27efa5da2d0 --- /dev/null +++ b/icu4c/source/common/unicode/char16ptr.h @@ -0,0 +1,289 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +// char16ptr.h +// created: 2017feb28 Markus W. Scherer + +#ifndef __CHAR16PTR_H__ +#define __CHAR16PTR_H__ + +#include +#include "unicode/utypes.h" + +/** + * \file + * \brief C++ API: char16_t pointer wrappers with + * implicit conversion to/from bit-compatible raw pointer types. + * Also conversion functions from char16_t * to UChar * and OldUChar *. + */ + +U_NAMESPACE_BEGIN + +/** + * \def U_ALIASING_BARRIER + * Barrier for pointer anti-aliasing optimizations even across function boundaries. + * @internal + */ +#ifdef U_ALIASING_BARRIER + // Use the predefined value. +#elif (defined(__clang__) || defined(__GNUC__)) && U_PLATFORM != U_PF_BROWSER_NATIVE_CLIENT +# define U_ALIASING_BARRIER(ptr) asm volatile("" : : "rm"(ptr) : "memory") +#endif + +/** + * char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types. + * @draft ICU 59 + */ +class U_COMMON_API Char16Ptr U_FINAL { +public: + /** + * Copies the pointer. + * @param p pointer + * @draft ICU 59 + */ + inline Char16Ptr(char16_t *p); + /** + * Converts the pointer to char16_t *. + * @param p pointer to be converted + * @draft ICU 59 + */ + inline Char16Ptr(uint16_t *p); +#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) + /** + * Converts the pointer to char16_t *. + * (Only defined if U_SIZEOF_WCHAR_T==2.) + * @param p pointer to be converted + * @draft ICU 59 + */ + inline Char16Ptr(wchar_t *p); +#endif + /** + * nullptr constructor. + * @param p nullptr + * @draft ICU 59 + */ + inline Char16Ptr(std::nullptr_t p); + /** + * Destructor. + * @draft ICU 59 + */ + inline ~Char16Ptr(); + + /** + * Pointer access. + * @return the wrapped pointer + * @draft ICU 59 + */ + inline char16_t *get() const; + /** + * char16_t pointer access via type conversion (e.g., static_cast). + * @return the wrapped pointer + * @draft ICU 59 + */ + inline operator char16_t *() const { return get(); } + +private: + Char16Ptr() = delete; + +#ifdef U_ALIASING_BARRIER + template static char16_t *cast(T *t) { + U_ALIASING_BARRIER(t); + return reinterpret_cast(t); + } + + char16_t *p; +#else + union { + char16_t *cp; + uint16_t *up; + wchar_t *wp; + } u; +#endif +}; + +#ifdef U_ALIASING_BARRIER + +Char16Ptr::Char16Ptr(char16_t *p) : p(p) {} +Char16Ptr::Char16Ptr(uint16_t *p) : p(cast(p)) {} +#if U_SIZEOF_WCHAR_T==2 +Char16Ptr::Char16Ptr(wchar_t *p) : p(cast(p)) {} +#endif +Char16Ptr::Char16Ptr(std::nullptr_t p) : p(p) {} +Char16Ptr::~Char16Ptr() { + U_ALIASING_BARRIER(p); +} + +char16_t *Char16Ptr::get() const { return p; } + +#else + +Char16Ptr::Char16Ptr(char16_t *p) { u.cp = p; } +Char16Ptr::Char16Ptr(uint16_t *p) { u.up = p; } +#if U_SIZEOF_WCHAR_T==2 +Char16Ptr::Char16Ptr(wchar_t *p) { u.wp = p; } +#endif +Char16Ptr::Char16Ptr(std::nullptr_t p) { u.cp = p; } +Char16Ptr::~Char16Ptr() {} + +char16_t *Char16Ptr::get() const { return u.cp; } + +#endif + +/** + * const char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types. + * @draft ICU 59 + */ +class U_COMMON_API ConstChar16Ptr U_FINAL { +public: + /** + * Copies the pointer. + * @param p pointer + * @draft ICU 59 + */ + inline ConstChar16Ptr(const char16_t *p); + /** + * Converts the pointer to char16_t *. + * @param p pointer to be converted + * @draft ICU 59 + */ + inline ConstChar16Ptr(const uint16_t *p); +#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) + /** + * Converts the pointer to char16_t *. + * (Only defined if U_SIZEOF_WCHAR_T==2.) + * @param p pointer to be converted + * @draft ICU 59 + */ + inline ConstChar16Ptr(const wchar_t *p); +#endif + /** + * nullptr constructor. + * @param p nullptr + * @draft ICU 59 + */ + inline ConstChar16Ptr(const std::nullptr_t p); + /** + * Destructor. + * @draft ICU 59 + */ + inline ~ConstChar16Ptr(); + + /** + * Pointer access. + * @return the wrapped pointer + * @draft ICU 59 + */ + inline const char16_t *get() const; + /** + * char16_t pointer access via type conversion (e.g., static_cast). + * @return the wrapped pointer + * @draft ICU 59 + */ + inline operator const char16_t *() const { return get(); } + +private: + ConstChar16Ptr() = delete; + +#ifdef U_ALIASING_BARRIER + template static const char16_t *cast(const T *t) { + U_ALIASING_BARRIER(t); + return reinterpret_cast(t); + } + + const char16_t *p; +#else + union { + const char16_t *cp; + const uint16_t *up; + const wchar_t *wp; + } u; +#endif +}; + +#ifdef U_ALIASING_BARRIER + +ConstChar16Ptr::ConstChar16Ptr(const char16_t *p) : p(p) {} +ConstChar16Ptr::ConstChar16Ptr(const uint16_t *p) : p(cast(p)) {} +#if U_SIZEOF_WCHAR_T==2 +ConstChar16Ptr::ConstChar16Ptr(const wchar_t *p) : p(cast(p)) {} +#endif +ConstChar16Ptr::ConstChar16Ptr(const std::nullptr_t p) : p(p) {} +ConstChar16Ptr::~ConstChar16Ptr() { + U_ALIASING_BARRIER(p); +} + +const char16_t *ConstChar16Ptr::get() const { return p; } + +#else + +ConstChar16Ptr::ConstChar16Ptr(const char16_t *p) { u.cp = p; } +ConstChar16Ptr::ConstChar16Ptr(const uint16_t *p) { u.up = p; } +#if U_SIZEOF_WCHAR_T==2 +ConstChar16Ptr::ConstChar16Ptr(const wchar_t *p) { u.wp = p; } +#endif +ConstChar16Ptr::ConstChar16Ptr(const std::nullptr_t p) { u.cp = p; } +ConstChar16Ptr::~ConstChar16Ptr() {} + +const char16_t *ConstChar16Ptr::get() const { return u.cp; } + +#endif + +/** + * Converts from const char16_t * to const UChar *. + * Includes an aliasing barrier if available. + * @param p pointer + * @return p as const UChar * + * @draft ICU 59 + */ +inline U_COMMON_API const UChar *toUCharPtr(const char16_t *p) { +#ifdef U_ALIASING_BARRIER + U_ALIASING_BARRIER(p); +#endif + return reinterpret_cast(p); +} + +/** + * Converts from char16_t * to UChar *. + * Includes an aliasing barrier if available. + * @param p pointer + * @return p as UChar * + * @draft ICU 59 + */ +inline U_COMMON_API UChar *toUCharPtr(char16_t *p) { +#ifdef U_ALIASING_BARRIER + U_ALIASING_BARRIER(p); +#endif + return reinterpret_cast(p); +} + +/** + * Converts from const char16_t * to const OldUChar *. + * Includes an aliasing barrier if available. + * @param p pointer + * @return p as const OldUChar * + * @draft ICU 59 + */ +inline U_COMMON_API const OldUChar *toOldUCharPtr(const char16_t *p) { +#ifdef U_ALIASING_BARRIER + U_ALIASING_BARRIER(p); +#endif + return reinterpret_cast(p); +} + +/** + * Converts from char16_t * to OldUChar *. + * Includes an aliasing barrier if available. + * @param p pointer + * @return p as OldUChar * + * @draft ICU 59 + */ +inline U_COMMON_API OldUChar *toOldUCharPtr(char16_t *p) { +#ifdef U_ALIASING_BARRIER + U_ALIASING_BARRIER(p); +#endif + return reinterpret_cast(p); +} + +U_NAMESPACE_END + +#endif // __CHAR16PTR_H__ diff --git a/icu4c/source/common/unicode/chariter.h b/icu4c/source/common/unicode/chariter.h index 17ab178e6c2..76f01402e7b 100644 --- a/icu4c/source/common/unicode/chariter.h +++ b/icu4c/source/common/unicode/chariter.h @@ -78,7 +78,7 @@ U_NAMESPACE_BEGIN * } * * void function1(ForwardCharacterIterator &it) { - * UChar c; + * char16_t c; * while((c=it.nextPostInc())!=ForwardCharacterIterator::DONE) { * // use c * } @@ -149,7 +149,7 @@ public: * @return the current code unit. * @stable ICU 2.0 */ - virtual UChar nextPostInc(void) = 0; + virtual char16_t nextPostInc(void) = 0; /** * Gets the current code point for returning and advances to the next code point @@ -230,7 +230,7 @@ protected: * showing a way to convert simple for() loops: * \code * void forward2(CharacterIterator &it) { - * UChar c; + * char16_t c; * for(c=it.firstPostInc(); c!=CharacterIterator::DONE; c=it.nextPostInc()) { * // use c * } @@ -249,7 +249,7 @@ protected: * Backward iteration with a more traditional for() loop: * \code * void backward2(CharacterIterator &it) { - * UChar c; + * char16_t c; * for(c=it.last(); c!=CharacterIterator::DONE; c=it.previous()) { * // use c * } @@ -266,7 +266,7 @@ protected: * // get the position * int32_t pos=it.getIndex(); * // get the previous code unit - * UChar u=it.previous(); + * char16_t u=it.previous(); * // move back one more code unit * it.move(-1, CharacterIterator::kCurrent); * // set the position back to where it was @@ -283,7 +283,7 @@ protected: * Function processing characters, in this example simple output *
  * \code
- *  void processChar( UChar c )
+ *  void processChar( char16_t c )
  *  {
  *      cout << " " << c;
  *  }
@@ -294,7 +294,7 @@ protected:
  * \code
  *  void traverseForward(CharacterIterator& iter)
  *  {
- *      for(UChar c = iter.first(); c != CharacterIterator.DONE; c = iter.next()) {
+ *      for(char16_t c = iter.first(); c != CharacterIterator.DONE; c = iter.next()) {
  *          processChar(c);
  *      }
  *  }
@@ -305,7 +305,7 @@ protected:
  * \code
  *  void traverseBackward(CharacterIterator& iter)
  *  {
- *      for(UChar c = iter.last(); c != CharacterIterator.DONE; c = iter.previous()) {
+ *      for(char16_t c = iter.last(); c != CharacterIterator.DONE; c = iter.previous()) {
  *          processChar(c);
  *      }
  *  }
@@ -317,7 +317,7 @@ protected:
  * \code
  * void traverseOut(CharacterIterator& iter, int32_t pos)
  * {
- *      UChar c;
+ *      char16_t c;
  *      for (c = iter.setIndex(pos);
  *      c != CharacterIterator.DONE && (Unicode::isLetter(c) || Unicode::isDigit(c));
  *          c = iter.next()) {}
@@ -386,7 +386,7 @@ public:
      * @return the first code unit in its iteration range.
      * @stable ICU 2.0
      */
-    virtual UChar         first(void) = 0;
+    virtual char16_t         first(void) = 0;
 
     /**
      * Sets the iterator to refer to the first code unit in its
@@ -396,7 +396,7 @@ public:
      * @return the first code unit in its iteration range.
      * @stable ICU 2.0
      */
-    virtual UChar         firstPostInc(void);
+    virtual char16_t         firstPostInc(void);
 
     /**
      * Sets the iterator to refer to the first code point in its
@@ -435,7 +435,7 @@ public:
      * @return the last code unit.
      * @stable ICU 2.0
      */
-    virtual UChar         last(void) = 0;
+    virtual char16_t         last(void) = 0;
         
     /**
      * Sets the iterator to refer to the last code point in its
@@ -463,7 +463,7 @@ public:
      * @return the "position"-th code unit.
      * @stable ICU 2.0
      */
-    virtual UChar         setIndex(int32_t position) = 0;
+    virtual char16_t         setIndex(int32_t position) = 0;
 
     /**
      * Sets the iterator to refer to the beginning of the code point
@@ -483,7 +483,7 @@ public:
      * @return the current code unit. 
      * @stable ICU 2.0
      */
-    virtual UChar         current(void) const = 0;
+    virtual char16_t         current(void) const = 0;
         
     /**
      * Returns the code point the iterator currently refers to.  
@@ -499,7 +499,7 @@ public:
      * @return the next code unit.
      * @stable ICU 2.0
      */
-    virtual UChar         next(void) = 0;
+    virtual char16_t         next(void) = 0;
         
     /**
      * Advances to the next code point in the iteration range
@@ -520,7 +520,7 @@ public:
      * @return the previous code unit.
      * @stable ICU 2.0
      */
-    virtual UChar         previous(void) = 0;
+    virtual char16_t         previous(void) = 0;
 
     /**
      * Advances to the previous code point in the iteration range
diff --git a/icu4c/source/common/unicode/localpointer.h b/icu4c/source/common/unicode/localpointer.h
index aa374f83b86..3ab820188f7 100644
--- a/icu4c/source/common/unicode/localpointer.h
+++ b/icu4c/source/common/unicode/localpointer.h
@@ -174,9 +174,9 @@ private:
  * \code
  * LocalPointer s(new UnicodeString((UChar32)0x50005));
  * int32_t length=s->length();  // 2
- * UChar lead=s->charAt(0);  // 0xd900
+ * char16_t lead=s->charAt(0);  // 0xd900
  * if(some condition) { return; }  // no need to explicitly delete the pointer
- * s.adoptInstead(new UnicodeString((UChar)0xfffc));
+ * s.adoptInstead(new UnicodeString((char16_t)0xfffc));
  * length=s->length();  // 1
  * // no need to explicitly delete the pointer
  * \endcode
@@ -323,10 +323,10 @@ public:
  * Usage example:
  * \code
  * LocalArray a(new UnicodeString[2]);
- * a[0].append((UChar)0x61);
+ * a[0].append((char16_t)0x61);
  * if(some condition) { return; }  // no need to explicitly delete the array
  * a.adoptInstead(new UnicodeString[4]);
- * a[3].append((UChar)0x62).append((UChar)0x63).reverse();
+ * a[3].append((char16_t)0x62).append((char16_t)0x63).reverse();
  * // no need to explicitly delete the array
  * \endcode
  *
diff --git a/icu4c/source/common/unicode/normalizer2.h b/icu4c/source/common/unicode/normalizer2.h
index 8995c8a56d1..d326da948a3 100644
--- a/icu4c/source/common/unicode/normalizer2.h
+++ b/icu4c/source/common/unicode/normalizer2.h
@@ -282,7 +282,7 @@ public:
      *
      * When used on a standard NFC Normalizer2 instance,
      * it returns the Decomposition_Mapping only if the Decomposition_Type (dt) is Canonical (Can);
-     * in this case, the result contains either one or two code points (=1..4 UChars).
+     * in this case, the result contains either one or two code points (=1..4 char16_ts).
      *
      * This function is independent of the mode of the Normalizer2.
      * The default implementation returns FALSE.
diff --git a/icu4c/source/common/unicode/normlzr.h b/icu4c/source/common/unicode/normlzr.h
index 53665e97842..0141058fb00 100644
--- a/icu4c/source/common/unicode/normlzr.h
+++ b/icu4c/source/common/unicode/normlzr.h
@@ -168,7 +168,7 @@ public:
    * @param mode  The normalization mode.
    * @deprecated ICU 56 Use Normalizer2 instead.
    */
-  Normalizer(const UChar* str, int32_t length, UNormalizationMode mode);
+  Normalizer(ConstChar16Ptr str, int32_t length, UNormalizationMode mode);
 
   /**
    * Creates a new Normalizer object for iterating over the
@@ -704,7 +704,7 @@ public:
    * @param status a UErrorCode
    * @deprecated ICU 56 Use Normalizer2 instead.
    */
-  void setText(const UChar* newText,
+  void setText(ConstChar16Ptr newText,
                     int32_t length,
             UErrorCode &status);
   /**
@@ -796,8 +796,8 @@ Normalizer::compare(const UnicodeString &s1, const UnicodeString &s2,
                     uint32_t options,
                     UErrorCode &errorCode) {
   // all argument checking is done in unorm_compare
-  return unorm_compare(s1.getBuffer(), s1.length(),
-                       s2.getBuffer(), s2.length(),
+  return unorm_compare(toUCharPtr(s1.getBuffer()), s1.length(),
+                       toUCharPtr(s2.getBuffer()), s2.length(),
                        options,
                        &errorCode);
 }
diff --git a/icu4c/source/common/unicode/platform.h b/icu4c/source/common/unicode/platform.h
index b553b6878da..e6d449b57ab 100644
--- a/icu4c/source/common/unicode/platform.h
+++ b/icu4c/source/common/unicode/platform.h
@@ -150,7 +150,7 @@
 #   define U_PLATFORM U_PF_ANDROID
     /* Android wchar_t support depends on the API level. */
 #   include 
-#elif defined(__native_client__)
+#elif defined(__pnacl__) || defined(__native_client__)
 #   define U_PLATFORM U_PF_BROWSER_NATIVE_CLIENT
 #elif defined(linux) || defined(__linux__) || defined(__linux)
 #   define U_PLATFORM U_PF_LINUX
diff --git a/icu4c/source/common/unicode/rep.h b/icu4c/source/common/unicode/rep.h
index cf414967afb..41fdc040497 100644
--- a/icu4c/source/common/unicode/rep.h
+++ b/icu4c/source/common/unicode/rep.h
@@ -93,7 +93,7 @@ public:
      * @return 16-bit code unit of text at given offset
      * @stable ICU 1.8
      */
-    inline UChar charAt(int32_t offset) const;
+    inline char16_t charAt(int32_t offset) const;
 
     /**
      * Returns the 32-bit code point at the given 16-bit offset into
@@ -230,7 +230,7 @@ protected:
      * Virtual version of charAt().
      * @stable ICU 2.4
      */
-    virtual UChar getCharAt(int32_t offset) const = 0;
+    virtual char16_t getCharAt(int32_t offset) const = 0;
 
     /**
      * Virtual version of char32At().
@@ -246,7 +246,7 @@ Replaceable::length() const {
     return getLength();
 }
 
-inline UChar
+inline char16_t
 Replaceable::charAt(int32_t offset) const {
     return getCharAt(offset);
 }
diff --git a/icu4c/source/common/unicode/resbund.h b/icu4c/source/common/unicode/resbund.h
index 355df474178..b522a7a6154 100644
--- a/icu4c/source/common/unicode/resbund.h
+++ b/icu4c/source/common/unicode/resbund.h
@@ -216,7 +216,7 @@ public:
      *                could be U_MISSING_RESOURCE_ERROR if the key is not found
      *                could be a warning
      *                e.g.: U_USING_FALLBACK_WARNING,U_USING_DEFAULT_WARNING 
-     * @return a pointer to a zero-terminated UChar array which lives in a memory mapped/DLL file.
+     * @return a pointer to a zero-terminated char16_t array which lives in a memory mapped/DLL file.
      * @stable ICU 2.0
      */
     UnicodeString
diff --git a/icu4c/source/common/unicode/schriter.h b/icu4c/source/common/unicode/schriter.h
index f9d8117b3cc..b1dc939bd66 100644
--- a/icu4c/source/common/unicode/schriter.h
+++ b/icu4c/source/common/unicode/schriter.h
@@ -175,7 +175,7 @@ protected:
    * @param newTextLength The length of the String
    * @stable ICU 2.0
    */
-  void setText(const UChar* newText, int32_t newTextLength);
+  void setText(const char16_t* newText, int32_t newTextLength);
 
   /**
    * Copy of the iterated string object.
diff --git a/icu4c/source/common/unicode/simpleformatter.h b/icu4c/source/common/unicode/simpleformatter.h
index 16c8ad046e8..d307b4728c0 100644
--- a/icu4c/source/common/unicode/simpleformatter.h
+++ b/icu4c/source/common/unicode/simpleformatter.h
@@ -57,7 +57,7 @@ public:
      * Default constructor.
      * @draft ICU 57
      */
-    SimpleFormatter() : compiledPattern((UChar)0) {}
+    SimpleFormatter() : compiledPattern((char16_t)0) {}
 
     /**
      * Constructs a formatter from the pattern string.
@@ -275,15 +275,15 @@ private:
      */
     UnicodeString compiledPattern;
 
-    static inline int32_t getArgumentLimit(const UChar *compiledPattern,
+    static inline int32_t getArgumentLimit(const char16_t *compiledPattern,
                                               int32_t compiledPatternLength) {
         return compiledPatternLength == 0 ? 0 : compiledPattern[0];
     }
 
-    static UnicodeString getTextWithNoArguments(const UChar *compiledPattern, int32_t compiledPatternLength);
+    static UnicodeString getTextWithNoArguments(const char16_t *compiledPattern, int32_t compiledPatternLength);
 
     static UnicodeString &format(
-            const UChar *compiledPattern, int32_t compiledPatternLength,
+            const char16_t *compiledPattern, int32_t compiledPatternLength,
             const UnicodeString *const *values,
             UnicodeString &result, const UnicodeString *resultCopy, UBool forbidResultAsValue,
             int32_t *offsets, int32_t offsetsLength,
diff --git a/icu4c/source/common/unicode/strenum.h b/icu4c/source/common/unicode/strenum.h
index 368188a4aa8..61d514813b9 100644
--- a/icu4c/source/common/unicode/strenum.h
+++ b/icu4c/source/common/unicode/strenum.h
@@ -35,7 +35,7 @@ U_NAMESPACE_BEGIN
  * call, so the returned string still might not be 'valid' on
  * subsequent use.

* - *

Strings may take the form of const char*, const UChar*, or const + *

Strings may take the form of const char*, const char16_t*, or const * UnicodeString*. The type you get is determine by the variant of * 'next' that you call. In general the StringEnumeration is * optimized for one of these types, but all StringEnumerations can @@ -112,7 +112,7 @@ public: *

If the iterator is out of sync with its service, status is set * to U_ENUM_OUT_OF_SYNC_ERROR and NULL is returned.

* - *

If the native service string is a UChar* string, it is + *

If the native service string is a char16_t* string, it is * converted to char* with the invariant converter. If the * conversion fails (because a character cannot be converted) then * status is set to U_INVARIANT_CONVERSION_ERROR and the return @@ -131,7 +131,7 @@ public: virtual const char* next(int32_t *resultLength, UErrorCode& status); /** - *

Returns the next element as a NUL-terminated UChar*. If there + *

Returns the next element as a NUL-terminated char16_t*. If there * are no more elements, returns NULL. If the resultLength pointer * is not NULL, the length of the string (not counting the * terminating NUL) is returned at that address. If an error @@ -153,7 +153,7 @@ public: * * @stable ICU 2.4 */ - virtual const UChar* unext(int32_t *resultLength, UErrorCode& status); + virtual const char16_t* unext(int32_t *resultLength, UErrorCode& status); /** *

Returns the next element a UnicodeString*. If there are no diff --git a/icu4c/source/common/unicode/stringtriebuilder.h b/icu4c/source/common/unicode/stringtriebuilder.h index 91f1bd24dab..bcad2484e7b 100644 --- a/icu4c/source/common/unicode/stringtriebuilder.h +++ b/icu4c/source/common/unicode/stringtriebuilder.h @@ -105,7 +105,7 @@ protected: /** @internal */ virtual int32_t getElementStringLength(int32_t i) const = 0; /** @internal */ - virtual UChar getElementUnit(int32_t i, int32_t unitIndex) const = 0; + virtual char16_t getElementUnit(int32_t i, int32_t unitIndex) const = 0; /** @internal */ virtual int32_t getElementValue(int32_t i) const = 0; @@ -120,7 +120,7 @@ protected: /** @internal */ virtual int32_t skipElementsBySomeUnits(int32_t i, int32_t unitIndex, int32_t count) const = 0; /** @internal */ - virtual int32_t indexOfElementWithNextUnit(int32_t i, int32_t unitIndex, UChar unit) const = 0; + virtual int32_t indexOfElementWithNextUnit(int32_t i, int32_t unitIndex, char16_t unit) const = 0; /** @internal */ virtual UBool matchNodesCanHaveValues() const = 0; @@ -137,7 +137,7 @@ protected: /** @internal */ static const int32_t kMaxBranchLinearSubNodeLength=5; - // Maximum number of nested split-branch levels for a branch on all 2^16 possible UChar units. + // Maximum number of nested split-branch levels for a branch on all 2^16 possible char16_t units. // log2(2^16/kMaxBranchLinearSubNodeLength) rounded up. /** @internal */ static const int32_t kMaxSplitBranchLevels=14; @@ -338,7 +338,7 @@ protected: virtual void write(StringTrieBuilder &builder); // Adds a unit with a final value. void add(int32_t c, int32_t value) { - units[length]=(UChar)c; + units[length]=(char16_t)c; equal[length]=NULL; values[length]=value; ++length; @@ -346,7 +346,7 @@ protected: } // Adds a unit which leads to another match node. void add(int32_t c, Node *node) { - units[length]=(UChar)c; + units[length]=(char16_t)c; equal[length]=node; values[length]=0; ++length; @@ -356,7 +356,7 @@ protected: Node *equal[kMaxBranchLinearSubNodeLength]; // NULL means "has final value". int32_t length; int32_t values[kMaxBranchLinearSubNodeLength]; - UChar units[kMaxBranchLinearSubNodeLength]; + char16_t units[kMaxBranchLinearSubNodeLength]; }; /** @@ -364,7 +364,7 @@ protected: */ class SplitBranchNode : public BranchNode { public: - SplitBranchNode(UChar middleUnit, Node *lessThanNode, Node *greaterOrEqualNode) + SplitBranchNode(char16_t middleUnit, Node *lessThanNode, Node *greaterOrEqualNode) : BranchNode(((0x555555*37+middleUnit)*37+ hashCode(lessThanNode))*37+hashCode(greaterOrEqualNode)), unit(middleUnit), lessThan(lessThanNode), greaterOrEqual(greaterOrEqualNode) {} @@ -372,7 +372,7 @@ protected: virtual int32_t markRightEdgesFirst(int32_t edgeNumber); virtual void write(StringTrieBuilder &builder); protected: - UChar unit; + char16_t unit; Node *lessThan; Node *greaterOrEqual; }; diff --git a/icu4c/source/common/unicode/ucharstrie.h b/icu4c/source/common/unicode/ucharstrie.h index 91c5ba1c2c2..dfc93f6d0ba 100644 --- a/icu4c/source/common/unicode/ucharstrie.h +++ b/icu4c/source/common/unicode/ucharstrie.h @@ -36,7 +36,7 @@ class UVector32; /** * Light-weight, non-const reader class for a UCharsTrie. - * Traverses a UChar-serialized data structure with minimal state, + * Traverses a char16_t-serialized data structure with minimal state, * for mapping strings (16-bit-unit sequences) to non-negative integer values. * * This class owns the serialized trie data only if it was constructed by @@ -52,18 +52,18 @@ public: /** * Constructs a UCharsTrie reader instance. * - * The trieUChars must contain a copy of a UChar sequence from the UCharsTrieBuilder, - * starting with the first UChar of that sequence. - * The UCharsTrie object will not read more UChars than + * The trieUChars must contain a copy of a char16_t sequence from the UCharsTrieBuilder, + * starting with the first char16_t of that sequence. + * The UCharsTrie object will not read more char16_ts than * the UCharsTrieBuilder generated in the corresponding build() call. * * The array is not copied/cloned and must not be modified while * the UCharsTrie object is in use. * - * @param trieUChars The UChar array that contains the serialized trie. + * @param trieUChars The char16_t array that contains the serialized trie. * @stable ICU 4.8 */ - UCharsTrie(const UChar *trieUChars) + UCharsTrie(ConstChar16Ptr trieUChars) : ownedArray_(NULL), uchars_(trieUChars), pos_(uchars_), remainingMatchLength_(-1) {} @@ -75,7 +75,7 @@ public: /** * Copy constructor, copies the other trie reader object and its state, - * but not the UChar array which will be shared. (Shallow copy.) + * but not the char16_t array which will be shared. (Shallow copy.) * @param other Another UCharsTrie object. * @stable ICU 4.8 */ @@ -109,8 +109,8 @@ public: private: friend class UCharsTrie; - const UChar *uchars; - const UChar *pos; + const char16_t *uchars; + const char16_t *pos; int32_t remainingMatchLength; }; @@ -148,14 +148,14 @@ public: /** * Determines whether the string so far matches, whether it has a value, - * and whether another input UChar can continue a matching string. + * and whether another input char16_t can continue a matching string. * @return The match/value Result. * @stable ICU 4.8 */ UStringTrieResult current() const; /** - * Traverses the trie from the initial state for this input UChar. + * Traverses the trie from the initial state for this input char16_t. * Equivalent to reset().next(uchar). * @param uchar Input char value. Values below 0 and above 0xffff will never match. * @return The match/value Result. @@ -177,7 +177,7 @@ public: UStringTrieResult firstForCodePoint(UChar32 cp); /** - * Traverses the trie from the current state for this input UChar. + * Traverses the trie from the current state for this input char16_t. * @param uchar Input char value. Values below 0 and above 0xffff will never match. * @return The match/value Result. * @stable ICU 4.8 @@ -208,7 +208,7 @@ public: * @return The match/value Result. * @stable ICU 4.8 */ - UStringTrieResult next(const UChar *s, int32_t length); + UStringTrieResult next(ConstChar16Ptr s, int32_t length); /** * Returns a matching string's value if called immediately after @@ -220,7 +220,7 @@ public: * @stable ICU 4.8 */ inline int32_t getValue() const { - const UChar *pos=pos_; + const char16_t *pos=pos_; int32_t leadUnit=*pos++; // U_ASSERT(leadUnit>=kMinValueLead); return leadUnit&kValueIsFinal ? @@ -237,16 +237,16 @@ public: * @stable ICU 4.8 */ inline UBool hasUniqueValue(int32_t &uniqueValue) const { - const UChar *pos=pos_; + const char16_t *pos=pos_; // Skip the rest of a pending linear-match node. return pos!=NULL && findUniqueValue(pos+remainingMatchLength_+1, FALSE, uniqueValue); } /** - * Finds each UChar which continues the string from the current state. - * That is, each UChar c for which it would be next(c)!=USTRINGTRIE_NO_MATCH now. - * @param out Each next UChar is appended to this object. - * @return the number of UChars which continue the string from here + * Finds each char16_t which continues the string from the current state. + * That is, each char16_t c for which it would be next(c)!=USTRINGTRIE_NO_MATCH now. + * @param out Each next char16_t is appended to this object. + * @return the number of char16_ts which continue the string from here * @stable ICU 4.8 */ int32_t getNextUChars(Appendable &out) const; @@ -258,8 +258,8 @@ public: class U_COMMON_API Iterator : public UMemory { public: /** - * Iterates from the root of a UChar-serialized UCharsTrie. - * @param trieUChars The trie UChars. + * Iterates from the root of a char16_t-serialized UCharsTrie. + * @param trieUChars The trie char16_ts. * @param maxStringLength If 0, the iterator returns full strings. * Otherwise, the iterator returns strings with this maximum length. * @param errorCode Standard ICU error code. Its input value must @@ -268,7 +268,7 @@ public: * function chaining. (See User Guide for details.) * @stable ICU 4.8 */ - Iterator(const UChar *trieUChars, int32_t maxStringLength, UErrorCode &errorCode); + Iterator(ConstChar16Ptr trieUChars, int32_t maxStringLength, UErrorCode &errorCode); /** * Iterates from the current state of the specified UCharsTrie. @@ -336,11 +336,11 @@ public: return TRUE; } - const UChar *branchNext(const UChar *pos, int32_t length, UErrorCode &errorCode); + const char16_t *branchNext(const char16_t *pos, int32_t length, UErrorCode &errorCode); - const UChar *uchars_; - const UChar *pos_; - const UChar *initialPos_; + const char16_t *uchars_; + const char16_t *pos_; + const char16_t *initialPos_; int32_t remainingMatchLength_; int32_t initialRemainingMatchLength_; UBool skipValue_; // Skip intermediate value which was already delivered. @@ -368,7 +368,7 @@ private: * this constructor adopts the builder's array. * This constructor is only called by the builder. */ - UCharsTrie(UChar *adoptUChars, const UChar *trieUChars) + UCharsTrie(char16_t *adoptUChars, const char16_t *trieUChars) : ownedArray_(adoptUChars), uchars_(trieUChars), pos_(uchars_), remainingMatchLength_(-1) {} @@ -381,7 +381,7 @@ private: // Reads a compact 32-bit integer. // pos is already after the leadUnit, and the lead unit has bit 15 reset. - static inline int32_t readValue(const UChar *pos, int32_t leadUnit) { + static inline int32_t readValue(const char16_t *pos, int32_t leadUnit) { int32_t value; if(leadUnit=kMinTwoUnitValueLead) { if(leadUnit=kMinTwoUnitNodeValueLead) { if(leadUnit=kMinTwoUnitDeltaLead) { if(delta==kThreeUnitDeltaLead) { @@ -444,7 +444,7 @@ private: return pos+delta; } - static const UChar *skipDelta(const UChar *pos) { + static const char16_t *skipDelta(const char16_t *pos) { int32_t delta=*pos++; if(delta>=kMinTwoUnitDeltaLead) { if(delta==kThreeUnitDeltaLead) { @@ -461,28 +461,28 @@ private: } // Handles a branch node for both next(uchar) and next(string). - UStringTrieResult branchNext(const UChar *pos, int32_t length, int32_t uchar); + UStringTrieResult branchNext(const char16_t *pos, int32_t length, int32_t uchar); // Requires remainingLength_<0. - UStringTrieResult nextImpl(const UChar *pos, int32_t uchar); + UStringTrieResult nextImpl(const char16_t *pos, int32_t uchar); // Helper functions for hasUniqueValue(). // Recursively finds a unique value (or whether there is not a unique one) // from a branch. - static const UChar *findUniqueValueFromBranch(const UChar *pos, int32_t length, + static const char16_t *findUniqueValueFromBranch(const char16_t *pos, int32_t length, UBool haveUniqueValue, int32_t &uniqueValue); // Recursively finds a unique value (or whether there is not a unique one) // starting from a position on a node lead unit. - static UBool findUniqueValue(const UChar *pos, UBool haveUniqueValue, int32_t &uniqueValue); + static UBool findUniqueValue(const char16_t *pos, UBool haveUniqueValue, int32_t &uniqueValue); // Helper functions for getNextUChars(). // getNextUChars() when pos is on a branch node. - static void getNextBranchUChars(const UChar *pos, int32_t length, Appendable &out); + static void getNextBranchUChars(const char16_t *pos, int32_t length, Appendable &out); // UCharsTrie data structure // - // The trie consists of a series of UChar-serialized nodes for incremental - // Unicode string/UChar sequence matching. (UChar=16-bit unsigned integer) + // The trie consists of a series of char16_t-serialized nodes for incremental + // Unicode string/char16_t sequence matching. (char16_t=16-bit unsigned integer) // The root node is at the beginning of the trie data. // // Types of nodes are distinguished by their node lead unit ranges. @@ -491,9 +491,9 @@ private: // // Node types: // - Final-value node: Stores a 32-bit integer in a compact, variable-length format. - // The value is for the string/UChar sequence so far. + // The value is for the string/char16_t sequence so far. // - Match node, optionally with an intermediate value in a different compact format. - // The value, if present, is for the string/UChar sequence so far. + // The value, if present, is for the string/char16_t sequence so far. // // Aside from the value, which uses the node lead unit's high bits: // @@ -560,15 +560,15 @@ private: static const int32_t kMaxTwoUnitDelta=((kThreeUnitDeltaLead-kMinTwoUnitDeltaLead)<<16)-1; // 0x03feffff - UChar *ownedArray_; + char16_t *ownedArray_; // Fixed value referencing the UCharsTrie words. - const UChar *uchars_; + const char16_t *uchars_; // Iterator variables. // Pointer to next trie unit to read. NULL if no more matches. - const UChar *pos_; + const char16_t *pos_; // Remaining length of a linear-match node, minus 1. Negative if not in such a node. int32_t remainingMatchLength_; }; diff --git a/icu4c/source/common/unicode/ucharstriebuilder.h b/icu4c/source/common/unicode/ucharstriebuilder.h index a8b75697de7..2aa4757e52c 100644 --- a/icu4c/source/common/unicode/ucharstriebuilder.h +++ b/icu4c/source/common/unicode/ucharstriebuilder.h @@ -89,21 +89,21 @@ public: UCharsTrie *build(UStringTrieBuildOption buildOption, UErrorCode &errorCode); /** - * Builds a UCharsTrie for the add()ed data and UChar-serializes it. + * Builds a UCharsTrie for the add()ed data and char16_t-serializes it. * Once built, no further data can be add()ed until clear() is called. * * A UCharsTrie cannot be empty. At least one (string, value) pair * must have been add()ed. * * Multiple calls to buildUnicodeString() set the UnicodeStrings to the - * builder's same UChar array, without rebuilding. + * builder's same char16_t array, without rebuilding. * If buildUnicodeString() is called after build(), the trie will be * re-serialized into a new array. * If build() is called after buildUnicodeString(), the trie object will become * the owner of the previously returned array. * After clear() has been called, a new array will be used as well. * @param buildOption Build option, see UStringTrieBuildOption. - * @param result A UnicodeString which will be set to the UChar-serialized + * @param result A UnicodeString which will be set to the char16_t-serialized * UCharsTrie for the add()ed data. * @param errorCode Standard ICU error code. Its input value must * pass the U_SUCCESS() test, or else the function returns @@ -135,14 +135,14 @@ private: void buildUChars(UStringTrieBuildOption buildOption, UErrorCode &errorCode); virtual int32_t getElementStringLength(int32_t i) const; - virtual UChar getElementUnit(int32_t i, int32_t unitIndex) const; + virtual char16_t getElementUnit(int32_t i, int32_t unitIndex) const; virtual int32_t getElementValue(int32_t i) const; virtual int32_t getLimitOfLinearMatch(int32_t first, int32_t last, int32_t unitIndex) const; virtual int32_t countElementUnits(int32_t start, int32_t limit, int32_t unitIndex) const; virtual int32_t skipElementsBySomeUnits(int32_t i, int32_t unitIndex, int32_t count) const; - virtual int32_t indexOfElementWithNextUnit(int32_t i, int32_t unitIndex, UChar unit) const; + virtual int32_t indexOfElementWithNextUnit(int32_t i, int32_t unitIndex, char16_t unit) const; virtual UBool matchNodesCanHaveValues() const { return TRUE; } @@ -152,11 +152,11 @@ private: class UCTLinearMatchNode : public LinearMatchNode { public: - UCTLinearMatchNode(const UChar *units, int32_t len, Node *nextNode); + UCTLinearMatchNode(const char16_t *units, int32_t len, Node *nextNode); virtual UBool operator==(const Node &other) const; virtual void write(StringTrieBuilder &builder); private: - const UChar *s; + const char16_t *s; }; virtual Node *createLinearMatchNode(int32_t i, int32_t unitIndex, int32_t length, @@ -164,7 +164,7 @@ private: UBool ensureCapacity(int32_t length); virtual int32_t write(int32_t unit); - int32_t write(const UChar *s, int32_t length); + int32_t write(const char16_t *s, int32_t length); virtual int32_t writeElementUnits(int32_t i, int32_t unitIndex, int32_t length); virtual int32_t writeValueAndFinal(int32_t i, UBool isFinal); virtual int32_t writeValueAndType(UBool hasValue, int32_t value, int32_t node); @@ -175,9 +175,9 @@ private: int32_t elementsCapacity; int32_t elementsLength; - // UChar serialization of the trie. + // char16_t serialization of the trie. // Grows from the back: ucharsLength measures from the end of the buffer! - UChar *uchars; + char16_t *uchars; int32_t ucharsCapacity; int32_t ucharsLength; }; diff --git a/icu4c/source/common/unicode/uchriter.h b/icu4c/source/common/unicode/uchriter.h index 3408a555071..21c93154141 100644 --- a/icu4c/source/common/unicode/uchriter.h +++ b/icu4c/source/common/unicode/uchriter.h @@ -15,18 +15,18 @@ /** * \file - * \brief C++ API: UChar Character Iterator + * \brief C++ API: char16_t Character Iterator */ U_NAMESPACE_BEGIN /** * A concrete subclass of CharacterIterator that iterates over the - * characters (code units or code points) in a UChar array. + * characters (code units or code points) in a char16_t array. * It's possible not only to create an - * iterator that iterates over an entire UChar array, but also to - * create one that iterates over only a subrange of a UChar array - * (iterators over different subranges of the same UChar array don't + * iterator that iterates over an entire char16_t array, but also to + * create one that iterates over only a subrange of a char16_t array + * (iterators over different subranges of the same char16_t array don't * compare equal). * @see CharacterIterator * @see ForwardCharacterIterator @@ -35,34 +35,34 @@ U_NAMESPACE_BEGIN class U_COMMON_API UCharCharacterIterator : public CharacterIterator { public: /** - * Create an iterator over the UChar array referred to by "textPtr". + * Create an iterator over the char16_t array referred to by "textPtr". * The iteration range is 0 to length-1. * text is only aliased, not adopted (the * destructor will not delete it). - * @param textPtr The UChar array to be iterated over - * @param length The length of the UChar array + * @param textPtr The char16_t array to be iterated over + * @param length The length of the char16_t array * @stable ICU 2.0 */ - UCharCharacterIterator(const UChar* textPtr, int32_t length); + UCharCharacterIterator(ConstChar16Ptr textPtr, int32_t length); /** - * Create an iterator over the UChar array referred to by "textPtr". + * Create an iterator over the char16_t array referred to by "textPtr". * The iteration range is 0 to length-1. * text is only aliased, not adopted (the * destructor will not delete it). * The starting * position is specified by "position". If "position" is outside the valid * iteration range, the behavior of this object is undefined. - * @param textPtr The UChar array to be iteratd over - * @param length The length of the UChar array + * @param textPtr The char16_t array to be iteratd over + * @param length The length of the char16_t array * @param position The starting position of the iteration * @stable ICU 2.0 */ - UCharCharacterIterator(const UChar* textPtr, int32_t length, + UCharCharacterIterator(ConstChar16Ptr textPtr, int32_t length, int32_t position); /** - * Create an iterator over the UChar array referred to by "textPtr". + * Create an iterator over the char16_t array referred to by "textPtr". * The iteration range is 0 to end-1. * text is only aliased, not adopted (the * destructor will not delete it). @@ -70,14 +70,14 @@ public: * position is specified by "position". If begin and end do not * form a valid iteration range or "position" is outside the valid * iteration range, the behavior of this object is undefined. - * @param textPtr The UChar array to be iterated over - * @param length The length of the UChar array + * @param textPtr The char16_t array to be iterated over + * @param length The length of the char16_t array * @param textBegin The begin position of the iteration range * @param textEnd The end position of the iteration range * @param position The starting position of the iteration * @stable ICU 2.0 */ - UCharCharacterIterator(const UChar* textPtr, int32_t length, + UCharCharacterIterator(ConstChar16Ptr textPtr, int32_t length, int32_t textBegin, int32_t textEnd, int32_t position); @@ -141,7 +141,7 @@ public: * @return the first code unit in its iteration range. * @stable ICU 2.0 */ - virtual UChar first(void); + virtual char16_t first(void); /** * Sets the iterator to refer to the first code unit in its @@ -151,7 +151,7 @@ public: * @return the first code unit in its iteration range * @stable ICU 2.0 */ - virtual UChar firstPostInc(void); + virtual char16_t firstPostInc(void); /** * Sets the iterator to refer to the first code point in its @@ -181,7 +181,7 @@ public: * @return the last code unit in its iteration range. * @stable ICU 2.0 */ - virtual UChar last(void); + virtual char16_t last(void); /** * Sets the iterator to refer to the last code point in its @@ -200,7 +200,7 @@ public: * @return the code unit * @stable ICU 2.0 */ - virtual UChar setIndex(int32_t position); + virtual char16_t setIndex(int32_t position); /** * Sets the iterator to refer to the beginning of the code point @@ -220,7 +220,7 @@ public: * @return the code unit the iterator currently refers to. * @stable ICU 2.0 */ - virtual UChar current(void) const; + virtual char16_t current(void) const; /** * Returns the code point the iterator currently refers to. @@ -236,7 +236,7 @@ public: * @return the next code unit in the iteration range. * @stable ICU 2.0 */ - virtual UChar next(void); + virtual char16_t next(void); /** * Gets the current code unit for returning and advances to the next code unit @@ -246,7 +246,7 @@ public: * @return the current code unit. * @stable ICU 2.0 */ - virtual UChar nextPostInc(void); + virtual char16_t nextPostInc(void); /** * Advances to the next code point in the iteration range (toward @@ -288,7 +288,7 @@ public: * @return the previous code unit in the iteration range. * @stable ICU 2.0 */ - virtual UChar previous(void); + virtual char16_t previous(void); /** * Advances to the previous code point in the iteration range (toward @@ -340,10 +340,10 @@ public: * Sets the iterator to iterate over a new range of text * @stable ICU 2.0 */ - void setText(const UChar* newText, int32_t newTextLength); + void setText(ConstChar16Ptr newText, int32_t newTextLength); /** - * Copies the UChar array under iteration into the UnicodeString + * Copies the char16_t array under iteration into the UnicodeString * referred to by "result". Even if this iterator iterates across * only a part of this string, the whole string is copied. * @param result Receives a copy of the text under iteration. @@ -375,7 +375,7 @@ protected: * Protected member text * @stable ICU 2.0 */ - const UChar* text; + const char16_t* text; }; diff --git a/icu4c/source/common/unicode/umachine.h b/icu4c/source/common/unicode/umachine.h index fbd2bfe5686..cc28e4b687e 100644 --- a/icu4c/source/common/unicode/umachine.h +++ b/icu4c/source/common/unicode/umachine.h @@ -301,32 +301,75 @@ typedef int8_t UBool; /** * \var UChar * - * For C++, UChar is always defined to be char16_t. + * The base type for UTF-16 code units and pointers. + * Unsigned 16-bit integer. + * Starting with ICU 59, C++ API uses char16_t directly, while C API continues to use UChar. * - * For plain C, define UChar to be UCHAR_TYPE, if that is #defined (for example, to char16_t), - * or wchar_t if that is 16 bits wide; always assumed to be unsigned. - * If neither is available, then define UChar to be uint16_t. + * UChar is configurable by defining the macro UCHAR_TYPE + * on the preprocessor or compiler command line: + * -DUCHAR_TYPE=uint16_t or -DUCHAR_TYPE=wchar_t (if U_SIZEOF_WCHAR_T==2) etc. + * (The UCHAR_TYPE can also be #defined earlier in this file, for outside the ICU library code.) + * This is for transitional use from application code that uses uint16_t or wchar_t for UTF-16. * - * This makes the definition of UChar platform-dependent - * but allows direct string type compatibility with platforms with - * 16-bit wchar_t types. + * The default is UChar=char16_t. + * + * C++11 defines char16_t as bit-compatible with uint16_t, but as a distinct type. + * + * In C, char16_t is a simple typedef of uint_least16_t. + * ICU requires uint_least16_t=uint16_t for data memory mapping. + * On macOS, char16_t is not available because the uchar.h standard header is missing. * * @stable ICU 4.4 */ -#ifdef __cplusplus + +#if 1 + // #if 1 is normal. UChar defaults to char16_t in C++. + // For configuration testing of UChar=uint16_t temporarily change this to #if 0. + // The intltest Makefile #defines UCHAR_TYPE=char16_t, + // so we only #define it to uint16_t if it is undefined so far. +#elif !defined(UCHAR_TYPE) +# define UCHAR_TYPE uint16_t +#endif + +#if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || \ + defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION) + // Inside the ICU library code, never configurable. typedef char16_t UChar; #elif defined(UCHAR_TYPE) typedef UCHAR_TYPE UChar; -#elif U_SIZEOF_WCHAR_T==2 - typedef wchar_t UChar; -#elif U_HAVE_CHAR16_T +#elif defined(__cplusplus) typedef char16_t UChar; -#elif defined(__CHAR16_TYPE__) - typedef __CHAR16_TYPE__ UChar; #else typedef uint16_t UChar; #endif +/** + * \var OldUChar + * Default ICU 58 definition of UChar. + * A base type for UTF-16 code units and pointers. + * Unsigned 16-bit integer. + * + * Define OldUChar to be wchar_t if that is 16 bits wide. + * If wchar_t is not 16 bits wide, then define UChar to be uint16_t. + * + * This makes the definition of OldUChar platform-dependent + * but allows direct string type compatibility with platforms with + * 16-bit wchar_t types. + * + * This is how UChar was defined in ICU 58, for transition convenience. + * Exception: ICU 58 UChar was defined to UCHAR_TYPE if that macro was defined. + * The current UChar responds to UCHAR_TYPE but OldUChar does not. + * + * @draft ICU 59 + */ +#if U_SIZEOF_WCHAR_T==2 + typedef wchar_t OldUChar; +#elif defined(__CHAR16_TYPE__) + typedef __CHAR16_TYPE__ OldUChar; +#else + typedef uint16_t OldUChar; +#endif + /** * Define UChar32 as a type for single Unicode code points. * UChar32 is a signed 32-bit integer (same as int32_t). diff --git a/icu4c/source/common/unicode/unifilt.h b/icu4c/source/common/unicode/unifilt.h index a23d871a151..e10527154b6 100644 --- a/icu4c/source/common/unicode/unifilt.h +++ b/icu4c/source/common/unicode/unifilt.h @@ -30,7 +30,7 @@ U_NAMESPACE_BEGIN * defined range. * @stable ICU 3.0 */ -#define U_ETHER ((UChar)0xFFFF) +#define U_ETHER ((char16_t)0xFFFF) /** * diff --git a/icu4c/source/common/unicode/uniset.h b/icu4c/source/common/unicode/uniset.h index 133c2cf7f8d..4a4ce193b64 100644 --- a/icu4c/source/common/unicode/uniset.h +++ b/icu4c/source/common/unicode/uniset.h @@ -294,7 +294,7 @@ class U_COMMON_API UnicodeSet U_FINAL : public UnicodeFilter { * indicating that toPattern() must generate a pattern * representation from the inversion list. */ - UChar *pat; + char16_t *pat; UVector* strings; // maintained in sorted order UnicodeSetStringSpan *stringSpan; @@ -891,7 +891,7 @@ public: * @stable ICU 3.8 * @see USetSpanCondition */ - int32_t span(const UChar *s, int32_t length, USetSpanCondition spanCondition) const; + int32_t span(const char16_t *s, int32_t length, USetSpanCondition spanCondition) const; /** * Returns the end of the substring of the input string according to the USetSpanCondition. @@ -924,7 +924,7 @@ public: * @stable ICU 3.8 * @see USetSpanCondition */ - int32_t spanBack(const UChar *s, int32_t length, USetSpanCondition spanCondition) const; + int32_t spanBack(const char16_t *s, int32_t length, USetSpanCondition spanCondition) const; /** * Returns the start of the substring of the input string according to the USetSpanCondition. diff --git a/icu4c/source/common/unicode/unistr.h b/icu4c/source/common/unicode/unistr.h index 0d215d67bb0..42ef43b293e 100644 --- a/icu4c/source/common/unicode/unistr.h +++ b/icu4c/source/common/unicode/unistr.h @@ -28,7 +28,9 @@ * \brief C++ API: Unicode String */ +#include #include "unicode/utypes.h" +#include "unicode/char16ptr.h" #include "unicode/rep.h" #include "unicode/std_string.h" #include "unicode/stringpiece.h" @@ -75,8 +77,8 @@ UStringCaseMapper(int32_t caseLocale, uint32_t options, #if !UCONFIG_NO_BREAK_ITERATION icu::BreakIterator *iter, #endif - UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, + char16_t *dest, int32_t destCapacity, + const char16_t *src, int32_t srcLength, icu::Edits *edits, UErrorCode &errorCode); @@ -117,7 +119,7 @@ class UnicodeStringAppendable; // unicode/appendable.h * NUL, must be specified as a constant. * @stable ICU 2.0 */ -#define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)u ## cs, _length) +#define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, u ## cs, _length) /** * Unicode String literals in C++. @@ -137,7 +139,7 @@ class UnicodeStringAppendable; // unicode/appendable.h /** * \def UNISTR_FROM_CHAR_EXPLICIT * This can be defined to be empty or "explicit". - * If explicit, then the UnicodeString(UChar) and UnicodeString(UChar32) + * If explicit, then the UnicodeString(char16_t) and UnicodeString(UChar32) * constructors are marked as explicit, preventing their inadvertent use. * @stable ICU 49 */ @@ -154,7 +156,7 @@ class UnicodeStringAppendable; // unicode/appendable.h /** * \def UNISTR_FROM_STRING_EXPLICIT * This can be defined to be empty or "explicit". - * If explicit, then the UnicodeString(const char *) and UnicodeString(const UChar *) + * If explicit, then the UnicodeString(const char *) and UnicodeString(const char16_t *) * constructors are marked as explicit, preventing their inadvertent use. * * In particular, this helps prevent accidentally depending on ICU conversion code @@ -188,18 +190,18 @@ class UnicodeStringAppendable; // unicode/appendable.h * to 4 * sizeof(pointer) (or 3 * sizeof(pointer) for P128 data models), * to hold the fields for heap-allocated strings. * Such a minimum size also ensures that the object is easily large enough - * to hold at least 2 UChars, for one supplementary code point (U16_MAX_LENGTH). + * to hold at least 2 char16_ts, for one supplementary code point (U16_MAX_LENGTH). * * sizeof(UnicodeString) >= 48 should work for all known platforms. * * For example, on a 64-bit machine where sizeof(vtable pointer) is 8, * sizeof(UnicodeString) = 64 would leave space for * (64 - sizeof(vtable pointer) - 2) / U_SIZEOF_UCHAR = (64 - 8 - 2) / 2 = 27 - * UChars stored inside the object. + * char16_ts stored inside the object. * * The minimum object size on a 64-bit machine would be * 4 * sizeof(pointer) = 4 * 8 = 32 bytes, - * and the internal buffer would hold up to 11 UChars in that case. + * and the internal buffer would hold up to 11 char16_ts in that case. * * @see U16_MAX_LENGTH * @stable ICU 56 @@ -231,7 +233,7 @@ class UnicodeStringAppendable; // unicode/appendable.h *

In ICU, a Unicode string consists of 16-bit Unicode code units. * A Unicode character may be stored with either one code unit * (the most common case) or with a matched pair of special code units - * ("surrogates"). The data type for code units is UChar. + * ("surrogates"). The data type for code units is char16_t. * For single-character handling, a Unicode character code point is a value * in the range 0..0x10ffff. ICU uses the UChar32 type for code points.

* @@ -247,7 +249,7 @@ class UnicodeStringAppendable; // unicode/appendable.h * than other ICU APIs. In particular: * - If indexes are out of bounds for a UnicodeString object * (<0 or >length()) then they are "pinned" to the nearest boundary. - * - If primitive string pointer values (e.g., const UChar * or char *) + * - If primitive string pointer values (e.g., const char16_t * or char *) * for input strings are NULL, then those input string parameters are treated * as if they pointed to an empty string. * However, this is not the case for char * parameters for charset names @@ -434,7 +436,7 @@ public: * in srcChars. * @stable ICU 2.0 */ - inline int8_t compare(const UChar *srcChars, + inline int8_t compare(ConstChar16Ptr srcChars, int32_t srcLength) const; /** @@ -453,7 +455,7 @@ public: */ inline int8_t compare(int32_t start, int32_t length, - const UChar *srcChars) const; + const char16_t *srcChars) const; /** * Compare the characters bitwise in the range @@ -474,7 +476,7 @@ public: */ inline int8_t compare(int32_t start, int32_t length, - const UChar *srcChars, + const char16_t *srcChars, int32_t srcStart, int32_t srcLength) const; @@ -588,7 +590,7 @@ public: * in code point order * @stable ICU 2.0 */ - inline int8_t compareCodePointOrder(const UChar *srcChars, + inline int8_t compareCodePointOrder(ConstChar16Ptr srcChars, int32_t srcLength) const; /** @@ -612,7 +614,7 @@ public: */ inline int8_t compareCodePointOrder(int32_t start, int32_t length, - const UChar *srcChars) const; + const char16_t *srcChars) const; /** * Compare two Unicode strings in code point order. @@ -637,7 +639,7 @@ public: */ inline int8_t compareCodePointOrder(int32_t start, int32_t length, - const UChar *srcChars, + const char16_t *srcChars, int32_t srcStart, int32_t srcLength) const; @@ -761,7 +763,7 @@ public: * @return A negative, zero, or positive integer indicating the comparison result. * @stable ICU 2.0 */ - inline int8_t caseCompare(const UChar *srcChars, + inline int8_t caseCompare(ConstChar16Ptr srcChars, int32_t srcLength, uint32_t options) const; @@ -787,7 +789,7 @@ public: */ inline int8_t caseCompare(int32_t start, int32_t length, - const UChar *srcChars, + const char16_t *srcChars, uint32_t options) const; /** @@ -814,7 +816,7 @@ public: */ inline int8_t caseCompare(int32_t start, int32_t length, - const UChar *srcChars, + const char16_t *srcChars, int32_t srcStart, int32_t srcLength, uint32_t options) const; @@ -879,7 +881,7 @@ public: * FALSE otherwise * @stable ICU 2.0 */ - inline UBool startsWith(const UChar *srcChars, + inline UBool startsWith(ConstChar16Ptr srcChars, int32_t srcLength) const; /** @@ -891,7 +893,7 @@ public: * @return TRUE if this ends with the characters in srcChars, FALSE otherwise * @stable ICU 2.0 */ - inline UBool startsWith(const UChar *srcChars, + inline UBool startsWith(const char16_t *srcChars, int32_t srcStart, int32_t srcLength) const; @@ -926,7 +928,7 @@ public: * FALSE otherwise * @stable ICU 2.0 */ - inline UBool endsWith(const UChar *srcChars, + inline UBool endsWith(ConstChar16Ptr srcChars, int32_t srcLength) const; /** @@ -939,7 +941,7 @@ public: * FALSE otherwise * @stable ICU 2.0 */ - inline UBool endsWith(const UChar *srcChars, + inline UBool endsWith(const char16_t *srcChars, int32_t srcStart, int32_t srcLength) const; @@ -1016,7 +1018,7 @@ public: * or -1 if not found. * @stable ICU 2.0 */ - inline int32_t indexOf(const UChar *srcChars, + inline int32_t indexOf(const char16_t *srcChars, int32_t srcLength, int32_t start) const; @@ -1032,7 +1034,7 @@ public: * or -1 if not found. * @stable ICU 2.0 */ - inline int32_t indexOf(const UChar *srcChars, + inline int32_t indexOf(ConstChar16Ptr srcChars, int32_t srcLength, int32_t start, int32_t length) const; @@ -1053,7 +1055,7 @@ public: * or -1 if not found. * @stable ICU 2.0 */ - int32_t indexOf(const UChar *srcChars, + int32_t indexOf(const char16_t *srcChars, int32_t srcStart, int32_t srcLength, int32_t start, @@ -1066,7 +1068,7 @@ public: * @return The offset into this of c, or -1 if not found. * @stable ICU 2.0 */ - inline int32_t indexOf(UChar c) const; + inline int32_t indexOf(char16_t c) const; /** * Locate in this the first occurrence of the code point c, @@ -1086,7 +1088,7 @@ public: * @return The offset into this of c, or -1 if not found. * @stable ICU 2.0 */ - inline int32_t indexOf(UChar c, + inline int32_t indexOf(char16_t c, int32_t start) const; /** @@ -1111,7 +1113,7 @@ public: * @return The offset into this of c, or -1 if not found. * @stable ICU 2.0 */ - inline int32_t indexOf(UChar c, + inline int32_t indexOf(char16_t c, int32_t start, int32_t length) const; @@ -1199,7 +1201,7 @@ public: * or -1 if not found. * @stable ICU 2.0 */ - inline int32_t lastIndexOf(const UChar *srcChars, + inline int32_t lastIndexOf(const char16_t *srcChars, int32_t srcLength, int32_t start) const; @@ -1215,7 +1217,7 @@ public: * or -1 if not found. * @stable ICU 2.0 */ - inline int32_t lastIndexOf(const UChar *srcChars, + inline int32_t lastIndexOf(ConstChar16Ptr srcChars, int32_t srcLength, int32_t start, int32_t length) const; @@ -1236,7 +1238,7 @@ public: * or -1 if not found. * @stable ICU 2.0 */ - int32_t lastIndexOf(const UChar *srcChars, + int32_t lastIndexOf(const char16_t *srcChars, int32_t srcStart, int32_t srcLength, int32_t start, @@ -1249,7 +1251,7 @@ public: * @return The offset into this of c, or -1 if not found. * @stable ICU 2.0 */ - inline int32_t lastIndexOf(UChar c) const; + inline int32_t lastIndexOf(char16_t c) const; /** * Locate in this the last occurrence of the code point c, @@ -1269,7 +1271,7 @@ public: * @return The offset into this of c, or -1 if not found. * @stable ICU 2.0 */ - inline int32_t lastIndexOf(UChar c, + inline int32_t lastIndexOf(char16_t c, int32_t start) const; /** @@ -1294,7 +1296,7 @@ public: * @return The offset into this of c, or -1 if not found. * @stable ICU 2.0 */ - inline int32_t lastIndexOf(UChar c, + inline int32_t lastIndexOf(char16_t c, int32_t start, int32_t length) const; @@ -1324,7 +1326,7 @@ public: * or 0xffff if the offset is not valid for this string * @stable ICU 2.0 */ - inline UChar charAt(int32_t offset) const; + inline char16_t charAt(int32_t offset) const; /** * Return the code unit at offset offset. @@ -1333,7 +1335,7 @@ public: * @return the code unit at offset offset * @stable ICU 2.0 */ - inline UChar operator[] (int32_t offset) const; + inline char16_t operator[] (int32_t offset) const; /** * Return the code point that contains the code unit @@ -1454,7 +1456,7 @@ public: */ inline void extract(int32_t start, int32_t length, - UChar *dst, + Char16Ptr dst, int32_t dstStart = 0) const; /** @@ -1473,13 +1475,13 @@ public: * then extract() will not copy the contents. * * @param dest Destination string buffer. - * @param destCapacity Number of UChars available at dest. + * @param destCapacity Number of char16_ts available at dest. * @param errorCode ICU error code. * @return length() * @stable ICU 2.0 */ int32_t - extract(UChar *dest, int32_t destCapacity, + extract(Char16Ptr dest, int32_t destCapacity, UErrorCode &errorCode) const; /** @@ -1509,7 +1511,7 @@ public: */ inline void extractBetween(int32_t start, int32_t limit, - UChar *dst, + char16_t *dst, int32_t dstStart = 0) const; /** @@ -1750,7 +1752,7 @@ public: /** * Return the length of the UnicodeString object. - * The length is the number of UChar code units are in the UnicodeString. + * The length is the number of char16_t code units are in the UnicodeString. * If you want the number of code points, please use countChar32(). * @return the length of the UnicodeString object * @see countChar32 @@ -1759,14 +1761,14 @@ public: inline int32_t length(void) const; /** - * Count Unicode code points in the length UChar code units of the string. - * A code point may occupy either one or two UChar code units. + * Count Unicode code points in the length char16_t code units of the string. + * A code point may occupy either one or two char16_t code units. * Counting code points involves reading all code units. * * This functions is basically the inverse of moveIndex32(). * * @param start the index of the first code unit to check - * @param length the number of UChar code units to check + * @param length the number of char16_t code units to check * @return the number of code points in the specified code units * @see length * @stable ICU 2.0 @@ -1775,7 +1777,7 @@ public: countChar32(int32_t start=0, int32_t length=INT32_MAX) const; /** - * Check if the length UChar code units of the string + * Check if the length char16_t code units of the string * contain more Unicode code points than a certain number. * This is more efficient than counting all code points in this part of the string * and comparing that number with a threshold. @@ -1783,10 +1785,10 @@ public: * falls within a certain range, and * never needs to count more than 'number+1' code points. * Logically equivalent to (countChar32(start, length)>number). - * A Unicode code point may occupy either one or two UChar code units. + * A Unicode code point may occupy either one or two char16_t code units. * * @param start the index of the first code unit to check (0 for the entire string) - * @param length the number of UChar code units to check + * @param length the number of char16_t code units to check * (use INT32_MAX for the entire string; remember that start/length * values are pinned) * @param number The number of code points in the (sub)string is compared against @@ -1812,7 +1814,7 @@ public: * This is useful together with the getBuffer functions. * See there for details. * - * @return the number of UChars available in the internal buffer + * @return the number of char16_ts available in the internal buffer * @see getBuffer * @stable ICU 2.0 */ @@ -1946,7 +1948,7 @@ public: * @return a reference to this * @stable ICU 2.0 */ - inline UnicodeString& operator= (UChar ch); + inline UnicodeString& operator= (char16_t ch); /** * Assignment operator. Replace the characters in this UnicodeString @@ -2006,7 +2008,7 @@ public: * @return a reference to this * @stable ICU 2.0 */ - inline UnicodeString& setTo(const UChar *srcChars, + inline UnicodeString& setTo(const char16_t *srcChars, int32_t srcLength); /** @@ -2017,7 +2019,7 @@ public: * @return a reference to this * @stable ICU 2.0 */ - UnicodeString& setTo(UChar srcChar); + UnicodeString& setTo(char16_t srcChar); /** * Set the characters in the UnicodeString object to the code point @@ -2030,7 +2032,7 @@ public: UnicodeString& setTo(UChar32 srcChar); /** - * Aliasing setTo() function, analogous to the readonly-aliasing UChar* constructor. + * Aliasing setTo() function, analogous to the readonly-aliasing char16_t* constructor. * The text will be used for the UnicodeString object, but * it will not be released when the UnicodeString is destroyed. * This has copy-on-write semantics: @@ -2053,11 +2055,11 @@ public: * @stable ICU 2.0 */ UnicodeString &setTo(UBool isTerminated, - const UChar *text, + ConstChar16Ptr text, int32_t textLength); /** - * Aliasing setTo() function, analogous to the writable-aliasing UChar* constructor. + * Aliasing setTo() function, analogous to the writable-aliasing char16_t* constructor. * The text will be used for the UnicodeString object, but * it will not be released when the UnicodeString is destroyed. * This has write-through semantics: @@ -2066,16 +2068,16 @@ public: * a new buffer will be allocated and the contents copied as with regularly * constructed strings. * In an assignment to another UnicodeString, the buffer will be copied. - * The extract(UChar *dst) function detects whether the dst pointer is the same + * The extract(Char16Ptr dst) function detects whether the dst pointer is the same * as the string buffer itself and will in this case not copy the contents. * * @param buffer The characters to alias for the UnicodeString. * @param buffLength The number of Unicode characters in buffer to alias. - * @param buffCapacity The size of buffer in UChars. + * @param buffCapacity The size of buffer in char16_ts. * @return a reference to this * @stable ICU 2.0 */ - UnicodeString &setTo(UChar *buffer, + UnicodeString &setTo(char16_t *buffer, int32_t buffLength, int32_t buffCapacity); @@ -2111,7 +2113,7 @@ public: * s.truncate(0); // set to an empty string (complete truncation), or * s=UnicodeString(); // assign an empty string, or * s.setTo((UChar32)-1); // set to a pseudo code point that is out of range, or - * static const UChar nul=0; + * static const char16_t nul=0; * s.setTo(&nul, 0); // set to an empty C Unicode string * } * \endcode @@ -2129,7 +2131,7 @@ public: * @stable ICU 2.0 */ UnicodeString& setCharAt(int32_t offset, - UChar ch); + char16_t ch); /* Append operations */ @@ -2141,7 +2143,7 @@ public: * @return a reference to this * @stable ICU 2.0 */ - inline UnicodeString& operator+= (UChar ch); + inline UnicodeString& operator+= (char16_t ch); /** * Append operator. Append the code point ch to the UnicodeString @@ -2201,7 +2203,7 @@ public: * @return a reference to this * @stable ICU 2.0 */ - inline UnicodeString& append(const UChar *srcChars, + inline UnicodeString& append(const char16_t *srcChars, int32_t srcStart, int32_t srcLength); @@ -2214,7 +2216,7 @@ public: * @return a reference to this * @stable ICU 2.0 */ - inline UnicodeString& append(const UChar *srcChars, + inline UnicodeString& append(ConstChar16Ptr srcChars, int32_t srcLength); /** @@ -2223,7 +2225,7 @@ public: * @return a reference to this * @stable ICU 2.0 */ - inline UnicodeString& append(UChar srcChar); + inline UnicodeString& append(char16_t srcChar); /** * Append the code point srcChar to the UnicodeString object. @@ -2279,7 +2281,7 @@ public: * @stable ICU 2.0 */ inline UnicodeString& insert(int32_t start, - const UChar *srcChars, + const char16_t *srcChars, int32_t srcStart, int32_t srcLength); @@ -2293,7 +2295,7 @@ public: * @stable ICU 2.0 */ inline UnicodeString& insert(int32_t start, - const UChar *srcChars, + ConstChar16Ptr srcChars, int32_t srcLength); /** @@ -2305,7 +2307,7 @@ public: * @stable ICU 2.0 */ inline UnicodeString& insert(int32_t start, - UChar srcChar); + char16_t srcChar); /** * Insert the code point srcChar into the UnicodeString object at @@ -2379,7 +2381,7 @@ public: */ UnicodeString& replace(int32_t start, int32_t length, - const UChar *srcChars, + const char16_t *srcChars, int32_t srcStart, int32_t srcLength); @@ -2397,7 +2399,7 @@ public: */ inline UnicodeString& replace(int32_t start, int32_t length, - const UChar *srcChars, + ConstChar16Ptr srcChars, int32_t srcLength); /** @@ -2413,7 +2415,7 @@ public: */ inline UnicodeString& replace(int32_t start, int32_t length, - UChar srcChar); + char16_t srcChar); /** * Replace the characters in the range @@ -2611,7 +2613,7 @@ public: * @stable ICU 2.0 */ UBool padLeading(int32_t targetLength, - UChar padChar = 0x0020); + char16_t padChar = 0x0020); /** * Pad the end of this UnicodeString with the character padChar. @@ -2625,7 +2627,7 @@ public: * @stable ICU 2.0 */ UBool padTrailing(int32_t targetLength, - UChar padChar = 0x0020); + char16_t padChar = 0x0020); /** * Truncate this UnicodeString to the targetLength. @@ -2812,7 +2814,7 @@ public: /** * Get a read/write pointer to the internal buffer. - * The buffer is guaranteed to be large enough for at least minCapacity UChars, + * The buffer is guaranteed to be large enough for at least minCapacity char16_ts, * writable, and is still owned by the UnicodeString object. * Calls to getBuffer(minCapacity) must not be nested, and * must be matched with calls to releaseBuffer(newLength). @@ -2843,17 +2845,17 @@ public: * - You must call releaseBuffer(newLength) before and in order to * return to normal UnicodeString operation. * - * @param minCapacity the minimum number of UChars that are to be available + * @param minCapacity the minimum number of char16_ts that are to be available * in the buffer, starting at the returned pointer; * default to the current string capacity if minCapacity==-1 * @return a writable pointer to the internal string buffer, - * or 0 if an error occurs (nested calls, out of memory) + * or nullptr if an error occurs (nested calls, out of memory) * * @see releaseBuffer * @see getTerminatedBuffer() * @stable ICU 2.0 */ - UChar *getBuffer(int32_t minCapacity); + char16_t *getBuffer(int32_t minCapacity); /** * Release a read/write buffer on a UnicodeString object with an @@ -2901,13 +2903,13 @@ public: * be modified. * * @return a read-only pointer to the internal string buffer, - * or 0 if the string is empty or bogus + * or nullptr if the string is empty or bogus * * @see getBuffer(int32_t minCapacity) * @see getTerminatedBuffer() * @stable ICU 2.0 */ - inline const UChar *getBuffer() const; + inline const char16_t *getBuffer() const; /** * Get a read-only pointer to the internal buffer, @@ -2942,7 +2944,7 @@ public: * @see getBuffer() * @stable ICU 2.2 */ - const UChar *getTerminatedBuffer(); + const char16_t *getTerminatedBuffer(); //======================================== // Constructors @@ -2954,8 +2956,8 @@ public: inline UnicodeString(); /** - * Construct a UnicodeString with capacity to hold capacity UChars - * @param capacity the number of UChars this UnicodeString should hold + * Construct a UnicodeString with capacity to hold capacity char16_ts + * @param capacity the number of char16_ts this UnicodeString should hold * before a resize is necessary; if count is greater than 0 and count * code points c take up more space than capacity, then capacity is adjusted * accordingly. @@ -2967,7 +2969,7 @@ public: UnicodeString(int32_t capacity, UChar32 c, int32_t count); /** - * Single UChar (code unit) constructor. + * Single char16_t (code unit) constructor. * * It is recommended to mark this constructor "explicit" by * -DUNISTR_FROM_CHAR_EXPLICIT=explicit @@ -2975,7 +2977,7 @@ public: * @param ch the character to place in the UnicodeString * @stable ICU 2.0 */ - UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar ch); + UNISTR_FROM_CHAR_EXPLICIT UnicodeString(char16_t ch); /** * Single UChar32 (code point) constructor. @@ -2989,7 +2991,7 @@ public: UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar32 ch); /** - * UChar* constructor. + * char16_t* constructor. * * It is recommended to mark this constructor "explicit" by * -DUNISTR_FROM_STRING_EXPLICIT=explicit @@ -2998,20 +3000,93 @@ public: * must be NULL (U+0000) terminated. * @stable ICU 2.0 */ - UNISTR_FROM_STRING_EXPLICIT UnicodeString(const UChar *text); + UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char16_t *text); /** - * UChar* constructor. + * uint16_t * constructor. + * Delegates to UnicodeString(const char16_t *). + * + * It is recommended to mark this constructor "explicit" by + * -DUNISTR_FROM_STRING_EXPLICIT=explicit + * on the compiler command line or similar. + * @param text NUL-terminated UTF-16 string + * @draft ICU 59 + */ + UNISTR_FROM_STRING_EXPLICIT UnicodeString(const uint16_t *text) : + UnicodeString(ConstChar16Ptr(text)) {} + +#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) + /** + * wchar_t * constructor. + * (Only defined if U_SIZEOF_WCHAR_T==2.) + * Delegates to UnicodeString(const char16_t *). + * + * It is recommended to mark this constructor "explicit" by + * -DUNISTR_FROM_STRING_EXPLICIT=explicit + * on the compiler command line or similar. + * @param text NUL-terminated UTF-16 string + * @draft ICU 59 + */ + UNISTR_FROM_STRING_EXPLICIT UnicodeString(const wchar_t *text) : + UnicodeString(ConstChar16Ptr(text)) {} +#endif + + /** + * nullptr_t constructor. + * Effectively the same as the default constructor, makes an empty string object. + * + * It is recommended to mark this constructor "explicit" by + * -DUNISTR_FROM_STRING_EXPLICIT=explicit + * on the compiler command line or similar. + * @param text nullptr + * @draft ICU 59 + */ + UNISTR_FROM_STRING_EXPLICIT inline UnicodeString(const std::nullptr_t text); + + /** + * char16_t* constructor. * @param text The characters to place in the UnicodeString. * @param textLength The number of Unicode characters in text * to copy. * @stable ICU 2.0 */ - UnicodeString(const UChar *text, + UnicodeString(const char16_t *text, int32_t textLength); /** - * Readonly-aliasing UChar* constructor. + * uint16_t * constructor. + * Delegates to UnicodeString(const char16_t *, int32_t). + * @param text UTF-16 string + * @param length string length + * @draft ICU 59 + */ + UnicodeString(const uint16_t *text, int32_t length) : + UnicodeString(ConstChar16Ptr(text), length) {} + +#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) + /** + * wchar_t * constructor. + * (Only defined if U_SIZEOF_WCHAR_T==2.) + * Delegates to UnicodeString(const char16_t *, int32_t). + * @param text NUL-terminated UTF-16 string + * @param length string length + * @draft ICU 59 + */ + UnicodeString(const wchar_t *text, int32_t length) : + UnicodeString(ConstChar16Ptr(text), length) {} +#endif + + /** + * nullptr_t constructor. + * Effectively the same as the default constructor, makes an empty string object. + * @param text nullptr + * @param length ignored + * @draft ICU 59 + */ + inline UnicodeString(const std::nullptr_t text, int32_t length); + + /** + * Readonly-aliasing char16_t* constructor. * The text will be used for the UnicodeString object, but * it will not be released when the UnicodeString is destroyed. * This has copy-on-write semantics: @@ -3033,11 +3108,11 @@ public: * @stable ICU 2.0 */ UnicodeString(UBool isTerminated, - const UChar *text, + ConstChar16Ptr text, int32_t textLength); /** - * Writable-aliasing UChar* constructor. + * Writable-aliasing char16_t* constructor. * The text will be used for the UnicodeString object, but * it will not be released when the UnicodeString is destroyed. * This has write-through semantics: @@ -3046,15 +3121,50 @@ public: * a new buffer will be allocated and the contents copied as with regularly * constructed strings. * In an assignment to another UnicodeString, the buffer will be copied. - * The extract(UChar *dst) function detects whether the dst pointer is the same + * The extract(Char16Ptr dst) function detects whether the dst pointer is the same * as the string buffer itself and will in this case not copy the contents. * * @param buffer The characters to alias for the UnicodeString. * @param buffLength The number of Unicode characters in buffer to alias. - * @param buffCapacity The size of buffer in UChars. + * @param buffCapacity The size of buffer in char16_ts. * @stable ICU 2.0 */ - UnicodeString(UChar *buffer, int32_t buffLength, int32_t buffCapacity); + UnicodeString(char16_t *buffer, int32_t buffLength, int32_t buffCapacity); + + /** + * Writable-aliasing uint16_t * constructor. + * Delegates to UnicodeString(const char16_t *, int32_t, int32_t). + * @param buffer writable buffer of/for UTF-16 text + * @param buffLength length of the current buffer contents + * @param buffCapacity buffer capacity + * @draft ICU 59 + */ + UnicodeString(uint16_t *buffer, int32_t buffLength, int32_t buffCapacity) : + UnicodeString(Char16Ptr(buffer), buffLength, buffCapacity) {} + +#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) + /** + * Writable-aliasing wchar_t * constructor. + * (Only defined if U_SIZEOF_WCHAR_T==2.) + * Delegates to UnicodeString(const char16_t *, int32_t, int32_t). + * @param buffer writable buffer of/for UTF-16 text + * @param buffLength length of the current buffer contents + * @param buffCapacity buffer capacity + * @draft ICU 59 + */ + UnicodeString(wchar_t *buffer, int32_t buffLength, int32_t buffCapacity) : + UnicodeString(Char16Ptr(buffer), buffLength, buffCapacity) {} +#endif + + /** + * Writable-aliasing nullptr_t constructor. + * Effectively the same as the default constructor, makes an empty string object. + * @param buffer nullptr + * @param buffLength ignored + * @param buffCapacity ignored + * @draft ICU 59 + */ + inline UnicodeString(std::nullptr_t buffer, int32_t buffLength, int32_t buffCapacity); #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION @@ -3371,7 +3481,7 @@ protected: * UnicodeString::charAt() to be inline again (see jitterbug 709). * @stable ICU 2.4 */ - virtual UChar getCharAt(int32_t offset) const; + virtual char16_t getCharAt(int32_t offset) const; /** * The change in Replaceable to use virtual getChar32At() allows @@ -3407,7 +3517,7 @@ private: int8_t doCompare(int32_t start, int32_t length, - const UChar *srcChars, + const char16_t *srcChars, int32_t srcStart, int32_t srcLength) const; @@ -3420,7 +3530,7 @@ private: int8_t doCompareCodePointOrder(int32_t start, int32_t length, - const UChar *srcChars, + const char16_t *srcChars, int32_t srcStart, int32_t srcLength) const; @@ -3435,12 +3545,12 @@ private: int8_t doCaseCompare(int32_t start, int32_t length, - const UChar *srcChars, + const char16_t *srcChars, int32_t srcStart, int32_t srcLength, uint32_t options) const; - int32_t doIndexOf(UChar c, + int32_t doIndexOf(char16_t c, int32_t start, int32_t length) const; @@ -3448,7 +3558,7 @@ private: int32_t start, int32_t length) const; - int32_t doLastIndexOf(UChar c, + int32_t doLastIndexOf(char16_t c, int32_t start, int32_t length) const; @@ -3458,14 +3568,14 @@ private: void doExtract(int32_t start, int32_t length, - UChar *dst, + char16_t *dst, int32_t dstStart) const; inline void doExtract(int32_t start, int32_t length, UnicodeString& target) const; - inline UChar doCharAt(int32_t offset) const; + inline char16_t doCharAt(int32_t offset) const; UnicodeString& doReplace(int32_t start, int32_t length, @@ -3475,12 +3585,12 @@ private: UnicodeString& doReplace(int32_t start, int32_t length, - const UChar *srcChars, + const char16_t *srcChars, int32_t srcStart, int32_t srcLength); UnicodeString& doAppend(const UnicodeString& src, int32_t srcStart, int32_t srcLength); - UnicodeString& doAppend(const UChar *srcChars, int32_t srcStart, int32_t srcLength); + UnicodeString& doAppend(const char16_t *srcChars, int32_t srcStart, int32_t srcLength); UnicodeString& doReverse(int32_t start, int32_t length); @@ -3490,8 +3600,8 @@ private: // get pointer to start of array // these do not check for kOpenGetBuffer, unlike the public getBuffer() function - inline UChar* getArrayStart(void); - inline const UChar* getArrayStart(void) const; + inline char16_t* getArrayStart(void); + inline const char16_t* getArrayStart(void) const; inline UBool hasShortLength() const; inline int32_t getShortLength() const; @@ -3508,7 +3618,7 @@ private: inline void setShortLength(int32_t len); inline void setLength(int32_t len); inline void setToEmpty(); - inline void setArray(UChar *array, int32_t len, int32_t capacity); // sets length but not flags + inline void setArray(char16_t *array, int32_t len, int32_t capacity); // sets length but not flags // allocate the array; result may be the stack buffer // sets refCount to 1 if appropriate @@ -3686,15 +3796,15 @@ private: // Each struct of the union must begin with fLengthAndFlags. struct { int16_t fLengthAndFlags; // bit fields: see constants above - UChar fBuffer[US_STACKBUF_SIZE]; // buffer for short strings + char16_t fBuffer[US_STACKBUF_SIZE]; // buffer for short strings } fStackFields; struct { int16_t fLengthAndFlags; // bit fields: see constants above int32_t fLength; // number of characters in fArray if >127; else undefined - int32_t fCapacity; // capacity of fArray (in UChars) + int32_t fCapacity; // capacity of fArray (in char16_ts) // array pointer last to minimize padding for machines with P128 data model // or pointer sizes that are not a power of 2 - UChar *fArray; // the Unicode data + char16_t *fArray; // the Unicode data } fFields; } fUnion; }; @@ -3747,13 +3857,13 @@ UnicodeString::pinIndices(int32_t& start, } } -inline UChar* +inline char16_t* UnicodeString::getArrayStart() { return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ? fUnion.fStackFields.fBuffer : fUnion.fFields.fArray; } -inline const UChar* +inline const char16_t* UnicodeString::getArrayStart() const { return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ? fUnion.fStackFields.fBuffer : fUnion.fFields.fArray; @@ -3768,6 +3878,18 @@ UnicodeString::UnicodeString() { fUnion.fStackFields.fLengthAndFlags=kShortString; } +inline UnicodeString::UnicodeString(const std::nullptr_t /*text*/) { + fUnion.fStackFields.fLengthAndFlags=kShortString; +} + +inline UnicodeString::UnicodeString(const std::nullptr_t /*text*/, int32_t /*length*/) { + fUnion.fStackFields.fLengthAndFlags=kShortString; +} + +inline UnicodeString::UnicodeString(std::nullptr_t /*buffer*/, int32_t /*buffLength*/, int32_t /*buffCapacity*/) { + fUnion.fStackFields.fLengthAndFlags=kShortString; +} + //======================================== // Read-only implementation methods //======================================== @@ -3814,10 +3936,10 @@ UnicodeString::isBufferWritable() const (!(fUnion.fFields.fLengthAndFlags&kRefCounted) || refCount()==1)); } -inline const UChar * +inline const char16_t * UnicodeString::getBuffer() const { if(fUnion.fFields.fLengthAndFlags&(kIsBogus|kOpenGetBuffer)) { - return 0; + return nullptr; } else if(fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) { return fUnion.fStackFields.fBuffer; } else { @@ -3885,7 +4007,7 @@ UnicodeString::compare(int32_t start, { return doCompare(start, _length, srcText, 0, srcText.length()); } inline int8_t -UnicodeString::compare(const UChar *srcChars, +UnicodeString::compare(ConstChar16Ptr srcChars, int32_t srcLength) const { return doCompare(0, length(), srcChars, 0, srcLength); } @@ -3900,13 +4022,13 @@ UnicodeString::compare(int32_t start, inline int8_t UnicodeString::compare(int32_t start, int32_t _length, - const UChar *srcChars) const + const char16_t *srcChars) const { return doCompare(start, _length, srcChars, 0, _length); } inline int8_t UnicodeString::compare(int32_t start, int32_t _length, - const UChar *srcChars, + const char16_t *srcChars, int32_t srcStart, int32_t srcLength) const { return doCompare(start, _length, srcChars, srcStart, srcLength); } @@ -3946,7 +4068,7 @@ UnicodeString::compareCodePointOrder(int32_t start, { return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); } inline int8_t -UnicodeString::compareCodePointOrder(const UChar *srcChars, +UnicodeString::compareCodePointOrder(ConstChar16Ptr srcChars, int32_t srcLength) const { return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); } @@ -3961,13 +4083,13 @@ UnicodeString::compareCodePointOrder(int32_t start, inline int8_t UnicodeString::compareCodePointOrder(int32_t start, int32_t _length, - const UChar *srcChars) const + const char16_t *srcChars) const { return doCompareCodePointOrder(start, _length, srcChars, 0, _length); } inline int8_t UnicodeString::compareCodePointOrder(int32_t start, int32_t _length, - const UChar *srcChars, + const char16_t *srcChars, int32_t srcStart, int32_t srcLength) const { return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); } @@ -4011,7 +4133,7 @@ UnicodeString::caseCompare(int32_t start, } inline int8_t -UnicodeString::caseCompare(const UChar *srcChars, +UnicodeString::caseCompare(ConstChar16Ptr srcChars, int32_t srcLength, uint32_t options) const { return doCaseCompare(0, length(), srcChars, 0, srcLength, options); @@ -4030,7 +4152,7 @@ UnicodeString::caseCompare(int32_t start, inline int8_t UnicodeString::caseCompare(int32_t start, int32_t _length, - const UChar *srcChars, + const char16_t *srcChars, uint32_t options) const { return doCaseCompare(start, _length, srcChars, 0, _length, options); } @@ -4038,7 +4160,7 @@ UnicodeString::caseCompare(int32_t start, inline int8_t UnicodeString::caseCompare(int32_t start, int32_t _length, - const UChar *srcChars, + const char16_t *srcChars, int32_t srcStart, int32_t srcLength, uint32_t options) const { @@ -4089,7 +4211,7 @@ UnicodeString::indexOf(const UnicodeString& text, { return indexOf(text, 0, text.length(), start, _length); } inline int32_t -UnicodeString::indexOf(const UChar *srcChars, +UnicodeString::indexOf(const char16_t *srcChars, int32_t srcLength, int32_t start) const { pinIndex(start); @@ -4097,14 +4219,14 @@ UnicodeString::indexOf(const UChar *srcChars, } inline int32_t -UnicodeString::indexOf(const UChar *srcChars, +UnicodeString::indexOf(ConstChar16Ptr srcChars, int32_t srcLength, int32_t start, int32_t _length) const { return indexOf(srcChars, 0, srcLength, start, _length); } inline int32_t -UnicodeString::indexOf(UChar c, +UnicodeString::indexOf(char16_t c, int32_t start, int32_t _length) const { return doIndexOf(c, start, _length); } @@ -4116,7 +4238,7 @@ UnicodeString::indexOf(UChar32 c, { return doIndexOf(c, start, _length); } inline int32_t -UnicodeString::indexOf(UChar c) const +UnicodeString::indexOf(char16_t c) const { return doIndexOf(c, 0, length()); } inline int32_t @@ -4124,7 +4246,7 @@ UnicodeString::indexOf(UChar32 c) const { return indexOf(c, 0, length()); } inline int32_t -UnicodeString::indexOf(UChar c, +UnicodeString::indexOf(char16_t c, int32_t start) const { pinIndex(start); return doIndexOf(c, start, length() - start); @@ -4138,14 +4260,14 @@ UnicodeString::indexOf(UChar32 c, } inline int32_t -UnicodeString::lastIndexOf(const UChar *srcChars, +UnicodeString::lastIndexOf(ConstChar16Ptr srcChars, int32_t srcLength, int32_t start, int32_t _length) const { return lastIndexOf(srcChars, 0, srcLength, start, _length); } inline int32_t -UnicodeString::lastIndexOf(const UChar *srcChars, +UnicodeString::lastIndexOf(const char16_t *srcChars, int32_t srcLength, int32_t start) const { pinIndex(start); @@ -4186,7 +4308,7 @@ UnicodeString::lastIndexOf(const UnicodeString& text) const { return lastIndexOf(text, 0, text.length(), 0, length()); } inline int32_t -UnicodeString::lastIndexOf(UChar c, +UnicodeString::lastIndexOf(char16_t c, int32_t start, int32_t _length) const { return doLastIndexOf(c, start, _length); } @@ -4199,7 +4321,7 @@ UnicodeString::lastIndexOf(UChar32 c, } inline int32_t -UnicodeString::lastIndexOf(UChar c) const +UnicodeString::lastIndexOf(char16_t c) const { return doLastIndexOf(c, 0, length()); } inline int32_t @@ -4208,7 +4330,7 @@ UnicodeString::lastIndexOf(UChar32 c) const { } inline int32_t -UnicodeString::lastIndexOf(UChar c, +UnicodeString::lastIndexOf(char16_t c, int32_t start) const { pinIndex(start); return doLastIndexOf(c, start, length() - start); @@ -4232,17 +4354,17 @@ UnicodeString::startsWith(const UnicodeString& srcText, { return doCompare(0, srcLength, srcText, srcStart, srcLength) == 0; } inline UBool -UnicodeString::startsWith(const UChar *srcChars, int32_t srcLength) const { +UnicodeString::startsWith(ConstChar16Ptr srcChars, int32_t srcLength) const { if(srcLength < 0) { - srcLength = u_strlen(srcChars); + srcLength = u_strlen(toUCharPtr(srcChars)); } return doCompare(0, srcLength, srcChars, 0, srcLength) == 0; } inline UBool -UnicodeString::startsWith(const UChar *srcChars, int32_t srcStart, int32_t srcLength) const { +UnicodeString::startsWith(const char16_t *srcChars, int32_t srcStart, int32_t srcLength) const { if(srcLength < 0) { - srcLength = u_strlen(srcChars); + srcLength = u_strlen(toUCharPtr(srcChars)); } return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0; } @@ -4262,21 +4384,21 @@ UnicodeString::endsWith(const UnicodeString& srcText, } inline UBool -UnicodeString::endsWith(const UChar *srcChars, +UnicodeString::endsWith(ConstChar16Ptr srcChars, int32_t srcLength) const { if(srcLength < 0) { - srcLength = u_strlen(srcChars); + srcLength = u_strlen(toUCharPtr(srcChars)); } return doCompare(length() - srcLength, srcLength, srcChars, 0, srcLength) == 0; } inline UBool -UnicodeString::endsWith(const UChar *srcChars, +UnicodeString::endsWith(const char16_t *srcChars, int32_t srcStart, int32_t srcLength) const { if(srcLength < 0) { - srcLength = u_strlen(srcChars + srcStart); + srcLength = u_strlen(toUCharPtr(srcChars + srcStart)); } return doCompare(length() - srcLength, srcLength, srcChars, srcStart, srcLength) == 0; @@ -4302,14 +4424,14 @@ UnicodeString::replace(int32_t start, inline UnicodeString& UnicodeString::replace(int32_t start, int32_t _length, - const UChar *srcChars, + ConstChar16Ptr srcChars, int32_t srcLength) { return doReplace(start, _length, srcChars, 0, srcLength); } inline UnicodeString& UnicodeString::replace(int32_t start, int32_t _length, - const UChar *srcChars, + const char16_t *srcChars, int32_t srcStart, int32_t srcLength) { return doReplace(start, _length, srcChars, srcStart, srcLength); } @@ -4317,7 +4439,7 @@ UnicodeString::replace(int32_t start, inline UnicodeString& UnicodeString::replace(int32_t start, int32_t _length, - UChar srcChar) + char16_t srcChar) { return doReplace(start, _length, &srcChar, 0, 1); } inline UnicodeString& @@ -4360,7 +4482,7 @@ UnicodeString::doExtract(int32_t start, inline void UnicodeString::extract(int32_t start, int32_t _length, - UChar *target, + Char16Ptr target, int32_t targetStart) const { doExtract(start, _length, target, targetStart); } @@ -4388,7 +4510,7 @@ UnicodeString::extract(int32_t start, inline void UnicodeString::extractBetween(int32_t start, int32_t limit, - UChar *dst, + char16_t *dst, int32_t dstStart) const { pinIndex(start); pinIndex(limit); @@ -4400,7 +4522,7 @@ UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const { return tempSubString(start, limit - start); } -inline UChar +inline char16_t UnicodeString::doCharAt(int32_t offset) const { if((uint32_t)offset < (uint32_t)length()) { @@ -4410,11 +4532,11 @@ UnicodeString::doCharAt(int32_t offset) const } } -inline UChar +inline char16_t UnicodeString::charAt(int32_t offset) const { return doCharAt(offset); } -inline UChar +inline char16_t UnicodeString::operator[] (int32_t offset) const { return doCharAt(offset); } @@ -4455,14 +4577,14 @@ UnicodeString::setToEmpty() { } inline void -UnicodeString::setArray(UChar *array, int32_t len, int32_t capacity) { +UnicodeString::setArray(char16_t *array, int32_t len, int32_t capacity) { setLength(len); fUnion.fFields.fArray = array; fUnion.fFields.fCapacity = capacity; } inline UnicodeString& -UnicodeString::operator= (UChar ch) +UnicodeString::operator= (char16_t ch) { return doReplace(0, length(), &ch, 0, 1); } inline UnicodeString& @@ -4494,7 +4616,7 @@ UnicodeString::setTo(const UnicodeString& srcText) } inline UnicodeString& -UnicodeString::setTo(const UChar *srcChars, +UnicodeString::setTo(const char16_t *srcChars, int32_t srcLength) { unBogus(); @@ -4502,7 +4624,7 @@ UnicodeString::setTo(const UChar *srcChars, } inline UnicodeString& -UnicodeString::setTo(UChar srcChar) +UnicodeString::setTo(char16_t srcChar) { unBogus(); return doReplace(0, length(), &srcChar, 0, 1); @@ -4526,22 +4648,22 @@ UnicodeString::append(const UnicodeString& srcText) { return doAppend(srcText, 0, srcText.length()); } inline UnicodeString& -UnicodeString::append(const UChar *srcChars, +UnicodeString::append(const char16_t *srcChars, int32_t srcStart, int32_t srcLength) { return doAppend(srcChars, srcStart, srcLength); } inline UnicodeString& -UnicodeString::append(const UChar *srcChars, +UnicodeString::append(ConstChar16Ptr srcChars, int32_t srcLength) { return doAppend(srcChars, 0, srcLength); } inline UnicodeString& -UnicodeString::append(UChar srcChar) +UnicodeString::append(char16_t srcChar) { return doAppend(&srcChar, 0, 1); } inline UnicodeString& -UnicodeString::operator+= (UChar ch) +UnicodeString::operator+= (char16_t ch) { return doAppend(&ch, 0, 1); } inline UnicodeString& @@ -4567,20 +4689,20 @@ UnicodeString::insert(int32_t start, inline UnicodeString& UnicodeString::insert(int32_t start, - const UChar *srcChars, + const char16_t *srcChars, int32_t srcStart, int32_t srcLength) { return doReplace(start, 0, srcChars, srcStart, srcLength); } inline UnicodeString& UnicodeString::insert(int32_t start, - const UChar *srcChars, + ConstChar16Ptr srcChars, int32_t srcLength) { return doReplace(start, 0, srcChars, 0, srcLength); } inline UnicodeString& UnicodeString::insert(int32_t start, - UChar srcChar) + char16_t srcChar) { return doReplace(start, 0, &srcChar, 0, 1); } inline UnicodeString& diff --git a/icu4c/source/common/unicode/utypes.h b/icu4c/source/common/unicode/utypes.h index 6b61c4c1646..d60450b5a56 100644 --- a/icu4c/source/common/unicode/utypes.h +++ b/icu4c/source/common/unicode/utypes.h @@ -178,12 +178,12 @@ /** * \def NULL - * Define NULL if necessary, to 0 for C++ and to ((void *)0) for C. + * Define NULL if necessary, to nullptr for C++ and to ((void *)0) for C. * @stable ICU 2.0 */ #ifndef NULL #ifdef __cplusplus -#define NULL 0 +#define NULL nullptr #else #define NULL ((void *)0) #endif diff --git a/icu4c/source/common/unistr.cpp b/icu4c/source/common/unistr.cpp index bb4de3afa7e..1bfb71aa107 100644 --- a/icu4c/source/common/unistr.cpp +++ b/icu4c/source/common/unistr.cpp @@ -218,9 +218,10 @@ UnicodeString::UnicodeString(const UChar *text, } UnicodeString::UnicodeString(UBool isTerminated, - const UChar *text, + ConstChar16Ptr textPtr, int32_t textLength) { fUnion.fFields.fLengthAndFlags = kReadonlyAlias; + const UChar *text = textPtr; if(text == NULL) { // treat as an empty string, do not alias setToEmpty(); @@ -234,7 +235,8 @@ UnicodeString::UnicodeString(UBool isTerminated, // text is terminated, or else it would have failed the above test textLength = u_strlen(text); } - setArray((UChar *)text, textLength, isTerminated ? textLength + 1 : textLength); + setArray(const_cast(text), textLength, + isTerminated ? textLength + 1 : textLength); } } @@ -873,7 +875,7 @@ UnicodeString::doExtract(int32_t start, } int32_t -UnicodeString::extract(UChar *dest, int32_t destCapacity, +UnicodeString::extract(Char16Ptr dest, int32_t destCapacity, UErrorCode &errorCode) const { int32_t len = length(); if(U_SUCCESS(errorCode)) { @@ -1215,10 +1217,10 @@ UnicodeString::unBogus() { } } -const UChar * +const char16_t * UnicodeString::getTerminatedBuffer() { if(!isWritable()) { - return 0; + return nullptr; } UChar *array = getArrayStart(); int32_t len = length(); @@ -1249,14 +1251,14 @@ UnicodeString::getTerminatedBuffer() { array[len] = 0; return array; } else { - return NULL; + return nullptr; } } // setTo() analogous to the readonly-aliasing constructor with the same signature UnicodeString & UnicodeString::setTo(UBool isTerminated, - const UChar *text, + ConstChar16Ptr textPtr, int32_t textLength) { if(fUnion.fFields.fLengthAndFlags & kOpenGetBuffer) { @@ -1264,6 +1266,7 @@ UnicodeString::setTo(UBool isTerminated, return *this; } + const UChar *text = textPtr; if(text == NULL) { // treat as an empty string, do not alias releaseArray(); @@ -1713,14 +1716,14 @@ UnicodeString::doHashCode() const // External Buffer //======================================== -UChar * +char16_t * UnicodeString::getBuffer(int32_t minCapacity) { if(minCapacity>=-1 && cloneArrayIfNeeded(minCapacity)) { fUnion.fFields.fLengthAndFlags|=kOpenGetBuffer; setZeroLength(); return getArrayStart(); } else { - return 0; + return nullptr; } } diff --git a/icu4c/source/common/utext.cpp b/icu4c/source/common/utext.cpp index d68939429ce..52ae7ff9787 100644 --- a/icu4c/source/common/utext.cpp +++ b/icu4c/source/common/utext.cpp @@ -2242,13 +2242,13 @@ unistrTextCopy(UText *ut, } if(move) { - // move: copy to destIndex, then replace original with nothing + // move: copy to destIndex, then remove original int32_t segLength=limit32-start32; us->copy(start32, limit32, destIndex32); if(destIndex32replace(start32, segLength, NULL, 0); + us->remove(start32, segLength); } else { // copy us->copy(start32, limit32, destIndex32); diff --git a/icu4c/source/common/wintz.cpp b/icu4c/source/common/wintz.cpp index b92f3260f42..397e430f730 100644 --- a/icu4c/source/common/wintz.cpp +++ b/icu4c/source/common/wintz.cpp @@ -13,6 +13,7 @@ #include "unicode/utypes.h" +// This file contains only desktop Windows behavior #if U_PLATFORM_HAS_WIN32_API #include "wintz.h" @@ -46,102 +47,25 @@ typedef struct * Various registry keys and key fragments. */ static const char CURRENT_ZONE_REGKEY[] = "SYSTEM\\CurrentControlSet\\Control\\TimeZoneInformation\\"; -/* static const char STANDARD_NAME_REGKEY[] = "StandardName"; Currently unused constant */ static const char STANDARD_TIME_REGKEY[] = " Standard Time"; static const char TZI_REGKEY[] = "TZI"; static const char STD_REGKEY[] = "Std"; /** - * HKLM subkeys used to probe for the flavor of Windows. Note that we - * specifically check for the "GMT" zone subkey; this is present on - * NT, but on XP has become "GMT Standard Time". We need to - * discriminate between these cases. + * The time zone root keys (under HKLM) for Win7+ */ -static const char* const WIN_TYPE_PROBE_REGKEY[] = { - /* WIN_9X_ME_TYPE */ - "SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\Time Zones", - - /* WIN_NT_TYPE */ - "SOFTWARE\\Microsoft\\Windows NT\\CurrentVersion\\Time Zones\\GMT" - - /* otherwise: WIN_2K_XP_TYPE */ -}; - -/** - * The time zone root subkeys (under HKLM) for different flavors of - * Windows. - */ -static const char* const TZ_REGKEY[] = { - /* WIN_9X_ME_TYPE */ - "SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\Time Zones\\", - - /* WIN_NT_TYPE | WIN_2K_XP_TYPE */ - "SOFTWARE\\Microsoft\\Windows NT\\CurrentVersion\\Time Zones\\" -}; - -/** - * Flavor of Windows, from our perspective. Not a real OS version, - * but rather the flavor of the layout of the time zone information in - * the registry. - */ -enum { - WIN_9X_ME_TYPE = 1, - WIN_NT_TYPE = 2, - WIN_2K_XP_TYPE = 3 -}; - -static int32_t gWinType = 0; - -static int32_t detectWindowsType() -{ - int32_t winType; - LONG result; - HKEY hkey; - - /* Detect the version of windows by trying to open a sequence of - probe keys. We don't use the OS version API because what we - really want to know is how the registry is laid out. - Specifically, is it 9x/Me or not, and is it "GMT" or "GMT - Standard Time". */ - for (winType = 0; winType < 2; winType++) { - result = RegOpenKeyExA(HKEY_LOCAL_MACHINE, - WIN_TYPE_PROBE_REGKEY[winType], - 0, - KEY_QUERY_VALUE, - &hkey); - RegCloseKey(hkey); - - if (result == ERROR_SUCCESS) { - break; - } - } - - return winType+1; /* +1 to bring it inline with the enum */ -} +static const char TZ_REGKEY[] = "SOFTWARE\\Microsoft\\Windows NT\\CurrentVersion\\Time Zones\\"; static LONG openTZRegKey(HKEY *hkey, const char *winid) { - char subKeyName[110]; /* TODO: why 96?? */ + char subKeyName[110]; /* TODO: why 110?? */ char *name; LONG result; - /* This isn't thread safe, but it's good enough because the result should be constant per system. */ - if (gWinType <= 0) { - gWinType = detectWindowsType(); - } - - uprv_strcpy(subKeyName, TZ_REGKEY[(gWinType != WIN_9X_ME_TYPE)]); + uprv_strcpy(subKeyName, TZ_REGKEY); name = &subKeyName[strlen(subKeyName)]; uprv_strcat(subKeyName, winid); - if (gWinType == WIN_9X_ME_TYPE) { - /* Remove " Standard Time" */ - char *pStd = uprv_strstr(subKeyName, STANDARD_TIME_REGKEY); - if (pStd) { - *pStd = 0; - } - } - result = RegOpenKeyExA(HKEY_LOCAL_MACHINE, subKeyName, 0, @@ -158,7 +82,8 @@ static LONG getTZI(const char *winid, TZI *tzi) result = openTZRegKey(&hkey, winid); - if (result == ERROR_SUCCESS) { + if (result == ERROR_SUCCESS) + { result = RegQueryValueExA(hkey, TZI_REGKEY, NULL, @@ -171,14 +96,16 @@ static LONG getTZI(const char *winid, TZI *tzi) return result; } -static LONG getSTDName(const char *winid, char *regStdName, int32_t length) { +static LONG getSTDName(const char *winid, char *regStdName, int32_t length) +{ DWORD cbData = length; LONG result; HKEY hkey; result = openTZRegKey(&hkey, winid); - if (result == ERROR_SUCCESS) { + if (result == ERROR_SUCCESS) + { result = RegQueryValueExA(hkey, STD_REGKEY, NULL, @@ -191,7 +118,8 @@ static LONG getSTDName(const char *winid, char *regStdName, int32_t length) { return result; } -static LONG getTZKeyName(char* tzKeyName, int32_t length) { +static LONG getTZKeyName(char* tzKeyName, int32_t length) +{ HKEY hkey; LONG result = FALSE; DWORD cbData = length; @@ -218,21 +146,19 @@ static LONG getTZKeyName(char* tzKeyName, int32_t length) { } /* - This code attempts to detect the Windows time zone, as set in the - Windows Date and Time control panel. It attempts to work on - multiple flavors of Windows (9x, Me, NT, 2000, XP) and on localized + This code attempts to detect the Windows time zone directly, + as set in the Windows Date and Time control panel. It attempts + to work on versions greater than Windows Vista and on localized installs. It works by directly interrogating the registry and comparing the data there with the data returned by the GetTimeZoneInformation API, along with some other strategies. The - registry contains time zone data under one of two keys (depending on - the flavor of Windows): + registry contains time zone data under this key: - HKLM\SOFTWARE\Microsoft\Windows\CurrentVersion\Time Zones\ HKLM\SOFTWARE\Microsoft\Windows NT\CurrentVersion\Time Zones\ - Under this key are several subkeys, one for each time zone. These - subkeys are named "Pacific" on Win9x/Me and "Pacific Standard Time" - on WinNT/2k/XP. There are some other wrinkles; see the code for + Under this key are several subkeys, one for each time zone. For + example these subkeys are named "Pacific Standard Time" on Vista+. + There are some other wrinkles; see the code for details. The subkey name is NOT LOCALIZED, allowing us to support localized installs. @@ -270,7 +196,8 @@ static LONG getTZKeyName(char* tzKeyName, int32_t length) { * time zone, translated to an ICU time zone, or NULL upon failure. */ U_CFUNC const char* U_EXPORT2 -uprv_detectWindowsTimeZone() { +uprv_detectWindowsTimeZone() +{ UErrorCode status = U_ZERO_ERROR; UResourceBundle* bundle = NULL; char* icuid = NULL; @@ -288,7 +215,6 @@ uprv_detectWindowsTimeZone() { TZI tziReg; TIME_ZONE_INFORMATION apiTZI; - BOOL isVistaOrHigher; BOOL tryPreVistaFallback; OSVERSIONINFO osVerInfo; @@ -325,75 +251,86 @@ uprv_detectWindowsTimeZone() { */ uprv_memset(&osVerInfo, 0, sizeof(osVerInfo)); osVerInfo.dwOSVersionInfoSize = sizeof(osVerInfo); - GetVersionEx(&osVerInfo); - isVistaOrHigher = osVerInfo.dwMajorVersion >= 6; /* actually includes Windows Server 2008 as well, but don't worry about it */ tryPreVistaFallback = TRUE; - if(isVistaOrHigher) { - result = getTZKeyName(regStdName, sizeof(regStdName)); - if(ERROR_SUCCESS == result) { - UResourceBundle* winTZ = ures_getByKey(bundle, regStdName, NULL, &status); - if(U_SUCCESS(status)) { - const UChar* icuTZ = NULL; - if (errorCode != 0) { - icuTZ = ures_getStringByKey(winTZ, ISOcodeA, &len, &status); - } - if (errorCode==0 || icuTZ==NULL) { - /* fallback to default "001" and reset status */ - status = U_ZERO_ERROR; - icuTZ = ures_getStringByKey(winTZ, "001", &len, &status); - } - - if(U_SUCCESS(status)) { - int index=0; - while (! (*icuTZ == '\0' || *icuTZ ==' ')) { - tmpid[index++]=(char)(*icuTZ++); /* safe to assume 'char' is ASCII compatible on windows */ - } - tmpid[index]='\0'; - tryPreVistaFallback = FALSE; - } + result = getTZKeyName(regStdName, sizeof(regStdName)); + if(ERROR_SUCCESS == result) + { + UResourceBundle* winTZ = ures_getByKey(bundle, regStdName, NULL, &status); + if(U_SUCCESS(status)) + { + const UChar* icuTZ = NULL; + if (errorCode != 0) + { + icuTZ = ures_getStringByKey(winTZ, ISOcodeA, &len, &status); + } + if (errorCode==0 || icuTZ==NULL) + { + /* fallback to default "001" and reset status */ + status = U_ZERO_ERROR; + icuTZ = ures_getStringByKey(winTZ, "001", &len, &status); + } + + if(U_SUCCESS(status)) + { + int index=0; + while (! (*icuTZ == '\0' || *icuTZ ==' ')) + { + tmpid[index++]=(char)(*icuTZ++); /* safe to assume 'char' is ASCII compatible on windows */ + } + tmpid[index]='\0'; + tryPreVistaFallback = FALSE; } - ures_close(winTZ); } + ures_close(winTZ); } - if(tryPreVistaFallback) { - + if(tryPreVistaFallback) + { /* Note: We get the winid not from static tables but from resource bundle. */ - while (U_SUCCESS(status) && ures_hasNext(bundle)) { + while (U_SUCCESS(status) && ures_hasNext(bundle)) + { UBool idFound = FALSE; const char* winid; UResourceBundle* winTZ = ures_getNextResource(bundle, NULL, &status); - if (U_FAILURE(status)) { + if (U_FAILURE(status)) + { break; } winid = ures_getKey(winTZ); result = getTZI(winid, &tziReg); - if (result == ERROR_SUCCESS) { + if (result == ERROR_SUCCESS) + { /* Windows alters the DaylightBias in some situations. Using the bias and the rules suffices, so overwrite these unreliable fields. */ tziKey.standardBias = tziReg.standardBias; tziKey.daylightBias = tziReg.daylightBias; - if (uprv_memcmp((char *)&tziKey, (char*)&tziReg, sizeof(tziKey)) == 0) { + if (uprv_memcmp((char *)&tziKey, (char*)&tziReg, sizeof(tziKey)) == 0) + { const UChar* icuTZ = NULL; - if (errorCode != 0) { + if (errorCode != 0) + { icuTZ = ures_getStringByKey(winTZ, ISOcodeA, &len, &status); } - if (errorCode==0 || icuTZ==NULL) { + if (errorCode==0 || icuTZ==NULL) + { /* fallback to default "001" and reset status */ status = U_ZERO_ERROR; icuTZ = ures_getStringByKey(winTZ, "001", &len, &status); } - if (U_SUCCESS(status)) { + if (U_SUCCESS(status)) + { /* Get the standard name from the registry key to compare with the one from Windows API call. */ uprv_memset(regStdName, 0, sizeof(regStdName)); result = getSTDName(winid, regStdName, sizeof(regStdName)); - if (result == ERROR_SUCCESS) { - if (uprv_strcmp(apiStdName, regStdName) == 0) { + if (result == ERROR_SUCCESS) + { + if (uprv_strcmp(apiStdName, regStdName) == 0) + { idFound = TRUE; } } @@ -402,10 +339,12 @@ uprv_detectWindowsTimeZone() { * If none is found, tmpid buffer will contain a fallback ID (i.e. the time zone ID matching * the current time zone information) */ - if (idFound || tmpid[0] == 0) { + if (idFound || tmpid[0] == 0) + { /* if icuTZ has more than one city, take only the first (i.e. terminate icuTZ at first space) */ int index=0; - while (! (*icuTZ == '\0' || *icuTZ ==' ')) { + while (! (*icuTZ == '\0' || *icuTZ ==' ')) + { tmpid[index++]=(char)(*icuTZ++); /* safe to assume 'char' is ASCII compatible on windows */ } tmpid[index]='\0'; @@ -414,7 +353,8 @@ uprv_detectWindowsTimeZone() { } } ures_close(winTZ); - if (idFound) { + if (idFound) + { break; } } @@ -423,10 +363,12 @@ uprv_detectWindowsTimeZone() { /* * Copy the timezone ID to icuid to be returned. */ - if (tmpid[0] != 0) { + if (tmpid[0] != 0) + { len = uprv_strlen(tmpid); icuid = (char*)uprv_calloc(len + 1, sizeof(char)); - if (icuid != NULL) { + if (icuid != NULL) + { uprv_strcpy(icuid, tmpid); } } diff --git a/icu4c/source/configure.ac b/icu4c/source/configure.ac index c858e1fd428..b6065af7997 100644 --- a/icu4c/source/configure.ac +++ b/icu4c/source/configure.ac @@ -1349,7 +1349,6 @@ AC_CONFIG_FILES([icudefs.mk \ tools/pkgdata/Makefile \ tools/tzcode/Makefile \ tools/gencfu/Makefile \ - tools/escapesrc/Makefile \ test/Makefile \ test/compat/Makefile \ test/testdata/Makefile \ diff --git a/icu4c/source/i18n/coleitr.cpp b/icu4c/source/i18n/coleitr.cpp index 5668097a8ce..64d3ab4d2bf 100644 --- a/icu4c/source/i18n/coleitr.cpp +++ b/icu4c/source/i18n/coleitr.cpp @@ -29,6 +29,7 @@ #if !UCONFIG_NO_COLLATION +#include "unicode/chariter.h" #include "unicode/coleitr.h" #include "unicode/tblcoll.h" #include "unicode/ustring.h" diff --git a/icu4c/source/i18n/curramt.cpp b/icu4c/source/i18n/curramt.cpp index 51f23b437ab..d2242707a1e 100644 --- a/icu4c/source/i18n/curramt.cpp +++ b/icu4c/source/i18n/curramt.cpp @@ -19,12 +19,12 @@ U_NAMESPACE_BEGIN -CurrencyAmount::CurrencyAmount(const Formattable& amount, const UChar* isoCode, +CurrencyAmount::CurrencyAmount(const Formattable& amount, ConstChar16Ptr isoCode, UErrorCode& ec) : Measure(amount, new CurrencyUnit(isoCode, ec), ec) { } -CurrencyAmount::CurrencyAmount(double amount, const UChar* isoCode, +CurrencyAmount::CurrencyAmount(double amount, ConstChar16Ptr isoCode, UErrorCode& ec) : Measure(Formattable(amount), new CurrencyUnit(isoCode, ec), ec) { } diff --git a/icu4c/source/i18n/currunit.cpp b/icu4c/source/i18n/currunit.cpp index 3c74c1c1d99..197885452f5 100644 --- a/icu4c/source/i18n/currunit.cpp +++ b/icu4c/source/i18n/currunit.cpp @@ -19,10 +19,10 @@ U_NAMESPACE_BEGIN -CurrencyUnit::CurrencyUnit(const UChar* _isoCode, UErrorCode& ec) { +CurrencyUnit::CurrencyUnit(ConstChar16Ptr _isoCode, UErrorCode& ec) { *isoCode = 0; if (U_SUCCESS(ec)) { - if (_isoCode && u_strlen(_isoCode)==3) { + if (_isoCode != nullptr && u_strlen(_isoCode)==3) { u_strcpy(isoCode, _isoCode); char simpleIsoCode[4]; u_UCharsToChars(isoCode, simpleIsoCode, 4); diff --git a/icu4c/source/i18n/dtfmtsym.cpp b/icu4c/source/i18n/dtfmtsym.cpp index 333c85c8fe0..6dd4380a4df 100644 --- a/icu4c/source/i18n/dtfmtsym.cpp +++ b/icu4c/source/i18n/dtfmtsym.cpp @@ -1368,7 +1368,7 @@ DateFormatSymbols::setZoneStrings(const UnicodeString* const *strings, int32_t r //------------------------------------------------------ -const UChar * U_EXPORT2 +const char16_t * U_EXPORT2 DateFormatSymbols::getPatternUChars(void) { return gPatternChars; diff --git a/icu4c/source/i18n/numfmt.cpp b/icu4c/source/i18n/numfmt.cpp index 951146725af..ea9aed1a359 100644 --- a/icu4c/source/i18n/numfmt.cpp +++ b/icu4c/source/i18n/numfmt.cpp @@ -1188,7 +1188,7 @@ void NumberFormat::setCurrency(const UChar* theCurrency, UErrorCode& ec) { } } -const UChar* NumberFormat::getCurrency() const { +const char16_t* NumberFormat::getCurrency() const { return fCurrency; } diff --git a/icu4c/source/i18n/unicode/coll.h b/icu4c/source/i18n/unicode/coll.h index f7b2b52d1c5..7e467df80e0 100644 --- a/icu4c/source/i18n/unicode/coll.h +++ b/icu4c/source/i18n/unicode/coll.h @@ -58,7 +58,7 @@ #include "unicode/uobject.h" #include "unicode/ucol.h" -#include "unicode/normlzr.h" +#include "unicode/unorm.h" #include "unicode/locid.h" #include "unicode/uniset.h" #include "unicode/umisc.h" @@ -158,7 +158,7 @@ class CollationKey; * @see CollationKey * @see CollationElementIterator * @see Locale -* @see Normalizer +* @see Normalizer2 * @version 2.0 11/15/01 */ @@ -393,8 +393,8 @@ public: * is less than, greater than or equal to another string array. *

Example of use: *

-     * .       UChar ABC[] = {0x41, 0x42, 0x43, 0};  // = "ABC"
-     * .       UChar abc[] = {0x61, 0x62, 0x63, 0};  // = "abc"
+     * .       char16_t ABC[] = {0x41, 0x42, 0x43, 0};  // = "ABC"
+     * .       char16_t abc[] = {0x61, 0x62, 0x63, 0};  // = "abc"
      * .       UErrorCode status = U_ZERO_ERROR;
      * .       Collator *myCollation =
      * .                         Collator::createInstance(Locale::getUS(), status);
@@ -420,8 +420,8 @@ public:
      *         target
      * @deprecated ICU 2.6 use the overload with UErrorCode &
      */
-    virtual EComparisonResult compare(const UChar* source, int32_t sourceLength,
-                                      const UChar* target, int32_t targetLength)
+    virtual EComparisonResult compare(const char16_t* source, int32_t sourceLength,
+                                      const char16_t* target, int32_t targetLength)
                                       const;
 
     /**
@@ -440,8 +440,8 @@ public:
      * than target
      * @stable ICU 2.6
      */
-    virtual UCollationResult compare(const UChar* source, int32_t sourceLength,
-                                      const UChar* target, int32_t targetLength,
+    virtual UCollationResult compare(const char16_t* source, int32_t sourceLength,
+                                      const char16_t* target, int32_t targetLength,
                                       UErrorCode &status) const = 0;
 
     /**
@@ -517,7 +517,7 @@ public:
      * @see CollationKey#compare
      * @stable ICU 2.0
      */
-    virtual CollationKey& getCollationKey(const UChar*source,
+    virtual CollationKey& getCollationKey(const char16_t*source,
                                           int32_t sourceLength,
                                           CollationKey& key,
                                           UErrorCode& status) const = 0;
@@ -911,7 +911,7 @@ public:
      * the top of one of the supported reordering groups,
      * and it must not be beyond the last of those groups.
      * See setMaxVariable().
-     * @param varTop one or more (if contraction) UChars to which the variable top should be set
+     * @param varTop one or more (if contraction) char16_ts to which the variable top should be set
      * @param len length of variable top string. If -1 it is considered to be zero terminated.
      * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: 
* U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such contraction
@@ -920,7 +920,7 @@ public: * @return variable top primary weight * @deprecated ICU 53 Call setMaxVariable() instead. */ - virtual uint32_t setVariableTop(const UChar *varTop, int32_t len, UErrorCode &status) = 0; + virtual uint32_t setVariableTop(const char16_t *varTop, int32_t len, UErrorCode &status) = 0; /** * Sets the variable top to the primary weight of the specified string. @@ -929,7 +929,7 @@ public: * the top of one of the supported reordering groups, * and it must not be beyond the last of those groups. * See setMaxVariable(). - * @param varTop a UnicodeString size 1 or more (if contraction) of UChars to which the variable top should be set + * @param varTop a UnicodeString size 1 or more (if contraction) of char16_ts to which the variable top should be set * @param status error code. If error code is set, the return value is undefined. Errors set by this function are:
* U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such contraction
* U_ILLEGAL_ARGUMENT_ERROR if the variable top is beyond @@ -1002,7 +1002,7 @@ public: int32_t resultLength) const = 0; /** - * Get the sort key as an array of bytes from a UChar buffer. + * Get the sort key as an array of bytes from a char16_t buffer. * Sort key byte arrays are zero-terminated and can be compared using * strcmp(). * @@ -1020,7 +1020,7 @@ public: * @return Number of bytes needed for storing the sort key * @stable ICU 2.2 */ - virtual int32_t getSortKey(const UChar*source, int32_t sourceLength, + virtual int32_t getSortKey(const char16_t*source, int32_t sourceLength, uint8_t*result, int32_t resultLength) const = 0; /** diff --git a/icu4c/source/i18n/unicode/curramt.h b/icu4c/source/i18n/unicode/curramt.h index 9071f11bd10..e321df861d2 100644 --- a/icu4c/source/i18n/unicode/curramt.h +++ b/icu4c/source/i18n/unicode/curramt.h @@ -46,7 +46,7 @@ class U_I18N_API CurrencyAmount: public Measure { * is invalid, then this will be set to a failing value. * @stable ICU 3.0 */ - CurrencyAmount(const Formattable& amount, const UChar* isoCode, + CurrencyAmount(const Formattable& amount, ConstChar16Ptr isoCode, UErrorCode &ec); /** @@ -59,7 +59,7 @@ class U_I18N_API CurrencyAmount: public Measure { * then this will be set to a failing value. * @stable ICU 3.0 */ - CurrencyAmount(double amount, const UChar* isoCode, + CurrencyAmount(double amount, ConstChar16Ptr isoCode, UErrorCode &ec); /** @@ -115,14 +115,14 @@ class U_I18N_API CurrencyAmount: public Measure { * Return the ISO currency code of this object. * @stable ICU 3.0 */ - inline const UChar* getISOCurrency() const; + inline const char16_t* getISOCurrency() const; }; inline const CurrencyUnit& CurrencyAmount::getCurrency() const { return (const CurrencyUnit&) getUnit(); } -inline const UChar* CurrencyAmount::getISOCurrency() const { +inline const char16_t* CurrencyAmount::getISOCurrency() const { return getCurrency().getISOCurrency(); } diff --git a/icu4c/source/i18n/unicode/currunit.h b/icu4c/source/i18n/unicode/currunit.h index 61f9201ebb2..fd0f9f2bcce 100644 --- a/icu4c/source/i18n/unicode/currunit.h +++ b/icu4c/source/i18n/unicode/currunit.h @@ -28,7 +28,7 @@ U_NAMESPACE_BEGIN /** * A unit of currency, such as USD (U.S. dollars) or JPY (Japanese - * yen). This class is a thin wrapper over a UChar string that + * yen). This class is a thin wrapper over a char16_t string that * subclasses MeasureUnit, for use with Measure and MeasureFormat. * * @author Alan Liu @@ -44,7 +44,7 @@ class U_I18N_API CurrencyUnit: public MeasureUnit { * then this will be set to a failing value. * @stable ICU 3.0 */ - CurrencyUnit(const UChar* isoCode, UErrorCode &ec); + CurrencyUnit(ConstChar16Ptr isoCode, UErrorCode &ec); /** * Copy constructor @@ -93,16 +93,16 @@ class U_I18N_API CurrencyUnit: public MeasureUnit { * Return the ISO currency code of this object. * @stable ICU 3.0 */ - inline const UChar* getISOCurrency() const; + inline const char16_t* getISOCurrency() const; private: /** * The ISO 4217 code of this object. */ - UChar isoCode[4]; + char16_t isoCode[4]; }; -inline const UChar* CurrencyUnit::getISOCurrency() const { +inline const char16_t* CurrencyUnit::getISOCurrency() const { return isoCode; } diff --git a/icu4c/source/i18n/unicode/dcfmtsym.h b/icu4c/source/i18n/unicode/dcfmtsym.h index 6e6615ebd9f..3a502d0ec03 100644 --- a/icu4c/source/i18n/unicode/dcfmtsym.h +++ b/icu4c/source/i18n/unicode/dcfmtsym.h @@ -393,7 +393,7 @@ public: * Returns that pattern stored in currecy info. Internal API for use by NumberFormat API. * @internal */ - inline const UChar* getCurrencyPattern(void) const; + inline const char16_t* getCurrencyPattern(void) const; #endif /* U_HIDE_INTERNAL_API */ private: @@ -424,7 +424,7 @@ private: char actualLocale[ULOC_FULLNAME_CAPACITY]; char validLocale[ULOC_FULLNAME_CAPACITY]; - const UChar* currPattern; + const char16_t* currPattern; UnicodeString currencySpcBeforeSym[UNUM_CURRENCY_SPACING_COUNT]; UnicodeString currencySpcAfterSym[UNUM_CURRENCY_SPACING_COUNT]; @@ -492,7 +492,7 @@ DecimalFormatSymbols::getLocale() const { } #ifndef U_HIDE_INTERNAL_API -inline const UChar* +inline const char16_t* DecimalFormatSymbols::getCurrencyPattern() const { return currPattern; } diff --git a/icu4c/source/i18n/unicode/decimfmt.h b/icu4c/source/i18n/unicode/decimfmt.h index 7ba34dd4a94..1deff5bf921 100644 --- a/icu4c/source/i18n/unicode/decimfmt.h +++ b/icu4c/source/i18n/unicode/decimfmt.h @@ -604,7 +604,7 @@ template class U_I18N_API EnumSet"* #0 o''clock", the format width is 10. * - *
  • The width is counted in 16-bit code units (UChars). + *
  • The width is counted in 16-bit code units (char16_ts). * *
  • Some parameters which usually do not matter have meaning when padding is * used, because the pattern width is significant with padding. In the pattern @@ -1961,14 +1961,14 @@ public: * @param ec input-output error code * @stable ICU 3.0 */ - virtual void setCurrency(const UChar* theCurrency, UErrorCode& ec); + virtual void setCurrency(const char16_t* theCurrency, UErrorCode& ec); /** * Sets the currency used to display currency amounts. See - * setCurrency(const UChar*, UErrorCode&). - * @deprecated ICU 3.0. Use setCurrency(const UChar*, UErrorCode&). + * setCurrency(const char16_t*, UErrorCode&). + * @deprecated ICU 3.0. Use setCurrency(const char16_t*, UErrorCode&). */ - virtual void setCurrency(const UChar* theCurrency); + virtual void setCurrency(const char16_t* theCurrency); /** * Sets the Currency Context object used to display currency. @@ -2108,7 +2108,7 @@ private: void parse(const UnicodeString& text, Formattable& result, ParsePosition& pos, - UChar* currency) const; + char16_t* currency) const; enum { fgStatusInfinite, @@ -2124,7 +2124,7 @@ private: int8_t type, ParsePosition& parsePosition, DigitList& digits, UBool* status, - UChar* currency) const; + char16_t* currency) const; // Mixed style parsing for currency. // It parses against the current currency pattern @@ -2135,7 +2135,7 @@ private: ParsePosition& parsePosition, DigitList& digits, UBool* status, - UChar* currency) const; + char16_t* currency) const; int32_t skipPadding(const UnicodeString& text, int32_t position) const; @@ -2146,7 +2146,7 @@ private: const UnicodeString* affixPat, UBool complexCurrencyParsing, int8_t type, - UChar* currency) const; + char16_t* currency) const; static UnicodeString& trimMarksFromAffix(const UnicodeString& affix, UnicodeString& trimmedAffix); @@ -2169,7 +2169,7 @@ private: const UnicodeString& input, int32_t pos, int8_t type, - UChar* currency) const; + char16_t* currency) const; static int32_t match(const UnicodeString& text, int32_t pos, UChar32 ch); @@ -2195,11 +2195,11 @@ private: void setupCurrencyAffixPatterns(UErrorCode& status); // get the currency rounding with respect to currency usage - double getCurrencyRounding(const UChar* currency, + double getCurrencyRounding(const char16_t* currency, UErrorCode* ec) const; // get the currency fraction with respect to currency usage - int getCurrencyFractionDigits(const UChar* currency, + int getCurrencyFractionDigits(const char16_t* currency, UErrorCode* ec) const; // hashtable operations @@ -2271,7 +2271,7 @@ protected: * have a capacity of at least 4 * @internal */ - virtual void getEffectiveCurrency(UChar* result, UErrorCode& ec) const; + virtual void getEffectiveCurrency(char16_t* result, UErrorCode& ec) const; /** number of integer digits * @stable ICU 2.4 diff --git a/icu4c/source/i18n/unicode/dtfmtsym.h b/icu4c/source/i18n/unicode/dtfmtsym.h index 512ebb8351f..0cf3ce20ac6 100644 --- a/icu4c/source/i18n/unicode/dtfmtsym.h +++ b/icu4c/source/i18n/unicode/dtfmtsym.h @@ -426,13 +426,13 @@ public: * doesn't specify any time separator, and always recognized when parsing. * @internal */ - static const UChar DEFAULT_TIME_SEPARATOR = 0x003a; // ':' + static const char16_t DEFAULT_TIME_SEPARATOR = 0x003a; // ':' /** * This alternate time separator is always recognized when parsing. * @internal */ - static const UChar ALTERNATE_TIME_SEPARATOR = 0x002e; // '.' + static const char16_t ALTERNATE_TIME_SEPARATOR = 0x002e; // '.' /** * Gets the time separator string. For example: ":". @@ -566,7 +566,7 @@ public: * @return the non-localized date-time pattern characters * @stable ICU 2.0 */ - static const UChar * U_EXPORT2 getPatternUChars(void); + static const char16_t * U_EXPORT2 getPatternUChars(void); /** * Gets localized date-time pattern characters. For example: 'u', 't', etc. @@ -977,7 +977,7 @@ private: * Returns the date format field index of the pattern character c, * or UDAT_FIELD_COUNT if c is not a pattern character. */ - static UDateFormatField U_EXPORT2 getPatternCharIndex(UChar c); + static UDateFormatField U_EXPORT2 getPatternCharIndex(char16_t c); /** * Returns TRUE if f (with its pattern character repeated count times) is a numeric field. @@ -987,7 +987,7 @@ private: /** * Returns TRUE if c (repeated count times) is the pattern character for a numeric field. */ - static UBool U_EXPORT2 isNumericPatternChar(UChar c, int32_t count); + static UBool U_EXPORT2 isNumericPatternChar(char16_t c, int32_t count); public: #ifndef U_HIDE_INTERNAL_API /** diff --git a/icu4c/source/i18n/unicode/dtitvfmt.h b/icu4c/source/i18n/unicode/dtitvfmt.h index 3c42e15e675..5eaa559d0ea 100644 --- a/icu4c/source/i18n/unicode/dtitvfmt.h +++ b/icu4c/source/i18n/unicode/dtitvfmt.h @@ -996,7 +996,7 @@ private: // from calendar field to pattern letter - static const UChar fgCalendarFieldToPatternLetter[]; + static const char16_t fgCalendarFieldToPatternLetter[]; /** diff --git a/icu4c/source/i18n/unicode/dtptngen.h b/icu4c/source/i18n/unicode/dtptngen.h index 6748e57ff85..5f991db6838 100644 --- a/icu4c/source/i18n/unicode/dtptngen.h +++ b/icu4c/source/i18n/unicode/dtptngen.h @@ -517,7 +517,7 @@ private: DateTimeMatcher *skipMatcher; Hashtable *fAvailableFormatKeyHash; UnicodeString emptyString; - UChar fDefaultHourFormatChar; + char16_t fDefaultHourFormatChar; int32_t fAllowedHourFormats[7]; // Actually an array of AllowedHourFormat enum type, ending with UNKNOWN. diff --git a/icu4c/source/i18n/unicode/msgfmt.h b/icu4c/source/i18n/unicode/msgfmt.h index 55cbee35247..fef80107747 100644 --- a/icu4c/source/i18n/unicode/msgfmt.h +++ b/icu4c/source/i18n/unicode/msgfmt.h @@ -939,7 +939,7 @@ private: * @return the index of the list which matches the keyword s. */ static int32_t findKeyword( const UnicodeString& s, - const UChar * const *list); + const char16_t * const *list); /** * Thin wrapper around the format(... AppendableWrapper ...) variant. diff --git a/icu4c/source/i18n/unicode/numfmt.h b/icu4c/source/i18n/unicode/numfmt.h index 1907476ff30..b8dec36be64 100644 --- a/icu4c/source/i18n/unicode/numfmt.h +++ b/icu4c/source/i18n/unicode/numfmt.h @@ -931,7 +931,7 @@ public: * @param ec input-output error code * @stable ICU 3.0 */ - virtual void setCurrency(const UChar* theCurrency, UErrorCode& ec); + virtual void setCurrency(const char16_t* theCurrency, UErrorCode& ec); /** * Gets the currency used to display currency @@ -940,7 +940,7 @@ public: * the currency in use, or a pointer to the empty string. * @stable ICU 2.6 */ - const UChar* getCurrency() const; + const char16_t* getCurrency() const; /** * Set a particular UDisplayContext value in the formatter, such as @@ -1018,7 +1018,7 @@ protected: * have a capacity of at least 4 * @internal */ - virtual void getEffectiveCurrency(UChar* result, UErrorCode& ec) const; + virtual void getEffectiveCurrency(char16_t* result, UErrorCode& ec) const; #ifndef U_HIDE_INTERNAL_API /** @@ -1065,7 +1065,7 @@ private: UBool fLenient; // TRUE => lenient parse is enabled // ISO currency code - UChar fCurrency[4]; + char16_t fCurrency[4]; UDisplayContext fCapitalizationContext; diff --git a/icu4c/source/i18n/unicode/regex.h b/icu4c/source/i18n/unicode/regex.h index 4f2562466a0..6829f7ba472 100644 --- a/icu4c/source/i18n/unicode/regex.h +++ b/icu4c/source/i18n/unicode/regex.h @@ -350,17 +350,17 @@ public: private: /** * Cause a compilation error if an application accidentally attempts to - * create a matcher with a (UChar *) string as input rather than + * create a matcher with a (char16_t *) string as input rather than * a UnicodeString. Avoids a dangling reference to a temporary string. *

    - * To efficiently work with UChar *strings, wrap the data in a UnicodeString + * To efficiently work with char16_t *strings, wrap the data in a UnicodeString * using one of the aliasing constructors, such as - * UnicodeString(UBool isTerminated, const UChar *text, int32_t textLength); + * UnicodeString(UBool isTerminated, const char16_t *text, int32_t textLength); * or in a UText, using - * utext_openUChars(UText *ut, const UChar *text, int64_t textLength, UErrorCode *status); + * utext_openUChars(UText *ut, const char16_t *text, int64_t textLength, UErrorCode *status); * */ - RegexMatcher *matcher(const UChar *input, + RegexMatcher *matcher(const char16_t *input, UErrorCode &status) const; public: @@ -748,17 +748,17 @@ public: private: /** * Cause a compilation error if an application accidentally attempts to - * create a matcher with a (UChar *) string as input rather than + * create a matcher with a (char16_t *) string as input rather than * a UnicodeString. Avoids a dangling reference to a temporary string. *

    - * To efficiently work with UChar *strings, wrap the data in a UnicodeString + * To efficiently work with char16_t *strings, wrap the data in a UnicodeString * using one of the aliasing constructors, such as - * UnicodeString(UBool isTerminated, const UChar *text, int32_t textLength); + * UnicodeString(UBool isTerminated, const char16_t *text, int32_t textLength); * or in a UText, using - * utext_openUChars(UText *ut, const UChar *text, int64_t textLength, UErrorCode *status); + * utext_openUChars(UText *ut, const char16_t *text, int64_t textLength, UErrorCode *status); * */ - RegexMatcher(const UnicodeString ®exp, const UChar *input, + RegexMatcher(const UnicodeString ®exp, const char16_t *input, uint32_t flags, UErrorCode &status); public: @@ -1156,17 +1156,17 @@ public: private: /** * Cause a compilation error if an application accidentally attempts to - * reset a matcher with a (UChar *) string as input rather than + * reset a matcher with a (char16_t *) string as input rather than * a UnicodeString. Avoids a dangling reference to a temporary string. *

    - * To efficiently work with UChar *strings, wrap the data in a UnicodeString + * To efficiently work with char16_t *strings, wrap the data in a UnicodeString * using one of the aliasing constructors, such as - * UnicodeString(UBool isTerminated, const UChar *text, int32_t textLength); + * UnicodeString(UBool isTerminated, const char16_t *text, int32_t textLength); * or in a UText, using - * utext_openUChars(UText *ut, const UChar *text, int64_t textLength, UErrorCode *status); + * utext_openUChars(UText *ut, const char16_t *text, int64_t textLength, UErrorCode *status); * */ - RegexMatcher &reset(const UChar *input); + RegexMatcher &reset(const char16_t *input); public: /** diff --git a/icu4c/source/i18n/unicode/smpdtfmt.h b/icu4c/source/i18n/unicode/smpdtfmt.h index e0ab9702ced..4733e759aa7 100644 --- a/icu4c/source/i18n/unicode/smpdtfmt.h +++ b/icu4c/source/i18n/unicode/smpdtfmt.h @@ -1170,7 +1170,7 @@ public: * @param field The UDateFormatField to get * @stable ICU 54 */ - const NumberFormat * getNumberFormatForField(UChar field) const; + const NumberFormat * getNumberFormatForField(char16_t field) const; #ifndef U_HIDE_INTERNAL_API /** @@ -1262,7 +1262,7 @@ private: * succeeds. */ void subFormat(UnicodeString &appendTo, - UChar ch, + char16_t ch, int32_t count, UDisplayContext capitalizationContext, int32_t fieldNum, @@ -1294,7 +1294,7 @@ private: * Return true if the given format character, occuring count * times, represents a numeric field. */ - static UBool isNumeric(UChar formatChar, int32_t count); + static UBool isNumeric(char16_t formatChar, int32_t count); /** * Returns TRUE if the patternOffset is at the start of a numeric field. @@ -1412,7 +1412,7 @@ private: * @return the new start position if matching succeeded; a negative number * indicating matching failure, otherwise. */ - int32_t subParse(const UnicodeString& text, int32_t& start, UChar ch, int32_t count, + int32_t subParse(const UnicodeString& text, int32_t& start, char16_t ch, int32_t count, UBool obeyCount, UBool allowNegative, UBool ambiguousYear[], int32_t& saveHebrewMonth, Calendar& cal, int32_t patLoc, MessageFormat * numericLeapMonthFormatter, UTimeZoneFormatTimeType *tzTimeType, SimpleDateFormatMutableNFs &mutableNFs, int32_t *dayPeriod=NULL) const; @@ -1523,12 +1523,12 @@ private: /** * Map calendar field letter into calendar field level. */ - static int32_t getLevelFromChar(UChar ch); + static int32_t getLevelFromChar(char16_t ch); /** * Tell if a character can be used to define a field in a format string. */ - static UBool isSyntaxChar(UChar ch); + static UBool isSyntaxChar(char16_t ch); /** * The formatting pattern for this formatter. diff --git a/icu4c/source/i18n/unicode/tblcoll.h b/icu4c/source/i18n/unicode/tblcoll.h index 6767ee93fc0..24ba213b41e 100644 --- a/icu4c/source/i18n/unicode/tblcoll.h +++ b/icu4c/source/i18n/unicode/tblcoll.h @@ -308,8 +308,8 @@ public: * than target * @stable ICU 2.6 */ - virtual UCollationResult compare(const UChar* source, int32_t sourceLength, - const UChar* target, int32_t targetLength, + virtual UCollationResult compare(const char16_t* source, int32_t sourceLength, + const char16_t* target, int32_t targetLength, UErrorCode &status) const; /** @@ -377,7 +377,7 @@ public: * @see CollationKey * @stable ICU 2.0 */ - virtual CollationKey& getCollationKey(const UChar *source, + virtual CollationKey& getCollationKey(const char16_t *source, int32_t sourceLength, CollationKey& key, UErrorCode& status) const; @@ -552,7 +552,7 @@ public: * the top of one of the supported reordering groups, * and it must not be beyond the last of those groups. * See setMaxVariable(). - * @param varTop one or more (if contraction) UChars to which the variable top should be set + * @param varTop one or more (if contraction) char16_ts to which the variable top should be set * @param len length of variable top string. If -1 it is considered to be zero terminated. * @param status error code. If error code is set, the return value is undefined. Errors set by this function are:
    * U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such contraction
    @@ -561,7 +561,7 @@ public: * @return variable top primary weight * @deprecated ICU 53 Call setMaxVariable() instead. */ - virtual uint32_t setVariableTop(const UChar *varTop, int32_t len, UErrorCode &status); + virtual uint32_t setVariableTop(const char16_t *varTop, int32_t len, UErrorCode &status); /** * Sets the variable top to the primary weight of the specified string. @@ -570,7 +570,7 @@ public: * the top of one of the supported reordering groups, * and it must not be beyond the last of those groups. * See setMaxVariable(). - * @param varTop a UnicodeString size 1 or more (if contraction) of UChars to which the variable top should be set + * @param varTop a UnicodeString size 1 or more (if contraction) of char16_ts to which the variable top should be set * @param status error code. If error code is set, the return value is undefined. Errors set by this function are:
    * U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such contraction
    * U_ILLEGAL_ARGUMENT_ERROR if the variable top is beyond @@ -631,7 +631,7 @@ public: int32_t resultLength) const; /** - * Get the sort key as an array of bytes from a UChar buffer. + * Get the sort key as an array of bytes from a char16_t buffer. * * Note that sort keys are often less efficient than simply doing comparison. * For more details, see the ICU User Guide. @@ -646,7 +646,7 @@ public: * @return Number of bytes needed for storing the sort key * @stable ICU 2.2 */ - virtual int32_t getSortKey(const UChar *source, int32_t sourceLength, + virtual int32_t getSortKey(const char16_t *source, int32_t sourceLength, uint8_t *result, int32_t resultLength) const; /** @@ -821,17 +821,17 @@ private: void adoptTailoring(CollationTailoring *t, UErrorCode &errorCode); // Both lengths must be <0 or else both must be >=0. - UCollationResult doCompare(const UChar *left, int32_t leftLength, - const UChar *right, int32_t rightLength, + UCollationResult doCompare(const char16_t *left, int32_t leftLength, + const char16_t *right, int32_t rightLength, UErrorCode &errorCode) const; UCollationResult doCompare(const uint8_t *left, int32_t leftLength, const uint8_t *right, int32_t rightLength, UErrorCode &errorCode) const; - void writeSortKey(const UChar *s, int32_t length, + void writeSortKey(const char16_t *s, int32_t length, SortKeyByteSink &sink, UErrorCode &errorCode) const; - void writeIdenticalLevel(const UChar *s, const UChar *limit, + void writeIdenticalLevel(const char16_t *s, const char16_t *limit, SortKeyByteSink &sink, UErrorCode &errorCode) const; const CollationSettings &getDefaultSettings() const; diff --git a/icu4c/source/i18n/unicode/timezone.h b/icu4c/source/i18n/unicode/timezone.h index 3d570d23bde..83dee317784 100644 --- a/icu4c/source/i18n/unicode/timezone.h +++ b/icu4c/source/i18n/unicode/timezone.h @@ -863,7 +863,7 @@ private: * @param id zone id string * @return the pointer of the ID resource, or NULL. */ - static const UChar* findID(const UnicodeString& id); + static const char16_t* findID(const UnicodeString& id); /** * Resolve a link in Olson tzdata. When the given id is known and it's not a link, @@ -873,7 +873,7 @@ private: * @param id zone id string * @return the dereferenced zone or NULL */ - static const UChar* dereferOlsonLink(const UnicodeString& id); + static const char16_t* dereferOlsonLink(const UnicodeString& id); /** * Returns the region code associated with the given zone, @@ -881,7 +881,7 @@ private: * @param id zone id string * @return the region associated with the given zone */ - static const UChar* getRegion(const UnicodeString& id); + static const char16_t* getRegion(const UnicodeString& id); public: #ifndef U_HIDE_INTERNAL_API @@ -893,7 +893,7 @@ private: * @return the region associated with the given zone * @internal */ - static const UChar* getRegion(const UnicodeString& id, UErrorCode& status); + static const char16_t* getRegion(const UnicodeString& id, UErrorCode& status); #endif /* U_HIDE_INTERNAL_API */ private: diff --git a/icu4c/source/i18n/unicode/translit.h b/icu4c/source/i18n/unicode/translit.h index f4ea9ae8014..bccba548024 100644 --- a/icu4c/source/i18n/unicode/translit.h +++ b/icu4c/source/i18n/unicode/translit.h @@ -1319,7 +1319,7 @@ inline int32_t Transliterator::getMaximumContextLength(void) const { inline void Transliterator::setID(const UnicodeString& id) { ID = id; // NUL-terminate the ID string, which is a non-aliased copy. - ID.append((UChar)0); + ID.append((char16_t)0); ID.truncate(ID.length()-1); } diff --git a/icu4c/source/i18n/unicode/tzfmt.h b/icu4c/source/i18n/unicode/tzfmt.h index 6d2de5bcf06..724ff4d85bb 100644 --- a/icu4c/source/i18n/unicode/tzfmt.h +++ b/icu4c/source/i18n/unicode/tzfmt.h @@ -942,7 +942,7 @@ private: * @param parsedLen the parsed length, or 0 on failure. * @return the parsed offset in milliseconds. */ - int32_t parseDefaultOffsetFields(const UnicodeString& text, int32_t start, UChar separator, + int32_t parseDefaultOffsetFields(const UnicodeString& text, int32_t start, char16_t separator, int32_t& parsedLen) const; /** @@ -982,7 +982,7 @@ private: * @param maxFields The maximum fields * @return The offset string */ - static UnicodeString& formatOffsetWithAsciiDigits(int32_t offset, UChar sep, + static UnicodeString& formatOffsetWithAsciiDigits(int32_t offset, char16_t sep, OffsetFields minFields, OffsetFields maxFields, UnicodeString& result); /** @@ -1012,7 +1012,7 @@ private: * @param maxFields The maximum Fields to be parsed * @return Parsed offset, 0 or positive number. */ - static int32_t parseAsciiOffsetFields(const UnicodeString& text, ParsePosition& pos, UChar sep, + static int32_t parseAsciiOffsetFields(const UnicodeString& text, ParsePosition& pos, char16_t sep, OffsetFields minFields, OffsetFields maxFields); /** diff --git a/icu4c/source/i18n/zonemeta.cpp b/icu4c/source/i18n/zonemeta.cpp index b80ac3ea379..84a96578029 100644 --- a/icu4c/source/i18n/zonemeta.cpp +++ b/icu4c/source/i18n/zonemeta.cpp @@ -28,6 +28,7 @@ #include "uresimp.h" #include "uhash.h" #include "olsontz.h" +#include "uinvchar.h" static UMutex gZoneMetaLock = U_MUTEX_INITIALIZER; @@ -255,6 +256,12 @@ ZoneMeta::getCanonicalCLDRID(const UnicodeString &tzid, UErrorCode& status) { tzid.extract(utzid, ZID_KEY_MAX + 1, tmpStatus); U_ASSERT(tmpStatus == U_ZERO_ERROR); // we checked the length of tzid already + if (!uprv_isInvariantUString(utzid, -1)) { + // All of known tz IDs are only containing ASCII invariant characters. + status = U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } + // Check if it was already cached umtx_lock(&gZoneMetaLock); { diff --git a/icu4c/source/i18n/zonemeta.h b/icu4c/source/i18n/zonemeta.h index e0f22b779ed..9dbcc878a22 100644 --- a/icu4c/source/i18n/zonemeta.h +++ b/icu4c/source/i18n/zonemeta.h @@ -41,7 +41,11 @@ public: /** * Return the canonical id for this tzid defined by CLDR, which might be the id itself. * This overload method returns a persistent const UChar*, which is guranteed to persist - * (a pointer to a resource). + * (a pointer to a resource). If the given system tzid is not known, U_ILLEGAL_ARGUMENT_ERROR + * is set in the status. + * @param tzid Zone ID + * @param status Receives the status + * @return The canonical ID for the input time zone ID */ static const UChar* U_EXPORT2 getCanonicalCLDRID(const UnicodeString &tzid, UErrorCode& status); diff --git a/icu4c/source/test/intltest/Makefile.in b/icu4c/source/test/intltest/Makefile.in index 416d0a91bf9..c4ba088f4f6 100644 --- a/icu4c/source/test/intltest/Makefile.in +++ b/icu4c/source/test/intltest/Makefile.in @@ -35,7 +35,7 @@ BUILDDIR := $(BUILDDIR:test\\intltest/../../=) BUILDDIR := $(BUILDDIR:TEST\\INTLTEST/../../=) CPPFLAGS += -I$(srcdir) -I$(top_srcdir)/common -I$(top_srcdir)/i18n -I$(top_srcdir)/tools/toolutil -I$(top_srcdir)/tools/ctestfw -CPPFLAGS += -DUNISTR_FROM_CHAR_EXPLICIT= -DUNISTR_FROM_STRING_EXPLICIT= +CPPFLAGS += -DUNISTR_FROM_CHAR_EXPLICIT= -DUNISTR_FROM_STRING_EXPLICIT= -DUCHAR_TYPE=char16_t DEFS += -D'U_TOPSRCDIR="$(top_srcdir)/"' -D'U_TOPBUILDDIR="$(BUILDDIR)"' LIBS = $(LIBCTESTFW) $(LIBICUI18N) $(LIBICUUC) $(LIBICUTOOLUTIL) $(DEFAULT_LIBS) $(LIB_M) $(LIB_THREAD) diff --git a/icu4c/source/test/intltest/intltest.vcxproj b/icu4c/source/test/intltest/intltest.vcxproj index 3779ffa2e22..315adfd51f4 100644 --- a/icu4c/source/test/intltest/intltest.vcxproj +++ b/icu4c/source/test/intltest/intltest.vcxproj @@ -368,7 +368,7 @@ - + false diff --git a/icu4c/source/test/intltest/intltest.vcxproj.filters b/icu4c/source/test/intltest/intltest.vcxproj.filters index c490d90fdfb..39a3e4ef251 100644 --- a/icu4c/source/test/intltest/intltest.vcxproj.filters +++ b/icu4c/source/test/intltest/intltest.vcxproj.filters @@ -352,7 +352,7 @@ idna - + idna diff --git a/icu4c/source/test/intltest/punyref.c b/icu4c/source/test/intltest/punyref.cpp similarity index 99% rename from icu4c/source/test/intltest/punyref.c rename to icu4c/source/test/intltest/punyref.cpp index 5e0f8422cf4..77c3e27fa6e 100644 --- a/icu4c/source/test/intltest/punyref.c +++ b/icu4c/source/test/intltest/punyref.cpp @@ -7,7 +7,7 @@ * Corporation and others. All Rights Reserved. * ******************************************************************************* - * file name: punyref.h + * file name: punyref.cpp * encoding: UTF-8 * tab size: 8 (not used) * indentation:4 diff --git a/icu4c/source/test/intltest/srchtest.cpp b/icu4c/source/test/intltest/srchtest.cpp index b5ad2da92f4..a7534ddc64d 100644 --- a/icu4c/source/test/intltest/srchtest.cpp +++ b/icu4c/source/test/intltest/srchtest.cpp @@ -644,7 +644,7 @@ void StringSearchTest::TestOpenClose() } delete result; - text.append(0, 0x1); + // No-op: text.append(0, 0x1); -- what was intended here? status = U_ZERO_ERROR; result = new StringSearch(pattern, text, NULL, NULL, status); if (U_SUCCESS(status)) { diff --git a/icu4c/source/test/intltest/tzfmttst.cpp b/icu4c/source/test/intltest/tzfmttst.cpp index b0d9dd5212d..e673d0aa4ae 100644 --- a/icu4c/source/test/intltest/tzfmttst.cpp +++ b/icu4c/source/test/intltest/tzfmttst.cpp @@ -82,6 +82,7 @@ TimeZoneFormatTest::runIndexedTest( int32_t index, UBool exec, const char* &name TESTCASE(3, TestISOFormat); TESTCASE(4, TestFormat); TESTCASE(5, TestFormatTZDBNames); + TESTCASE(6, TestFormatCustomZone); default: name = ""; break; } } @@ -1213,5 +1214,38 @@ TimeZoneFormatTest::TestFormatTZDBNames(void) { } } +void +TimeZoneFormatTest::TestFormatCustomZone(void) { + struct { + const char* id; + int32_t offset; + const char* expected; + } TESTDATA[] = { + { "abc", 3600000, "GMT+01:00" }, // unknown ID + { "$abc", -3600000, "GMT-01:00" }, // unknown, with ASCII variant char '$' + { "\\u00c1\\u00df\\u00c7", 5400000, "GMT+01:30"}, // unknown, with non-ASCII chars + { 0, 0, 0 } + }; + + UDate now = Calendar::getNow(); + + for (int32_t i = 0; ; i++) { + const char *id = TESTDATA[i].id; + if (id == 0) { + break; + } + UnicodeString tzid = UnicodeString(id, -1, US_INV).unescape(); + SimpleTimeZone tz(TESTDATA[i].offset, tzid); + + UErrorCode status = U_ZERO_ERROR; + LocalPointer tzfmt(TimeZoneFormat::createInstance(Locale("en"), status)); + UnicodeString tzstr; + UnicodeString expected = UnicodeString(TESTDATA[i].expected, -1, US_INV).unescape(); + + tzfmt->format(UTZFMT_STYLE_SPECIFIC_LONG, tz, now, tzstr, NULL); + assertEquals(UnicodeString("Format result for ") + tzid, expected, tzstr); + } +} + #endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/icu4c/source/test/intltest/tzfmttst.h b/icu4c/source/test/intltest/tzfmttst.h index 9e70f117da2..0bf91ae0e8e 100644 --- a/icu4c/source/test/intltest/tzfmttst.h +++ b/icu4c/source/test/intltest/tzfmttst.h @@ -27,6 +27,7 @@ class TimeZoneFormatTest : public IntlTest { void TestISOFormat(void); void TestFormat(void); void TestFormatTZDBNames(void); + void TestFormatCustomZone(void); void RunTimeRoundTripTests(int32_t threadNumber); }; diff --git a/icu4c/source/test/intltest/ustrtest.cpp b/icu4c/source/test/intltest/ustrtest.cpp index e92131c7536..d3734fbf2be 100644 --- a/icu4c/source/test/intltest/ustrtest.cpp +++ b/icu4c/source/test/intltest/ustrtest.cpp @@ -61,6 +61,9 @@ void UnicodeStringTest::runIndexedTest( int32_t index, UBool exec, const char* & TESTCASE_AUTO(TestSizeofUnicodeString); TESTCASE_AUTO(TestStartsWithAndEndsWithNulTerminated); TESTCASE_AUTO(TestMoveSwap); + TESTCASE_AUTO(TestUInt16Pointers); + TESTCASE_AUTO(TestWCharPointers); + TESTCASE_AUTO(TestNullPointers); TESTCASE_AUTO_END; } @@ -1554,7 +1557,10 @@ UnicodeStringTest::TestBogus() { // writable alias to another string's buffer: very bad idea, just convenient for this test test3.setToBogus(); - if(!test3.isBogus() || test3.setTo((UChar *)test1.getBuffer(), test1.length(), test1.getCapacity()).isBogus() || test3!=test1) { + if(!test3.isBogus() || + test3.setTo(const_cast(test1.getBuffer()), + test1.length(), test1.getCapacity()).isBogus() || + test3!=test1) { errln("bogus.setTo(writable alias) failed"); } @@ -1609,8 +1615,8 @@ UnicodeStringTest::TestBogus() { // test that NULL primitive input string values are treated like // empty strings, not errors (bogus) test2.setTo((UChar32)0x10005); - if(test2.insert(1, NULL, 1).length()!=2) { - errln("UniStr.insert(...NULL...) should not modify the string but does"); + if(test2.insert(1, nullptr, 1).length()!=2) { + errln("UniStr.insert(...nullptr...) should not modify the string but does"); } UErrorCode errorCode=U_ZERO_ERROR; @@ -2186,3 +2192,64 @@ UnicodeStringTest::TestMoveSwap() { errln("UnicodeString copy after self-move did not work"); } } + +void +UnicodeStringTest::TestUInt16Pointers() { + static const uint16_t carr[] = { 0x61, 0x62, 0x63, 0 }; + uint16_t arr[4]; + + UnicodeString expected(u"abc"); + assertEquals("abc from pointer", expected, UnicodeString(carr)); + assertEquals("abc from pointer+length", expected, UnicodeString(carr, 3)); + assertEquals("abc from read-only-alias pointer", expected, UnicodeString(TRUE, carr, 3)); + + UnicodeString alias(arr, 0, 4); + alias.append(u'a').append(u'b').append(u'c'); + assertEquals("abc from writable alias", expected, alias); + assertEquals("buffer=abc from writable alias", expected, UnicodeString(arr, 3)); + + UErrorCode errorCode = U_ZERO_ERROR; + int32_t length = UnicodeString(u"def").extract(arr, 4, errorCode); + TEST_ASSERT_STATUS(errorCode); + assertEquals("def from extract()", UnicodeString(u"def"), UnicodeString(arr, length)); +} + +void +UnicodeStringTest::TestWCharPointers() { +#if U_SIZEOF_WCHAR_T==2 + static const wchar_t carr[] = { 0x61, 0x62, 0x63, 0 }; + wchar_t arr[4]; + + UnicodeString expected(u"abc"); + assertEquals("abc from pointer", expected, UnicodeString(carr)); + assertEquals("abc from pointer+length", expected, UnicodeString(carr, 3)); + assertEquals("abc from read-only-alias pointer", expected, UnicodeString(TRUE, carr, 3)); + + UnicodeString alias(arr, 0, 4); + alias.append(u'a').append(u'b').append(u'c'); + assertEquals("abc from writable alias", expected, alias); + assertEquals("buffer=abc from writable alias", expected, UnicodeString(arr, 3)); + + UErrorCode errorCode = U_ZERO_ERROR; + int32_t length = UnicodeString(u"def").extract(arr, 4, errorCode); + TEST_ASSERT_STATUS(errorCode); + assertEquals("def from extract()", UnicodeString(u"def"), UnicodeString(arr, length)); +#endif +} + +void +UnicodeStringTest::TestNullPointers() { + assertTrue("empty from nullptr", UnicodeString(nullptr).isEmpty()); + assertTrue("empty from nullptr+length", UnicodeString(nullptr, 2).isEmpty()); + assertTrue("empty from read-only-alias nullptr", UnicodeString(TRUE, nullptr, 3).isEmpty()); + + UnicodeString alias(nullptr, 4, 4); // empty, no alias + assertTrue("empty from writable alias", alias.isEmpty()); + alias.append(u'a').append(u'b').append(u'c'); + UnicodeString expected(u"abc"); + assertEquals("abc from writable alias", expected, alias); + + UErrorCode errorCode = U_ZERO_ERROR; + UnicodeString(u"def").extract(nullptr, 0, errorCode); + assertEquals("buffer overflow extracting to nullptr", U_BUFFER_OVERFLOW_ERROR, errorCode); +} diff --git a/icu4c/source/test/intltest/ustrtest.h b/icu4c/source/test/intltest/ustrtest.h index 0c449350f01..4ba348c431f 100644 --- a/icu4c/source/test/intltest/ustrtest.h +++ b/icu4c/source/test/intltest/ustrtest.h @@ -92,6 +92,10 @@ public: void TestUnicodeStringImplementsAppendable(); void TestSizeofUnicodeString(); void TestMoveSwap(); + + void TestUInt16Pointers(); + void TestWCharPointers(); + void TestNullPointers(); }; #endif diff --git a/icu4c/source/test/iotest/stream.cpp b/icu4c/source/test/iotest/stream.cpp index 427276efb26..424d4e33af5 100644 --- a/icu4c/source/test/iotest/stream.cpp +++ b/icu4c/source/test/iotest/stream.cpp @@ -106,12 +106,12 @@ static void U_CALLCONV TestStream(void) inTestStream >> inStr >> inStr2; if (inStr.compare(thisMu) != 0) { - u_austrncpy(inStrC, inStr.getBuffer(), inStr.length()); + u_austrncpy(inStrC, toUCharPtr(inStr.getBuffer()), inStr.length()); inStrC[inStr.length()] = 0; log_err("Got: \"%s\", Expected: \"tHis\\u03BC\"\n", inStrC); } if (inStr2.compare(mu) != 0) { - u_austrncpy(inStrC, inStr.getBuffer(), inStr.length()); + u_austrncpy(inStrC, toUCharPtr(inStr.getBuffer()), inStr.length()); inStrC[inStr.length()] = 0; log_err("Got: \"%s\", Expected: \"mu\"\n", inStrC); } diff --git a/icu4c/source/test/testdata/break_rules/line_normal_cj.txt b/icu4c/source/test/testdata/break_rules/line_normal_cj.txt index d84bd877efe..1895e10a8e5 100644 --- a/icu4c/source/test/testdata/break_rules/line_normal_cj.txt +++ b/icu4c/source/test/testdata/break_rules/line_normal_cj.txt @@ -164,7 +164,7 @@ LB20.1b: CB CM* ÷; # not picking up the continuing match after the BA from 21a. # TODO: For CJ tailorings (with BAX) does this rule want to include BAX? If so, # should "HL BAX" not break when followed by a CB? Thats what the current -# rules do, which is why "[^CM CB]?" includes the ?. +# rules do, which is why "[^CM CB]?" includes the ?. LB21a: HL CM* (HY | BA | BAX) CM* [^CM CB]?; # DO allow breaks here before $BAXcm and $NSXcm, so don't include them diff --git a/icu4c/source/tools/Makefile.in b/icu4c/source/tools/Makefile.in index 8fb7876de76..1cbef0005d4 100644 --- a/icu4c/source/tools/Makefile.in +++ b/icu4c/source/tools/Makefile.in @@ -17,7 +17,7 @@ subdir = tools SUBDIRS = toolutil ctestfw makeconv genrb genbrk \ gencnval gensprep icuinfo genccode gencmn icupkg pkgdata \ -gentest gennorm2 gencfu gendict escapesrc +gentest gennorm2 gencfu gendict ## List of phony targets .PHONY : all all-local all-recursive install install-local \ diff --git a/icu4c/source/tools/ctestfw/datamap.cpp b/icu4c/source/tools/ctestfw/datamap.cpp index ded0c7b609f..96241a0657b 100644 --- a/icu4c/source/tools/ctestfw/datamap.cpp +++ b/icu4c/source/tools/ctestfw/datamap.cpp @@ -10,6 +10,7 @@ #include "unicode/datamap.h" #include "unicode/resbund.h" +#include "unicode/unistr.h" #include "hash.h" #include @@ -20,7 +21,7 @@ int32_t DataMap::utoi(const UnicodeString &s) const { char ch[256]; - const UChar *u = s.getBuffer(); + const UChar *u = toUCharPtr(s.getBuffer()); int32_t len = s.length(); u_UCharsToChars(u, ch, len); ch[len] = 0; /* include terminating \0 */ diff --git a/icu4c/source/tools/gennorm2/n2builder.cpp b/icu4c/source/tools/gennorm2/n2builder.cpp index 17aecffd567..66ccd700344 100644 --- a/icu4c/source/tools/gennorm2/n2builder.cpp +++ b/icu4c/source/tools/gennorm2/n2builder.cpp @@ -280,7 +280,7 @@ uint8_t Normalizer2DataBuilder::getCC(UChar32 c) const { static UBool isWellFormed(const UnicodeString &s) { UErrorCode errorCode=U_ZERO_ERROR; - u_strToUTF8(NULL, 0, NULL, s.getBuffer(), s.length(), &errorCode); + u_strToUTF8(NULL, 0, NULL, toUCharPtr(s.getBuffer()), s.length(), &errorCode); return U_SUCCESS(errorCode) || errorCode==U_BUFFER_OVERFLOW_ERROR; } @@ -313,7 +313,7 @@ void Normalizer2DataBuilder::setRoundTripMapping(UChar32 c, const UnicodeString (int)phase, (long)c); exit(U_INVALID_FORMAT_ERROR); } - int32_t numCP=u_countChar32(m.getBuffer(), m.length()); + int32_t numCP=u_countChar32(toUCharPtr(m.getBuffer()), m.length()); if(numCP!=2) { fprintf(stderr, "error in gennorm2 phase %d: " @@ -450,7 +450,7 @@ Normalizer2DataBuilder::decompose(UChar32 start, UChar32 end, uint32_t value) { Norm &norm=norms[value]; const UnicodeString &m=*norm.mapping; UnicodeString *decomposed=NULL; - const UChar *s=m.getBuffer(); + const UChar *s=toUCharPtr(m.getBuffer()); int32_t length=m.length(); int32_t prev, i=0; UChar32 c; @@ -605,7 +605,7 @@ Normalizer2DataBuilder::reorder(Norm *p, BuilderReorderingBuffer &buffer) { if(length>Normalizer2Impl::MAPPING_LENGTH_MASK) { return; // writeMapping() will complain about it and print the code point. } - const UChar *s=m.getBuffer(); + const UChar *s=toUCharPtr(m.getBuffer()); int32_t i=0; UChar32 c; while(i(f16BitUnits.getBuffer()), (uint32_t)f16BitUnits.length() * 2, checksum); } else { // Swap to big-endian so we get the same checksum on all platforms @@ -1039,7 +1039,7 @@ void SRBRoot::write(const char *outputDir, const char *outputPkg, UnicodeString s(f16BitUnits); s.append((UChar)1); // Ensure that we own this buffer. assert(!s.isBogus()); - uint16_t *p = (uint16_t *)s.getBuffer(); + uint16_t *p = const_cast(reinterpret_cast(s.getBuffer())); for (int32_t count = f16BitUnits.length(); count > 0; --count) { uint16_t x = *p; *p++ = (uint16_t)((x << 8) | (x >> 8)); diff --git a/icu4c/source/tools/genrb/reslist.h b/icu4c/source/tools/genrb/reslist.h index d5e0945da12..ee1ecf3d05f 100644 --- a/icu4c/source/tools/genrb/reslist.h +++ b/icu4c/source/tools/genrb/reslist.h @@ -304,7 +304,7 @@ public: StringBaseResource(int8_t type, const UChar *value, int32_t len, UErrorCode &errorCode); virtual ~StringBaseResource(); - const UChar *getBuffer() const { return fString.getBuffer(); } + const UChar *getBuffer() const { return icu::toUCharPtr(fString.getBuffer()); } int32_t length() const { return fString.length(); } virtual void handlePreWrite(uint32_t *byteOffset); diff --git a/icu4c/source/tools/genrb/wrtxml.cpp b/icu4c/source/tools/genrb/wrtxml.cpp index ba6ad865362..2bfcfebf9ef 100644 --- a/icu4c/source/tools/genrb/wrtxml.cpp +++ b/icu4c/source/tools/genrb/wrtxml.cpp @@ -73,7 +73,7 @@ static int32_t write_utf8_file(FileStream* fileStream, UnicodeString outString) u_strToUTF8(NULL, 0, &len, - outString.getBuffer(), + toUCharPtr(outString.getBuffer()), outString.length(), &status); @@ -85,7 +85,7 @@ static int32_t write_utf8_file(FileStream* fileStream, UnicodeString outString) u_strToUTF8(dest, len, &len, - outString.getBuffer(), + toUCharPtr(outString.getBuffer()), outString.length(), &status); diff --git a/icu4c/source/tools/toolutil/dbgutil.cpp b/icu4c/source/tools/toolutil/dbgutil.cpp index b9f6372440e..a5a3f518bb6 100644 --- a/icu4c/source/tools/toolutil/dbgutil.cpp +++ b/icu4c/source/tools/toolutil/dbgutil.cpp @@ -117,7 +117,7 @@ U_CAPI int32_t udbg_stoi(const UnicodeString &s) { char ch[256]; - const UChar *u = s.getBuffer(); + const UChar *u = toUCharPtr(s.getBuffer()); int32_t len = s.length(); u_UCharsToChars(u, ch, len); ch[len] = 0; /* include terminating \0 */ @@ -129,7 +129,7 @@ U_CAPI double udbg_stod(const UnicodeString &s) { char ch[256]; - const UChar *u = s.getBuffer(); + const UChar *u = toUCharPtr(s.getBuffer()); int32_t len = s.length(); u_UCharsToChars(u, ch, len); ch[len] = 0; /* include terminating \0 */ diff --git a/icu4c/source/tools/toolutil/ppucd.cpp b/icu4c/source/tools/toolutil/ppucd.cpp index c57a1ef1767..cccde81c7ab 100644 --- a/icu4c/source/tools/toolutil/ppucd.cpp +++ b/icu4c/source/tools/toolutil/ppucd.cpp @@ -515,12 +515,12 @@ PreparsedUCD::parseCodePointRange(const char *s, UChar32 &start, UChar32 &end, U void PreparsedUCD::parseString(const char *s, UnicodeString &uni, UErrorCode &errorCode) { - UChar *buffer=uni.getBuffer(-1); + UChar *buffer=toUCharPtr(uni.getBuffer(-1)); int32_t length=u_parseString(s, buffer, uni.getCapacity(), NULL, &errorCode); if(errorCode==U_BUFFER_OVERFLOW_ERROR) { errorCode=U_ZERO_ERROR; uni.releaseBuffer(0); - buffer=uni.getBuffer(length); + buffer=toUCharPtr(uni.getBuffer(length)); length=u_parseString(s, buffer, uni.getCapacity(), NULL, &errorCode); } uni.releaseBuffer(length); diff --git a/icu4c/source/tools/toolutil/xmlparser.cpp b/icu4c/source/tools/toolutil/xmlparser.cpp index 3fbf1a96f3c..1ca111c0756 100644 --- a/icu4c/source/tools/toolutil/xmlparser.cpp +++ b/icu4c/source/tools/toolutil/xmlparser.cpp @@ -209,7 +209,7 @@ UXMLParser::parseFile(const char *filename, UErrorCode &errorCode) { goto exit; } - buffer=src.getBuffer(bytesLength); + buffer=toUCharPtr(src.getBuffer(bytesLength)); if(buffer==NULL) { // unexpected failure to reserve some string capacity errorCode=U_MEMORY_ALLOCATION_ERROR; @@ -278,7 +278,7 @@ UXMLParser::parseFile(const char *filename, UErrorCode &errorCode) { pb=bytes; for(;;) { length=src.length(); - buffer=src.getBuffer(capacity); + buffer=toUCharPtr(src.getBuffer(capacity)); if(buffer==NULL) { // unexpected failure to reserve some string capacity errorCode=U_MEMORY_ALLOCATION_ERROR; diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/DateFormatSymbols.java b/icu4j/main/classes/core/src/com/ibm/icu/text/DateFormatSymbols.java index f5021533586..f4be9febaeb 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/DateFormatSymbols.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/DateFormatSymbols.java @@ -2147,57 +2147,7 @@ public class DateFormatSymbols implements Serializable, Cloneable { /** * Returns the {@link DateFormatSymbols} object that should be used to format a * calendar system's dates in the given locale. - *

    - * Subclassing:
    - * When creating a new Calendar subclass, you must create the - * {@link ResourceBundle ResourceBundle} - * containing its {@link DateFormatSymbols DateFormatSymbols} in a specific place. - * The resource bundle name is based on the calendar's fully-specified - * class name, with ".resources" inserted at the end of the package name - * (just before the class name) and "Symbols" appended to the end. - * For example, the bundle corresponding to "com.ibm.icu.util.HebrewCalendar" - * is "com.ibm.icu.impl.data.HebrewCalendarSymbols". - *

    - * Within the ResourceBundle, this method searches for five keys: - *

      - *
    • DayNames - - * An array of strings corresponding to each possible - * value of the DAY_OF_WEEK field. Even though - * DAY_OF_WEEK starts with SUNDAY = 1, - * This array is 0-based; the name for Sunday goes in the - * first position, at index 0. If this key is not found - * in the bundle, the day names are inherited from the - * default DateFormatSymbols for the requested locale. * - *
    • DayAbbreviations - - * An array of abbreviated day names corresponding - * to the values in the "DayNames" array. If this key - * is not found in the resource bundle, the "DayNames" - * values are used instead. If neither key is found, - * the day abbreviations are inherited from the default - * DateFormatSymbols for the locale. - * - *
    • MonthNames - - * An array of strings corresponding to each possible - * value of the MONTH field. If this key is not found - * in the bundle, the month names are inherited from the - * default DateFormatSymbols for the requested locale. - * - *
    • MonthAbbreviations - - * An array of abbreviated day names corresponding - * to the values in the "MonthNames" array. If this key - * is not found in the resource bundle, the "MonthNames" - * values are used instead. If neither key is found, - * the day abbreviations are inherited from the default - * DateFormatSymbols for the locale. - * - *
    • Eras - - * An array of strings corresponding to each possible - * value of the ERA field. If this key is not found - * in the bundle, the era names are inherited from the - * default DateFormatSymbols for the requested locale. - *
    - *

    * @param cal The calendar system whose date format symbols are desired. * @param locale The locale whose symbols are desired. * @@ -2211,57 +2161,6 @@ public class DateFormatSymbols implements Serializable, Cloneable { /** * Returns the {@link DateFormatSymbols} object that should be used to format a * calendar system's dates in the given locale. - *

    - * Subclassing:
    - * When creating a new Calendar subclass, you must create the - * {@link ResourceBundle ResourceBundle} - * containing its {@link DateFormatSymbols DateFormatSymbols} in a specific place. - * The resource bundle name is based on the calendar's fully-specified - * class name, with ".resources" inserted at the end of the package name - * (just before the class name) and "Symbols" appended to the end. - * For example, the bundle corresponding to "com.ibm.icu.util.HebrewCalendar" - * is "com.ibm.icu.impl.data.HebrewCalendarSymbols". - *

    - * Within the ResourceBundle, this method searches for five keys: - *

      - *
    • DayNames - - * An array of strings corresponding to each possible - * value of the DAY_OF_WEEK field. Even though - * DAY_OF_WEEK starts with SUNDAY = 1, - * This array is 0-based; the name for Sunday goes in the - * first position, at index 0. If this key is not found - * in the bundle, the day names are inherited from the - * default DateFormatSymbols for the requested locale. - * - *
    • DayAbbreviations - - * An array of abbreviated day names corresponding - * to the values in the "DayNames" array. If this key - * is not found in the resource bundle, the "DayNames" - * values are used instead. If neither key is found, - * the day abbreviations are inherited from the default - * DateFormatSymbols for the locale. - * - *
    • MonthNames - - * An array of strings corresponding to each possible - * value of the MONTH field. If this key is not found - * in the bundle, the month names are inherited from the - * default DateFormatSymbols for the requested locale. - * - *
    • MonthAbbreviations - - * An array of abbreviated day names corresponding - * to the values in the "MonthNames" array. If this key - * is not found in the resource bundle, the "MonthNames" - * values are used instead. If neither key is found, - * the day abbreviations are inherited from the default - * DateFormatSymbols for the locale. - * - *
    • Eras - - * An array of strings corresponding to each possible - * value of the ERA field. If this key is not found - * in the bundle, the era names are inherited from the - * default DateFormatSymbols for the requested locale. - *
    - *

    * @param cal The calendar system whose date format symbols are desired. * @param locale The ulocale whose symbols are desired. *