ICU-22876 C++ UnicodeSet/USet easy item iteration

See #3120
This commit is contained in:
Markus Scherer 2024-09-10 23:26:45 +00:00
parent 263c735400
commit 37b41495d7
9 changed files with 1087 additions and 62 deletions

View file

@ -313,7 +313,7 @@ private:
char16_t *pat = nullptr;
int32_t patLen = 0;
UVector* strings = nullptr; // maintained in sorted order
UVector* strings_ = nullptr; // maintained in sorted order
UnicodeSetStringSpan *stringSpan = nullptr;
/**
@ -1102,6 +1102,118 @@ public:
*/
UChar32 charAt(int32_t index) const;
#ifndef U_HIDE_DRAFT_API
/**
* Returns a C++ "range" for iterating over the code points of this set.
*
* \code
* UnicodeSet set(u"[abcçカ🚴]", errorCode);
* for (UChar32 c : set.codePoints()) {
* printf("set.codePoint U+%04lx\n", (long)c);
* }
* \endcode
*
* @return a "range" object for iterating over the code points of this set.
* @draft ICU 76
* @see ranges
* @see strings
* @see begin
* @see end
*/
inline U_HEADER_NESTED_NAMESPACE::USetCodePoints codePoints() const {
return U_HEADER_NESTED_NAMESPACE::USetCodePoints(toUSet());
}
/**
* Returns a C++ "range" for iterating over the code point ranges of this set.
*
* \code
* UnicodeSet set(u"[abcçカ🚴]", errorCode);
* for (auto [start, end] : set.ranges()) {
* printf("set.range U+%04lx..U+%04lx\n", (long)start, (long)end);
* }
* for (auto range : set.ranges()) {
* for (UChar32 c : range) {
* printf("set.range.c U+%04lx\n", (long)c);
* }
* }
* \endcode
*
* @return a "range" object for iterating over the code point ranges of this set.
* @draft ICU 76
* @see codePoints
* @see strings
* @see begin
* @see end
*/
inline U_HEADER_NESTED_NAMESPACE::USetRanges ranges() const {
return U_HEADER_NESTED_NAMESPACE::USetRanges(toUSet());
}
/**
* Returns a C++ "range" for iterating over the empty and multi-character strings of this set.
* Returns each string as a std::u16string_view without copying its contents.
*
* \code
* UnicodeSet set(u"[abcçカ🚴{}{abc}{de}]", errorCode);
* for (auto s : set.strings()) {
* UnicodeString us(s);
* std::string u8;
* printf("set.string length %ld \"%s\"\n", (long)s.length(), us.toUTF8String(u8).c_str());
* }
* \endcode
*
* @return a "range" object for iterating over the strings of this set.
* @draft ICU 76
* @see codePoints
* @see ranges
* @see begin
* @see end
*/
inline U_HEADER_NESTED_NAMESPACE::USetStrings strings() const {
return U_HEADER_NESTED_NAMESPACE::USetStrings(toUSet());
}
/**
* Returns a C++ iterator for iterating over all of the elements of this set.
* Convenient all-in one iteration, but creates a UnicodeString for each
* code point or string.
* (Similar to how Java UnicodeSet *is an* Iterable<String>.)
*
* Code points are returned first, then empty and multi-character strings.
*
* \code
* UnicodeSet set(u"[abcçカ🚴{}{abc}{de}]", errorCode);
* for (auto el : set) {
* std::string u8;
* printf("set.string length %ld \"%s\"\n", (long)el.length(), el.toUTF8String(u8).c_str());
* }
* \endcode
*
* @return an all-elements iterator.
* @draft ICU 76
* @see end
* @see codePoints
* @see ranges
* @see strings
*/
inline U_HEADER_NESTED_NAMESPACE::USetElementIterator begin() const {
return U_HEADER_NESTED_NAMESPACE::USetElements(toUSet()).begin();
}
/**
* @return an exclusive-end sentinel for iterating over all of the elements of this set.
* @draft ICU 76
* @see begin
* @see codePoints
* @see ranges
* @see strings
*/
inline U_HEADER_NESTED_NAMESPACE::USetElementIterator end() const {
return U_HEADER_NESTED_NAMESPACE::USetElements(toUSet()).end();
}
#endif // U_HIDE_DRAFT_API
/**
* Adds the specified range to this set if it is not already
* present. If this set already contains the specified range,

View file

@ -33,7 +33,10 @@
#include "unicode/uchar.h"
#if U_SHOW_CPLUSPLUS_API
#include <string_view>
#include "unicode/char16ptr.h"
#include "unicode/localpointer.h"
#include "unicode/unistr.h"
#endif // U_SHOW_CPLUSPLUS_API
#ifndef USET_DEFINED
@ -955,7 +958,7 @@ uset_charAt(const USet* set, int32_t charIndex);
/**
* Returns the number of characters and strings contained in this set.
* The last (uset_getItemCount() - uset_getRangeCount()) items are strings.
* The last uset_getStringCount() == (uset_getItemCount() - uset_getRangeCount()) items are strings.
*
* This is slower than uset_getRangeCount() and uset_getItemCount() because
* it counts the code points of all ranges.
@ -965,6 +968,8 @@ uset_charAt(const USet* set, int32_t charIndex);
* contained in set
* @stable ICU 2.4
* @see uset_getRangeCount
* @see uset_getStringCount
* @see uset_getItemCount
*/
U_CAPI int32_t U_EXPORT2
uset_size(const USet* set);
@ -975,11 +980,42 @@ uset_size(const USet* set);
* @stable ICU 70
* @see uset_getItemCount
* @see uset_getItem
* @see uset_getStringCount
* @see uset_size
*/
U_CAPI int32_t U_EXPORT2
uset_getRangeCount(const USet *set);
#ifndef U_HIDE_DRAFT_API
/**
* @param set the set
* @return the number of strings in this set.
* @draft ICU 76
* @see uset_getRangeCount
* @see uset_getItemCount
* @see uset_size
*/
U_CAPI int32_t U_EXPORT2
uset_getStringCount(const USet *set);
/**
* Returns the index-th string (empty or multi-character) in the set.
* The string may not be NUL-terminated.
* The output length must be used, and the caller must not read more than that many UChars.
*
* @param set the set
* @param index the string index, 0 .. uset_getStringCount() - 1
* @param pLength the output string length; must not be NULL
* @return the pointer to the string; NULL if the index is out of range or pLength is NULL
* @draft ICU 76
* @see uset_getStringCount
*/
U_CAPI const UChar* U_EXPORT2
uset_getString(const USet *set, int32_t index, int32_t *pLength);
#endif // U_HIDE_DRAFT_API
/**
* Returns the number of items in this set. An item is either a range
* of characters or a single multicharacter string.
@ -987,6 +1023,8 @@ uset_getRangeCount(const USet *set);
* @return a non-negative integer counting the character ranges
* and/or strings contained in set
* @stable ICU 2.4
* @see uset_getRangeCount
* @see uset_getStringCount
*/
U_CAPI int32_t U_EXPORT2
uset_getItemCount(const USet* set);
@ -1001,6 +1039,7 @@ uset_getItemCount(const USet* set);
* If <code>itemIndex</code> is at least uset_getRangeCount() and less than uset_getItemCount(), then
* this function copies the string into <code>str[strCapacity]</code> and
* returns the length of the string (0 for the empty string).
* See uset_getString() for a function that does not copy the string contents.
*
* If <code>itemIndex</code> is out of range, then this function returns -1.
*
@ -1018,6 +1057,7 @@ uset_getItemCount(const USet* set);
* @return the length of the string (0 or >= 2), or 0 if the item is a range,
* or -1 if the itemIndex is out of range
* @stable ICU 2.4
* @see uset_getString
*/
U_CAPI int32_t U_EXPORT2
uset_getItem(const USet* set, int32_t itemIndex,
@ -1285,4 +1325,574 @@ U_CAPI UBool U_EXPORT2
uset_getSerializedRange(const USerializedSet* set, int32_t rangeIndex,
UChar32* pStart, UChar32* pEnd);
#endif
#if U_SHOW_CPLUSPLUS_API
#ifndef U_HIDE_DRAFT_API
namespace U_HEADER_ONLY_NAMESPACE {
// Note: Not U_COMMON_API, and not a subclass of UMemory, because this is a header-only class,
// not intended to be used via export from the ICU DLL.
/**
* Iterator returned by USetCodePoints.
* @draft ICU 76
*/
class USetCodePointIterator {
public:
/** @draft ICU 76 */
USetCodePointIterator(const USetCodePointIterator &other) = default;
/** @draft ICU 76 */
bool operator==(const USetCodePointIterator &other) const {
// No need to compare rangeCount & end given private constructor
// and assuming we don't compare iterators across the set being modified.
// And comparing rangeIndex is redundant with comparing c.
// We might even skip comparing uset.
// Unless we want operator==() to be "correct" for more than iteration.
return uset == other.uset && c == other.c;
}
/** @draft ICU 76 */
bool operator!=(const USetCodePointIterator &other) const { return !operator==(other); }
/** @draft ICU 76 */
UChar32 operator*() const { return c; }
/**
* Pre-increment.
* @draft ICU 76
*/
USetCodePointIterator &operator++() {
if (c < end) {
++c;
} else if (rangeIndex < rangeCount) {
UErrorCode errorCode = U_ZERO_ERROR;
int32_t result = uset_getItem(uset, rangeIndex, &c, &end, nullptr, 0, &errorCode);
if (U_SUCCESS(errorCode) && result == 0) {
++rangeIndex;
} else {
c = end = U_SENTINEL;
}
} else {
c = end = U_SENTINEL;
}
return *this;
}
/**
* Post-increment.
* @draft ICU 76
*/
USetCodePointIterator operator++(int) {
USetCodePointIterator result(*this);
operator++();
return result;
}
private:
friend class USetCodePoints;
USetCodePointIterator(const USet *uset, int32_t rangeIndex, int32_t rangeCount)
: uset(uset), rangeIndex(rangeIndex), rangeCount(rangeCount),
c(U_SENTINEL), end(U_SENTINEL) {
// Fetch the first range.
operator++();
}
const USet *uset;
int32_t rangeIndex;
int32_t rangeCount;
UChar32 c, end;
};
/**
* C++ "range" for iterating over the code points of a USet.
*
* \code
* using U_HEADER_NESTED_NAMESPACE::USetCodePoints;
* LocalUSetPointer uset(uset_openPattern(u"[abcçカ🚴]", -1, &errorCode));
* for (UChar32 c : USetCodePoints(uset.getAlias())) {
* printf("uset.codePoint U+%04lx\n", (long)c);
* }
* \endcode
*
* C++ UnicodeSet has member functions for iteration, including codePoints().
*
* @draft ICU 76
* @see USetRanges
* @see USetStrings
* @see USetElements
*/
class USetCodePoints {
public:
/**
* Constructs a C++ "range" object over the code points of the USet.
* @draft ICU 76
*/
USetCodePoints(const USet *uset) : uset(uset), rangeCount(uset_getRangeCount(uset)) {}
/** @draft ICU 76 */
USetCodePoints(const USetCodePoints &other) = default;
/** @draft ICU 76 */
USetCodePointIterator begin() const {
return USetCodePointIterator(uset, 0, rangeCount);
}
/** @draft ICU 76 */
USetCodePointIterator end() const {
return USetCodePointIterator(uset, rangeCount, rangeCount);
}
private:
const USet *uset;
int32_t rangeCount;
};
/**
* A contiguous range of code points in a USet/UnicodeSet.
* Returned by USetRangeIterator which is returned by USetRanges.
* Both the rangeStart and rangeEnd are in the range.
* (end() returns an iterator corresponding to rangeEnd+1.)
* @draft ICU 76
*/
struct CodePointRange {
/** @draft ICU 76 */
struct iterator {
/** @draft ICU 76 */
iterator(UChar32 c) : c(c) {}
/** @draft ICU 76 */
bool operator==(const iterator &other) const { return c == other.c; }
/** @draft ICU 76 */
bool operator!=(const iterator &other) const { return !operator==(other); }
/** @draft ICU 76 */
UChar32 operator*() const { return c; }
/**
* Pre-increment.
* @draft ICU 76
*/
iterator &operator++() {
++c;
return *this;
}
/**
* Post-increment.
* @draft ICU 76
*/
iterator operator++(int) {
return c++;
}
/**
* The current code point in the range.
* @draft ICU 76
*/
UChar32 c;
};
/** @draft ICU 76 */
CodePointRange(UChar32 start, UChar32 end) : rangeStart(start), rangeEnd(end) {}
/** @draft ICU 76 */
CodePointRange(const CodePointRange &other) = default;
/** @draft ICU 76 */
size_t size() const { return (rangeEnd + 1) - rangeStart; }
/** @draft ICU 76 */
iterator begin() const { return rangeStart; }
/** @draft ICU 76 */
iterator end() const { return rangeEnd + 1; }
/**
* Start of a USet/UnicodeSet range of code points.
* @draft ICU 76
*/
UChar32 rangeStart;
/**
* Inclusive end of a USet/UnicodeSet range of code points.
* @draft ICU 76
*/
UChar32 rangeEnd;
};
/**
* Iterator returned by USetRanges.
* @draft ICU 76
*/
class USetRangeIterator {
public:
/** @draft ICU 76 */
USetRangeIterator(const USetRangeIterator &other) = default;
/** @draft ICU 76 */
bool operator==(const USetRangeIterator &other) const {
// No need to compare rangeCount given private constructor
// and assuming we don't compare iterators across the set being modified.
// We might even skip comparing uset.
// Unless we want operator==() to be "correct" for more than iteration.
return uset == other.uset && rangeIndex == other.rangeIndex;
}
/** @draft ICU 76 */
bool operator!=(const USetRangeIterator &other) const { return !operator==(other); }
/** @draft ICU 76 */
CodePointRange operator*() const {
if (rangeIndex < rangeCount) {
UChar32 start, end;
UErrorCode errorCode = U_ZERO_ERROR;
int32_t result = uset_getItem(uset, rangeIndex, &start, &end, nullptr, 0, &errorCode);
if (U_SUCCESS(errorCode) && result == 0) {
return CodePointRange(start, end);
}
}
return CodePointRange(U_SENTINEL, U_SENTINEL);
}
/**
* Pre-increment.
* @draft ICU 76
*/
USetRangeIterator &operator++() {
++rangeIndex;
return *this;
}
/**
* Post-increment.
* @draft ICU 76
*/
USetRangeIterator operator++(int) {
USetRangeIterator result(*this);
++rangeIndex;
return result;
}
private:
friend class USetRanges;
USetRangeIterator(const USet *uset, int32_t rangeIndex, int32_t rangeCount)
: uset(uset), rangeIndex(rangeIndex), rangeCount(rangeCount) {}
const USet *uset;
int32_t rangeIndex;
int32_t rangeCount;
};
/**
* C++ "range" for iterating over the code point ranges of a USet.
*
* \code
* using U_HEADER_NESTED_NAMESPACE::USetRanges;
* LocalUSetPointer uset(uset_openPattern(u"[abcçカ🚴]", -1, &errorCode));
* for (auto [start, end] : USetRanges(uset.getAlias())) {
* printf("uset.range U+%04lx..U+%04lx\n", (long)start, (long)end);
* }
* for (auto range : USetRanges(uset.getAlias())) {
* for (UChar32 c : range) {
* printf("uset.range.c U+%04lx\n", (long)c);
* }
* }
* \endcode
*
* C++ UnicodeSet has member functions for iteration, including ranges().
*
* @draft ICU 76
* @see USetCodePoints
* @see USetStrings
* @see USetElements
*/
class USetRanges {
public:
/**
* Constructs a C++ "range" object over the code point ranges of the USet.
* @draft ICU 76
*/
USetRanges(const USet *uset) : uset(uset), rangeCount(uset_getRangeCount(uset)) {}
/** @draft ICU 76 */
USetRanges(const USetRanges &other) = default;
/** @draft ICU 76 */
USetRangeIterator begin() const {
return USetRangeIterator(uset, 0, rangeCount);
}
/** @draft ICU 76 */
USetRangeIterator end() const {
return USetRangeIterator(uset, rangeCount, rangeCount);
}
private:
const USet *uset;
int32_t rangeCount;
};
/**
* Iterator returned by USetStrings.
* @draft ICU 76
*/
class USetStringIterator {
public:
/** @draft ICU 76 */
USetStringIterator(const USetStringIterator &other) = default;
/** @draft ICU 76 */
bool operator==(const USetStringIterator &other) const {
// No need to compare count given private constructor
// and assuming we don't compare iterators across the set being modified.
// We might even skip comparing uset.
// Unless we want operator==() to be "correct" for more than iteration.
return uset == other.uset && index == other.index;
}
/** @draft ICU 76 */
bool operator!=(const USetStringIterator &other) const { return !operator==(other); }
/** @draft ICU 76 */
std::u16string_view operator*() const {
if (index < count) {
int32_t length;
const UChar *uchars = uset_getString(uset, index, &length);
// assert uchars != nullptr;
return { ConstChar16Ptr(uchars), (uint32_t)length };
}
return {};
}
/**
* Pre-increment.
* @draft ICU 76
*/
USetStringIterator &operator++() {
++index;
return *this;
}
/**
* Post-increment.
* @draft ICU 76
*/
USetStringIterator operator++(int) {
USetStringIterator result(*this);
++index;
return result;
}
private:
friend class USetStrings;
USetStringIterator(const USet *uset, int32_t index, int32_t count)
: uset(uset), index(index), count(count) {}
const USet *uset;
int32_t index;
int32_t count;
};
/**
* C++ "range" for iterating over the empty and multi-character strings of a USet.
*
* \code
* using U_HEADER_NESTED_NAMESPACE::USetStrings;
* LocalUSetPointer uset(uset_openPattern(u"[abcçカ🚴{}{abc}{de}]", -1, &errorCode));
* for (auto s : USetStrings(uset.getAlias())) {
* UnicodeString us(s);
* std::string u8;
* printf("uset.string length %ld \"%s\"\n", (long)s.length(), us.toUTF8String(u8).c_str());
* }
* \endcode
*
* C++ UnicodeSet has member functions for iteration, including strings().
*
* @draft ICU 76
* @see USetCodePoints
* @see USetRanges
* @see USetElements
*/
class USetStrings {
public:
/**
* Constructs a C++ "range" object over the strings of the USet.
* @draft ICU 76
*/
USetStrings(const USet *uset) : uset(uset), count(uset_getStringCount(uset)) {}
/** @draft ICU 76 */
USetStrings(const USetStrings &other) = default;
/** @draft ICU 76 */
USetStringIterator begin() const {
return USetStringIterator(uset, 0, count);
}
/** @draft ICU 76 */
USetStringIterator end() const {
return USetStringIterator(uset, count, count);
}
private:
const USet *uset;
int32_t count;
};
/**
* Iterator returned by USetElements.
* @draft ICU 76
*/
class USetElementIterator {
public:
/** @draft ICU 76 */
USetElementIterator(const USetElementIterator &other) = default;
/** @draft ICU 76 */
bool operator==(const USetElementIterator &other) const {
// No need to compare rangeCount & end given private constructor
// and assuming we don't compare iterators across the set being modified.
// We might even skip comparing uset.
// Unless we want operator==() to be "correct" for more than iteration.
return uset == other.uset && c == other.c && index == other.index;
}
/** @draft ICU 76 */
bool operator!=(const USetElementIterator &other) const { return !operator==(other); }
/** @draft ICU 76 */
UnicodeString operator*() const {
if (c >= 0) {
return UnicodeString(c);
} else if (index < totalCount) {
int32_t length;
const UChar *uchars = uset_getString(uset, index - rangeCount, &length);
// assert uchars != nullptr;
return UnicodeString(uchars, length);
} else {
return UnicodeString();
}
}
/**
* Pre-increment.
* @draft ICU 76
*/
USetElementIterator &operator++() {
if (c < end) {
++c;
} else if (index < rangeCount) {
UErrorCode errorCode = U_ZERO_ERROR;
int32_t result = uset_getItem(uset, index, &c, &end, nullptr, 0, &errorCode);
if (U_SUCCESS(errorCode) && result == 0) {
++index;
} else {
c = end = U_SENTINEL;
}
} else if (c >= 0) {
// assert index == rangeCount;
// Switch from the last range to the first string.
c = end = U_SENTINEL;
} else {
++index;
}
return *this;
}
/**
* Post-increment.
* @draft ICU 76
*/
USetElementIterator operator++(int) {
USetElementIterator result(*this);
operator++();
return result;
}
private:
friend class USetElements;
USetElementIterator(const USet *uset, int32_t index, int32_t rangeCount, int32_t totalCount)
: uset(uset), index(index), rangeCount(rangeCount), totalCount(totalCount),
c(U_SENTINEL), end(U_SENTINEL) {
if (index < rangeCount) {
// Fetch the first range.
operator++();
}
// Otherwise don't move beyond the (index - rangeCount)-th string.
}
const USet *uset;
int32_t index;
/** Number of UnicodeSet/USet code point ranges. */
int32_t rangeCount;
/**
* Number of code point ranges plus number of strings.
* index starts from 0, counts ranges while less than rangeCount,
* then counts strings while at least rangeCount and less than totalCount.
*
* Note that totalCount is the same as uset_getItemCount(), but usually
* smaller than the number of elements returned by this iterator
* because we return each code point of each range.
*/
int32_t totalCount;
UChar32 c, end;
};
/**
* A C++ "range" for iterating over all of the elements of a USet.
* Convenient all-in one iteration, but creates a UnicodeString for each
* code point or string.
*
* Code points are returned first, then empty and multi-character strings.
*
* \code
* using U_HEADER_NESTED_NAMESPACE::USetElements;
* LocalUSetPointer uset(uset_openPattern(u"[abcçカ🚴{}{abc}{de}]", -1, &errorCode));
* for (auto el : USetElements(uset.getAlias())) {
* std::string u8;
* printf("uset.string length %ld \"%s\"\n", (long)el.length(), el.toUTF8String(u8).c_str());
* }
* \endcode
*
* C++ UnicodeSet has member functions for iteration, including begin() and end().
*
* @return an all-elements iterator.
* @draft ICU 76
* @see USetCodePoints
* @see USetRanges
* @see USetStrings
*/
class USetElements {
public:
/**
* Constructs a C++ "range" object over all of the elements of the USet.
* @draft ICU 76
*/
USetElements(const USet *uset)
: uset(uset), rangeCount(uset_getRangeCount(uset)),
stringCount(uset_getStringCount(uset)) {}
/** @draft ICU 76 */
USetElements(const USetElements &other) = default;
/** @draft ICU 76 */
USetElementIterator begin() const {
return USetElementIterator(uset, 0, rangeCount, rangeCount + stringCount);
}
/** @draft ICU 76 */
USetElementIterator end() const {
return USetElementIterator(uset, rangeCount + stringCount, rangeCount, rangeCount + stringCount);
}
private:
const USet *uset;
int32_t rangeCount, stringCount;
};
} // namespace U_HEADER_ONLY_NAMESPACE
#endif // U_HIDE_DRAFT_API
#endif // U_SHOW_CPLUSPLUS_API
#endif // __USET_H__

View file

@ -124,6 +124,49 @@ typedef uint8_t UVersionInfo[U_MAX_VERSION_LENGTH];
# if U_USING_ICU_NAMESPACE
U_NAMESPACE_USE
# endif
#ifndef U_HIDE_DRAFT_API
/**
* \def U_HEADER_NESTED_NAMESPACE
* Nested namespace used inside U_ICU_NAMESPACE for header-only APIs.
* Different when used inside ICU to prevent public use of internal instantiations:
* "header" when compiling calling code; "internal" when compiling ICU library code.
*
* When compiling for Windows, where DLL exports of APIs are explicit,
* this is always "header". Header-only types are not marked for export,
* which on Windows already avoids callers linking with library instantiations.
*
* @draft ICU 76
* @see U_HEADER_ONLY_NAMESPACE
*/
/**
* \def U_HEADER_ONLY_NAMESPACE
* Namespace used for header-only APIs.
* Different when used inside ICU to prevent public use of internal instantiations.
* "U_ICU_NAMESPACE::header" or "U_ICU_NAMESPACE::internal",
* see U_HEADER_NESTED_NAMESPACE for details.
*
* @draft ICU 76
*/
// The first test is the same as for defining U_EXPORT for Windows.
#if defined(_MSC_VER) || (UPRV_HAS_DECLSPEC_ATTRIBUTE(__dllexport__) && \
UPRV_HAS_DECLSPEC_ATTRIBUTE(__dllimport__))
# define U_HEADER_NESTED_NAMESPACE header
#elif defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || \
defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION) || \
defined(U_LAYOUTEX_IMPLEMENTATION) || defined(U_TOOLUTIL_IMPLEMENTATION)
# define U_HEADER_NESTED_NAMESPACE internal
#else
# define U_HEADER_NESTED_NAMESPACE header
#endif
#define U_HEADER_ONLY_NAMESPACE U_ICU_NAMESPACE::U_HEADER_NESTED_NAMESPACE
namespace U_HEADER_ONLY_NAMESPACE {}
#endif // U_HIDE_DRAFT_API
#endif /* __cplusplus */
/*===========================================================================*/

View file

@ -118,15 +118,15 @@ static int32_t U_CALLCONV compareUnicodeString(UElement t1, UElement t2) {
}
UBool UnicodeSet::hasStrings() const {
return strings != nullptr && !strings->isEmpty();
return strings_ != nullptr && !strings_->isEmpty();
}
int32_t UnicodeSet::stringsSize() const {
return strings == nullptr ? 0 : strings->size();
return strings_ == nullptr ? 0 : strings_->size();
}
UBool UnicodeSet::stringsContains(const UnicodeString &s) const {
return strings != nullptr && strings->contains((void*) &s);
return strings_ != nullptr && strings_->contains((void*) &s);
}
//----------------------------------------------------------------
@ -171,7 +171,7 @@ UnicodeSet::UnicodeSet(const UnicodeSet& o, UBool /* asThawed */) : UnicodeFilte
if (o.hasStrings()) {
UErrorCode status = U_ZERO_ERROR;
if (!allocateStrings(status) ||
(strings->assign(*o.strings, cloneUnicodeString, status), U_FAILURE(status))) {
(strings_->assign(*o.strings_, cloneUnicodeString, status), U_FAILURE(status))) {
setToBogus();
return;
}
@ -195,7 +195,7 @@ UnicodeSet::~UnicodeSet() {
if (buffer != stackList) {
uprv_free(buffer);
}
delete strings;
delete strings_;
delete stringSpan;
releasePattern();
}
@ -233,16 +233,16 @@ UnicodeSet& UnicodeSet::copyFrom(const UnicodeSet& o, UBool asThawed) {
}
if (o.hasStrings()) {
UErrorCode status = U_ZERO_ERROR;
if ((strings == nullptr && !allocateStrings(status)) ||
(strings->assign(*o.strings, cloneUnicodeString, status), U_FAILURE(status))) {
if ((strings_ == nullptr && !allocateStrings(status)) ||
(strings_->assign(*o.strings_, cloneUnicodeString, status), U_FAILURE(status))) {
setToBogus();
return *this;
}
} else if (hasStrings()) {
strings->removeAllElements();
strings_->removeAllElements();
}
if (o.stringSpan != nullptr && !asThawed) {
stringSpan = new UnicodeSetStringSpan(*o.stringSpan, *strings);
stringSpan = new UnicodeSetStringSpan(*o.stringSpan, *strings_);
if (stringSpan == nullptr) { // Check for memory allocation error.
setToBogus();
return *this;
@ -284,7 +284,7 @@ bool UnicodeSet::operator==(const UnicodeSet& o) const {
if (list[i] != o.list[i]) return false;
}
if (hasStrings() != o.hasStrings()) { return false; }
if (hasStrings() && *strings != *o.strings) return false;
if (hasStrings() && *strings_ != *o.strings_) return false;
return true;
}
@ -450,7 +450,7 @@ UBool UnicodeSet::containsAll(const UnicodeSet& c) const {
return false;
}
}
return !c.hasStrings() || (strings != nullptr && strings->containsAll(*c.strings));
return !c.hasStrings() || (strings_ != nullptr && strings_->containsAll(*c.strings_));
}
/**
@ -495,7 +495,7 @@ UBool UnicodeSet::containsNone(const UnicodeSet& c) const {
return false;
}
}
return strings == nullptr || !c.hasStrings() || strings->containsNone(*c.strings);
return strings_ == nullptr || !c.hasStrings() || strings_->containsNone(*c.strings_);
}
/**
@ -536,8 +536,8 @@ UBool UnicodeSet::matchesIndexValue(uint8_t v) const {
}
}
if (hasStrings()) {
for (i=0; i<strings->size(); ++i) {
const UnicodeString& s = *static_cast<const UnicodeString*>(strings->elementAt(i));
for (i=0; i<strings_->size(); ++i) {
const UnicodeString& s = *static_cast<const UnicodeString*>(strings_->elementAt(i));
if (s.isEmpty()) {
continue; // skip the empty string
}
@ -586,8 +586,8 @@ UMatchDegree UnicodeSet::matches(const Replaceable& text,
// return the longest match.
int32_t highWaterLength = 0;
for (i=0; i<strings->size(); ++i) {
const UnicodeString& trial = *static_cast<const UnicodeString*>(strings->elementAt(i));
for (i=0; i<strings_->size(); ++i) {
const UnicodeString& trial = *static_cast<const UnicodeString*>(strings_->elementAt(i));
if (trial.isEmpty()) {
continue; // skip the empty string
}
@ -962,15 +962,15 @@ UnicodeSet& UnicodeSet::add(const UnicodeString& s) {
}
/**
* Adds the given string, in order, to 'strings'. The given string
* must have been checked by the caller to not already be in 'strings'.
* Adds the given string, in order, to 'strings_'. The given string
* must have been checked by the caller to not already be in 'strings_'.
*/
void UnicodeSet::_add(const UnicodeString& s) {
if (isFrozen() || isBogus()) {
return;
}
UErrorCode ec = U_ZERO_ERROR;
if (strings == nullptr && !allocateStrings(ec)) {
if (strings_ == nullptr && !allocateStrings(ec)) {
setToBogus();
return;
}
@ -979,7 +979,7 @@ void UnicodeSet::_add(const UnicodeString& s) {
setToBogus();
return;
}
strings->sortedInsert(t, compareUnicodeString, ec);
strings_->sortedInsert(t, compareUnicodeString, ec);
if (U_FAILURE(ec)) {
setToBogus();
}
@ -1058,7 +1058,7 @@ UnicodeSet& UnicodeSet::removeAll(const UnicodeString& s) {
UnicodeSet& UnicodeSet::removeAllStrings() {
if (!isFrozen() && hasStrings()) {
strings->removeAllElements();
strings_->removeAllElements();
releasePattern();
}
return *this;
@ -1176,7 +1176,7 @@ UnicodeSet& UnicodeSet::remove(const UnicodeString& s) {
if (isFrozen() || isBogus()) return *this;
int32_t cp = getSingleCP(s);
if (cp < 0) {
if (strings != nullptr && strings->removeElement((void*) &s)) {
if (strings_ != nullptr && strings_->removeElement((void*) &s)) {
releasePattern();
}
} else {
@ -1248,7 +1248,7 @@ UnicodeSet& UnicodeSet::complement(const UnicodeString& s) {
int32_t cp = getSingleCP(s);
if (cp < 0) {
if (stringsContains(s)) {
strings->removeElement((void*) &s);
strings_->removeElement((void*) &s);
} else {
_add(s);
}
@ -1275,9 +1275,9 @@ UnicodeSet& UnicodeSet::addAll(const UnicodeSet& c) {
}
// Add strings in order
if ( c.strings!=nullptr ) {
for (int32_t i=0; i<c.strings->size(); ++i) {
const UnicodeString* s = static_cast<const UnicodeString*>(c.strings->elementAt(i));
if ( c.strings_!=nullptr ) {
for (int32_t i=0; i<c.strings_->size(); ++i) {
const UnicodeString* s = static_cast<const UnicodeString*>(c.strings_->elementAt(i));
if (!stringsContains(*s)) {
_add(*s);
}
@ -1302,9 +1302,9 @@ UnicodeSet& UnicodeSet::retainAll(const UnicodeSet& c) {
retain(c.list, c.len, 0);
if (hasStrings()) {
if (!c.hasStrings()) {
strings->removeAllElements();
strings_->removeAllElements();
} else {
strings->retainAll(*c.strings);
strings_->retainAll(*c.strings_);
}
}
return *this;
@ -1325,7 +1325,7 @@ UnicodeSet& UnicodeSet::removeAll(const UnicodeSet& c) {
}
retain(c.list, c.len, 2);
if (hasStrings() && c.hasStrings()) {
strings->removeAll(*c.strings);
strings_->removeAll(*c.strings_);
}
return *this;
}
@ -1344,10 +1344,10 @@ UnicodeSet& UnicodeSet::complementAll(const UnicodeSet& c) {
}
exclusiveOr(c.list, c.len, 0);
if (c.strings != nullptr) {
for (int32_t i=0; i<c.strings->size(); ++i) {
void* e = c.strings->elementAt(i);
if (strings == nullptr || !strings->removeElement(e)) {
if (c.strings_ != nullptr) {
for (int32_t i=0; i<c.strings_->size(); ++i) {
void* e = c.strings_->elementAt(i);
if (strings_ == nullptr || !strings_->removeElement(e)) {
_add(*static_cast<const UnicodeString*>(e));
}
}
@ -1366,8 +1366,8 @@ UnicodeSet& UnicodeSet::clear() {
list[0] = UNICODESET_HIGH;
len = 1;
releasePattern();
if (strings != nullptr) {
strings->removeAllElements();
if (strings_ != nullptr) {
strings_->removeAllElements();
}
// Remove bogus
fFlags = 0;
@ -1405,7 +1405,7 @@ UChar32 UnicodeSet::getRangeEnd(int32_t index) const {
}
const UnicodeString* UnicodeSet::getString(int32_t index) const {
return static_cast<const UnicodeString*>(strings->elementAt(index));
return static_cast<const UnicodeString*>(strings_->elementAt(index));
}
/**
@ -1439,9 +1439,9 @@ UnicodeSet& UnicodeSet::compact() {
// else what the heck happened?! We allocated less memory!
// Oh well. We'll keep our original array.
}
if (strings != nullptr && strings->isEmpty()) {
delete strings;
strings = nullptr;
if (strings_ != nullptr && strings_->isEmpty()) {
delete strings_;
strings_ = nullptr;
}
return *this;
}
@ -1607,15 +1607,15 @@ UBool UnicodeSet::allocateStrings(UErrorCode &status) {
if (U_FAILURE(status)) {
return false;
}
strings = new UVector(uprv_deleteUObject,
strings_ = new UVector(uprv_deleteUObject,
uhash_compareUnicodeString, 1, status);
if (strings == nullptr) { // Check for memory allocation error.
if (strings_ == nullptr) { // Check for memory allocation error.
status = U_MEMORY_ALLOCATION_ERROR;
return false;
}
if (U_FAILURE(status)) {
delete strings;
strings = nullptr;
delete strings_;
strings_ = nullptr;
return false;
}
return true;
@ -2131,11 +2131,11 @@ UnicodeString& UnicodeSet::_generatePattern(UnicodeString& result,
}
}
if (strings != nullptr) {
for (int32_t i = 0; i<strings->size(); ++i) {
if (strings_ != nullptr) {
for (int32_t i = 0; i<strings_->size(); ++i) {
result.append(u'{');
_appendToPat(result,
*static_cast<const UnicodeString*>(strings->elementAt(i)),
*static_cast<const UnicodeString*>(strings_->elementAt(i)),
escapeUnprintable);
result.append(u'}');
}
@ -2175,7 +2175,7 @@ UnicodeSet *UnicodeSet::freeze() {
// Optimize contains() and span() and similar functions.
if (hasStrings()) {
stringSpan = new UnicodeSetStringSpan(*this, *strings, UnicodeSetStringSpan::ALL);
stringSpan = new UnicodeSetStringSpan(*this, *strings_, UnicodeSetStringSpan::ALL);
if (stringSpan == nullptr) {
setToBogus();
return this;
@ -2216,7 +2216,7 @@ int32_t UnicodeSet::span(const char16_t *s, int32_t length, USetSpanCondition sp
uint32_t which= spanCondition==USET_SPAN_NOT_CONTAINED ?
UnicodeSetStringSpan::FWD_UTF16_NOT_CONTAINED :
UnicodeSetStringSpan::FWD_UTF16_CONTAINED;
UnicodeSetStringSpan strSpan(*this, *strings, which);
UnicodeSetStringSpan strSpan(*this, *strings_, which);
if(strSpan.needsStringSpanUTF16()) {
return strSpan.span(s, length, spanCondition);
}
@ -2253,7 +2253,7 @@ int32_t UnicodeSet::spanBack(const char16_t *s, int32_t length, USetSpanConditio
uint32_t which= spanCondition==USET_SPAN_NOT_CONTAINED ?
UnicodeSetStringSpan::BACK_UTF16_NOT_CONTAINED :
UnicodeSetStringSpan::BACK_UTF16_CONTAINED;
UnicodeSetStringSpan strSpan(*this, *strings, which);
UnicodeSetStringSpan strSpan(*this, *strings_, which);
if(strSpan.needsStringSpanUTF16()) {
return strSpan.spanBack(s, length, spanCondition);
}
@ -2291,7 +2291,7 @@ int32_t UnicodeSet::spanUTF8(const char *s, int32_t length, USetSpanCondition sp
uint32_t which= spanCondition==USET_SPAN_NOT_CONTAINED ?
UnicodeSetStringSpan::FWD_UTF8_NOT_CONTAINED :
UnicodeSetStringSpan::FWD_UTF8_CONTAINED;
UnicodeSetStringSpan strSpan(*this, *strings, which);
UnicodeSetStringSpan strSpan(*this, *strings_, which);
if(strSpan.needsStringSpanUTF8()) {
return strSpan.spanUTF8(reinterpret_cast<const uint8_t*>(s), length, spanCondition);
}
@ -2329,7 +2329,7 @@ int32_t UnicodeSet::spanBackUTF8(const char *s, int32_t length, USetSpanConditio
uint32_t which= spanCondition==USET_SPAN_NOT_CONTAINED ?
UnicodeSetStringSpan::BACK_UTF8_NOT_CONTAINED :
UnicodeSetStringSpan::BACK_UTF8_CONTAINED;
UnicodeSetStringSpan strSpan(*this, *strings, which);
UnicodeSetStringSpan strSpan(*this, *strings_, which);
if(strSpan.needsStringSpanUTF8()) {
return strSpan.spanBackUTF8(reinterpret_cast<const uint8_t*>(s), length, spanCondition);
}

View file

@ -242,7 +242,7 @@ void UnicodeSet::closeOverCaseInsensitive(bool simple) {
// therefore, start with no strings and add only those needed.
// Do this before processing code points, because they may add strings.
if (!simple && foldSet.hasStrings()) {
foldSet.strings->removeAllElements();
foldSet.strings_->removeAllElements();
}
USetAdder sa = {
@ -276,8 +276,8 @@ void UnicodeSet::closeOverCaseInsensitive(bool simple) {
}
if (hasStrings()) {
UnicodeString str;
for (int32_t j=0; j<strings->size(); ++j) {
const UnicodeString* pStr = static_cast<const UnicodeString*>(strings->elementAt(j));
for (int32_t j=0; j<strings_->size(); ++j) {
const UnicodeString* pStr = static_cast<const UnicodeString*>(strings_->elementAt(j));
if (simple) {
if (scfString(*pStr, str)) {
foldSet.remove(*pStr).add(str);
@ -334,8 +334,8 @@ void UnicodeSet::closeOverAddCaseMappings() {
BreakIterator *bi = BreakIterator::createWordInstance(root, status);
if (U_SUCCESS(status)) {
#endif
for (int32_t j=0; j<strings->size(); ++j) {
const UnicodeString* pStr = static_cast<const UnicodeString*>(strings->elementAt(j));
for (int32_t j=0; j<strings_->size(); ++j) {
const UnicodeString* pStr = static_cast<const UnicodeString*>(strings_->elementAt(j));
(str = *pStr).toLower(root);
foldSet.add(str);
#if !UCONFIG_NO_BREAK_ITERATION

View file

@ -21,6 +21,7 @@
*/
#include "unicode/utypes.h"
#include "unicode/char16ptr.h"
#include "unicode/uobject.h"
#include "unicode/uset.h"
#include "unicode/uniset.h"
@ -306,12 +307,32 @@ uset_getRangeCount(const USet *set) {
return ((const UnicodeSet *)set)->UnicodeSet::getRangeCount();
}
U_CAPI int32_t U_EXPORT2
uset_getStringCount(const USet *uset) {
const UnicodeSet &set = *(const UnicodeSet *)uset;
return USetAccess::getStringCount(set);
}
U_CAPI int32_t U_EXPORT2
uset_getItemCount(const USet* uset) {
const UnicodeSet& set = *(const UnicodeSet*)uset;
return set.getRangeCount() + USetAccess::getStringCount(set);
}
U_CAPI const UChar* U_EXPORT2
uset_getString(const USet *uset, int32_t index, int32_t *pLength) {
if (pLength == nullptr) { return nullptr; }
const UnicodeSet &set = *(const UnicodeSet *)uset;
int32_t count = USetAccess::getStringCount(set);
if (index < 0 || count <= index) {
*pLength = 0;
return nullptr;
}
const UnicodeString *s = USetAccess::getString(set, index);
*pLength = s->length();
return toUCharPtr(s->getBuffer());
}
U_CAPI int32_t U_EXPORT2
uset_getItem(const USet* uset, int32_t itemIndex,
UChar32* start, UChar32* end,

View file

@ -61,7 +61,7 @@ UBool UnicodeSetIterator::next() {
if (nextString >= stringCount) return false;
codepoint = static_cast<UChar32>(IS_STRING); // signal that value is actually a string
string = static_cast<const UnicodeString*>(set->strings->elementAt(nextString++));
string = static_cast<const UnicodeString*>(set->strings_->elementAt(nextString++));
return true;
}
@ -94,7 +94,7 @@ UBool UnicodeSetIterator::nextRange() {
if (nextString >= stringCount) return false;
codepoint = static_cast<UChar32>(IS_STRING); // signal that value is actually a string
string = static_cast<const UnicodeString*>(set->strings->elementAt(nextString++));
string = static_cast<const UnicodeString*>(set->strings_->elementAt(nextString++));
return true;
}

View file

@ -12,9 +12,11 @@
*/
#include <stdio.h>
#include <string.h>
#include <string_view>
#include <unordered_map>
#include "unicode/utypes.h"
#include "usettest.h"
#include "unicode/ucnv.h"
@ -104,6 +106,14 @@ UnicodeSetTest::runIndexedTest(int32_t index, UBool exec,
TESTCASE_AUTO(TestEmptyString);
TESTCASE_AUTO(TestSkipToStrings);
TESTCASE_AUTO(TestPatternCodePointComplement);
TESTCASE_AUTO(TestCodePointIterator);
TESTCASE_AUTO(TestUSetCodePointIterator);
TESTCASE_AUTO(TestRangeIterator);
TESTCASE_AUTO(TestUSetRangeIterator);
TESTCASE_AUTO(TestStringIterator);
TESTCASE_AUTO(TestUSetStringIterator);
TESTCASE_AUTO(TestElementIterator);
TESTCASE_AUTO(TestUSetElementIterator);
TESTCASE_AUTO_END;
}
@ -4259,3 +4269,223 @@ void UnicodeSetTest::TestPatternCodePointComplement() {
assertFalse("[:Basic_Emoji:].complement() --> no bicycle", notBasic.contains(U'🚲'));
}
}
void UnicodeSetTest::TestCodePointIterator() {
IcuTestErrorCode errorCode(*this, "TestCodePointIterator");
UnicodeSet set(u"[abcçカ🚴]", errorCode);
UnicodeString result;
for (UChar32 c : set.codePoints()) {
// printf("set.codePoint U+%04lx\n", (long)c);
result.append(u' ').append(c);
}
assertEquals(WHERE, u" a b c ç カ 🚴", result);
// codePoints() returns USetCodePoints for which explicit APIs are tested via USet.
}
void UnicodeSetTest::TestUSetCodePointIterator() {
IcuTestErrorCode errorCode(*this, "TestUSetCodePointIterator");
using U_HEADER_NESTED_NAMESPACE::USetCodePoints;
LocalUSetPointer uset(uset_openPattern(u"[abcçカ🚴]", -1, errorCode));
UnicodeString result;
for (UChar32 c : USetCodePoints(uset.getAlias())) {
// printf("uset.codePoint U+%04lx\n", (long)c);
result.append(u' ').append(c);
}
assertEquals(WHERE, u" a b c ç カ 🚴", result);
USetCodePoints range1(uset.getAlias());
auto range2(range1); // copy constructor
auto iter = range1.begin();
auto limit = range2.end();
// operator* with pre- and post-increment
assertEquals(WHERE, u'a', *iter);
++iter;
assertEquals(WHERE, u'b', *iter);
assertEquals(WHERE, u'c', *++iter);
auto iter2(iter); // copy constructor
assertEquals(WHERE, u'c', *iter2++);
assertEquals(WHERE, u'ç', *iter2++);
assertEquals(WHERE, u'', *iter2);
assertTrue(WHERE, ++iter2 != limit);
auto iter3(iter2++);
assertEquals(WHERE, U'🚴', *iter3);
assertTrue(WHERE, iter2 == limit);
}
void UnicodeSetTest::TestRangeIterator() {
IcuTestErrorCode errorCode(*this, "TestRangeIterator");
UnicodeSet set(u"[abcçカ🚴]", errorCode);
UnicodeString result;
for (auto [start, end] : set.ranges()) {
// printf("set.range U+%04lx..U+%04lx\n", (long)start, (long)end);
result.append(u' ').append(start).append(u'-').append(end);
}
assertEquals(WHERE, u" a-c ç-ç カ-カ 🚴-🚴", result);
result.remove();
for (auto range : set.ranges()) {
for (UChar32 c : range) {
// printf("set.range.c U+%04lx\n", (long)c);
result.append(u' ').append(c);
}
result.append(u" |");
}
assertEquals(WHERE, u" a b c | ç | カ | 🚴 |", result);
// ranges() returns USetRanges for which explicit APIs are tested via USet.
}
void UnicodeSetTest::TestUSetRangeIterator() {
IcuTestErrorCode errorCode(*this, "TestUSetRangeIterator");
using U_HEADER_NESTED_NAMESPACE::USetRanges;
using U_HEADER_NESTED_NAMESPACE::CodePointRange;
LocalUSetPointer uset(uset_openPattern(u"[abcçカ🚴]", -1, errorCode));
UnicodeString result;
for (auto [start, end] : USetRanges(uset.getAlias())) {
// printf("uset.range U+%04lx..U+%04lx\n", (long)start, (long)end);
result.append(u' ').append(start).append(u'-').append(end);
}
assertEquals(WHERE, u" a-c ç-ç カ-カ 🚴-🚴", result);
result.remove();
for (auto range : USetRanges(uset.getAlias())) {
for (UChar32 c : range) {
// printf("uset.range.c U+%04lx\n", (long)c);
result.append(u' ').append(c);
}
result.append(u" |");
}
assertEquals(WHERE, u" a b c | ç | カ | 🚴 |", result);
USetRanges range1(uset.getAlias());
auto range2(range1); // copy constructor
auto iter = range1.begin();
auto limit = range2.end();
// operator* with pre- and post-increment
{
auto cpRange = *iter;
assertEquals(WHERE, u'a', cpRange.rangeStart);
assertEquals(WHERE, u'c', cpRange.rangeEnd);
assertEquals(WHERE, 3, cpRange.size());
auto cpRange2(cpRange);
auto cpIter = cpRange.begin();
auto cpLimit = cpRange2.end();
assertEquals(WHERE, u'a', *cpIter++);
assertEquals(WHERE, u'b', *cpIter);
assertTrue(WHERE, cpIter != cpLimit);
CodePointRange::iterator cpIter2(u'b'); // public constructor
assertTrue(WHERE, cpIter == cpIter2);
assertEquals(WHERE, u'c', *++cpIter);
assertTrue(WHERE, cpIter != cpIter2);
assertTrue(WHERE, ++cpIter == cpLimit);
}
++iter;
auto iter2(iter); // copy constructor
assertEquals(WHERE, u'ç', (*iter2).rangeStart);
assertEquals(WHERE, u'ç', (*iter2).rangeEnd);
assertEquals(WHERE, 1, (*iter2).size());
assertEquals(WHERE, u'ç', (*iter2++).rangeStart);
assertEquals(WHERE, u'', (*iter2).rangeStart);
assertTrue(WHERE, ++iter2 != limit);
auto iter3(iter2++);
assertEquals(WHERE, U'🚴', (*iter3).rangeStart);
assertTrue(WHERE, iter2 == limit);
{
CodePointRange cpRange(u'h', u'k'); // public constructor
// FYI: currently no operator==
assertEquals(WHERE, u'h', cpRange.rangeStart);
assertEquals(WHERE, u'k', cpRange.rangeEnd);
assertEquals(WHERE, 4, cpRange.size());
assertEquals(WHERE, u'i', *++(cpRange.begin()));
}
}
void UnicodeSetTest::TestStringIterator() {
IcuTestErrorCode errorCode(*this, "TestStringIterator");
UnicodeSet set(u"[abcçカ🚴{}{abc}{de}]", errorCode);
UnicodeString result;
for (auto s : set.strings()) {
// UnicodeString us(s);
// std::string u8;
// printf("set.string length %ld \"%s\"\n", (long)s.length(), us.toUTF8String(u8).c_str());
result.append(u" \"").append(s).append(u'"');
}
assertEquals(WHERE, uR"( "" "abc" "de")", result);
// strings() returns USetStrins for which explicit APIs are tested via USet.
}
void UnicodeSetTest::TestUSetStringIterator() {
IcuTestErrorCode errorCode(*this, "TestUSetStringIterator");
using U_HEADER_NESTED_NAMESPACE::USetStrings;
LocalUSetPointer uset(uset_openPattern(u"[abcçカ🚴{}{abc}{de}]", -1, errorCode));
UnicodeString result;
for (auto s : USetStrings(uset.getAlias())) {
// UnicodeString us(s);
// std::string u8;
// printf("uset.string length %ld \"%s\"\n", (long)s.length(), us.toUTF8String(u8).c_str());
result.append(u" \"").append(s).append(u'"');
}
assertEquals(WHERE, uR"( "" "abc" "de")", result);
USetStrings range1(uset.getAlias());
auto range2(range1); // copy constructor
auto iter = range1.begin();
auto limit = range2.end();
// operator* with pre- and post-increment
assertEquals(WHERE, UnicodeString(), UnicodeString(*iter));
assertEquals(WHERE, u"abc", UnicodeString(*++iter));
auto iter2(iter); // copy constructor
assertEquals(WHERE, u"abc", UnicodeString(*iter2++));
assertTrue(WHERE, iter2 != limit);
auto iter3(iter2++);
assertEquals(WHERE, u"de", UnicodeString(*iter3));
assertTrue(WHERE, iter2 == limit);
}
void UnicodeSetTest::TestElementIterator() {
IcuTestErrorCode errorCode(*this, "TestElementIterator");
UnicodeSet set(u"[abcçカ🚴{}{abc}{de}]", errorCode);
UnicodeString result;
for (auto el : set) {
// std::string u8;
// printf("set.string length %ld \"%s\"\n", (long)el.length(), el.toUTF8String(u8).c_str());
result.append(u" \"").append(el).append(u'"');
}
assertEquals(WHERE, uR"( "a" "b" "c" "ç" "" "🚴" "" "abc" "de")", result);
// begin() & end() return USetElementIterator for which explicit APIs are tested via USet.
}
void UnicodeSetTest::TestUSetElementIterator() {
IcuTestErrorCode errorCode(*this, "TestUSetElementIterator");
using U_HEADER_NESTED_NAMESPACE::USetElements;
LocalUSetPointer uset(uset_openPattern(u"[abcçカ🚴{}{abc}{de}]", -1, errorCode));
UnicodeString result;
for (auto el : USetElements(uset.getAlias())) {
// std::string u8;
// printf("uset.string length %ld \"%s\"\n", (long)el.length(), el.toUTF8String(u8).c_str());
result.append(u" \"").append(el).append(u'"');
}
assertEquals(WHERE, uR"( "a" "b" "c" "ç" "" "🚴" "" "abc" "de")", result);
USetElements range1(uset.getAlias());
auto range2(range1); // copy constructor
auto iter = range1.begin();
auto limit = range2.end();
// operator* with pre- and post-increment
assertEquals(WHERE, u"a", *iter);
++iter;
assertEquals(WHERE, u"b", *iter);
assertEquals(WHERE, u"c", *++iter);
auto iter2(iter); // copy constructor
assertEquals(WHERE, u"c", *iter2++);
// skip çカ🚴
++++++iter2;
assertEquals(WHERE, UnicodeString(), *iter2++);
assertEquals(WHERE, u"abc", *iter2);
assertTrue(WHERE, ++iter2 != limit);
auto iter3(iter2++);
assertEquals(WHERE, u"de", *iter3);
assertTrue(WHERE, iter2 == limit);
}

View file

@ -105,6 +105,15 @@ private:
void TestSkipToStrings();
void TestPatternCodePointComplement();
void TestCodePointIterator();
void TestUSetCodePointIterator();
void TestRangeIterator();
void TestUSetRangeIterator();
void TestStringIterator();
void TestUSetStringIterator();
void TestElementIterator();
void TestUSetElementIterator();
private:
UBool toPatternAux(UChar32 start, UChar32 end);