ICU-21525 UnicodeSet.hasString(), UnicodeSetIterator.skipToStrings() & C API

2025-04-08 06:53:45 +00:00 · 2021-06-29 17:27:09 +00:00 · 2021-06-29 17:27:09 +00:00 · e4e2ae9544
commit e4e2ae9544
parent 84595b49a6
15 changed files with 364 additions and 192 deletions
--- a/icu4c/source/common/unicode/uniset.h
+++ b/icu4c/source/common/unicode/uniset.h
@ -771,8 +771,12 @@ public:
     * Note than the elements of a set may include both individual
     * codepoints and strings.
     *
+     * This is slower than getRangeCount() because
+     * it counts the code points of all ranges.
+     *
     * @return the number of elements in this set (its cardinality).
     * @stable ICU 2.0
+     * @see getRangeCount
     */
    virtual int32_t size(void) const;

@ -784,6 +788,14 @@ public:
     */
    virtual UBool isEmpty(void) const;

+#ifndef U_HIDE_DRAFT_API
+    /**
+     * @return true if this set contains multi-character strings or the empty string.
+     * @draft ICU 70
+     */
+    UBool hasStrings() const;
+#endif  // U_HIDE_DRAFT_API
+
    /**
     * Returns true if this set contains the given character.
     * This function works faster with a frozen set.
@ -1064,8 +1076,14 @@ public:
    /**
     * Returns the character at the given index within this set, where
     * the set is ordered by ascending code point.  If the index is
-     * out of range, return (UChar32)-1.  The inverse of this method is
-     * <code>indexOf()</code>.
+     * out of range for characters, returns (UChar32)-1.
+     * The inverse of this method is <code>indexOf()</code>.
+     *
+     * For iteration, this is slower than UnicodeSetIterator or
+     * getRangeCount()/getRangeStart()/getRangeEnd(),
+     * because for each call it skips linearly over <code>index</code>
+     * characters in the ranges.
+     *
     * @param index an index from 0..size()-1
     * @return the character at the given index, or (UChar32)-1.
     * @stable ICU 2.4
@ -1567,7 +1585,6 @@ private:
    void swapBuffers(void);

    UBool allocateStrings(UErrorCode &status);
-    UBool hasStrings() const;
    int32_t stringsSize() const;
    UBool stringsContains(const UnicodeString &s) const;

--- a/icu4c/source/common/unicode/uset.h
+++ b/icu4c/source/common/unicode/uset.h
@ -851,6 +851,16 @@ uset_removeAllStrings(USet* set);
 U_CAPI UBool U_EXPORT2
 uset_isEmpty(const USet* set);

+#ifndef U_HIDE_DRAFT_API
+/**
+ * @param set the set
+ * @return true if this set contains multi-character strings or the empty string.
+ * @draft ICU 70
+ */
+U_CAPI UBool U_EXPORT2
+uset_hasStrings(const USet *set);
+#endif  // U_HIDE_DRAFT_API
+
 /**
 * Returns true if the given USet contains the given character.
 * This function works faster with a frozen set.
@ -901,8 +911,13 @@ uset_indexOf(const USet* set, UChar32 c);
 /**
 * Returns the character at the given index within this set, where
 * the set is ordered by ascending code point.  If the index is
- * out of range, return (UChar32)-1.  The inverse of this method is
- * <code>indexOf()</code>.
+ * out of range for characters, returns (UChar32)-1.
+ * The inverse of this method is <code>indexOf()</code>.
+ *
+ * For iteration, this is slower than uset_getRangeCount()/uset_getItemCount()
+ * with uset_getItem(), because for each call it skips linearly over <code>index</code>
+ * characters in the ranges.
+ *
 * @param set the set
 * @param charIndex an index from 0..size()-1 to obtain the char for
 * @return the character at the given index, or (UChar32)-1.
@ -912,16 +927,34 @@ U_CAPI UChar32 U_EXPORT2
 uset_charAt(const USet* set, int32_t charIndex);

 /**
- * Returns the number of characters and strings contained in the given
- * USet.
+ * Returns the number of characters and strings contained in this set.
+ * The last (uset_getItemCount() - uset_getRangeCount()) items are strings.
+ *
+ * This is slower than uset_getRangeCount() and uset_getItemCount() because
+ * it counts the code points of all ranges.
+ *
 * @param set the set
 * @return a non-negative integer counting the characters and strings
 * contained in set
 * @stable ICU 2.4
+ * @see uset_getRangeCount
 */
 U_CAPI int32_t U_EXPORT2
 uset_size(const USet* set);

+#ifndef U_HIDE_DRAFT_API
+/**
+ * @param set the set
+ * @return the number of ranges in this set.
+ * @draft ICU 70
+ * @see uset_getItemCount
+ * @see uset_getItem
+ * @see uset_size
+ */
+U_CAPI int32_t U_EXPORT2
+uset_getRangeCount(const USet *set);
+#endif  // U_HIDE_DRAFT_API
+
 /**
 * Returns the number of items in this set.  An item is either a range
 * of characters or a single multicharacter string.
@ -935,20 +968,30 @@ uset_getItemCount(const USet* set);

 /**
 * Returns an item of this set.  An item is either a range of
- * characters or a single multicharacter string.
+ * characters or a single multicharacter string (which can be the empty string).
+ *
+ * If <code>itemIndex</code> is less than uset_getRangeCount(), then this function returns 0,
+ * and the range is <code>*start</code>..<code>*end</code>.
+ *
+ * If <code>itemIndex</code> is at least uset_getRangeCount() and less than uset_getItemCount(), then
+ * this function copies the string into <code>str[strCapacity]</code> and
+ * returns the length of the string (0 for the empty string).
+ *
+ * If <code>itemIndex</code> is out of range, then this function returns -1.
+ *
+ * Note that 0 is returned for each range as well as for the empty string.
+ *
 * @param set the set
- * @param itemIndex a non-negative integer in the range 0..
- * uset_getItemCount(set)-1
- * @param start pointer to variable to receive first character
- * in range, inclusive
- * @param end pointer to variable to receive last character in range,
- * inclusive
+ * @param itemIndex a non-negative integer in the range 0..uset_getItemCount(set)-1
+ * @param start pointer to variable to receive first character in range, inclusive;
+ *              can be NULL for a string item
+ * @param end pointer to variable to receive last character in range, inclusive;
+ *            can be NULL for a string item
 * @param str buffer to receive the string, may be NULL
 * @param strCapacity capacity of str, or 0 if str is NULL
- * @param ec error code
- * @return the length of the string (>= 2), or 0 if the item is a
- * range, in which case it is the range *start..*end, or -1 if
- * itemIndex is out of range
+ * @param ec error code; U_INDEX_OUTOFBOUNDS_ERROR if the itemIndex is out of range
+ * @return the length of the string (0 or >= 2), or 0 if the item is a range,
+ *         or -1 if the itemIndex is out of range
 * @stable ICU 2.4
 */
 U_CAPI int32_t U_EXPORT2
--- a/icu4c/source/common/unicode/usetiter.h
+++ b/icu4c/source/common/unicode/usetiter.h
@ -60,6 +60,9 @@ class UnicodeString;
 *   }
 * }
 * </pre>
+ *
+ * To iterate over only the strings, start with <code>skipToStrings()</code>.
+ *
 * @author M. Davis
 * @stable ICU 2.4
 */
@ -170,6 +173,25 @@ class U_COMMON_API UnicodeSetIterator : public UObject {
     */
    const UnicodeString& getString();

+#ifndef U_HIDE_DRAFT_API
+    /**
+     * Skips over the remaining code points/ranges, if any.
+     * A following call to next() or nextRange() will yield a string, if there is one.
+     * No-op if next() would return false, or if it would yield a string anyway.
+     *
+     * @return *this
+     * @draft ICU 70
+     * @see UnicodeSet#strings()
+     */
+    inline UnicodeSetIterator &skipToStrings() {
+        // Finish code point/range iteration.
+        range = endRange;
+        endElement = -1;
+        nextElement = 0;
+        return *this;
+    }
+#endif  // U_HIDE_DRAFT_API
+
    /**
     * Advances the iteration position to the next element in the set, 
     * which can be either a single code point or a string.  
@ -281,13 +303,16 @@ class U_COMMON_API UnicodeSetIterator : public UObject {
     */
    int32_t stringCount;

+ private:
+
    /**
     *  Points to the string to use when the caller asks for a
     *  string and the current iteration item is a code point, not a string.
-     *  @internal
     */
    UnicodeString *cpString;

+ protected:
+
    /** Copy constructor. Disallowed.
     * @stable ICU 2.4
     */
@ -306,7 +331,7 @@ class U_COMMON_API UnicodeSetIterator : public UObject {
 };

 inline UBool UnicodeSetIterator::isString() const {
-    return codepoint == (UChar32)IS_STRING;
+    return codepoint < 0;
 }

 inline UChar32 UnicodeSetIterator::getCodepoint() const {
--- a/icu4c/source/common/uset.cpp
+++ b/icu4c/source/common/uset.cpp
@ -196,6 +196,11 @@ uset_isEmpty(const USet* set) {
    return ((const UnicodeSet*) set)->UnicodeSet::isEmpty();
 }

+U_CAPI UBool U_EXPORT2
+uset_hasStrings(const USet* set) {
+    return ((const UnicodeSet*) set)->UnicodeSet::hasStrings();
+}
+
 U_CAPI UBool U_EXPORT2
 uset_contains(const USet* set, UChar32 c) {
    return ((const UnicodeSet*) set)->UnicodeSet::contains(c);
@ -296,6 +301,11 @@ private:
 };
 U_NAMESPACE_END

+U_CAPI int32_t U_EXPORT2
+uset_getRangeCount(const USet *set) {
+    return ((const UnicodeSet *)set)->UnicodeSet::getRangeCount();
+}
+
 U_CAPI int32_t U_EXPORT2
 uset_getItemCount(const USet* uset) {
    const UnicodeSet& set = *(const UnicodeSet*)uset;
@ -330,11 +340,6 @@ uset_getItem(const USet* uset, int32_t itemIndex,
    }
 }

-//U_CAPI int32_t U_EXPORT2
-//uset_getRangeCount(const USet* set) {
-//    return ((const UnicodeSet*) set)->getRangeCount();
-//}
-//
 //U_CAPI UBool U_EXPORT2
 //uset_getRange(const USet* set, int32_t rangeIndex,
 //              UChar32* pStart, UChar32* pEnd) {
--- a/icu4c/source/test/cintltst/usettest.c
+++ b/icu4c/source/test/cintltst/usettest.c
@ -6,12 +6,15 @@
 * Corporation and others.  All Rights Reserved.
 **********************************************************************
 */
+
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+
 #include "unicode/uset.h"
 #include "unicode/ustring.h"
 #include "cintltst.h"
 #include "cmemory.h"
-#include <stdlib.h>
-#include <string.h>

 #define TEST(x) addTest(root, &x, "uset/" # x)

@ -101,6 +104,9 @@ static void TestAPI() {
    /* [ABC] */
    set = uset_open(0x0041, 0x0043);
    expect(set, "ABC", "DEF{ab}", NULL);
+    if(uset_hasStrings(set)) {
+        log_err("uset_hasStrings([ABC]) = true");
+    }
    uset_close(set);

    /* [a-c{ab}] */
@ -113,6 +119,9 @@ static void TestAPI() {
    if(!uset_resemblesPattern(PAT, PAT_LEN, 0)) {
        log_err("uset_resemblesPattern of PAT failed\n");
    }
+    if(!uset_hasStrings(set)) {
+        log_err("uset_hasStrings([a-c{ab}]) = false");
+    }
    expect(set, "abc{ab}", "def{bc}", &ec);

    /* [a-d{ab}] */
@ -167,6 +176,9 @@ static void TestAPI() {
        return;
    }
    expect(set, "0123456789ABCDEFabcdef", "GHIjkl{bc}", NULL);
+    if (uset_size(set) != 22 || uset_getRangeCount(set) != 3 || uset_getItemCount(set) != 3) {
+        log_err("line %d: uset_size()/uset_getRangeCount()/uset_getItemCount() wrong", __LINE__);
+    }

    /* [ab] */
    uset_clear(set);
@ -243,6 +255,9 @@ static void TestAPI() {
        return;
    }
    expect(set, "abcdef{ch}{sch}", "", NULL);
+    if (uset_size(set) != 8 || uset_getRangeCount(set) != 1 || uset_getItemCount(set) != 3) {
+        log_err("line %d: uset_size()/uset_getRangeCount()/uset_getItemCount() wrong", __LINE__);
+    }

    uset_retainString(set, u"sch", 3);
    expect(set, "{sch}", "abcdef{ch}", NULL);
@ -400,10 +415,12 @@ static void expectItems(const USet* set,
    char *pat;
    UErrorCode ec;
    int32_t expectedSize = 0;
+    int32_t rangeCount = uset_getRangeCount(set);
    int32_t itemCount = uset_getItemCount(set);
    int32_t itemIndex = 0;
    UChar32 start = 1, end = 0;
    int32_t itemLen = 0, length;
+    bool isString = false;

    ec = U_ZERO_ERROR;
    length = uset_toPattern(set, ustr, sizeof(ustr), TRUE, &ec);
@ -435,17 +452,26 @@ static void expectItems(const USet* set,
                return;
            }

-            itemLen = uset_getItem(set, itemIndex, &start, &end,
-                                   itemStr, sizeof(itemStr), &ec);
+            // Pass in NULL pointers where we expect them to be ok.
+            if (itemIndex < rangeCount) {
+                itemLen = uset_getItem(set, itemIndex, &start, &end, NULL, 0, &ec);
+            } else {
+                itemLen = uset_getItem(set, itemIndex, NULL, NULL,
+                                       itemStr, UPRV_LENGTHOF(itemStr), &ec);
+                isString = true;
+            }
            if (U_FAILURE(ec) || itemLen < 0) {
                log_err("FAIL: uset_getItem => %s\n", u_errorName(ec));
                return;
            }

-            if (itemLen == 0) {
+            if (!isString) {
                log_verbose("Ok: %s item %d is %c-%c\n", pat,
                            itemIndex, oneUCharToChar(start),
                            oneUCharToChar(end));
+                if (itemLen != 0) {
+                    log_err("FAIL: uset_getItem(%d) => length %d\n", itemIndex, itemLen);
+                }
            } else {
                itemStr[itemLen] = 0;
                u_UCharsToChars(itemStr, buf, itemLen+1);
@ -469,7 +495,7 @@ static void expectItems(const USet* set,
            u_charsToUChars(stringStart, ustr, stringLength);
            ustr[stringLength] = 0;
            
-            if (itemLen == 0) {
+            if (!isString) {
                log_err("FAIL: for %s expect \"%s\" next, but got a char\n",
                        pat, strCopy);
                return;
@ -488,18 +514,19 @@ static void expectItems(const USet* set,
            u_charsToUChars(p, ustr, 1);
            c = ustr[0];

-            if (itemLen != 0) {
+            if (isString) {
                log_err("FAIL: for %s expect '%c' next, but got a string\n",
                        pat, *p);
                return;
            }

-            if (c != start++) {
+            if (c != start) {
                log_err("FAIL: for %s expect '%c' next\n",
                        pat, *p);
                return;
            }

+            ++start;
            ++p;
        }
    }
--- a/icu4c/source/test/intltest/csdetest.cpp
+++ b/icu4c/source/test/intltest/csdetest.cpp
@ -780,7 +780,7 @@ void CharsetDetectionTest::Ticket6394Test() {
        return;
    }

-    UnicodeSet  setOfCharsetNames;    // UnicodSets can hold strings.
+    UnicodeSet  setOfCharsetNames;    // UnicodeSets can hold strings.
    int32_t i;
    for (i=0; i<matchCount; i++) {
        UnicodeString charSetName(ucsdet_getName(matches[i], &status));
--- a/icu4c/source/test/intltest/transrt.cpp
+++ b/icu4c/source/test/intltest/transrt.cpp
@ -262,67 +262,41 @@ UBool LegalGreek::isRho(UChar c) {
    return FALSE;
 }

-// AbbreviatedUnicodeSetIterator Interface ---------------------------------------------
-//
-//      Iterate over a UnicodeSet, only returning a sampling of the contained code points.
-//        density is the approximate total number of code points to returned for the entire set.
-//
+namespace {

-class AbbreviatedUnicodeSetIterator : public UnicodeSetIterator {
-public :
-
-    AbbreviatedUnicodeSetIterator();
-    virtual ~AbbreviatedUnicodeSetIterator();
-    void reset(UnicodeSet& set, UBool abb = FALSE, int32_t density = 100);
-
-    /**
-     * ICU "poor man's RTTI", returns a UClassID for this class.
-     */
-    static inline UClassID getStaticClassID() { return (UClassID)&fgClassID; }
-
-    /**
-     * ICU "poor man's RTTI", returns a UClassID for the actual class.
-     */
-    virtual inline UClassID getDynamicClassID() const { return getStaticClassID(); }
-
-private :
-    UBool abbreviated;
-    int32_t perRange;           // The maximum number of code points to be returned from each range
-    virtual void loadRange(int32_t range);
-
-    /**
-     * The address of this static class variable serves as this class's ID
-     * for ICU "poor man's RTTI".
-     */
-    static const char fgClassID;
-};
-
-// AbbreviatedUnicodeSetIterator Implementation ---------------------------------------
-
-const char AbbreviatedUnicodeSetIterator::fgClassID=0;
-
-AbbreviatedUnicodeSetIterator::AbbreviatedUnicodeSetIterator() :
-    UnicodeSetIterator(), abbreviated(FALSE) {
-}
-
-AbbreviatedUnicodeSetIterator::~AbbreviatedUnicodeSetIterator() {
-}
-        
-void AbbreviatedUnicodeSetIterator::reset(UnicodeSet& newSet, UBool abb, int32_t density) {
-    UnicodeSetIterator::reset(newSet);
-    abbreviated = abb;
-    perRange = newSet.getRangeCount();
+/**
+ * If abbreviated=true, returns a set which only a sampling of the original code points.
+ * density is the approximate total number of code points to returned for the entire set.
+ */
+const UnicodeSet &abbreviateSet(const UnicodeSet &set, bool abbreviated, int density,
+                                UnicodeSet &copy) {
+    if (!abbreviated) {
+        return set;
+    }
+    int32_t rangeCount = set.getRangeCount();
+    int32_t perRange = rangeCount;
    if (perRange != 0) {
        perRange = density / perRange;
    }
+    const UnicodeSet *p = &set;
+    bool unchanged = true;
+    for (int32_t i = 0; i < rangeCount; ++i) {
+        int32_t start = set.getRangeStart(i);
+        int32_t end = set.getRangeEnd(i);
+        int32_t newEnd = start + perRange;
+        if (end > newEnd) {
+            if (unchanged) {
+                copy = set;
+                p = &copy;
+                unchanged = false;
+            }
+            copy.remove(newEnd + 1, end);
+        }
+    }
+    return *p;
 }

-void AbbreviatedUnicodeSetIterator::loadRange(int32_t myRange) {
-    UnicodeSetIterator::loadRange(myRange);
-    if (abbreviated && (endElement > nextElement + perRange)) {
-        endElement = nextElement + perRange;
-    }
-}
+}  // namespace

 //--------------------------------------------------------------------
 // RTTest Interface
@ -587,8 +561,8 @@ void RTTest::test2(UBool quickRt, int32_t density) {
        return;
    }

-    AbbreviatedUnicodeSetIterator usi;
-    AbbreviatedUnicodeSetIterator usi2;
+    UnicodeSetIterator usi;
+    UnicodeSetIterator usi2;

    parent->logln("Checking that at least one irrelevant character is not NFC'ed");
    // string is from NFC_NO in the UCD
@ -702,13 +676,14 @@ void RTTest::test2(UBool quickRt, int32_t density) {

    UnicodeSet sourceRangeMinusFailures(sourceRange);
    sourceRangeMinusFailures.removeAll(failSourceTarg);
-            
-    usi.reset(sourceRangeMinusFailures, quickRt, density);
+
+    UnicodeSet copy, copy2;
+    usi.reset(abbreviateSet(sourceRangeMinusFailures, quickRt, density, copy));
    for (;;) { 
        if (!usi.next() || usi.isString()) break;
        UChar32 c = usi.getCodepoint();
             
-        usi2.reset(sourceRangeMinusFailures, quickRt, density);
+        usi2.reset(abbreviateSet(sourceRangeMinusFailures, quickRt, density, copy2));
        for (;;) {
            if (!usi2.next() || usi2.isString()) break;
            UChar32 d = usi2.getCodepoint();
@ -816,7 +791,7 @@ void RTTest::test2(UBool quickRt, int32_t density) {
    targetRangeMinusFailures.removeAll(failTargSource);
    targetRangeMinusFailures.removeAll(failRound);

-    usi.reset(targetRangeMinusFailures, quickRt, density);
+    usi.reset(abbreviateSet(targetRangeMinusFailures, quickRt, density, copy));
    UnicodeString targ2;
    UnicodeString reverse2;
    UnicodeString targD;
@ -830,7 +805,7 @@ void RTTest::test2(UBool quickRt, int32_t density) {
            return;
        }

-        usi2.reset(targetRangeMinusFailures, quickRt, density);
+        usi2.reset(abbreviateSet(targetRangeMinusFailures, quickRt, density, copy2));
        for (;;) {
            if (!usi2.next() || usi2.isString())
                break;
--- a/icu4c/source/test/intltest/usettest.cpp
+++ b/icu4c/source/test/intltest/usettest.cpp
@ -99,6 +99,7 @@ UnicodeSetTest::runIndexedTest(int32_t index, UBool exec,
    TESTCASE_AUTO(TestUnusedCcc);
    TESTCASE_AUTO(TestDeepPattern);
    TESTCASE_AUTO(TestEmptyString);
+    TESTCASE_AUTO(TestSkipToStrings);
    TESTCASE_AUTO_END;
 }

@ -882,6 +883,8 @@ void UnicodeSetTest::TestStrings() {
    if (U_FAILURE(ec)) {
        errln("FAIL: couldn't construct test sets");
    }
+    assertFalse("[a-c].hasStrings()", testList[0]->hasStrings());
+    assertTrue("[{ll}{ch}a-z].hasStrings()", testList[2]->hasStrings());

    for (int32_t i = 0; testList[i] != NULL; i+=2) {
        if (U_SUCCESS(ec)) {
@ -896,7 +899,7 @@ void UnicodeSetTest::TestStrings() {
        }
        delete testList[i];
        delete testList[i+1];
-    }        
+    }
 }

 /**
@ -4059,3 +4062,49 @@ void UnicodeSetTest::TestEmptyString() {
    assertTrue("frozen containsNone", set.containsNone(u"def"));
    assertFalse("frozen containsSome", set.containsSome(u"def"));
 }
+
+void UnicodeSetTest::assertNext(UnicodeSetIterator &iter, const UnicodeString &expected) {
+    assertTrue(expected + ".next()", iter.next());
+    assertEquals(expected + ".getString()", expected, iter.getString());
+}
+
+void UnicodeSetTest::TestSkipToStrings() {
+    IcuTestErrorCode errorCode(*this, "TestSkipToStrings");
+    UnicodeSet set(u"[0189{}{ch}]", errorCode);
+    UnicodeSetIterator iter(set);
+    assertNext(iter.skipToStrings(), u"");
+    assertNext(iter, u"ch");
+    assertFalse("no next", iter.next());
+
+    iter.reset();
+    assertNext(iter, u"0");
+    assertNext(iter, u"1");
+    assertNext(iter, u"8");
+    assertNext(iter, u"9");
+    assertNext(iter, u"");
+    assertNext(iter, u"ch");
+    assertFalse("no next", iter.next());
+
+    iter.reset();
+    assertNext(iter, u"0");
+    iter.skipToStrings();
+    assertNext(iter, u"");
+    assertNext(iter, u"ch");
+    assertFalse("no next", iter.next());
+
+    iter.reset();
+    iter.nextRange();
+    assertNext(iter, u"8");
+    iter.skipToStrings();
+    assertNext(iter, u"");
+    assertNext(iter, u"ch");
+    assertFalse("no next", iter.next());
+
+    iter.reset();
+    iter.nextRange();
+    iter.nextRange();
+    iter.nextRange();
+    iter.skipToStrings();
+    assertNext(iter, u"ch");
+    assertFalse("no next", iter.next());
+}
--- a/icu4c/source/test/intltest/usettest.h
+++ b/icu4c/source/test/intltest/usettest.h
@ -19,6 +19,7 @@
 #include "unicode/unistr.h"
 #include "unicode/uniset.h"
 #include "unicode/ucnv_err.h"
+#include "unicode/usetiter.h"
 #include "intltest.h"
 #include "cmemory.h"

@ -96,6 +97,9 @@ private:
    void TestDeepPattern();
    void TestEmptyString();

+    void assertNext(UnicodeSetIterator &iter, const UnicodeString &expected);
+    void TestSkipToStrings();
+
 private:

    UBool toPatternAux(UChar32 start, UChar32 end);
--- a/icu4j/main/classes/core/src/com/ibm/icu/number/NumberFormatter.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/number/NumberFormatter.java
@ -487,9 +487,8 @@ public final class NumberFormatter {
     * <li>AUTO: 0.90, 1.00, 1.10
     * <li>HIDE_IF_WHOLE: 0.90, 1, 1.10
     * </ul>
-     * 
+     *
     * @draft ICU 69
-     * @provisional This API might change or be removed in a future release.
     */
    public static enum TrailingZeroDisplay {
        /**
@ -498,7 +497,7 @@ public final class NumberFormatter {
         * @draft ICU 69
         */
        AUTO,
-    
+
        /**
         * Same as AUTO, but hide trailing zeros after the decimal separator if they are all zero.
         *
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/UnicodeSet.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/UnicodeSet.java
@ -829,10 +829,6 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
        }
    }

-    boolean hasStrings() {
-        return !strings.isEmpty();
-    }
-
    /**
     * Returns the number of elements in this set (its cardinality)
     * Note than the elements of a set may include both individual
@ -860,6 +856,14 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
        return len == 1 && !hasStrings();
    }

+    /**
+     * @return true if this set contains multi-character strings or the empty string.
+     * @draft ICU 70
+     */
+    public boolean hasStrings() {
+        return !strings.isEmpty();
+    }
+
    /**
     * Implementation of UnicodeMatcher API.  Returns <tt>true</tt> if
     * this set contains any character whose low byte is the given
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/UnicodeSetIterator.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/UnicodeSetIterator.java
@ -14,7 +14,7 @@ import java.util.Iterator;
 * UnicodeSetIterator iterates over the contents of a UnicodeSet.  It
 * iterates over either code points or code point ranges.  After all
 * code points or ranges have been returned, it returns the
- * multicharacter strings of the UnicodSet, if any.
+ * multicharacter strings of the UnicodeSet, if any.
 *
 * <p>To iterate over code points and multicharacter strings,
 * use a loop like this:
@ -34,10 +34,16 @@ import java.util.Iterator;
 *   }
 * }
 * </pre>
+ *
+ * <p>To iterate over only the strings, start with <code>new UnicodeSetIterator(set).skipToStrings()</code>.
+ *
 * <p><b>Warning: </b>For speed, UnicodeSet iteration does not check for concurrent modification.
 * Do not alter the UnicodeSet while iterating.
 * @author M. Davis
 * @stable ICU 2.0
+ * @see UnicodeSet#ranges()
+ * @see UnicodeSet#strings()
+ * @see UnicodeSet#iterator()
 */
 public class UnicodeSetIterator {

@ -94,6 +100,23 @@ public class UnicodeSetIterator {
        reset(new UnicodeSet());
    }

+    /**
+     * Skips over the remaining code points/ranges, if any.
+     * A following call to next() or nextRange() will yield a string, if there is one.
+     * No-op if next() would return false, or if it would yield a string anyway.
+     *
+     * @return this
+     * @draft ICU 70
+     * @see UnicodeSet#strings()
+     */
+    public UnicodeSetIterator skipToStrings() {
+        // Finish code point/range iteration.
+        range = endRange;
+        endElement = -1;
+        nextElement = 0;
+        return this;
+    }
+
    /**
     * Returns the next element in the set, either a single code point
     * or a string.  If there are no more elements in the set, return
@ -234,39 +257,15 @@ public class UnicodeSetIterator {
    private int endRange = 0;
    private int range = 0;

-    /**
-     * @internal
-     * @deprecated This API is ICU internal only.
-     */
-    @Deprecated
-    public UnicodeSet getSet() {
-        return set;
-    }
-
-    /**
-     * @internal
-     * @deprecated This API is ICU internal only.
-     */
-    @Deprecated
-    protected int endElement;
-    /**
-     * @internal
-     * @deprecated This API is ICU internal only.
-     */
-    @Deprecated
-    protected int nextElement;
-    private Iterator<String> stringIterator = null;
+    private int endElement;
+    private int nextElement;

    /**
     * Invariant: stringIterator is null when there are no (more) strings remaining
     */
+    private Iterator<String> stringIterator = null;

-    /**
-     * @internal
-     * @deprecated This API is ICU internal only.
-     */
-    @Deprecated
-    protected void loadRange(int aRange) {
+    private void loadRange(int aRange) {
        nextElement = set.getRangeStart(aRange);
        endElement = set.getRangeEnd(aRange);
    }
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/lang/UnicodeSetTest.java
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/lang/UnicodeSetTest.java
@ -819,6 +819,8 @@ public class UnicodeSetTest extends TestFmwk {
                            {new UnicodeSet('a','z').add('A', 'Z').retain('M','m').complement('X'),
                                new UnicodeSet("[[a-zA-Z]&[M-m]-[X]]")},
        };
+        assertFalse("[a-c].hasStrings()", testList[0][0].hasStrings());
+        assertTrue("[{ll}{ch}a-z].hasStrings()", testList[1][0].hasStrings());

        for (int i = 0; i < testList.length; ++i) {
            if (!testList[i][0].equals(testList[i][1])) {
@ -2420,17 +2422,6 @@ public class UnicodeSetTest extends TestFmwk {
        return Utility.unescape(s);
    }

-    /* Test the method public UnicodeSet getSet() */
-    @Test
-    public void TestGetSet() {
-        UnicodeSetIterator us = new UnicodeSetIterator();
-        try {
-            us.getSet();
-        } catch (Exception e) {
-            errln("UnicodeSetIterator.getSet() was not suppose to given an " + "an exception.");
-        }
-    }
-
    /* Tests the method public UnicodeSet add(Collection<?> source) */
    @Test
    public void TestAddCollection() {
@ -2840,4 +2831,50 @@ public class UnicodeSetTest extends TestFmwk {
        assertTrue("frozen containsNone", set.containsNone("def"));
        assertFalse("frozen containsSome", set.containsSome("def"));
    }
+
+    private void assertNext(UnicodeSetIterator iter, String expected) {
+        assertTrue(expected + ".next()", iter.next());
+        assertEquals(expected + ".getString()", expected, iter.getString());
+    }
+
+    @Test
+    public void TestSkipToStrings() {
+        UnicodeSet set = new UnicodeSet("[0189{}{ch}]");
+        UnicodeSetIterator iter = new UnicodeSetIterator(set).skipToStrings();
+        assertNext(iter, "");
+        assertNext(iter, "ch");
+        assertFalse("no next", iter.next());
+
+        iter.reset();
+        assertNext(iter, "0");
+        assertNext(iter, "1");
+        assertNext(iter, "8");
+        assertNext(iter, "9");
+        assertNext(iter, "");
+        assertNext(iter, "ch");
+        assertFalse("no next", iter.next());
+
+        iter.reset();
+        assertNext(iter, "0");
+        iter.skipToStrings();
+        assertNext(iter, "");
+        assertNext(iter, "ch");
+        assertFalse("no next", iter.next());
+
+        iter.reset();
+        iter.nextRange();
+        assertNext(iter, "8");
+        iter.skipToStrings();
+        assertNext(iter, "");
+        assertNext(iter, "ch");
+        assertFalse("no next", iter.next());
+
+        iter.reset();
+        iter.nextRange();
+        iter.nextRange();
+        iter.nextRange();
+        iter.skipToStrings();
+        assertNext(iter, "ch");
+        assertFalse("no next", iter.next());
+    }
 }
--- a/icu4j/main/tests/framework/src/com/ibm/icu/dev/util/UnicodeMapIterator.java
+++ b/icu4j/main/tests/framework/src/com/ibm/icu/dev/util/UnicodeMapIterator.java
@ -17,7 +17,7 @@ import com.ibm.icu.text.UTF16;
 * UnicodeSetIterator iterates over the contents of a UnicodeSet.  It
 * iterates over either code points or code point ranges.  After all
 * code points or ranges have been returned, it returns the
- * multicharacter strings of the UnicodSet, if any.
+ * multicharacter strings of the UnicodeSet, if any.
 *
 * <p>To iterate over code points, use a loop like this:
 * <pre>
@ -106,7 +106,7 @@ public class UnicodeMapIterator<T> {
     * false.  If <tt>codepoint == IS_STRING</tt>, the value is a
     * string in the <tt>string</tt> field.  Otherwise the value is a
     * single code point in the <tt>codepoint</tt> field.
-     * 
+     *
     * <p>The order of iteration is all code points in sorted order,
     * followed by all strings sorted order.  <tt>codepointEnd</tt> is
     * undefined after calling this method.  <tt>string</tt> is
@ -135,7 +135,7 @@ public class UnicodeMapIterator<T> {

        if (stringIterator == null) return false;
        codepoint = IS_STRING; // signal that value is actually a string
-        string = (String)stringIterator.next();
+        string = stringIterator.next();
        if (!stringIterator.hasNext()) stringIterator = null;
        return true;
    }
@ -147,7 +147,7 @@ public class UnicodeMapIterator<T> {
     * string in the <tt>string</tt> field.  Otherwise the value is a
     * range of one or more code points from <tt>codepoint</tt> to
     * <tt>codepointeEnd</tt> inclusive.
-     * 
+     *
     * <p>The order of iteration is all code points ranges in sorted
     * order, followed by all strings sorted order.  Ranges are
     * disjoint and non-contiguous.  <tt>string</tt> is undefined
@ -180,7 +180,7 @@ public class UnicodeMapIterator<T> {

        if (stringIterator == null) return false;
        codepoint = IS_STRING; // signal that value is actually a string
-        string = (String)stringIterator.next();
+        string = stringIterator.next();
        if (!stringIterator.hasNext()) stringIterator = null;
        return true;
    }
@ -198,13 +198,13 @@ public class UnicodeMapIterator<T> {

    /**
     * Resets this iterator to the start of the set.
-     * @return 
+     * @return
     */
    public UnicodeMapIterator<T> reset() {
        endRange = map.getRangeCount() - 1;
        // both next*() methods will test: if (nextElement <= endElement)
        // we set them to fail this test, which will cause them to load the first range
-        nextElement = 0; 
+        nextElement = 0;
        endElement = -1;
        range = -1;

--- a/icu4j/main/tests/translit/src/com/ibm/icu/dev/test/translit/RoundTripTest.java
+++ b/icu4j/main/tests/translit/src/com/ibm/icu/dev/test/translit/RoundTripTest.java
@ -77,45 +77,33 @@ public class RoundTripTest extends TestFmwk {
    static String KATAKANA_ITERATION = "[\u30FD\u30FE]";
    static String HIRAGANA_ITERATION = "[\u309D\u309E]";

-    //------------------------------------------------------------------
-    // AbbreviatedUnicodeSetIterator
-    //------------------------------------------------------------------
-
-    static class AbbreviatedUnicodeSetIterator extends UnicodeSetIterator {
-
-        private boolean abbreviated;
-        private int perRange;
-
-        public AbbreviatedUnicodeSetIterator() {
-            super();
-            abbreviated = false;
+    /**
+     * If abbreviated=true, returns a set which only a sampling of the original code points.
+     * density is the approximate total number of code points to returned for the entire set.
+     */
+    private static UnicodeSet abbreviateSet(UnicodeSet set, boolean abbreviated, int density) {
+        if (!abbreviated) {
+            return set;
        }
-
-        @Override
-        public void reset(UnicodeSet newSet) {
-            reset(newSet, false);
+        int rangeCount = set.getRangeCount();
+        int perRange = rangeCount;
+        if (perRange != 0) {
+            perRange = density / perRange;
        }
-
-        public void reset(UnicodeSet newSet, boolean abb) {
-            reset(newSet, abb, 100);
-        }
-
-        public void reset(UnicodeSet newSet, boolean abb, int density) {
-            super.reset(newSet);
-            abbreviated = abb;
-            perRange = newSet.getRangeCount();
-            if (perRange != 0) {
-                perRange = density / perRange;
-            }
-        }
-
-        @Override
-        protected void loadRange(int myRange) {
-            super.loadRange(myRange);
-            if (abbreviated && (endElement > nextElement + perRange)) {
-                endElement = nextElement + perRange;
+        boolean unchanged = true;
+        for (int i = 0; i < rangeCount; ++i) {
+            int start = set.getRangeStart(i);
+            int end = set.getRangeEnd(i);
+            int newEnd = start + perRange;
+            if (end > newEnd) {
+                if (unchanged) {
+                    set = set.cloneAsThawed();
+                    unchanged = false;
+                }
+                set.remove(newEnd + 1, end);
            }
        }
+        return set;
    }

    //--------------------------------------------------------------------
@ -1295,8 +1283,8 @@ public class RoundTripTest extends TestFmwk {
            return false;
        }

-        AbbreviatedUnicodeSetIterator usi = new AbbreviatedUnicodeSetIterator();
-        AbbreviatedUnicodeSetIterator usi2 = new AbbreviatedUnicodeSetIterator();
+        UnicodeSetIterator usi = new UnicodeSetIterator();
+        UnicodeSetIterator usi2 = new UnicodeSetIterator();

        Transliterator sourceToTarget;
        Transliterator targetToSource;
@ -1454,7 +1442,7 @@ public class RoundTripTest extends TestFmwk {

            boolean quickRt = TestFmwk.getExhaustiveness() < 10;

-            usi.reset(sourceRangeMinusFailures, quickRt, density);
+            usi.reset(abbreviateSet(sourceRangeMinusFailures, quickRt, density));

            while (usi.next()) {
                int c = usi.codepoint;
@ -1466,7 +1454,7 @@ public class RoundTripTest extends TestFmwk {
                    if (failSourceTarg.get(d)) continue;
                 */
                TestFmwk.logln(count + "/" + pairLimit + " Checking starting with " + UTF16.valueOf(c));
-                usi2.reset(sourceRangeMinusFailures, quickRt, density);
+                usi2.reset(abbreviateSet(sourceRangeMinusFailures, quickRt, density));

                while (usi2.next()) {
                    int d = usi2.codepoint;
@ -1561,7 +1549,7 @@ public class RoundTripTest extends TestFmwk {
                    !targetRange.contains(c)) continue;
             */

-            usi.reset(targetRangeMinusFailures, quickRt, density);
+            usi.reset(abbreviateSet(targetRangeMinusFailures, quickRt, density));

            while (usi.next()) {
                int c = usi.codepoint;
@ -1574,7 +1562,7 @@ public class RoundTripTest extends TestFmwk {
                        !targetRange.contains(d)) continue;
                 */
                TestFmwk.logln(count + "/" + pairLimit + " Checking starting with " + UTF16.valueOf(c));
-                usi2.reset(targetRangeMinusFailures, quickRt, density);
+                usi2.reset(abbreviateSet(targetRangeMinusFailures, quickRt, density));

                while (usi2.next()) {