ICU-13702 allow empty string in UnicodeSet

See #1519
2025-04-10 15:42:14 +00:00 · 2020-12-29 00:39:40 +00:00 · 2020-12-29 00:39:40 +00:00 · 9963b4d62a
commit 9963b4d62a
parent fb715eab4c
17 changed files with 231 additions and 150 deletions
--- a/docs/userguide/strings/unicodeset.md
+++ b/docs/userguide/strings/unicodeset.md
@ -25,15 +25,25 @@ Here are a few examples of sets:
 | `[abc123]` | The six characters a,b,c,1,2 and 3 |
 | `[\p{Letter}]` | All characters with the Unicode General Category of Letter. |

-String Values In addition to being a set of characters (of Unicode code points),
+### String Values
+
+In addition to being a set of characters (of Unicode code points),
 a UnicodeSet may also contain string values. Conceptually, the UnicodeSet is
 always a set of strings, not a set of characters, although in many common use
 cases the strings are all of length one, which reduces to being a set of
 characters.

 This concept can be confusing when first encountered, probably because similar
-set constructs from other environments (regular expressions) can only contain
-characters.
+set constructs from other environments
+(e.g., character classes in most regular expression implementations)
+can only contain characters.
+
+Until ICU 68, it was not possible for a UnicodeSet to contain the empty string.
+In Java, an exception was thrown. In C++, the empty string was silently ignored.
+
+Starting with ICU 69 [ICU-13702](https://unicode-org.atlassian.net/browse/ICU-13702)
+the empty string is supported as a set element;
+however, it is ignored in matching functions such as `span(string)`.

 ## UnicodeSet Patterns

--- a/icu4c/source/common/unicode/uniset.h
+++ b/icu4c/source/common/unicode/uniset.h
@ -178,8 +178,6 @@ class RuleCharacterIterator;
 * Unicode property
 * </table>
 *
- * <p><b>Warning</b>: you cannot add an empty string ("") to a UnicodeSet.</p>
- *
 * <p><b>Formal syntax</b></p>
 *
 * \htmlonly<blockquote>\endhtmlonly
@ -1104,8 +1102,8 @@ public:
     * present.  If this set already contains the multicharacter,
     * the call leaves this set unchanged.
     * Thus "ch" => {"ch"}
-     * <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
     * A frozen set will not be modified.
+     *
     * @param s the source string
     * @return this object, for chaining
     * @stable ICU 2.4
@ -1165,7 +1163,7 @@ public:

    /**
     * Makes a set from a multicharacter string. Thus "ch" => {"ch"}
-     * <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
+     *
     * @param s the source string
     * @return a newly created set containing the given string.
     * The caller owns the return object and is responsible for deleting it.
@ -1279,8 +1277,8 @@ public:
     * Complement the specified string in this set.
     * The set will not contain the specified string once the call
     * returns.
-     * <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
     * A frozen set will not be modified.
+     *
     * @param s the string to complement
     * @return this object, for chaining
     * @stable ICU 2.4
--- a/icu4c/source/common/uniset.cpp
+++ b/icu4c/source/common/uniset.cpp
@ -444,7 +444,6 @@ UBool UnicodeSet::contains(UChar32 start, UChar32 end) const {
 * @return <tt>true</tt> if this set contains the specified string
 */
 UBool UnicodeSet::contains(const UnicodeString& s) const {
-    if (s.length() == 0) return FALSE;
    int32_t cp = getSingleCP(s);
    if (cp < 0) {
        return stringsContains(s);
@ -559,11 +558,9 @@ UBool UnicodeSet::matchesIndexValue(uint8_t v) const {
    if (hasStrings()) {
        for (i=0; i<strings->size(); ++i) {
            const UnicodeString& s = *(const UnicodeString*)strings->elementAt(i);
-            //if (s.length() == 0) {
-            //    // Empty strings match everything
-            //    return TRUE;
-            //}
-            // assert(s.length() != 0); // We enforce this elsewhere
+            if (s.isEmpty()) {
+                continue;  // skip the empty string
+            }
            UChar32 c = s.char32At(0);
            if ((c & 0xFF) == v) {
                return TRUE;
@ -582,9 +579,6 @@ UMatchDegree UnicodeSet::matches(const Replaceable& text,
                                 int32_t limit,
                                 UBool incremental) {
    if (offset == limit) {
-        // Strings, if any, have length != 0, so we don't worry
-        // about them here.  If we ever allow zero-length strings
-        // we much check for them here.
        if (contains(U_ETHER)) {
            return incremental ? U_PARTIAL_MATCH : U_MATCH;
        } else {
@ -614,11 +608,9 @@ UMatchDegree UnicodeSet::matches(const Replaceable& text,

            for (i=0; i<strings->size(); ++i) {
                const UnicodeString& trial = *(const UnicodeString*)strings->elementAt(i);
-
-                //if (trial.length() == 0) {
-                //    return U_MATCH; // null-string always matches
-                //}
-                // assert(trial.length() != 0); // We ensure this elsewhere
+                if (trial.isEmpty()) {
+                    continue;  // skip the empty string
+                }

                UChar c = trial.charAt(forward ? 0 : trial.length() - 1);

@ -971,12 +963,12 @@ UnicodeSet& UnicodeSet::add(UChar32 c) {
 * present.  If this set already contains the multicharacter,
 * the call leaves this set unchanged.
 * Thus "ch" => {"ch"}
- * <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
+ *
 * @param s the source string
 * @return the modified set, for chaining
 */
 UnicodeSet& UnicodeSet::add(const UnicodeString& s) {
-    if (s.length() == 0 || isFrozen() || isBogus()) return *this;
+    if (isFrozen() || isBogus()) return *this;
    int32_t cp = getSingleCP(s);
    if (cp < 0) {
        if (!stringsContains(s)) {
@ -991,8 +983,7 @@ UnicodeSet& UnicodeSet::add(const UnicodeString& s) {

 /**
 * Adds the given string, in order, to 'strings'.  The given string
- * must have been checked by the caller to not be empty and to not
- * already be in 'strings'.
+ * must have been checked by the caller to not already be in 'strings'.
 */
 void UnicodeSet::_add(const UnicodeString& s) {
    if (isFrozen() || isBogus()) {
@ -1021,16 +1012,13 @@ void UnicodeSet::_add(const UnicodeString& s) {
 * @param string to test
 */
 int32_t UnicodeSet::getSingleCP(const UnicodeString& s) {
-    //if (s.length() < 1) {
-    //    throw new IllegalArgumentException("Can't use zero-length strings in UnicodeSet");
-    //}
-    if (s.length() > 2) return -1;
-    if (s.length() == 1) return s.charAt(0);
-
-    // at this point, len = 2
-    UChar32 cp = s.char32At(0);
-    if (cp > 0xFFFF) { // is surrogate pair
-        return cp;
+    int32_t sLength = s.length();
+    if (sLength == 1) return s.charAt(0);
+    if (sLength == 2) {
+        UChar32 cp = s.char32At(0);
+        if (cp > 0xFFFF) { // is surrogate pair
+            return cp;
+        }
    }
    return -1;
 }
@ -1186,7 +1174,7 @@ UnicodeSet& UnicodeSet::remove(UChar32 c) {
 * @return the modified set, for chaining
 */
 UnicodeSet& UnicodeSet::remove(const UnicodeString& s) {
-    if (s.length() == 0 || isFrozen() || isBogus()) return *this;
+    if (isFrozen() || isBogus()) return *this;
    int32_t cp = getSingleCP(s);
    if (cp < 0) {
        if (strings != nullptr && strings->removeElement((void*) &s)) {
@ -1252,12 +1240,12 @@ UnicodeSet& UnicodeSet::complement(void) {
 * Complement the specified string in this set.
 * The set will not contain the specified string once the call
 * returns.
- * <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
+ *
 * @param s the string to complement
 * @return this object, for chaining
 */
 UnicodeSet& UnicodeSet::complement(const UnicodeString& s) {
-    if (s.length() == 0 || isFrozen() || isBogus()) return *this;
+    if (isFrozen() || isBogus()) return *this;
    int32_t cp = getSingleCP(s);
    if (cp < 0) {
        if (stringsContains(s)) {
--- a/icu4c/source/common/uniset_props.cpp
+++ b/icu4c/source/common/uniset_props.cpp
@ -555,7 +555,7 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
                        }
                        buf.append(c);
                    }
-                    if (buf.length() < 1 || !ok) {
+                    if (!ok) {
                        // syntaxError(chars, "Invalid multicharacter string");
                        ec = U_MALFORMED_SET;
                        return;
--- a/icu4c/source/common/unisetspan.cpp
+++ b/icu4c/source/common/unisetspan.cpp
@ -231,6 +231,9 @@ UnicodeSetStringSpan::UnicodeSetStringSpan(const UnicodeSet &set,
        const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i);
        const UChar *s16=string.getBuffer();
        int32_t length16=string.length();
+        if (length16==0) {
+            continue;  // skip the empty string
+        }
        UBool thisRelevant;
        spanLength=spanSet.span(s16, length16, USET_SPAN_CONTAINED);
        if(spanLength<length16) {  // Relevant string.
@ -312,7 +315,7 @@ UnicodeSetStringSpan::UnicodeSetStringSpan(const UnicodeSet &set,
        const UChar *s16=string.getBuffer();
        int32_t length16=string.length();
        spanLength=spanSet.span(s16, length16, USET_SPAN_CONTAINED);
-        if(spanLength<length16) {  // Relevant string.
+        if(spanLength<length16 && length16>0) {  // Relevant string.
            if(which&UTF16) {
                if(which&CONTAINED) {
                    if(which&FWD) {
@ -362,7 +365,7 @@ UnicodeSetStringSpan::UnicodeSetStringSpan(const UnicodeSet &set,
                    addToSpanNotSet(c);
                }
            }
-        } else {  // Irrelevant string.
+        } else {  // Irrelevant string. (Also the empty string.)
            if(which&UTF8) {
                if(which&CONTAINED) {  // Only necessary for LONGEST_MATCH.
                    uint8_t *s8=utf8+utf8Count;
@ -653,11 +656,12 @@ int32_t UnicodeSetStringSpan::span(const UChar *s, int32_t length, USetSpanCondi
            for(i=0; i<stringsLength; ++i) {
                int32_t overlap=spanLengths[i];
                if(overlap==ALL_CP_CONTAINED) {
-                    continue;  // Irrelevant string.
+                    continue;  // Irrelevant string. (Also the empty string.)
                }
                const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i);
                const UChar *s16=string.getBuffer();
                int32_t length16=string.length();
+                U_ASSERT(length>0);

                // Try to match this string at pos-overlap..pos.
                if(overlap>=LONG_SPAN) {
@ -697,6 +701,9 @@ int32_t UnicodeSetStringSpan::span(const UChar *s, int32_t length, USetSpanCondi
                const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i);
                const UChar *s16=string.getBuffer();
                int32_t length16=string.length();
+                if (length16==0) {
+                    continue;  // skip the empty string
+                }

                // Try to match this string at pos-overlap..pos.
                if(overlap>=LONG_SPAN) {
@ -817,11 +824,12 @@ int32_t UnicodeSetStringSpan::spanBack(const UChar *s, int32_t length, USetSpanC
            for(i=0; i<stringsLength; ++i) {
                int32_t overlap=spanBackLengths[i];
                if(overlap==ALL_CP_CONTAINED) {
-                    continue;  // Irrelevant string.
+                    continue;  // Irrelevant string. (Also the empty string.)
                }
                const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i);
                const UChar *s16=string.getBuffer();
                int32_t length16=string.length();
+                U_ASSERT(length>0);

                // Try to match this string at pos-(length16-overlap)..pos-length16.
                if(overlap>=LONG_SPAN) {
@ -863,6 +871,9 @@ int32_t UnicodeSetStringSpan::spanBack(const UChar *s, int32_t length, USetSpanC
                const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i);
                const UChar *s16=string.getBuffer();
                int32_t length16=string.length();
+                if (length16==0) {
+                    continue;  // skip the empty string
+                }

                // Try to match this string at pos-(length16-overlap)..pos-length16.
                if(overlap>=LONG_SPAN) {
@ -1358,11 +1369,12 @@ int32_t UnicodeSetStringSpan::spanNot(const UChar *s, int32_t length) const {
        // Try to match the strings at pos.
        for(i=0; i<stringsLength; ++i) {
            if(spanLengths[i]==ALL_CP_CONTAINED) {
-                continue;  // Irrelevant string.
+                continue;  // Irrelevant string. (Also the empty string.)
            }
            const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i);
            const UChar *s16=string.getBuffer();
            int32_t length16=string.length();
+            U_ASSERT(length>0);
            if(length16<=rest && matches16CPB(s, pos, length, s16, length16)) {
                return pos;  // There is a set element at pos.
            }
@ -1401,11 +1413,12 @@ int32_t UnicodeSetStringSpan::spanNotBack(const UChar *s, int32_t length) const
            // it is easier and we only need to know whether the string is irrelevant
            // which is the same in either array.
            if(spanLengths[i]==ALL_CP_CONTAINED) {
-                continue;  // Irrelevant string.
+                continue;  // Irrelevant string. (Also the empty string.)
            }
            const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i);
            const UChar *s16=string.getBuffer();
            int32_t length16=string.length();
+            U_ASSERT(length>0);
            if(length16<=pos && matches16CPB(s, pos-length16, length, s16, length16)) {
                return pos;  // There is a set element at pos.
            }
--- a/icu4c/source/test/intltest/usettest.cpp
+++ b/icu4c/source/test/intltest/usettest.cpp
@ -98,6 +98,7 @@ UnicodeSetTest::runIndexedTest(int32_t index, UBool exec,
    TESTCASE_AUTO(TestIntOverflow);
    TESTCASE_AUTO(TestUnusedCcc);
    TESTCASE_AUTO(TestDeepPattern);
+    TESTCASE_AUTO(TestEmptyString);
    TESTCASE_AUTO_END;
 }

@ -3984,3 +3985,46 @@ void UnicodeSetTest::TestDeepPattern() {
    assertTrue("[a[a[a...1000s...]]] -> error", errorCode.isFailure());
    errorCode.reset();
 }
+
+void UnicodeSetTest::TestEmptyString() {
+    IcuTestErrorCode errorCode(*this, "TestEmptyString");
+    // Starting with ICU 69, the empty string is allowed in UnicodeSet. ICU-13702
+    UnicodeSet set(u"[{}]", errorCode);
+    if (!assertSuccess("set from pattern with {}", errorCode)) { return; }
+    assertTrue("set from pattern with {}", set.contains(u""));
+    assertEquals("set from pattern with {}: size", 1, set.size());
+    assertFalse("set from pattern with {}: isEmpty", set.isEmpty());
+
+    // Remove, add back, ...
+    assertFalse("remove empty string", set.remove(u"").contains(u""));
+    assertEquals("remove empty string: size", 0, set.size());
+    assertTrue("remove empty string: isEmpty", set.isEmpty());
+    assertTrue("add empty string", set.add(u"").contains(u""));
+    // missing API -- assertTrue("retain empty string", set.retain(u"").contains(u""));
+    assertFalse("complement-remove empty string", set.complement(u"").contains(u""));
+    assertTrue("complement-add empty string", set.complement(u"").contains(u""));
+
+    assertFalse("clear", set.clear().contains(u""));
+    assertTrue("add empty string 2", set.add(u"").contains(u""));
+    assertFalse("removeAllStrings", set.removeAllStrings().contains(u""));
+    assertTrue("add empty string 3", set.add(u"").contains(u""));
+    // Note that this leaves the set containing exactly the empty string.
+
+    // strings() access and iteration
+    // no C++ equivalent for Java strings() -- assertTrue("strings()", set.strings().contains(u""));
+    UnicodeSetIterator sit(set);
+    assertTrue("set iterator.next()", sit.next());
+    assertTrue("set iterator has empty string", sit.isString() && sit.getString().isEmpty());
+
+    // The empty string is ignored in matching.
+    set.add(u'a').add(u'c');
+    assertEquals("span", 1, set.span(u"abc", 3, USET_SPAN_SIMPLE));
+    assertEquals("spanBack", 2, set.spanBack(u"abc", 3, USET_SPAN_SIMPLE));
+    assertTrue("containsNone", set.containsNone(u"def"));
+    assertFalse("containsSome", set.containsSome(u"def"));
+    set.freeze();
+    assertEquals("frozen span", 1, set.span(u"abc", 3, USET_SPAN_SIMPLE));
+    assertEquals("frozen spanBack", 2, set.spanBack(u"abc", 3, USET_SPAN_SIMPLE));
+    assertTrue("frozen containsNone", set.containsNone(u"def"));
+    assertFalse("frozen containsSome", set.containsSome(u"def"));
+}
--- a/icu4c/source/test/intltest/usettest.h
+++ b/icu4c/source/test/intltest/usettest.h
@ -94,6 +94,7 @@ private:
    void TestIntOverflow();
    void TestUnusedCcc();
    void TestDeepPattern();
+    void TestEmptyString();

 private:

--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/StaticUnicodeSets.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/StaticUnicodeSets.java
@ -2,8 +2,6 @@
 // License & terms of use: http://www.unicode.org/copyright.html
 package com.ibm.icu.impl;

-import static com.ibm.icu.impl.number.parse.ParsingUtils.safeContains;
-
 import java.util.EnumMap;
 import java.util.Map;

@ -95,7 +93,7 @@ public class StaticUnicodeSets {
     * @return key1 if the set contains str, or COUNT if not.
     */
    public static Key chooseFrom(String str, Key key1) {
-        return safeContains(get(key1), str) ? key1 : null;
+        return get(key1).contains(str) ? key1 : null;
    }

    /**
@ -113,7 +111,7 @@ public class StaticUnicodeSets {
     *         contains str.
     */
    public static Key chooseFrom(String str, Key key1, Key key2) {
-        return safeContains(get(key1), str) ? key1 : chooseFrom(str, key2);
+        return get(key1).contains(str) ? key1 : chooseFrom(str, key2);
    }

    /**
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/UnicodeSetStringSpan.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/UnicodeSetStringSpan.java
@ -110,9 +110,15 @@ public class UnicodeSetStringSpan {
        int i, spanLength;
        int maxLength16 = 0;
        someRelevant = false;
-        for (i = 0; i < stringsLength; ++i) {
+        for (i = 0; i < stringsLength;) {
            String string = strings.get(i);
            int length16 = string.length();
+            if (length16 == 0) {
+                // Remove the empty string.
+                strings.remove(i);
+                --stringsLength;
+                continue;
+            }
            spanLength = spanSet.span(string, SpanCondition.CONTAINED);
            if (spanLength < length16) { // Relevant string.
                someRelevant = true;
@ -120,6 +126,7 @@ public class UnicodeSetStringSpan {
            if (/* (0 != (which & UTF16)) && */ length16 > maxLength16) {
                maxLength16 = length16;
            }
+            ++i;
        }
        this.maxLength16 = maxLength16;
        if (!someRelevant && (which & WITH_COUNT) == 0) {
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/InfinityMatcher.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/InfinityMatcher.java
@ -2,8 +2,6 @@
 // License & terms of use: http://www.unicode.org/copyright.html
 package com.ibm.icu.impl.number.parse;

-import static com.ibm.icu.impl.number.parse.ParsingUtils.safeContains;
-
 import com.ibm.icu.impl.StaticUnicodeSets;
 import com.ibm.icu.impl.StringSegment;
 import com.ibm.icu.text.DecimalFormatSymbols;
@ -18,7 +16,7 @@ public class InfinityMatcher extends SymbolMatcher {

    public static InfinityMatcher getInstance(DecimalFormatSymbols symbols) {
        String symbolString = symbols.getInfinity();
-        if (safeContains(DEFAULT.uniSet, symbolString)) {
+        if (DEFAULT.uniSet.contains(symbolString)) {
            return DEFAULT;
        } else {
            return new InfinityMatcher(symbolString);
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/MinusSignMatcher.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/MinusSignMatcher.java
@ -2,8 +2,6 @@
 // License & terms of use: http://www.unicode.org/copyright.html
 package com.ibm.icu.impl.number.parse;

-import static com.ibm.icu.impl.number.parse.ParsingUtils.safeContains;
-
 import com.ibm.icu.impl.StaticUnicodeSets;
 import com.ibm.icu.impl.StringSegment;
 import com.ibm.icu.text.DecimalFormatSymbols;
@ -19,7 +17,7 @@ public class MinusSignMatcher extends SymbolMatcher {

    public static MinusSignMatcher getInstance(DecimalFormatSymbols symbols, boolean allowTrailing) {
        String symbolString = symbols.getMinusSignString();
-        if (safeContains(DEFAULT.uniSet, symbolString)) {
+        if (DEFAULT.uniSet.contains(symbolString)) {
            return allowTrailing ? DEFAULT_ALLOW_TRAILING : DEFAULT;
        } else {
            return new MinusSignMatcher(symbolString, allowTrailing);
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ParsingUtils.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ParsingUtils.java
@ -33,7 +33,9 @@ public class ParsingUtils {
            output.add(range.codepoint, range.codepointEnd);
        }
        for (String str : input.strings()) {
-            output.add(str.codePointAt(0));
+            if (!str.isEmpty()) {
+                output.add(str.codePointAt(0));
+            }
        }
    }

@ -42,10 +44,4 @@ public class ParsingUtils {
            output.add(input.codePointAt(0));
        }
    }
-
-    // TODO: Remove this helper function (and update call sites) when #13805 is fixed
-    public static boolean safeContains(UnicodeSet uniset, CharSequence str) {
-        return str.length() != 0 && uniset.contains(str);
-    }
-
 }
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/PlusSignMatcher.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/PlusSignMatcher.java
@ -2,8 +2,6 @@
 // License & terms of use: http://www.unicode.org/copyright.html
 package com.ibm.icu.impl.number.parse;

-import static com.ibm.icu.impl.number.parse.ParsingUtils.safeContains;
-
 import com.ibm.icu.impl.StaticUnicodeSets;
 import com.ibm.icu.impl.StringSegment;
 import com.ibm.icu.text.DecimalFormatSymbols;
@ -19,7 +17,7 @@ public class PlusSignMatcher extends SymbolMatcher {

    public static PlusSignMatcher getInstance(DecimalFormatSymbols symbols, boolean allowTrailing) {
        String symbolString = symbols.getPlusSignString();
-        if (safeContains(DEFAULT.uniSet, symbolString)) {
+        if (DEFAULT.uniSet.contains(symbolString)) {
            return allowTrailing ? DEFAULT_ALLOW_TRAILING : DEFAULT;
        } else {
            return new PlusSignMatcher(symbolString, allowTrailing);
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ScientificMatcher.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ScientificMatcher.java
@ -2,8 +2,6 @@
 // License & terms of use: http://www.unicode.org/copyright.html
 package com.ibm.icu.impl.number.parse;

-import static com.ibm.icu.impl.number.parse.ParsingUtils.safeContains;
-
 import com.ibm.icu.impl.StaticUnicodeSets;
 import com.ibm.icu.impl.StringSegment;
 import com.ibm.icu.impl.number.DecimalQuantity_DualStorageBCD;
@ -36,9 +34,9 @@ public class ScientificMatcher implements NumberParseMatcher {
        ignorablesMatcher = IgnorablesMatcher.getInstance(ParsingUtils.PARSE_FLAG_STRICT_IGNORABLES);

        String minusSign = symbols.getMinusSignString();
-        customMinusSign = safeContains(minusSignSet(), minusSign) ? null : minusSign;
+        customMinusSign = minusSignSet().contains(minusSign) ? null : minusSign;
        String plusSign = symbols.getPlusSignString();
-        customPlusSign = safeContains(plusSignSet(), plusSign) ? null : plusSign;
+        customPlusSign = plusSignSet().contains(plusSign) ? null : plusSign;
    }

    private static UnicodeSet minusSignSet() {
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/UnicodeSet.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/UnicodeSet.java
@ -186,8 +186,6 @@ import com.ibm.icu.util.VersionInfo;
 * Unicode property
 * </table>
 *
- * <p><b>Warning</b>: you cannot add an empty string ("") to a UnicodeSet.</p>
- *
 * <p><b>Formal syntax</b></p>
 *
 * <blockquote>
@ -892,11 +890,9 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
        }
        if (hasStrings()) {
            for (String s : strings) {
-                //if (s.length() == 0) {
-                //    // Empty strings match everything
-                //    return true;
-                //}
-                // assert(s.length() != 0); // We enforce this elsewhere
+                if (s.isEmpty()) {
+                    continue;  // skip the empty string
+                }
                int c = UTF16.charAt(s, 0);
                if ((c & 0xFF) == v) {
                    return true;
@ -918,9 +914,6 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
            boolean incremental) {

        if (offset[0] == limit) {
-            // Strings, if any, have length != 0, so we don't worry
-            // about them here.  If we ever allow zero-length strings
-            // we much check for them here.
            if (contains(UnicodeMatcher.ETHER)) {
                return incremental ? U_PARTIAL_MATCH : U_MATCH;
            } else {
@ -948,10 +941,9 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
                int highWaterLength = 0;

                for (String trial : strings) {
-                    //if (trial.length() == 0) {
-                    //    return U_MATCH; // null-string always matches
-                    //}
-                    // assert(trial.length() != 0); // We ensure this elsewhere
+                    if (trial.isEmpty()) {
+                        continue;  // skip the empty string
+                    }

                    char c = trial.charAt(forward ? 0 : trial.length() - 1);

@ -1363,7 +1355,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
     * present.  If this set already contains the multicharacter,
     * the call leaves this set unchanged.
     * Thus "ch" =&gt; {"ch"}
-     * <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
+     *
     * @param s the source string
     * @return this object, for chaining
     * @stable ICU 2.0
@ -1392,22 +1384,19 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa

    /**
     * Utility for getting code point from single code point CharSequence.
-     * See the public UTF16.getSingleCodePoint()
+     * See the public UTF16.getSingleCodePoint() (which returns -1 for null rather than throwing NPE).
+     *
     * @return a code point IF the string consists of a single one.
     * otherwise returns -1.
     * @param s to test
     */
    private static int getSingleCP(CharSequence s) {
-        if (s.length() < 1) {
-            throw new IllegalArgumentException("Can't use zero-length strings in UnicodeSet");
-        }
-        if (s.length() > 2) return -1;
        if (s.length() == 1) return s.charAt(0);
-
-        // at this point, len = 2
-        int cp = UTF16.charAt(s, 0);
-        if (cp > 0xFFFF) { // is surrogate pair
-            return cp;
+        if (s.length() == 2) {
+            int cp = Character.codePointAt(s, 0);
+            if (cp > 0xFFFF) { // is surrogate pair
+                return cp;
+            }
        }
        return -1;
    }
@ -1478,7 +1467,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa

    /**
     * Makes a set from a multicharacter string. Thus "ch" =&gt; {"ch"}
-     * <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
+     *
     * @param s the source string
     * @return a newly created set containing the given string
     * @stable ICU 2.0
@ -1686,7 +1675,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
     * Complement the specified string in this set.
     * The set will not contain the specified string once the call
     * returns.
-     * <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
+     *
     * @param s the string to complement
     * @return this object, for chaining
     * @stable ICU 2.0
@ -2056,7 +2045,8 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
            return true;
        }
        for (String setStr : strings) {
-            if (s.startsWith(setStr, i) &&  containsAll(s, i+setStr.length())) {
+            if (!setStr.isEmpty() &&  // skip the empty string
+                    s.startsWith(setStr, i) &&  containsAll(s, i+setStr.length())) {
                return true;
            }
        }
@ -2801,7 +2791,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
                        }
                        appendCodePoint(buf, c);
                    }
-                    if (buf.length() < 1 || !ok) {
+                    if (!ok) {
                        syntaxError(chars, "Invalid multicharacter string");
                    }
                    // We have new string. Add it to set and continue;
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/lang/UnicodeSetTest.java
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/lang/UnicodeSetTest.java
@ -1528,12 +1528,12 @@ public class UnicodeSetTest extends TestFmwk {

        //public Iterator<String> iterator() {

-        ArrayList<String> oldList = new ArrayList<String>();
+        ArrayList<String> oldList = new ArrayList<>();
        for (UnicodeSetIterator it = new UnicodeSetIterator(set1); it.next();) {
            oldList.add(it.getString());
        }

-        ArrayList<String> list1 = new ArrayList<String>();
+        ArrayList<String> list1 = new ArrayList<>();
        for (String s : set1) {
            list1.add(s);
        }
@ -1613,11 +1613,11 @@ public class UnicodeSetTest extends TestFmwk {
        List<UnicodeSet> goalLongest = Arrays.asList(set1, set3, set2);
        List<UnicodeSet> goalLex = Arrays.asList(set1, set2, set3);

-        List<UnicodeSet> sorted = new ArrayList(new TreeSet<UnicodeSet>(unsorted));
+        List<UnicodeSet> sorted = new ArrayList(new TreeSet<>(unsorted));
        assertNotEquals("compareTo-shorter-first", unsorted, sorted);
        assertEquals("compareTo-shorter-first", goalShortest, sorted);

-        TreeSet<UnicodeSet> sorted1 = new TreeSet<UnicodeSet>(new Comparator<UnicodeSet>(){
+        TreeSet<UnicodeSet> sorted1 = new TreeSet<>(new Comparator<UnicodeSet>(){
            @Override
            public int compare(UnicodeSet o1, UnicodeSet o2) {
                // TODO Auto-generated method stub
@ -1628,7 +1628,7 @@ public class UnicodeSetTest extends TestFmwk {
        assertNotEquals("compareTo-longer-first", unsorted, sorted);
        assertEquals("compareTo-longer-first", goalLongest, sorted);

-        sorted1 = new TreeSet<UnicodeSet>(new Comparator<UnicodeSet>(){
+        sorted1 = new TreeSet<>(new Comparator<UnicodeSet>(){
            @Override
            public int compare(UnicodeSet o1, UnicodeSet o2) {
                // TODO Auto-generated method stub
@ -1642,7 +1642,7 @@ public class UnicodeSetTest extends TestFmwk {
        //compare(String, int)
        // make a list of interesting combinations
        List<String> sources = Arrays.asList("\u0000", "a", "b", "\uD7FF", "\uD800", "\uDBFF", "\uDC00", "\uDFFF", "\uE000", "\uFFFD", "\uFFFF");
-        TreeSet<String> target = new TreeSet<String>();
+        TreeSet<String> target = new TreeSet<>();
        for (String s : sources) {
            target.add(s);
            for (String t : sources) {
@ -1685,8 +1685,8 @@ public class UnicodeSetTest extends TestFmwk {

        //compare(Iterable<T>, Iterable<T>)
        int max = 10;
-        List<String> test1 = new ArrayList<String>(max);
-        List<String> test2 = new ArrayList<String>(max);
+        List<String> test1 = new ArrayList<>(max);
+        List<String> test2 = new ArrayList<>(max);
        for (int i = 0; i <= max; ++i) {
            test1.add("a" + i);
            test2.add("a" + (max - i)); // add in reverse order
@ -2792,4 +2792,47 @@ public class UnicodeSetTest extends TestFmwk {
        } catch(RuntimeException expected) {
        }
    }
+
+    @Test
+    public void TestEmptyString() {
+        // Starting with ICU 69, the empty string is allowed in UnicodeSet. ICU-13702
+        UnicodeSet set = new UnicodeSet("[{}]");
+        assertTrue("set from pattern with {}", set.contains(""));
+        assertEquals("set from pattern with {}: size", 1, set.size());
+        assertFalse("set from pattern with {}: isEmpty", set.isEmpty());
+
+        // Remove, add back, ...
+        assertFalse("remove empty string", set.remove("").contains(""));
+        assertEquals("remove empty string: size", 0, set.size());
+        assertTrue("remove empty string: isEmpty", set.isEmpty());
+        assertTrue("add empty string", set.add("").contains(""));
+        assertTrue("retain empty string", set.retain("").contains(""));
+        assertFalse("complement-remove empty string", set.complement("").contains(""));
+        assertTrue("complement-add empty string", set.complement("").contains(""));
+
+        assertFalse("clear", set.clear().contains(""));
+        assertTrue("add empty string 2", set.add("").contains(""));
+        assertFalse("removeAllStrings", set.removeAllStrings().contains(""));
+        assertTrue("add empty string 3", set.add("").contains(""));
+        // Note that this leaves the set containing exactly the empty string.
+
+        // strings() access and iteration
+        assertTrue("strings()", set.strings().contains(""));
+        UnicodeSetIterator sit = new UnicodeSetIterator(set);
+        assertTrue("set iterator.next()", sit.next());
+        assertTrue("set iterator has empty string",
+                sit.codepoint == UnicodeSetIterator.IS_STRING && sit.getString().isEmpty());
+
+        // The empty string is ignored in matching.
+        set.add('a').add('c');
+        assertEquals("span", 1, set.span("abc", SpanCondition.SIMPLE));
+        assertEquals("spanBack", 2, set.spanBack("abc", SpanCondition.SIMPLE));
+        assertTrue("containsNone", set.containsNone("def"));
+        assertFalse("containsSome", set.containsSome("def"));
+        set.freeze();
+        assertEquals("frozen span", 1, set.span("abc", SpanCondition.SIMPLE));
+        assertEquals("frozen spanBack", 2, set.spanBack("abc", SpanCondition.SIMPLE));
+        assertTrue("frozen containsNone", set.containsNone("def"));
+        assertFalse("frozen containsSome", set.containsSome("def"));
+    }
 }
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/StringTokenizerTest.java
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/StringTokenizerTest.java
@ -562,12 +562,12 @@ public final class StringTokenizerTest extends TestFmwk
            us._generatePattern(sb.append(1.0), true);
            us._generatePattern(sb.reverse(), true);
        } catch(Exception e){
-            errln("UnicodeSet._generatePattern is not suppose to return an exception.");
+            errln("UnicodeSet._generatePattern is not supposed to return an exception.");
        }

        try{
            us._generatePattern(null, true);
-            errln("UnicodeSet._generatePattern is suppose to return an exception.");
+            errln("UnicodeSet._generatePattern is supposed to return an exception.");
        } catch(Exception e){}
    }

@ -585,12 +585,12 @@ public final class StringTokenizerTest extends TestFmwk
        int limit = 0;

        if(us.matches(null, offset, limit, true) != UnicodeSet.U_PARTIAL_MATCH){
-            errln("UnicodeSet.matches is suppose to return " + UnicodeSet.U_PARTIAL_MATCH +
+            errln("UnicodeSet.matches is supposed to return " + UnicodeSet.U_PARTIAL_MATCH +
                    " but got " + us.matches(null, offset, limit, true));
        }

        if(us.matches(null, offset, limit, false) != UnicodeSet.U_MATCH){
-            errln("UnicodeSet.matches is suppose to return " + UnicodeSet.U_MATCH +
+            errln("UnicodeSet.matches is supposed to return " + UnicodeSet.U_MATCH +
                    " but got " + us.matches(null, offset, limit, false));
        }

@ -601,7 +601,7 @@ public final class StringTokenizerTest extends TestFmwk
            offset[0] = 4; // Takes the letter "y"
            us.matches(rs, offset, 1, true);
        } catch(Exception e) {
-            errln("UnicodeSet.matches is not suppose to return an exception");
+            errln("UnicodeSet.matches is not supposed to return an exception");
        }

        // TODO: Tests when "if (forward && length < highWaterLength)" is true
@ -650,7 +650,7 @@ public final class StringTokenizerTest extends TestFmwk
        for(int i=0; i < invalid.length; i++){
            try{
                us.indexOf(invalid[i]);
-                errln("UnicodeSet.indexOf is suppose to return an exception " +
+                errln("UnicodeSet.indexOf is supposed to return an exception " +
                        "for a value of " + invalid[i]);
            } catch(Exception e){}
        }
@ -659,7 +659,7 @@ public final class StringTokenizerTest extends TestFmwk
            try{
                us.indexOf(valid[i]);
            } catch(Exception e){
-                errln("UnicodeSet.indexOf is not suppose to return an exception " +
+                errln("UnicodeSet.indexOf is not supposed to return an exception " +
                        "for a value of " + valid[i]);
            }
        }
@ -676,7 +676,7 @@ public final class StringTokenizerTest extends TestFmwk
        int[] invalid = {-100,-10,-5,-2,-1};
        for(int i=0; i < invalid.length; i++){
            if(us.charAt(invalid[i]) != -1){
-                errln("UnicodeSet.charAt(int index) was suppose to return -1 "
+                errln("UnicodeSet.charAt(int index) was supposed to return -1 "
                        + "for an invalid input of " + invalid[i]);
            }
        }
@ -696,7 +696,7 @@ public final class StringTokenizerTest extends TestFmwk
         for(int i=0; i < invalid.length; i++){
             try{
                 us.add(invalid[i], UnicodeSet.MAX_VALUE);
-                 errln("UnicodeSet.add(int start, int end) was suppose to give "
+                 errln("UnicodeSet.add(int start, int end) was supposed to give "
                         + "an exception for an start invalid input of "
                         + invalid[i]);
             } catch (Exception e){}
@ -706,7 +706,7 @@ public final class StringTokenizerTest extends TestFmwk
         for(int i=0; i < invalid.length; i++){
             try{
                 us.add(UnicodeSet.MIN_VALUE, invalid[i]);
-                 errln("UnicodeSet.add(int start, int end) was suppose to give "
+                 errln("UnicodeSet.add(int start, int end) was supposed to give "
                         + "an exception for an end invalid input of "
                         + invalid[i]);
             } catch (Exception e){}
@ -714,12 +714,12 @@ public final class StringTokenizerTest extends TestFmwk

         // Tests when "else if (start == end)" is false
         if(!(us.add(UnicodeSet.MIN_VALUE+1, UnicodeSet.MIN_VALUE).equals(us)))
-             errln("UnicodeSet.add(int start, int end) was suppose to return "
+             errln("UnicodeSet.add(int start, int end) was supposed to return "
                     + "the same object because start of value " + (UnicodeSet.MIN_VALUE+1)
                     + " is greater than end of value " + UnicodeSet.MIN_VALUE);

         if(!(us.add(UnicodeSet.MAX_VALUE, UnicodeSet.MAX_VALUE-1).equals(us)))
-             errln("UnicodeSet.add(int start, int end) was suppose to return "
+             errln("UnicodeSet.add(int start, int end) was supposed to return "
                     + "the same object because start of value " + UnicodeSet.MAX_VALUE
                     + " is greater than end of value " + (UnicodeSet.MAX_VALUE-1));
     }
@ -738,7 +738,7 @@ public final class StringTokenizerTest extends TestFmwk
         for(int i=0; i < invalid.length; i++){
             try{
                 us.add(invalid[i]);
-                 errln("UnicodeSet.add(int c) was suppose to give "
+                 errln("UnicodeSet.add(int c) was supposed to give "
                         + "an exception for an start invalid input of "
                         + invalid[i]);
             } catch (Exception e){}
@ -758,14 +758,15 @@ public final class StringTokenizerTest extends TestFmwk
         // Tests when "if (s.length() < 1)" is true
         try{
             us.contains("");
-             errln("UnicodeSet.getSingleCP is suppose to give an exception for " +
+         } catch (Exception e) {
+             errln("UnicodeSet.getSingleCP is not supposed to give an exception for " +
                     "an empty string.");
-         } catch (Exception e){}
+         }

         try{
             us.contains((String)null);
-             errln("UnicodeSet.getSingleCP is suppose to give an exception for " +
-             "a null string.");
+             errln("UnicodeSet.getSingleCP is supposed to give an exception for " +
+                     "a null string.");
         } catch (Exception e){}

         // Tests when "if (cp > 0xFFFF)" is true
@ -774,8 +775,8 @@ public final class StringTokenizerTest extends TestFmwk
             try{
                 us.contains(cases[i]);
             } catch (Exception e){
-                 errln("UnicodeSet.getSingleCP is not suppose to give an exception for " +
-                     "a null string.");
+                 errln("UnicodeSet.getSingleCP is not supposed to give an exception for " +
+                     "a surrogate pair.");
             }
         }
     }
@ -790,7 +791,7 @@ public final class StringTokenizerTest extends TestFmwk
         try{
             us.removeAllStrings();
         } catch(Exception e){
-             errln("UnicodeSet.removeAllString() was not suppose to given an " +
+             errln("UnicodeSet.removeAllString() was not supposed to given an " +
                     "exception for a strings size of 0");
         }
     }
@ -808,7 +809,7 @@ public final class StringTokenizerTest extends TestFmwk
          for(int i=0; i < invalid.length; i++){
              try{
                  us.retain(invalid[i], UnicodeSet.MAX_VALUE);
-                  errln("UnicodeSet.retain(int start, int end) was suppose to give "
+                  errln("UnicodeSet.retain(int start, int end) was supposed to give "
                          + "an exception for an start invalid input of "
                          + invalid[i]);
              } catch (Exception e){}
@ -818,7 +819,7 @@ public final class StringTokenizerTest extends TestFmwk
          for(int i=0; i < invalid.length; i++){
              try{
                  us.retain(UnicodeSet.MIN_VALUE, invalid[i]);
-                  errln("UnicodeSet.retain(int start, int end) was suppose to give "
+                  errln("UnicodeSet.retain(int start, int end) was supposed to give "
                          + "an exception for an end invalid input of "
                          + invalid[i]);
              } catch (Exception e){}
@ -828,14 +829,14 @@ public final class StringTokenizerTest extends TestFmwk
          try{
              us.retain(UnicodeSet.MIN_VALUE+1, UnicodeSet.MIN_VALUE);
          } catch(Exception e){
-              errln("UnicodeSet.retain(int start, int end) was not suppose to give "
+              errln("UnicodeSet.retain(int start, int end) was not supposed to give "
                      + "an exception.");
          }

          try{
              us.retain(UnicodeSet.MAX_VALUE, UnicodeSet.MAX_VALUE-1);
          } catch(Exception e){
-              errln("UnicodeSet.retain(int start, int end) was not suppose to give "
+              errln("UnicodeSet.retain(int start, int end) was not supposed to give "
                      + "an exception.");
          }
      }
@ -849,7 +850,7 @@ public final class StringTokenizerTest extends TestFmwk
          UnicodeSet us = new UnicodeSet();
          us.add("dummy");
          if(!(us.retain("dummy").equals(us))){
-              errln("UnicodeSet.retain(String s) was suppose to return the " +
+              errln("UnicodeSet.retain(String s) was supposed to return the " +
                      "same UnicodeSet since the string was found in the original.");
          }
      }
@ -867,7 +868,7 @@ public final class StringTokenizerTest extends TestFmwk
           for(int i=0; i < invalid.length; i++){
               try{
                   us.remove(invalid[i], UnicodeSet.MAX_VALUE);
-                   errln("UnicodeSet.remove(int start, int end) was suppose to give "
+                   errln("UnicodeSet.remove(int start, int end) was supposed to give "
                           + "an exception for an start invalid input of "
                           + invalid[i]);
               } catch (Exception e){}
@ -877,7 +878,7 @@ public final class StringTokenizerTest extends TestFmwk
           for(int i=0; i < invalid.length; i++){
               try{
                   us.remove(UnicodeSet.MIN_VALUE, invalid[i]);
-                   errln("UnicodeSet.remove(int start, int end) was suppose to give "
+                   errln("UnicodeSet.remove(int start, int end) was supposed to give "
                           + "an exception for an end invalid input of "
                           + invalid[i]);
               } catch (Exception e){}
@ -887,14 +888,14 @@ public final class StringTokenizerTest extends TestFmwk
           try{
               us.remove(UnicodeSet.MIN_VALUE+1, UnicodeSet.MIN_VALUE);
           } catch(Exception e){
-               errln("UnicodeSet.remove(int start, int end) was not suppose to give "
+               errln("UnicodeSet.remove(int start, int end) was not supposed to give "
                       + "an exception.");
           }

           try{
               us.remove(UnicodeSet.MAX_VALUE, UnicodeSet.MAX_VALUE-1);
           } catch(Exception e){
-               errln("UnicodeSet.remove(int start, int end) was not suppose to give "
+               errln("UnicodeSet.remove(int start, int end) was not supposed to give "
                       + "an exception.");
           }
       }
@ -912,7 +913,7 @@ public final class StringTokenizerTest extends TestFmwk
            for(int i=0; i < invalid.length; i++){
                try{
                    us.complement(invalid[i], UnicodeSet.MAX_VALUE);
-                    errln("UnicodeSet.complement(int start, int end) was suppose to give "
+                    errln("UnicodeSet.complement(int start, int end) was supposed to give "
                            + "an exception for an start invalid input of "
                            + invalid[i]);
                } catch (Exception e){}
@ -922,7 +923,7 @@ public final class StringTokenizerTest extends TestFmwk
            for(int i=0; i < invalid.length; i++){
                try{
                    us.complement(UnicodeSet.MIN_VALUE, invalid[i]);
-                    errln("UnicodeSet.complement(int start, int end) was suppose to give "
+                    errln("UnicodeSet.complement(int start, int end) was supposed to give "
                            + "an exception for an end invalid input of "
                            + invalid[i]);
                } catch (Exception e){}
@ -932,14 +933,14 @@ public final class StringTokenizerTest extends TestFmwk
            try{
                us.complement(UnicodeSet.MIN_VALUE+1, UnicodeSet.MIN_VALUE);
            } catch(Exception e){
-                errln("UnicodeSet.complement(int start, int end) was not suppose to give "
+                errln("UnicodeSet.complement(int start, int end) was not supposed to give "
                        + "an exception.");
            }

            try{
                us.complement(UnicodeSet.MAX_VALUE, UnicodeSet.MAX_VALUE-1);
            } catch(Exception e){
-                errln("UnicodeSet.complement(int start, int end) was not suppose to give "
+                errln("UnicodeSet.complement(int start, int end) was not supposed to give "
                        + "an exception.");
            }
        }
@ -955,7 +956,7 @@ public final class StringTokenizerTest extends TestFmwk
            try{
                us.complement("dummy");
            } catch (Exception e){
-                errln("UnicodeSet.complement(String s) was not suppose to give "
+                errln("UnicodeSet.complement(String s) was not supposed to give "
                        + "an exception for 'dummy'.");
            }

@ -965,7 +966,7 @@ public final class StringTokenizerTest extends TestFmwk
            try{
                us.complement("\uDC11");
            } catch (Exception e){
-                errln("UnicodeSet.complement(String s) was not suppose to give "
+                errln("UnicodeSet.complement(String s) was not supposed to give "
                        + "an exception for '\uDC11'.");
            }
        }
@ -983,7 +984,7 @@ public final class StringTokenizerTest extends TestFmwk
            for(int i=0; i < invalid.length; i++){
                try{
                    us.contains(invalid[i]);
-                    errln("UnicodeSet.contains(int c) was suppose to give "
+                    errln("UnicodeSet.contains(int c) was supposed to give "
                            + "an exception for an start invalid input of "
                            + invalid[i]);
                } catch (Exception e){}
@ -1003,7 +1004,7 @@ public final class StringTokenizerTest extends TestFmwk
             for(int i=0; i < invalid.length; i++){
                 try{
                     us.contains(invalid[i], UnicodeSet.MAX_VALUE);
-                     errln("UnicodeSet.contains(int start, int end) was suppose to give "
+                     errln("UnicodeSet.contains(int start, int end) was supposed to give "
                             + "an exception for an start invalid input of "
                             + invalid[i]);
                 } catch (Exception e){}
@ -1013,7 +1014,7 @@ public final class StringTokenizerTest extends TestFmwk
             for(int i=0; i < invalid.length; i++){
                 try{
                     us.contains(UnicodeSet.MIN_VALUE, invalid[i]);
-                     errln("UnicodeSet.contains(int start, int end) was suppose to give "
+                     errln("UnicodeSet.contains(int start, int end) was supposed to give "
                             + "an exception for an end invalid input of "
                             + invalid[i]);
                 } catch (Exception e){}
@ -1028,7 +1029,7 @@ public final class StringTokenizerTest extends TestFmwk
             UnicodeSet us = new UnicodeSet();
             String res = us.getRegexEquivalent();
             if(!(res.equals("[]")))
-                 errln("UnicodeSet.getRegexEquivalent is suppose to return '[]' " +
+                 errln("UnicodeSet.getRegexEquivalent is supposed to return '[]' " +
                         "but got " + res);
         }

@ -1045,7 +1046,7 @@ public final class StringTokenizerTest extends TestFmwk
              for(int i=0; i < invalid.length; i++){
                  try{
                      us.containsNone(invalid[i], UnicodeSet.MAX_VALUE);
-                      errln("UnicodeSet.containsNoneint start, int end) was suppose to give "
+                      errln("UnicodeSet.containsNoneint start, int end) was supposed to give "
                              + "an exception for an start invalid input of "
                              + invalid[i]);
                  } catch (Exception e){}
@ -1055,7 +1056,7 @@ public final class StringTokenizerTest extends TestFmwk
              for(int i=0; i < invalid.length; i++){
                  try{
                      us.containsNone(UnicodeSet.MIN_VALUE, invalid[i]);
-                      errln("UnicodeSet.containsNone(int start, int end) was suppose to give "
+                      errln("UnicodeSet.containsNone(int start, int end) was supposed to give "
                              + "an exception for an end invalid input of "
                              + invalid[i]);
                  } catch (Exception e){}
@ -1066,7 +1067,7 @@ public final class StringTokenizerTest extends TestFmwk
                  us.add(0);
                  us.containsNone(1, 2); // 1 > 0
              } catch (Exception e){
-                  errln("UnicodeSet.containsNone(int start, int end) was not suppose to give " +
+                  errln("UnicodeSet.containsNone(int start, int end) was not supposed to give " +
                          "an exception.");
              }
          }