ICU-6065 UnicodeSet::closeOver(simple case folding)

See #2322
2025-04-10 07:39:16 +00:00 · 2023-03-02 00:25:11 +00:00 · 2023-03-02 00:25:11 +00:00 · 79ab90b5f9
commit 79ab90b5f9
parent 2864379937
14 changed files with 1228 additions and 391 deletions
--- a/icu4c/source/common/characterproperties.cpp
+++ b/icu4c/source/common/characterproperties.cpp
@ -377,22 +377,30 @@ UCPMap *makeMap(UProperty property, UErrorCode &errorCode) {

 }  // namespace

-U_NAMESPACE_USE
+U_NAMESPACE_BEGIN

-U_CAPI const USet * U_EXPORT2
-u_getBinaryPropertySet(UProperty property, UErrorCode *pErrorCode) {
-    if (U_FAILURE(*pErrorCode)) { return nullptr; }
+const UnicodeSet *CharacterProperties::getBinaryPropertySet(UProperty property, UErrorCode &errorCode) {
+    if (U_FAILURE(errorCode)) { return nullptr; }
    if (property < 0 || UCHAR_BINARY_LIMIT <= property) {
-        *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
+        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
        return nullptr;
    }
    Mutex m(&cpMutex);
    UnicodeSet *set = sets[property];
    if (set == nullptr) {
-        sets[property] = set = makeSet(property, *pErrorCode);
+        sets[property] = set = makeSet(property, errorCode);
    }
-    if (U_FAILURE(*pErrorCode)) { return nullptr; }
-    return set->toUSet();
+    return set;
+}
+
+U_NAMESPACE_END
+
+U_NAMESPACE_USE
+
+U_CAPI const USet * U_EXPORT2
+u_getBinaryPropertySet(UProperty property, UErrorCode *pErrorCode) {
+    const UnicodeSet *set = CharacterProperties::getBinaryPropertySet(property, *pErrorCode);
+    return U_SUCCESS(*pErrorCode) ? set->toUSet() : nullptr;
 }

 U_CAPI const UCPMap * U_EXPORT2
--- a/icu4c/source/common/ucase.cpp
+++ b/icu4c/source/common/ucase.cpp
@ -205,37 +205,7 @@ static const char16_t iDotTilde[3] = { 0x69, 0x307, 0x303 };

 U_CFUNC void U_EXPORT2
 ucase_addCaseClosure(UChar32 c, const USetAdder *sa) {
-    uint16_t props;
-
-    /*
-     * Hardcode the case closure of i and its relatives and ignore the
-     * data file data for these characters.
-     * The Turkic dotless i and dotted I with their case mapping conditions
-     * and case folding option make the related characters behave specially.
-     * This code matches their closure behavior to their case folding behavior.
-     */
-
-    switch(c) {
-    case 0x49:
-        /* regular i and I are in one equivalence class */
-        sa->add(sa->set, 0x69);
-        return;
-    case 0x69:
-        sa->add(sa->set, 0x49);
-        return;
-    case 0x130:
-        /* dotted I is in a class with <0069 0307> (for canonical equivalence with <0049 0307>) */
-        sa->addString(sa->set, iDot, 2);
-        return;
-    case 0x131:
-        /* dotless i is in a class by itself */
-        return;
-    default:
-        /* otherwise use the data file data */
-        break;
-    }
-
-    props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
+    uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
    if(!UCASE_HAS_EXCEPTION(props)) {
        if(UCASE_GET_TYPE(props)!=UCASE_NONE) {
            /* add the one simple case mapping, no matter what type it is */
@ -249,19 +219,42 @@ ucase_addCaseClosure(UChar32 c, const USetAdder *sa) {
         * c has exceptions, so there may be multiple simple and/or
         * full case mappings. Add them all.
         */
-        const uint16_t *pe0, *pe=GET_EXCEPTIONS(&ucase_props_singleton, props);
-        const char16_t *closure;
+        const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props);
        uint16_t excWord=*pe++;
-        int32_t idx, closureLength, fullLength, length;
+        const uint16_t *pe0=pe;

-        pe0=pe;
+        // Hardcode the case closure of i and its relatives and ignore the
+        // data file data for these characters.
+        // The Turkic dotless i and dotted I with their case mapping conditions
+        // and case folding option make the related characters behave specially.
+        // This code matches their closure behavior to their case folding behavior.
+        if (excWord&UCASE_EXC_CONDITIONAL_FOLD) {
+            // These characters have Turkic case foldings. Hardcode their closure.
+            if (c == 0x49) {
+                // Regular i and I are in one equivalence class.
+                sa->add(sa->set, 0x69);
+                return;
+            } else if (c == 0x130) {
+                // Dotted I is in a class with <0069 0307>
+                // (for canonical equivalence with <0049 0307>).
+                sa->addString(sa->set, iDot, 2);
+                return;
+            }
+        } else if (c == 0x69) {
+            sa->add(sa->set, 0x49);
+            return;
+        } else if (c == 0x131) {
+            // Dotless i is in a class by itself.
+            return;
+        }

        /* add all simple case mappings */
-        for(idx=UCASE_EXC_LOWER; idx<=UCASE_EXC_TITLE; ++idx) {
+        for(int32_t idx=UCASE_EXC_LOWER; idx<=UCASE_EXC_TITLE; ++idx) {
            if(HAS_SLOT(excWord, idx)) {
                pe=pe0;
-                GET_SLOT_VALUE(excWord, idx, pe, c);
-                sa->add(sa->set, c);
+                UChar32 mapping;
+                GET_SLOT_VALUE(excWord, idx, pe, mapping);
+                sa->add(sa->set, mapping);
            }
        }
        if(HAS_SLOT(excWord, UCASE_EXC_DELTA)) {
@ -272,6 +265,8 @@ ucase_addCaseClosure(UChar32 c, const USetAdder *sa) {
        }

        /* get the closure string pointer & length */
+        const char16_t *closure;
+        int32_t closureLength;
        if(HAS_SLOT(excWord, UCASE_EXC_CLOSURE)) {
            pe=pe0;
            GET_SLOT_VALUE(excWord, UCASE_EXC_CLOSURE, pe, closureLength);
@ -285,6 +280,7 @@ ucase_addCaseClosure(UChar32 c, const USetAdder *sa) {
        /* add the full case folding */
        if(HAS_SLOT(excWord, UCASE_EXC_FULL_MAPPINGS)) {
            pe=pe0;
+            int32_t fullLength;
            GET_SLOT_VALUE(excWord, UCASE_EXC_FULL_MAPPINGS, pe, fullLength);

            /* start of full case mapping strings */
@ -297,7 +293,7 @@ ucase_addCaseClosure(UChar32 c, const USetAdder *sa) {
            fullLength>>=4;

            /* add the full case folding string */
-            length=fullLength&0xf;
+            int32_t length=fullLength&0xf;
            if(length!=0) {
                sa->addString(sa->set, (const char16_t *)pe, length);
                pe+=length;
@ -313,9 +309,146 @@ ucase_addCaseClosure(UChar32 c, const USetAdder *sa) {
        }

        /* add each code point in the closure string */
-        for(idx=0; idx<closureLength;) {
-            U16_NEXT_UNSAFE(closure, idx, c);
-            sa->add(sa->set, c);
+        for(int32_t idx=0; idx<closureLength;) {
+            UChar32 mapping;
+            U16_NEXT_UNSAFE(closure, idx, mapping);
+            sa->add(sa->set, mapping);
+        }
+    }
+}
+
+namespace {
+
+/**
+ * Add the simple case closure mapping,
+ * except if there is not actually an scf relationship between the two characters.
+ * TODO: Unicode should probably add the corresponding scf mappings.
+ * See https://crbug.com/v8/13377 and Unicode-internal PAG issue #23.
+ * If & when those scf mappings are added, we should be able to remove all of these exceptions.
+ */
+void addOneSimpleCaseClosure(UChar32 c, UChar32 t, const USetAdder *sa) {
+    switch (c) {
+    case 0x0390:
+        if (t == 0x1FD3) { return; }
+        break;
+    case 0x03B0:
+        if (t == 0x1FE3) { return; }
+        break;
+    case 0x1FD3:
+        if (t == 0x0390) { return; }
+        break;
+    case 0x1FE3:
+        if (t == 0x03B0) { return; }
+        break;
+    case 0xFB05:
+        if (t == 0xFB06) { return; }
+        break;
+    case 0xFB06:
+        if (t == 0xFB05) { return; }
+        break;
+    default:
+        break;
+    }
+    sa->add(sa->set, t);
+}
+
+}  // namespace
+
+U_CFUNC void U_EXPORT2
+ucase_addSimpleCaseClosure(UChar32 c, const USetAdder *sa) {
+    uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
+    if(!UCASE_HAS_EXCEPTION(props)) {
+        if(UCASE_GET_TYPE(props)!=UCASE_NONE) {
+            /* add the one simple case mapping, no matter what type it is */
+            int32_t delta=UCASE_GET_DELTA(props);
+            if(delta!=0) {
+                sa->add(sa->set, c+delta);
+            }
+        }
+    } else {
+        // c has exceptions. Add the mappings relevant for scf=Simple_Case_Folding.
+        const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props);
+        uint16_t excWord=*pe++;
+        const uint16_t *pe0=pe;
+
+        // Hardcode the case closure of i and its relatives and ignore the
+        // data file data for these characters, like in ucase_addCaseClosure().
+        if (excWord&UCASE_EXC_CONDITIONAL_FOLD) {
+            // These characters have Turkic case foldings. Hardcode their closure.
+            if (c == 0x49) {
+                // Regular i and I are in one equivalence class.
+                sa->add(sa->set, 0x69);
+                return;
+            } else if (c == 0x130) {
+                // For scf=Simple_Case_Folding, dotted I is in a class by itself.
+                return;
+            }
+        } else if (c == 0x69) {
+            sa->add(sa->set, 0x49);
+            return;
+        } else if (c == 0x131) {
+            // Dotless i is in a class by itself.
+            return;
+        }
+
+        // Add all simple case mappings.
+        for(int32_t idx=UCASE_EXC_LOWER; idx<=UCASE_EXC_TITLE; ++idx) {
+            if(HAS_SLOT(excWord, idx)) {
+                pe=pe0;
+                UChar32 mapping;
+                GET_SLOT_VALUE(excWord, idx, pe, mapping);
+                addOneSimpleCaseClosure(c, mapping, sa);
+            }
+        }
+        if(HAS_SLOT(excWord, UCASE_EXC_DELTA)) {
+            pe=pe0;
+            int32_t delta;
+            GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe, delta);
+            UChar32 mapping = (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta;
+            addOneSimpleCaseClosure(c, mapping, sa);
+        }
+
+        /* get the closure string pointer & length */
+        const char16_t *closure;
+        int32_t closureLength;
+        if(HAS_SLOT(excWord, UCASE_EXC_CLOSURE)) {
+            pe=pe0;
+            GET_SLOT_VALUE(excWord, UCASE_EXC_CLOSURE, pe, closureLength);
+            closureLength&=UCASE_CLOSURE_MAX_LENGTH; /* higher bits are reserved */
+            closure=(const char16_t *)pe+1; /* behind this slot, unless there are full case mappings */
+        } else {
+            closureLength=0;
+            closure=nullptr;
+        }
+
+        // Skip the full case mappings.
+        if(closureLength > 0 && HAS_SLOT(excWord, UCASE_EXC_FULL_MAPPINGS)) {
+            pe=pe0;
+            int32_t fullLength;
+            GET_SLOT_VALUE(excWord, UCASE_EXC_FULL_MAPPINGS, pe, fullLength);
+
+            /* start of full case mapping strings */
+            ++pe;
+
+            fullLength&=0xffff; /* bits 16 and higher are reserved */
+
+            // Skip all 4 full case mappings.
+            pe+=fullLength&UCASE_FULL_LOWER;
+            fullLength>>=4;
+            pe+=fullLength&0xf;
+            fullLength>>=4;
+            pe+=fullLength&0xf;
+            fullLength>>=4;
+            pe+=fullLength;
+
+            closure=(const char16_t *)pe; /* behind full case mappings */
+        }
+
+        // Add each code point in the closure string whose scf maps back to c.
+        for(int32_t idx=0; idx<closureLength;) {
+            UChar32 mapping;
+            U16_NEXT_UNSAFE(closure, idx, mapping);
+            addOneSimpleCaseClosure(c, mapping, sa);
        }
    }
 }
--- a/icu4c/source/common/ucase.h
+++ b/icu4c/source/common/ucase.h
@ -108,6 +108,10 @@ ucase_fold(UChar32 c, uint32_t options);
 U_CFUNC void U_EXPORT2
 ucase_addCaseClosure(UChar32 c, const USetAdder *sa);

+/** Case closure with only scf=Simple_Case_Folding. */
+U_CFUNC void U_EXPORT2
+ucase_addSimpleCaseClosure(UChar32 c, const USetAdder *sa);
+
 /**
 * Maps the string to single code points and adds the associated case closure
 * mappings.
--- a/icu4c/source/common/unicode/uniset.h
+++ b/icu4c/source/common/unicode/uniset.h
@ -430,7 +430,9 @@ public:
     * description for the syntax of the pattern language.
     * @param pattern a string specifying what characters are in the set
     * @param options bitmask for options to apply to the pattern.
-     * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
+     * Valid options are USET_IGNORE_SPACE and
+     * at most one of USET_CASE_INSENSITIVE, USET_ADD_CASE_MAPPINGS, USET_SIMPLE_CASE_INSENSITIVE.
+     * These case options are mutually exclusive.
     * @param symbols a symbol table mapping variable names to values
     * and stand-in characters to UnicodeSets; may be nullptr
     * @param status returns <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the pattern
@ -450,7 +452,9 @@ public:
     * @param pos on input, the position in pattern at which to start parsing.
     * On output, the position after the last character parsed.
     * @param options bitmask for options to apply to the pattern.
-     * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
+     * Valid options are USET_IGNORE_SPACE and
+     * at most one of USET_CASE_INSENSITIVE, USET_ADD_CASE_MAPPINGS, USET_SIMPLE_CASE_INSENSITIVE.
+     * These case options are mutually exclusive.
     * @param symbols a symbol table mapping variable names to values
     * and stand-in characters to UnicodeSets; may be nullptr
     * @param status input-output error code
@ -645,7 +649,9 @@ public:
     * A frozen set will not be modified.
     * @param pattern a string specifying what characters are in the set
     * @param options bitmask for options to apply to the pattern.
-     * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
+     * Valid options are USET_IGNORE_SPACE and
+     * at most one of USET_CASE_INSENSITIVE, USET_ADD_CASE_MAPPINGS, USET_SIMPLE_CASE_INSENSITIVE.
+     * These case options are mutually exclusive.
     * @param symbols a symbol table mapping variable names to
     * values and stand-ins to UnicodeSets; may be nullptr
     * @param status returns <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the pattern
@ -683,7 +689,9 @@ public:
     * pattern.length() if the closing ']' is the last character of
     * the pattern string.
     * @param options bitmask for options to apply to the pattern.
-     * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
+     * Valid options are USET_IGNORE_SPACE and
+     * at most one of USET_CASE_INSENSITIVE, USET_ADD_CASE_MAPPINGS, USET_SIMPLE_CASE_INSENSITIVE.
+     * These case options are mutually exclusive.
     * @param symbols a symbol table mapping variable names to
     * values and stand-ins to UnicodeSets; may be nullptr
     * @param status returns <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the pattern
@ -1390,7 +1398,7 @@ public:

    /**
     * Close this set over the given attribute.  For the attribute
-     * USET_CASE, the result is to modify this set so that:
+     * USET_CASE_INSENSITIVE, the result is to modify this set so that:
     *
     * 1. For each character or string 'a' in this set, all strings or
     * characters 'b' such that foldCase(a) == foldCase(b) are added
@ -1408,8 +1416,10 @@ public:
     * A frozen set will not be modified.
     *
     * @param attribute bitmask for attributes to close over.
-     * Currently only the USET_CASE bit is supported.  Any undefined bits
-     * are ignored.
+     * Valid options:
+     * At most one of USET_CASE_INSENSITIVE, USET_ADD_CASE_MAPPINGS, USET_SIMPLE_CASE_INSENSITIVE.
+     * These case options are mutually exclusive.
+     * Unrelated options bits are ignored.
     * @return a reference to this set.
     * @stable ICU 4.2
     */
@ -1579,6 +1589,9 @@ private:
                      int32_t depth,
                      UErrorCode& ec);

+    void closeOverCaseInsensitive(bool simple);
+    void closeOverAddCaseMappings();
+
    //----------------------------------------------------------------
    // Implementation: Utility methods
    //----------------------------------------------------------------
--- a/icu4c/source/common/unicode/uset.h
+++ b/icu4c/source/common/unicode/uset.h
@ -53,6 +53,12 @@ typedef struct USet USet;
 /**
 * Bitmask values to be passed to uset_openPatternOptions() or
 * uset_applyPattern() taking an option parameter.
+ *
+ * Use at most one of USET_CASE_INSENSITIVE, USET_ADD_CASE_MAPPINGS, USET_SIMPLE_CASE_INSENSITIVE.
+ * These case options are mutually exclusive.
+ *
+ * Undefined options bits are ignored, and reserved for future use.
+ *
 * @stable ICU 2.4
 */
 enum {
@ -60,13 +66,13 @@ enum {
     * Ignore white space within patterns unless quoted or escaped.
     * @stable ICU 2.4
     */
-    USET_IGNORE_SPACE = 1,  
+    USET_IGNORE_SPACE = 1,

    /**
     * Enable case insensitive matching.  E.g., "[ab]" with this flag
     * will match 'a', 'A', 'b', and 'B'.  "[^ab]" with this flag will
     * match all except 'a', 'A', 'b', and 'B'. This performs a full
-     * closure over case mappings, e.g. U+017F for s.
+     * closure over case mappings, e.g. 'ſ' (U+017F long s) for 's'.
     *
     * The resulting set is a superset of the input for the code points but
     * not for the strings.
@ -88,17 +94,36 @@ enum {
     *
     * @stable ICU 2.4
     */
-    USET_CASE_INSENSITIVE = 2,  
+    USET_CASE_INSENSITIVE = 2,

    /**
-     * Enable case insensitive matching.  E.g., "[ab]" with this flag
-     * will match 'a', 'A', 'b', and 'B'.  "[^ab]" with this flag will
-     * match all except 'a', 'A', 'b', and 'B'. This adds the lower-,
-     * title-, and uppercase mappings as well as the case folding
+     * Adds all case mappings for each element in the set.
+     * This adds the full lower-, title-, and uppercase mappings as well as the full case folding
     * of each existing element in the set.
+     *
+     * Unlike the “case insensitive” options, this does not perform a closure.
+     * For example, it does not add 'ſ' (U+017F long s) for 's',
+     * 'K' (U+212A Kelvin sign) for 'k', or replace set strings by their case-folded versions.
+     *
     * @stable ICU 3.2
     */
-    USET_ADD_CASE_MAPPINGS = 4
+    USET_ADD_CASE_MAPPINGS = 4,
+
+#ifndef U_HIDE_DRAFT_API
+    /**
+     * Enable case insensitive matching.
+     * Same as USET_CASE_INSENSITIVE but using only Simple_Case_Folding (scf) mappings,
+     * which map each code point to one code point,
+     * not full Case_Folding (cf) mappings, which map some code points to multiple code points.
+     *
+     * This is designed for case-insensitive matches, for example in certain
+     * regular expression implementations where only Simple_Case_Folding mappings are used,
+     * such as in ECMAScript (JavaScript) regular expressions.
+     *
+     * @draft ICU 73
+     */
+    USET_SIMPLE_CASE_INSENSITIVE = 6
+#endif  // U_HIDE_DRAFT_API
 };

 /**
@ -299,7 +324,9 @@ uset_openPattern(const UChar* pattern, int32_t patternLength,
 * @param patternLength the length of the pattern, or -1 if null
 * terminated
 * @param options bitmask for options to apply to the pattern.
- * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
+ * Valid options are USET_IGNORE_SPACE and
+ * at most one of USET_CASE_INSENSITIVE, USET_ADD_CASE_MAPPINGS, USET_SIMPLE_CASE_INSENSITIVE.
+ * These case options are mutually exclusive.
 * @param ec the error code
 * @stable ICU 2.4
 */
@ -414,7 +441,10 @@ uset_set(USet* set,
 *                          The character at pattern[0] must be a '['.
 * @param patternLength     The length of the UChar string. -1 if NUL terminated.
 * @param options           A bitmask for options to apply to the pattern.
- *                          Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
+ *                          Valid options are USET_IGNORE_SPACE and
+ *                          at most one of USET_CASE_INSENSITIVE, USET_ADD_CASE_MAPPINGS,
+ *                          USET_SIMPLE_CASE_INSENSITIVE.
+ *                          These case options are mutually exclusive.
 * @param status            Returns an error if the pattern cannot be parsed.
 * @return                  Upon successful parse, the value is either
 *                          the index of the character after the closing ']' 
@ -804,7 +834,7 @@ uset_clear(USet* set);

 /**
 * Close this set over the given attribute.  For the attribute
- * USET_CASE, the result is to modify this set so that:
+ * USET_CASE_INSENSITIVE, the result is to modify this set so that:
 *
 * 1. For each character or string 'a' in this set, all strings or
 * characters 'b' such that foldCase(a) == foldCase(b) are added
@ -824,8 +854,10 @@ uset_clear(USet* set);
 * @param set the set
 *
 * @param attributes bitmask for attributes to close over.
- * Currently only the USET_CASE bit is supported.  Any undefined bits
- * are ignored.
+ * Valid options:
+ * At most one of USET_CASE_INSENSITIVE, USET_ADD_CASE_MAPPINGS, USET_SIMPLE_CASE_INSENSITIVE.
+ * These case options are mutually exclusive.
+ * Unrelated options bits are ignored.
 * @stable ICU 4.2
 */
 U_CAPI void U_EXPORT2
--- a/icu4c/source/common/uniset_closure.cpp
+++ b/icu4c/source/common/uniset_closure.cpp
@ -25,9 +25,11 @@
 #include "unicode/locid.h"
 #include "unicode/parsepos.h"
 #include "unicode/uniset.h"
+#include "unicode/utf16.h"
 #include "cmemory.h"
 #include "ruleiter.h"
 #include "ucase.h"
+#include "uprops.h"
 #include "util.h"
 #include "uvector.h"

@ -149,102 +151,208 @@ addCaseMapping(UnicodeSet &set, int32_t result, const char16_t *full, UnicodeStr
    // see ucase.h
 }

+namespace {
+
+/** For case closure on a large set, look only at code points with relevant properties. */
+const UnicodeSet &maybeOnlyCaseSensitive(const UnicodeSet &src, UnicodeSet &subset) {
+    // The subset must have been constructed with all code points,
+    // so that the retainAll() intersection effectively copies all single code points from src.
+    U_ASSERT(subset.contains(0, 0x10ffff));
+    if (src.size() < 30) {
+        return src;
+    }
+    // Return the intersection of the src code points with Case_Sensitive ones.
+    UErrorCode errorCode = U_ZERO_ERROR;
+    const UnicodeSet *sensitive =
+        CharacterProperties::getBinaryPropertySet(UCHAR_CASE_SENSITIVE, errorCode);
+    if (U_FAILURE(errorCode)) {
+        return src;
+    }
+    // Start by copying the "smaller" set.
+    // (We "copy" by intersecting all Unicode *code points* with the first set,
+    // which omits any strings.)
+    if (src.getRangeCount() > sensitive->getRangeCount()) {
+        subset.retainAll(*sensitive);
+        subset.retainAll(src);
+    } else {
+        subset.retainAll(src);
+        subset.retainAll(*sensitive);
+    }
+    return subset;
+}
+
+// Per-character scf = Simple_Case_Folding of a string.
+// (Normally when we case-fold a string we use full case foldings.)
+bool scfString(const UnicodeString &s, UnicodeString &scf) {
+    // Iterate over the raw buffer for best performance.
+    const char16_t *p = s.getBuffer();
+    int32_t length = s.length();
+    // Loop while not needing modification.
+    for (int32_t i = 0; i < length;) {
+        UChar32 c;
+        U16_NEXT(p, i, length, c);  // post-increments i
+        UChar32 scfChar = u_foldCase(c, U_FOLD_CASE_DEFAULT);
+        if (scfChar != c) {
+            // Copy the characters before c.
+            scf.setTo(p, i - U16_LENGTH(c));
+            // Loop over the rest of the string and keep case-folding.
+            for (;;) {
+                scf.append(scfChar);
+                if (i == length) {
+                    return true;
+                }
+                U16_NEXT(p, i, length, c);  // post-increments i
+                scfChar = u_foldCase(c, U_FOLD_CASE_DEFAULT);
+            }
+        }
+    }
+    return false;
+}
+
+}  // namespace
+
 UnicodeSet& UnicodeSet::closeOver(int32_t attribute) {
    if (isFrozen() || isBogus()) {
        return *this;
    }
-    if (attribute & (USET_CASE_INSENSITIVE | USET_ADD_CASE_MAPPINGS)) {
-        {
-            UnicodeSet foldSet(*this);
-            UnicodeString str;
-            USetAdder sa = {
-                foldSet.toUSet(),
-                _set_add,
-                _set_addRange,
-                _set_addString,
-                nullptr, // don't need remove()
-                nullptr // don't need removeRange()
-            };
-
-            // start with input set to guarantee inclusion
-            // USET_CASE: remove strings because the strings will actually be reduced (folded);
-            //            therefore, start with no strings and add only those needed
-            if ((attribute & USET_CASE_INSENSITIVE) && foldSet.hasStrings()) {
-                foldSet.strings->removeAllElements();
-            }
-
-            int32_t n = getRangeCount();
-            UChar32 result;
-            const char16_t *full;
-
-            for (int32_t i=0; i<n; ++i) {
-                UChar32 start = getRangeStart(i);
-                UChar32 end   = getRangeEnd(i);
-
-                if (attribute & USET_CASE_INSENSITIVE) {
-                    // full case closure
-                    for (UChar32 cp=start; cp<=end; ++cp) {
-                        ucase_addCaseClosure(cp, &sa);
-                    }
-                } else {
-                    // add case mappings
-                    // (does not add long s for regular s, or Kelvin for k, for example)
-                    for (UChar32 cp=start; cp<=end; ++cp) {
-                        result = ucase_toFullLower(cp, nullptr, nullptr, &full, UCASE_LOC_ROOT);
-                        addCaseMapping(foldSet, result, full, str);
-
-                        result = ucase_toFullTitle(cp, nullptr, nullptr, &full, UCASE_LOC_ROOT);
-                        addCaseMapping(foldSet, result, full, str);
-
-                        result = ucase_toFullUpper(cp, nullptr, nullptr, &full, UCASE_LOC_ROOT);
-                        addCaseMapping(foldSet, result, full, str);
-
-                        result = ucase_toFullFolding(cp, &full, 0);
-                        addCaseMapping(foldSet, result, full, str);
-                    }
-                }
-            }
-            if (hasStrings()) {
-                if (attribute & USET_CASE_INSENSITIVE) {
-                    for (int32_t j=0; j<strings->size(); ++j) {
-                        str = *(const UnicodeString *) strings->elementAt(j);
-                        str.foldCase();
-                        if(!ucase_addStringCaseClosure(str.getBuffer(), str.length(), &sa)) {
-                            foldSet.add(str); // does not map to code points: add the folded string itself
-                        }
-                    }
-                } else {
-                    Locale root("");
-#if !UCONFIG_NO_BREAK_ITERATION
-                    UErrorCode status = U_ZERO_ERROR;
-                    BreakIterator *bi = BreakIterator::createWordInstance(root, status);
-                    if (U_SUCCESS(status)) {
-#endif
-                        const UnicodeString *pStr;
-
-                        for (int32_t j=0; j<strings->size(); ++j) {
-                            pStr = (const UnicodeString *) strings->elementAt(j);
-                            (str = *pStr).toLower(root);
-                            foldSet.add(str);
-#if !UCONFIG_NO_BREAK_ITERATION
-                            (str = *pStr).toTitle(bi, root);
-                            foldSet.add(str);
-#endif
-                            (str = *pStr).toUpper(root);
-                            foldSet.add(str);
-                            (str = *pStr).foldCase();
-                            foldSet.add(str);
-                        }
-#if !UCONFIG_NO_BREAK_ITERATION
-                    }
-                    delete bi;
-#endif
-                }
-            }
-            *this = foldSet;
-        }
+    switch (attribute & USET_CASE_MASK) {
+    case 0:
+        break;
+    case USET_CASE_INSENSITIVE:
+        closeOverCaseInsensitive(/* simple= */ false);
+        break;
+    case USET_ADD_CASE_MAPPINGS:
+        closeOverAddCaseMappings();
+        break;
+    case USET_SIMPLE_CASE_INSENSITIVE:
+        closeOverCaseInsensitive(/* simple= */ true);
+        break;
+    default:
+        // bad option (unreachable)
+        break;
    }
    return *this;
 }

+void UnicodeSet::closeOverCaseInsensitive(bool simple) {
+    // Start with input set to guarantee inclusion.
+    UnicodeSet foldSet(*this);
+    // Full case mappings closure:
+    // Remove strings because the strings will actually be reduced (folded);
+    // therefore, start with no strings and add only those needed.
+    // Do this before processing code points, because they may add strings.
+    if (!simple && foldSet.hasStrings()) {
+        foldSet.strings->removeAllElements();
+    }
+
+    USetAdder sa = {
+        foldSet.toUSet(),
+        _set_add,
+        _set_addRange,
+        _set_addString,
+        nullptr, // don't need remove()
+        nullptr // don't need removeRange()
+    };
+
+    UnicodeSet subset(0, 0x10ffff);
+    const UnicodeSet &codePoints = maybeOnlyCaseSensitive(*this, subset);
+
+    // Iterate over the ranges of single code points. Nested loop for each code point.
+    int32_t n = codePoints.getRangeCount();
+
+    for (int32_t i=0; i<n; ++i) {
+        UChar32 start = codePoints.getRangeStart(i);
+        UChar32 end   = codePoints.getRangeEnd(i);
+
+        if (simple) {
+            for (UChar32 cp=start; cp<=end; ++cp) {
+                ucase_addSimpleCaseClosure(cp, &sa);
+            }
+        } else {
+            for (UChar32 cp=start; cp<=end; ++cp) {
+                ucase_addCaseClosure(cp, &sa);
+            }
+        }
+    }
+    if (hasStrings()) {
+        UnicodeString str;
+        for (int32_t j=0; j<strings->size(); ++j) {
+            const UnicodeString *pStr = (const UnicodeString *) strings->elementAt(j);
+            if (simple) {
+                if (scfString(*pStr, str)) {
+                    foldSet.remove(*pStr).add(str);
+                }
+            } else {
+                str = *pStr;
+                str.foldCase();
+                if(!ucase_addStringCaseClosure(str.getBuffer(), str.length(), &sa)) {
+                    foldSet.add(str); // does not map to code points: add the folded string itself
+                }
+            }
+        }
+    }
+    *this = foldSet;
+}
+
+void UnicodeSet::closeOverAddCaseMappings() {
+    // Start with input set to guarantee inclusion.
+    UnicodeSet foldSet(*this);
+
+    UnicodeSet subset(0, 0x10ffff);
+    const UnicodeSet &codePoints = maybeOnlyCaseSensitive(*this, subset);
+
+    // Iterate over the ranges of single code points. Nested loop for each code point.
+    int32_t n = codePoints.getRangeCount();
+    UChar32 result;
+    const char16_t *full;
+    UnicodeString str;
+
+    for (int32_t i=0; i<n; ++i) {
+        UChar32 start = codePoints.getRangeStart(i);
+        UChar32 end   = codePoints.getRangeEnd(i);
+
+        // add case mappings
+        // (does not add long s for regular s, or Kelvin for k, for example)
+        for (UChar32 cp=start; cp<=end; ++cp) {
+            result = ucase_toFullLower(cp, nullptr, nullptr, &full, UCASE_LOC_ROOT);
+            addCaseMapping(foldSet, result, full, str);
+
+            result = ucase_toFullTitle(cp, nullptr, nullptr, &full, UCASE_LOC_ROOT);
+            addCaseMapping(foldSet, result, full, str);
+
+            result = ucase_toFullUpper(cp, nullptr, nullptr, &full, UCASE_LOC_ROOT);
+            addCaseMapping(foldSet, result, full, str);
+
+            result = ucase_toFullFolding(cp, &full, 0);
+            addCaseMapping(foldSet, result, full, str);
+        }
+    }
+    if (hasStrings()) {
+        Locale root("");
+#if !UCONFIG_NO_BREAK_ITERATION
+        UErrorCode status = U_ZERO_ERROR;
+        BreakIterator *bi = BreakIterator::createWordInstance(root, status);
+        if (U_SUCCESS(status)) {
+#endif
+            for (int32_t j=0; j<strings->size(); ++j) {
+                const UnicodeString *pStr = (const UnicodeString *) strings->elementAt(j);
+                (str = *pStr).toLower(root);
+                foldSet.add(str);
+#if !UCONFIG_NO_BREAK_ITERATION
+                (str = *pStr).toTitle(bi, root);
+                foldSet.add(str);
+#endif
+                (str = *pStr).toUpper(root);
+                foldSet.add(str);
+                (str = *pStr).foldCase();
+                foldSet.add(str);
+            }
+#if !UCONFIG_NO_BREAK_ITERATION
+        }
+        delete bi;
+#endif
+    }
+    *this = foldSet;
+}
+
 U_NAMESPACE_END
--- a/icu4c/source/common/uniset_props.cpp
+++ b/icu4c/source/common/uniset_props.cpp
@ -631,11 +631,8 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
     * to close over case BEFORE COMPLEMENTING.  This makes
     * patterns like /[^abc]/i work.
     */
-    if ((options & USET_CASE_INSENSITIVE) != 0) {
-        (this->*caseClosure)(USET_CASE_INSENSITIVE);
-    }
-    else if ((options & USET_ADD_CASE_MAPPINGS) != 0) {
-        (this->*caseClosure)(USET_ADD_CASE_MAPPINGS);
+    if ((options & USET_CASE_MASK) != 0) {
+        (this->*caseClosure)(options);
    }
    if (invert) {
        complement().removeAllStrings();  // code point complement
--- a/icu4c/source/common/uprops.h
+++ b/icu4c/source/common/uprops.h
@ -441,6 +441,7 @@ class CharacterProperties {
 public:
    CharacterProperties() = delete;
    static const UnicodeSet *getInclusionsForProperty(UProperty prop, UErrorCode &errorCode);
+    static const UnicodeSet *getBinaryPropertySet(UProperty property, UErrorCode &errorCode);
 };

 // implemented in uniset_props.cpp
--- a/icu4c/source/common/uset_imp.h
+++ b/icu4c/source/common/uset_imp.h
@ -58,5 +58,14 @@ typedef struct USetAdder USetAdder;

 U_CDECL_END

-#endif
+#ifdef __cplusplus

+namespace {
+
+constexpr int32_t USET_CASE_MASK = USET_CASE_INSENSITIVE | USET_ADD_CASE_MAPPINGS;
+
+}  // namespace
+
+#endif  // __cplusplus
+
+#endif
--- a/icu4c/source/test/intltest/usettest.cpp
+++ b/icu4c/source/test/intltest/usettest.cpp
@ -14,6 +14,7 @@
 #include <stdio.h>

 #include <string.h>
+#include <unordered_map>
 #include "unicode/utypes.h"
 #include "usettest.h"
 #include "unicode/ucnv.h"
@ -85,6 +86,8 @@ UnicodeSetTest::runIndexedTest(int32_t index, UBool exec,
    TESTCASE_AUTO(TestStrings);
    TESTCASE_AUTO(Testj2268);
    TESTCASE_AUTO(TestCloseOver);
+    TESTCASE_AUTO(TestCloseOverSimpleCaseFolding);
+    TESTCASE_AUTO(TestCloseOverLargeSets);
    TESTCASE_AUTO(TestEscapePattern);
    TESTCASE_AUTO(TestInvalidCodePoint);
    TESTCASE_AUTO(TestSymbolTable);
@ -1243,27 +1246,38 @@ void UnicodeSetTest::TestIndexOf() {
 * Test closure API.
 */
 void UnicodeSetTest::TestCloseOver() {
-    UErrorCode ec = U_ZERO_ERROR;
-
-    char CASE[] = {(char)USET_CASE_INSENSITIVE};
-    char CASE_MAPPINGS[] = {(char)USET_ADD_CASE_MAPPINGS};
-    const char* DATA[] = {
+    static constexpr char CASE[] = {(char)USET_CASE_INSENSITIVE};
+    static constexpr char CASE_MAPPINGS[] = {(char)USET_ADD_CASE_MAPPINGS};
+    static constexpr char SIMPLE_CASE_INSENSITIVE[] = {(char)USET_SIMPLE_CASE_INSENSITIVE};
+    static const char* DATA[] = {
        // selector, input, output
        CASE,
        "[aq\\u00DF{Bc}{bC}{Fi}]",
        "[aAqQ\\u00DF\\u1E9E\\uFB01{ss}{bc}{fi}]",  // U+1E9E LATIN CAPITAL LETTER SHARP S is new in Unicode 5.1

+        SIMPLE_CASE_INSENSITIVE,
+        "[aq\\u00DF{Bc}{bC}{Fi}]",
+        "[aAqQ\\u00DF\\u1E9E{bc}{fi}]",
+
        CASE,
        "[\\u01F1]", // 'DZ'
        "[\\u01F1\\u01F2\\u01F3]",

+        SIMPLE_CASE_INSENSITIVE,
+        "[\\u01F1]", // 'DZ'
+        "[\\u01F1\\u01F2\\u01F3]",
+
        CASE,
        "[\\u1FB4]",
        "[\\u1FB4{\\u03AC\\u03B9}]",

+        SIMPLE_CASE_INSENSITIVE,
+        "[\\u1FB4]",
+        "[\\u1FB4]",
+
        CASE,
        "[{F\\uFB01}]",
-        "[\\uFB03{ffi}]",            
+        "[\\uFB03{ffi}]",

        CASE, // make sure binary search finds limits
        "[a\\uFF3A]",
@ -1271,6 +1285,10 @@ void UnicodeSetTest::TestCloseOver() {

        CASE,
        "[a-z]","[A-Za-z\\u017F\\u212A]",
+
+        SIMPLE_CASE_INSENSITIVE,
+        "[a-z]","[A-Za-z\\u017F\\u212A]",
+
        CASE,
        "[abc]","[A-Ca-c]",
        CASE,
@ -1311,7 +1329,7 @@ void UnicodeSetTest::TestCloseOver() {
        CASE_MAPPINGS,
        "[\\u01F1]", // 'DZ'
        "[\\u01F1\\u01F2\\u01F3]",
-        
+
        CASE_MAPPINGS,
        "[a-z]",
        "[A-Za-z]",
@ -1326,6 +1344,8 @@ void UnicodeSetTest::TestCloseOver() {
        int32_t selector = DATA[i][0];
        UnicodeString pat(DATA[i+1], -1, US_INV);
        UnicodeString exp(DATA[i+2], -1, US_INV);
+
+        UErrorCode ec = U_ZERO_ERROR;
        s.applyPattern(pat, ec);
        s.closeOver(selector);
        t.applyPattern(exp, ec);
@ -1341,68 +1361,8 @@ void UnicodeSetTest::TestCloseOver() {
        }
    }

-#if 0
-    /*
-     * Unused test code.
-     * This was used to compare the old implementation (using USET_CASE)
-     * with the new one (using 0x100 temporarily)
-     * while transitioning from hardcoded case closure tables in uniset.cpp
-     * (moved to uniset_props.cpp) to building the data by gencase into ucase.icu.
-     * and using ucase.c functions for closure.
-     * See Jitterbug 3432 RFE: Move uniset.cpp data to a data file
-     *
-     * Note: The old and new implementation never fully matched because
-     * the old implementation turned out to not map U+0130 and U+0131 correctly
-     * (dotted I and dotless i) and because the old implementation's data tables
-     * were outdated compared to Unicode 4.0.1 at the time of the change to the
-     * new implementation. (So sigmas and some other characters were not handled
-     * according to the newer Unicode version.)
-     */
-    UnicodeSet sens("[:case_sensitive:]", ec), sens2, s2;
-    UnicodeSetIterator si(sens);
-    UnicodeString str, buf2;
-    const UnicodeString *pStr;
-    UChar32 c;
-    while(si.next()) {
-        if(!si.isString()) {
-            c=si.getCodepoint();
-            s.clear();
-            s.add(c);
-
-            str.setTo(c);
-            str.foldCase();
-            sens2.add(str);
-
-            t=s;
-            s.closeOver(USET_CASE);
-            t.closeOver(0x100);
-            if(s!=t) {
-                errln("FAIL: closeOver(U+%04x) differs: ", c);
-                errln((UnicodeString)"old "+s.toPattern(buf, true)+" new: "+t.toPattern(buf2, true));
-            }
-        }
-    }
-    // remove all code points
-    // should contain all full case folding mapping strings
-    sens2.remove(0, 0x10ffff);
-    si.reset(sens2);
-    while(si.next()) {
-        if(si.isString()) {
-            pStr=&si.getString();
-            s.clear();
-            s.add(*pStr);
-            t=s2=s;
-            s.closeOver(USET_CASE);
-            t.closeOver(0x100);
-            if(s!=t) {
-                errln((UnicodeString)"FAIL: closeOver("+s2.toPattern(buf, true)+") differs: ");
-                errln((UnicodeString)"old "+s.toPattern(buf, true)+" new: "+t.toPattern(buf2, true));
-            }
-        }
-    }
-#endif
-
    // Test the pattern API
+    UErrorCode ec = U_ZERO_ERROR;
    s.applyPattern("[abc]", USET_CASE_INSENSITIVE, nullptr, ec);
    if (U_FAILURE(ec)) {
        errln("FAIL: applyPattern failed");
@ -1423,6 +1383,123 @@ void UnicodeSetTest::TestCloseOver() {
    }
 }

+namespace {
+
+void addIfAbsent(const std::unordered_multimap<UChar32, UChar32> &closure, UChar32 c, UChar32 t,
+                 std::unordered_multimap<UChar32, UChar32> &additions) {
+    for (auto it = closure.find(c);; ++it) {
+        if (it == closure.end() || it->first != c) {
+            // absent
+            additions.insert({c, t});
+            break;
+        } else if (it->second == t) {
+            // present
+            break;
+        }
+    }
+}
+
+}  // namespace
+
+void UnicodeSetTest::TestCloseOverSimpleCaseFolding() {
+    IcuTestErrorCode errorCode(*this, "TestCloseOverSimpleCaseFolding");
+    const UnicodeSet *sensitive =
+        UnicodeSet::fromUSet(u_getBinaryPropertySet(UCHAR_CASE_SENSITIVE, errorCode));
+    if (errorCode.errIfFailureAndReset("u_getBinaryPropertySet(UCHAR_CASE_SENSITIVE) failed")) {
+        return;
+    }
+    // Compute the scf=Simple_Case_Folding closure:
+    // For each scf(c)=t, start with mappings c->t and t->c.
+    std::unordered_multimap<UChar32, UChar32> closure;
+    UnicodeSetIterator iter(*sensitive);
+    while (iter.next()) {
+        UChar32 c = iter.getCodepoint();
+        UChar32 scfChar = u_foldCase(c, U_FOLD_CASE_DEFAULT);
+        if (scfChar != c) {
+            closure.insert({c, scfChar});
+            closure.insert({scfChar, c});
+        }
+    }
+    // Complete the closure: Add mappings of mappings.
+    for (;;) {
+        std::unordered_multimap<UChar32, UChar32> additions;
+        // for each mapping c->t
+        for (auto mapping : closure) {
+            UChar32 c = mapping.first;
+            UChar32 t = mapping.second;
+            // enumerate each t->u
+            for (auto it = closure.find(t); it != closure.end() && it->first == t; ++it) {
+                UChar32 u = it->second;
+                if (u != c) {
+                    addIfAbsent(closure, c, u, additions);
+                    addIfAbsent(closure, u, c, additions);
+                }
+            }
+        }
+        if (additions.empty()) {
+            break;  // The closure is complete.
+        }
+        closure.insert(additions.begin(), additions.end());
+    }
+    // Compare closeOver(USET_SIMPLE_CASE_INSENSITIVE) with an unoptimized implementation.
+    // Here we focus on single code points as input.
+    // Other examples, including strings, are tested in TestCloseOver().
+    int32_t errors = 0;
+    iter.reset();
+    UnicodeSet set, expected;
+    while (iter.next()) {
+        UChar32 c = iter.getCodepoint();
+        // closeOver()
+        set.clear().add(c);
+        set.closeOver(USET_SIMPLE_CASE_INSENSITIVE);
+        // From-first-principles implementation.
+        expected.clear().add(c);
+        for (auto it = closure.find(c); it != closure.end() && it->first == c; ++it) {
+            expected.add(it->second);
+        }
+        // compare
+        if (!checkEqual(expected, set, "closeOver() vs. test impl")) {
+            errln("    c=U+%04X", c);
+            if (++errors == 10) {
+                break;
+            }
+        }
+    }
+}
+
+void UnicodeSetTest::TestCloseOverLargeSets() {
+    IcuTestErrorCode errorCode(*this, "TestCloseOverLargeSets");
+    // Check that an optimization for large sets does not change the result.
+
+    // Most code points except ones that are boring for case mappings.
+    UnicodeSet manyCp(u"[^[:C:][:Ideographic:][:Hang:]]", errorCode);
+    // Main Unihan block.
+    constexpr UChar32 LARGE_START = 0x4E00;
+    constexpr UChar32 LARGE_END = 0x9FFF;
+
+    static constexpr int32_t OPTIONS[] = {
+        USET_CASE_INSENSITIVE, USET_ADD_CASE_MAPPINGS, USET_SIMPLE_CASE_INSENSITIVE
+    };
+    UnicodeSet input, small, large;
+    for (int32_t option : OPTIONS) {
+        UnicodeSetIterator iter(manyCp);
+        while (iter.next()) {
+            UChar32 c = iter.getCodepoint();
+            input.clear().add(c);
+            small = input;
+            small.closeOver(option);
+            large = input;
+            large.add(LARGE_START, LARGE_END);
+            large.closeOver(option);
+            large.remove(LARGE_START, LARGE_END);
+            if (!checkEqual(small, large, "small != large")) {
+                errln("    option=%d c=U+%04X", option, c);
+                break;
+            }
+        }
+    }
+}
+
 void UnicodeSetTest::TestEscapePattern() {
    const char pattern[] =
        "[\\uFEFF \\u200A-\\u200E \\U0001D173-\\U0001D17A \\U000F0000-\\U000FFFFD ]";
--- a/icu4c/source/test/intltest/usettest.h
+++ b/icu4c/source/test/intltest/usettest.h
@ -74,6 +74,8 @@ private:
    void TestExhaustive(void);

    void TestCloseOver(void);
+    void TestCloseOverSimpleCaseFolding();
+    void TestCloseOverLargeSets();

    void TestEscapePattern(void);

--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/UCaseProps.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/UCaseProps.java
@ -260,34 +260,6 @@ public final class UCaseProps {
     * - for k include the Kelvin sign
     */
    public final void addCaseClosure(int c, UnicodeSet set) {
-        /*
-         * Hardcode the case closure of i and its relatives and ignore the
-         * data file data for these characters.
-         * The Turkic dotless i and dotted I with their case mapping conditions
-         * and case folding option make the related characters behave specially.
-         * This code matches their closure behavior to their case folding behavior.
-         */
-
-        switch(c) {
-        case 0x49:
-            /* regular i and I are in one equivalence class */
-            set.add(0x69);
-            return;
-        case 0x69:
-            set.add(0x49);
-            return;
-        case 0x130:
-            /* dotted I is in a class with <0069 0307> (for canonical equivalence with <0049 0307>) */
-            set.add(iDot);
-            return;
-        case 0x131:
-            /* dotless i is in a class by itself */
-            return;
-        default:
-            /* otherwise use the data file data */
-            break;
-        }
-
        int props=trie.get(c);
        if(!propsHasException(props)) {
            if(getTypeFromProps(props)!=NONE) {
@ -302,19 +274,41 @@ public final class UCaseProps {
             * c has exceptions, so there may be multiple simple and/or
             * full case mappings. Add them all.
             */
-            int excOffset0, excOffset=getExceptionsOffset(props);
-            int closureOffset;
+            int excOffset=getExceptionsOffset(props);
            int excWord=exceptions.charAt(excOffset++);
-            int index, closureLength, fullLength, length;
+            int excOffset0=excOffset;

-            excOffset0=excOffset;
+            // Hardcode the case closure of i and its relatives and ignore the
+            // data file data for these characters.
+            // The Turkic dotless i and dotted I with their case mapping conditions
+            // and case folding option make the related characters behave specially.
+            // This code matches their closure behavior to their case folding behavior.
+            if ((excWord&EXC_CONDITIONAL_FOLD) != 0) {
+                // These characters have Turkic case foldings. Hardcode their closure.
+                if (c == 0x49) {
+                    // Regular i and I are in one equivalence class.
+                    set.add(0x69);
+                    return;
+                } else if (c == 0x130) {
+                    // Dotted I is in a class with <0069 0307>
+                    // (for canonical equivalence with <0049 0307>).
+                    set.add(iDot);
+                    return;
+                }
+            } else if (c == 0x69) {
+                set.add(0x49);
+                return;
+            } else if (c == 0x131) {
+                // Dotless i is in a class by itself.
+                return;
+            }

            /* add all simple case mappings */
-            for(index=EXC_LOWER; index<=EXC_TITLE; ++index) {
+            for(int index=EXC_LOWER; index<=EXC_TITLE; ++index) {
                if(hasSlot(excWord, index)) {
                    excOffset=excOffset0;
-                    c=getSlotValue(excWord, index, excOffset);
-                    set.add(c);
+                    int mapping=getSlotValue(excWord, index, excOffset);
+                    set.add(mapping);
                }
            }
            if(hasSlot(excWord, EXC_DELTA)) {
@ -324,6 +318,7 @@ public final class UCaseProps {
            }

            /* get the closure string pointer & length */
+            int closureOffset, closureLength;
            if(hasSlot(excWord, EXC_CLOSURE)) {
                excOffset=excOffset0;
                long value=getSlotValueAndOffset(excWord, EXC_CLOSURE, excOffset);
@ -338,7 +333,7 @@ public final class UCaseProps {
            if(hasSlot(excWord, EXC_FULL_MAPPINGS)) {
                excOffset=excOffset0;
                long value=getSlotValueAndOffset(excWord, EXC_FULL_MAPPINGS, excOffset);
-                fullLength=(int)value;
+                int fullLength=(int)value;

                /* start of full case mapping strings */
                excOffset=(int)(value>>32)+1;
@ -350,7 +345,7 @@ public final class UCaseProps {
                fullLength>>=4;

                /* add the full case folding string */
-                length=fullLength&0xf;
+                int length=fullLength&0xf;
                if(length!=0) {
                    set.add(exceptions.substring(excOffset, excOffset+length));
                    excOffset+=length;
@ -367,9 +362,137 @@ public final class UCaseProps {

            /* add each code point in the closure string */
            int limit=closureOffset+closureLength;
-            for(index=closureOffset; index<limit; index+=UTF16.getCharCount(c)) {
-                c=exceptions.codePointAt(index);
-                set.add(c);
+            for(int index=closureOffset; index<limit; index+=UTF16.getCharCount(c)) {
+                int mapping=exceptions.codePointAt(index);
+                set.add(mapping);
+            }
+        }
+    }
+
+    /**
+     * Add the simple case closure mapping,
+     * except if there is not actually an scf relationship between the two characters.
+     * TODO: Unicode should probably add the corresponding scf mappings.
+     * See https://crbug.com/v8/13377 and Unicode-internal PAG issue #23.
+     * If & when those scf mappings are added, we should be able to remove all of these exceptions.
+     */
+    private static void addOneSimpleCaseClosure(int c, int t, UnicodeSet set) {
+        switch (c) {
+        case 0x0390:
+            if (t == 0x1FD3) { return; }
+            break;
+        case 0x03B0:
+            if (t == 0x1FE3) { return; }
+            break;
+        case 0x1FD3:
+            if (t == 0x0390) { return; }
+            break;
+        case 0x1FE3:
+            if (t == 0x03B0) { return; }
+            break;
+        case 0xFB05:
+            if (t == 0xFB06) { return; }
+            break;
+        case 0xFB06:
+            if (t == 0xFB05) { return; }
+            break;
+        default:
+            break;
+        }
+        set.add(t);
+    }
+
+    public final void addSimpleCaseClosure(int c, UnicodeSet set) {
+        int props=trie.get(c);
+        if(!propsHasException(props)) {
+            if(getTypeFromProps(props)!=NONE) {
+                /* add the one simple case mapping, no matter what type it is */
+                int delta=getDelta(props);
+                if(delta!=0) {
+                    set.add(c+delta);
+                }
+            }
+        } else {
+            // c has exceptions. Add the mappings relevant for scf=Simple_Case_Folding.
+            int excOffset=getExceptionsOffset(props);
+            int excWord=exceptions.charAt(excOffset++);
+            int excOffset0=excOffset;
+
+            // Hardcode the case closure of i and its relatives and ignore the
+            // data file data for these characters, like in ucase_addCaseClosure().
+            if ((excWord&EXC_CONDITIONAL_FOLD) != 0) {
+                // These characters have Turkic case foldings. Hardcode their closure.
+                if (c == 0x49) {
+                    // Regular i and I are in one equivalence class.
+                    set.add(0x69);
+                    return;
+                } else if (c == 0x130) {
+                    // For scf=Simple_Case_Folding, dotted I is in a class by itself.
+                    return;
+                }
+            } else if (c == 0x69) {
+                set.add(0x49);
+                return;
+            } else if (c == 0x131) {
+                // Dotless i is in a class by itself.
+                return;
+            }
+
+            // Add all simple case mappings.
+            for(int index=EXC_LOWER; index<=EXC_TITLE; ++index) {
+                if(hasSlot(excWord, index)) {
+                    excOffset=excOffset0;
+                    int mapping=getSlotValue(excWord, index, excOffset);
+                    addOneSimpleCaseClosure(c, mapping, set);
+                }
+            }
+            if(hasSlot(excWord, EXC_DELTA)) {
+                excOffset=excOffset0;
+                int delta=getSlotValue(excWord, EXC_DELTA, excOffset);
+                int mapping = (excWord&EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta;
+                addOneSimpleCaseClosure(c, mapping, set);
+            }
+
+            /* get the closure string pointer & length */
+            int closureOffset, closureLength;
+            if(hasSlot(excWord, EXC_CLOSURE)) {
+                excOffset=excOffset0;
+                long value=getSlotValueAndOffset(excWord, EXC_CLOSURE, excOffset);
+                closureLength=(int)value&CLOSURE_MAX_LENGTH; /* higher bits are reserved */
+                closureOffset=(int)(value>>32)+1; /* behind this slot, unless there are full case mappings */
+            } else {
+                closureLength=0;
+                closureOffset=0;
+            }
+
+            // Skip the full case mappings.
+            if(closureLength > 0 && hasSlot(excWord, EXC_FULL_MAPPINGS)) {
+                excOffset=excOffset0;
+                long value=getSlotValueAndOffset(excWord, EXC_FULL_MAPPINGS, excOffset);
+                int fullLength=(int)value;
+
+                /* start of full case mapping strings */
+                excOffset=(int)(value>>32)+1;
+
+                fullLength&=0xffff; /* bits 16 and higher are reserved */
+
+                // Skip all 4 full case mappings.
+                excOffset+=fullLength&FULL_LOWER;
+                fullLength>>=4;
+                excOffset+=fullLength&0xf;
+                fullLength>>=4;
+                excOffset+=fullLength&0xf;
+                fullLength>>=4;
+                excOffset+=fullLength;
+
+                closureOffset=excOffset; /* behind full case mappings */
+            }
+
+            // Add each code point in the closure string whose scf maps back to c.
+            int limit=closureOffset+closureLength;
+            for(int index=closureOffset; index<limit; index+=UTF16.getCharCount(c)) {
+                int mapping=exceptions.codePointAt(index);
+                addOneSimpleCaseClosure(c, mapping, set);
            }
        }
    }
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/UnicodeSet.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/UnicodeSet.java
@ -459,7 +459,9 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
     * for the syntax of the pattern language.
     * @param pattern a string specifying what characters are in the set
     * @param options a bitmask indicating which options to apply.
-     * Valid options are IGNORE_SPACE and CASE.
+     * Valid options are {@link #IGNORE_SPACE} and
+     * at most one of {@link #CASE_INSENSITIVE}, {@link #ADD_CASE_MAPPINGS},
+     * {@link #SIMPLE_CASE_INSENSITIVE}. These case options are mutually exclusive.
     * @exception java.lang.IllegalArgumentException if the pattern contains
     * a syntax error.
     * @stable ICU 3.8
@ -495,7 +497,9 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
     * @param symbols a symbol table mapping variables to char[] arrays
     * and chars to UnicodeSets
     * @param options a bitmask indicating which options to apply.
-     * Valid options are IGNORE_SPACE and CASE.
+     * Valid options are {@link #IGNORE_SPACE} and
+     * at most one of {@link #CASE_INSENSITIVE}, {@link #ADD_CASE_MAPPINGS},
+     * {@link #SIMPLE_CASE_INSENSITIVE}. These case options are mutually exclusive.
     * @exception java.lang.IllegalArgumentException if the pattern
     * contains a syntax error.
     * @stable ICU 3.2
@ -587,7 +591,9 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
     * See the class description for the syntax of the pattern language.
     * @param pattern a string specifying what characters are in the set
     * @param options a bitmask indicating which options to apply.
-     * Valid options are IGNORE_SPACE and CASE.
+     * Valid options are {@link #IGNORE_SPACE} and
+     * at most one of {@link #CASE_INSENSITIVE}, {@link #ADD_CASE_MAPPINGS},
+     * {@link #SIMPLE_CASE_INSENSITIVE}. These case options are mutually exclusive.
     * @exception java.lang.IllegalArgumentException if the pattern
     * contains a syntax error.
     * @stable ICU 3.8
@ -2584,8 +2590,10 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
     * variables, or null if none.
     * @param rebuiltPat the pattern that was parsed, rebuilt or
     * copied from the input pattern, as appropriate.
-     * @param options a bit mask of zero or more of the following:
-     * IGNORE_SPACE, CASE.
+     * @param options a bit mask.
+     * Valid options are {@link #IGNORE_SPACE} and
+     * at most one of {@link #CASE_INSENSITIVE}, {@link #ADD_CASE_MAPPINGS},
+     * {@link #SIMPLE_CASE_INSENSITIVE}. These case options are mutually exclusive.
     */
    private void applyPattern(RuleCharacterIterator chars, SymbolTable symbols,
            Appendable rebuiltPat, int options, int depth) {
@ -2965,8 +2973,8 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
         * to close over case BEFORE COMPLEMENTING.  This makes
         * patterns like /[^abc]/i work.
         */
-        if ((options & CASE) != 0) {
-            closeOver(CASE);
+        if ((options & CASE_MASK) != 0) {
+            closeOver(options);
        }
        if (invert) {
            complement().removeAllStrings();  // code point complement
@ -3861,58 +3869,81 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
    public static final int IGNORE_SPACE = 1;

    /**
-     * Bitmask for constructor, applyPattern(), and closeOver()
-     * indicating letter case.  This may be ORed together with other
-     * selectors.
+     * Alias for {@link #CASE_INSENSITIVE}.
     *
-     * Enable case insensitive matching.  E.g., "[ab]" with this flag
-     * will match 'a', 'A', 'b', and 'B'.  "[^ab]" with this flag will
-     * match all except 'a', 'A', 'b', and 'B'. This performs a full
-     * closure over case mappings, e.g. U+017F for s.
-     *
-     * The resulting set is a superset of the input for the code points but
-     * not for the strings.
-     * It performs a case mapping closure of the code points and adds
-     * full case folding strings for the code points, and reduces strings of
-     * the original set to their full case folding equivalents.
-     *
-     * This is designed for case-insensitive matches, for example
-     * in regular expressions. The full code point case closure allows checking of
-     * an input character directly against the closure set.
-     * Strings are matched by comparing the case-folded form from the closure
-     * set with an incremental case folding of the string in question.
-     *
-     * The closure set will also contain single code points if the original
-     * set contained case-equivalent strings (like U+00DF for "ss" or "Ss" etc.).
-     * This is not necessary (that is, redundant) for the above matching method
-     * but results in the same closure sets regardless of whether the original
-     * set contained the code point or a string.
     * @stable ICU 3.8
     */
    public static final int CASE = 2;

    /**
-     * Alias for UnicodeSet.CASE, for ease of porting from C++ where ICU4C
-     * also has both USET_CASE and USET_CASE_INSENSITIVE (see uset.h).
-     * @see #CASE
+     * Enable case insensitive matching.  E.g., "[ab]" with this flag
+     * will match 'a', 'A', 'b', and 'B'.  "[^ab]" with this flag will
+     * match all except 'a', 'A', 'b', and 'B'. This performs a full
+     * closure over case mappings, e.g. 'ſ' (U+017F long s) for 's'.
+     *
+     * <p>This value is an options bit set value for some
+     * constructors, applyPattern(), and closeOver().
+     * It can be ORed together with other, unrelated options.
+     *
+     * <p>The resulting set is a superset of the input for the code points but
+     * not for the strings.
+     * It performs a case mapping closure of the code points and adds
+     * full case folding strings for the code points, and reduces strings of
+     * the original set to their full case folding equivalents.
+     *
+     * <p>This is designed for case-insensitive matches, for example
+     * in regular expressions. The full code point case closure allows checking of
+     * an input character directly against the closure set.
+     * Strings are matched by comparing the case-folded form from the closure
+     * set with an incremental case folding of the string in question.
+     *
+     * <p>The closure set will also contain single code points if the original
+     * set contained case-equivalent strings (like U+00DF for "ss" or "Ss" etc.).
+     * This is not necessary (that is, redundant) for the above matching method
+     * but results in the same closure sets regardless of whether the original
+     * set contained the code point or a string.
+     *
     * @stable ICU 3.4
     */
    public static final int CASE_INSENSITIVE = 2;

    /**
-     * Bitmask for constructor, applyPattern(), and closeOver()
-     * indicating letter case.  This may be ORed together with other
-     * selectors.
-     *
-     * Enable case insensitive matching.  E.g., "[ab]" with this flag
-     * will match 'a', 'A', 'b', and 'B'.  "[^ab]" with this flag will
-     * match all except 'a', 'A', 'b', and 'B'. This adds the lower-,
-     * title-, and uppercase mappings as well as the case folding
+     * Adds all case mappings for each element in the set.
+     * This adds the full lower-, title-, and uppercase mappings as well as the full case folding
     * of each existing element in the set.
+     *
+     * <p>This value is an options bit set value for some
+     * constructors, applyPattern(), and closeOver().
+     * It can be ORed together with other, unrelated options.
+     *
+     * <p>Unlike the “case insensitive” options, this does not perform a closure.
+     * For example, it does not add 'ſ' (U+017F long s) for 's',
+     * 'K' (U+212A Kelvin sign) for 'k', or replace set strings by their case-folded versions.
+     *
     * @stable ICU 3.4
     */
    public static final int ADD_CASE_MAPPINGS = 4;

+    /**
+     * Enable case insensitive matching.
+     * Same as {@link #CASE_INSENSITIVE} but using only Simple_Case_Folding (scf) mappings,
+     * which map each code point to one code point,
+     * not full Case_Folding (cf) mappings, which map some code points to multiple code points.
+     *
+     * <p>This is designed for case-insensitive matches, for example in certain
+     * regular expression implementations where only Simple_Case_Folding mappings are used,
+     * such as in ECMAScript (JavaScript) regular expressions.
+     *
+     * <p>This value is an options bit set value for some
+     * constructors, applyPattern(), and closeOver().
+     * It can be ORed together with other, unrelated options.
+     *
+     * @draft ICU 73
+     */
+    public static final int SIMPLE_CASE_INSENSITIVE = 6;
+
+    private static final int CASE_MASK = CASE_INSENSITIVE | ADD_CASE_MAPPINGS;
+
    //  add the result of a full case mapping to the set
    //  use str as a temporary string to avoid constructing one
    private static final void addCaseMapping(UnicodeSet set, int result, StringBuilder full) {
@ -3930,99 +3961,193 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
        // see UCaseProps
    }

+    /** For case closure on a large set, look only at code points with relevant properties. */
+    UnicodeSet maybeOnlyCaseSensitive(UnicodeSet src) {
+        if (src.size() < 30) {
+            return src;
+        }
+        // Return the intersection of the src code points with Case_Sensitive ones.
+        UnicodeSet sensitive = CharacterProperties.getBinaryPropertySet(UProperty.CASE_SENSITIVE);
+        // Start by cloning the "smaller" set. Try not to copy the strings, if there are any in src.
+        if (src.hasStrings() || src.getRangeCount() > sensitive.getRangeCount()) {
+            return sensitive.cloneAsThawed().retainAll(src);
+        } else {
+            return ((UnicodeSet) src.clone()).retainAll(sensitive);
+        }
+    }
+
+    // Per-character scf = Simple_Case_Folding of a string.
+    // (Normally when we case-fold a string we use full case foldings.)
+    private static final boolean scfString(CharSequence s, StringBuilder scf) {
+        int length = s.length();
+        // Loop while not needing modification.
+        for (int i = 0; i < length;) {
+            int c = Character.codePointAt(s, i);
+            int scfChar = UCharacter.foldCase(c, UCharacter.FOLD_CASE_DEFAULT);
+            if (scfChar != c) {
+                // Copy the characters before c.
+                scf.setLength(0);
+                scf.append(s, 0, i);
+                // Loop over the rest of the string and keep case-folding.
+                for (;;) {
+                    scf.appendCodePoint(scfChar);
+                    i += Character.charCount(c);
+                    if (i == length) {
+                        return true;
+                    }
+                    c = Character.codePointAt(s, i);
+                    scfChar = UCharacter.foldCase(c, UCharacter.FOLD_CASE_DEFAULT);
+                }
+            }
+            i += Character.charCount(c);
+        }
+        return false;
+    }
+
    /**
     * Close this set over the given attribute.  For the attribute
-     * CASE, the result is to modify this set so that:
+     * {@link #CASE_INSENSITIVE}, the result is to modify this set so that:
     *
-     * 1. For each character or string 'a' in this set, all strings
+     * <ol>
+     * <li>For each character or string 'a' in this set, all strings
     * 'b' such that foldCase(a) == foldCase(b) are added to this set.
     * (For most 'a' that are single characters, 'b' will have
     * b.length() == 1.)
     *
-     * 2. For each string 'e' in the resulting set, if e !=
+     * <li>For each string 'e' in the resulting set, if e !=
     * foldCase(e), 'e' will be removed.
+     * </ol>
     *
-     * Example: [aq\u00DF{Bc}{bC}{Fi}] =&gt; [aAqQ\u00DF\uFB01{ss}{bc}{fi}]
+     * <p>Example: [aq\u00DF{Bc}{bC}{Fi}] =&gt; [aAqQ\u00DF\uFB01{ss}{bc}{fi}]
     *
-     * (Here foldCase(x) refers to the operation
+     * <p>(Here foldCase(x) refers to the operation
     * UCharacter.foldCase(x, true), and a == b actually denotes
     * a.equals(b), not pointer comparison.)
     *
     * @param attribute bitmask for attributes to close over.
-     * Currently only the CASE bit is supported.  Any undefined bits
-     * are ignored.
+     * Valid options:
+     * At most one of {@link #CASE_INSENSITIVE}, {@link #ADD_CASE_MAPPINGS},
+     * {@link #SIMPLE_CASE_INSENSITIVE}. These case options are mutually exclusive.
+     * Unrelated options bits are ignored.
     * @return a reference to this set.
     * @stable ICU 3.8
     */
    public UnicodeSet closeOver(int attribute) {
        checkFrozen();
-        if ((attribute & (CASE | ADD_CASE_MAPPINGS)) != 0) {
-            UCaseProps csp = UCaseProps.INSTANCE;
-            UnicodeSet foldSet = new UnicodeSet(this);
-            ULocale root = ULocale.ROOT;
-
-            // start with input set to guarantee inclusion
-            // CASE: remove strings because the strings will actually be reduced (folded);
-            //       therefore, start with no strings and add only those needed
-            if((attribute & CASE) != 0 && foldSet.hasStrings()) {
-                foldSet.strings.clear();
-            }
-
-            int n = getRangeCount();
-            int result;
-            StringBuilder full = new StringBuilder();
-
-            for (int i=0; i<n; ++i) {
-                int start = getRangeStart(i);
-                int end   = getRangeEnd(i);
-
-                if((attribute & CASE) != 0) {
-                    // full case closure
-                    for (int cp=start; cp<=end; ++cp) {
-                        csp.addCaseClosure(cp, foldSet);
-                    }
-                } else {
-                    // add case mappings
-                    // (does not add long s for regular s, or Kelvin for k, for example)
-                    for (int cp=start; cp<=end; ++cp) {
-                        result = csp.toFullLower(cp, null, full, UCaseProps.LOC_ROOT);
-                        addCaseMapping(foldSet, result, full);
-
-                        result = csp.toFullTitle(cp, null, full, UCaseProps.LOC_ROOT);
-                        addCaseMapping(foldSet, result, full);
-
-                        result = csp.toFullUpper(cp, null, full, UCaseProps.LOC_ROOT);
-                        addCaseMapping(foldSet, result, full);
-
-                        result = csp.toFullFolding(cp, full, 0);
-                        addCaseMapping(foldSet, result, full);
-                    }
-                }
-            }
-            if (hasStrings()) {
-                if ((attribute & CASE) != 0) {
-                    for (String s : strings) {
-                        String str = UCharacter.foldCase(s, 0);
-                        if(!csp.addStringCaseClosure(str, foldSet)) {
-                            foldSet.add(str); // does not map to code points: add the folded string itself
-                        }
-                    }
-                } else {
-                    BreakIterator bi = BreakIterator.getWordInstance(root);
-                    for (String str : strings) {
-                        // TODO: call lower-level functions
-                        foldSet.add(UCharacter.toLowerCase(root, str));
-                        foldSet.add(UCharacter.toTitleCase(root, str, bi));
-                        foldSet.add(UCharacter.toUpperCase(root, str));
-                        foldSet.add(UCharacter.foldCase(str, 0));
-                    }
-                }
-            }
-            set(foldSet);
+        switch (attribute & CASE_MASK) {
+        case 0:
+            break;
+        case CASE_INSENSITIVE:
+            closeOverCaseInsensitive(/* simple= */ false);
+            break;
+        case ADD_CASE_MAPPINGS:
+            closeOverAddCaseMappings();
+            break;
+        case SIMPLE_CASE_INSENSITIVE:
+            closeOverCaseInsensitive(/* simple= */ true);
+            break;
+        default:
+            // bad option (unreachable)
+            break;
        }
        return this;
    }

+    private void closeOverCaseInsensitive(boolean simple) {
+        UCaseProps csp = UCaseProps.INSTANCE;
+        // Start with input set to guarantee inclusion.
+        UnicodeSet foldSet = new UnicodeSet(this);
+
+        // Full case mappings closure:
+        // Remove strings because the strings will actually be reduced (folded);
+        // therefore, start with no strings and add only those needed.
+        // Do this before processing code points, because they may add strings.
+        if (!simple && foldSet.hasStrings()) {
+            foldSet.strings.clear();
+        }
+
+        UnicodeSet codePoints = maybeOnlyCaseSensitive(this);
+
+        // Iterate over the ranges of single code points. Nested loop for each code point.
+        int n = codePoints.getRangeCount();
+        for (int i=0; i<n; ++i) {
+            int start = codePoints.getRangeStart(i);
+            int end   = codePoints.getRangeEnd(i);
+
+            if (simple) {
+                for (int cp=start; cp<=end; ++cp) {
+                    csp.addSimpleCaseClosure(cp, foldSet);
+                }
+            } else {
+                for (int cp=start; cp<=end; ++cp) {
+                    csp.addCaseClosure(cp, foldSet);
+                }
+            }
+        }
+        if (hasStrings()) {
+            StringBuilder sb = simple ? new StringBuilder() : null;
+            for (String s : strings) {
+                if (simple) {
+                    if (scfString(s, sb)) {
+                        foldSet.remove(s).add(sb);
+                    }
+                } else {
+                    String str = UCharacter.foldCase(s, 0);
+                    if(!csp.addStringCaseClosure(str, foldSet)) {
+                        foldSet.add(str); // does not map to code points: add the folded string itself
+                    }
+                }
+            }
+        }
+        set(foldSet);
+    }
+
+    private void closeOverAddCaseMappings() {
+        UCaseProps csp = UCaseProps.INSTANCE;
+        // Start with input set to guarantee inclusion.
+        UnicodeSet foldSet = new UnicodeSet(this);
+
+        UnicodeSet codePoints = maybeOnlyCaseSensitive(this);
+
+        // Iterate over the ranges of single code points. Nested loop for each code point.
+        int n = codePoints.getRangeCount();
+        int result;
+        StringBuilder full = new StringBuilder();
+
+        for (int i=0; i<n; ++i) {
+            int start = codePoints.getRangeStart(i);
+            int end   = codePoints.getRangeEnd(i);
+
+            // add case mappings
+            // (does not add long s for regular s, or Kelvin for k, for example)
+            for (int cp=start; cp<=end; ++cp) {
+                result = csp.toFullLower(cp, null, full, UCaseProps.LOC_ROOT);
+                addCaseMapping(foldSet, result, full);
+
+                result = csp.toFullTitle(cp, null, full, UCaseProps.LOC_ROOT);
+                addCaseMapping(foldSet, result, full);
+
+                result = csp.toFullUpper(cp, null, full, UCaseProps.LOC_ROOT);
+                addCaseMapping(foldSet, result, full);
+
+                result = csp.toFullFolding(cp, full, 0);
+                addCaseMapping(foldSet, result, full);
+            }
+        }
+        if (hasStrings()) {
+            ULocale root = ULocale.ROOT;
+            BreakIterator bi = BreakIterator.getWordInstance(root);
+            for (String str : strings) {
+                // TODO: call lower-level functions
+                foldSet.add(UCharacter.toLowerCase(root, str));
+                foldSet.add(UCharacter.toTitleCase(root, str, bi));
+                foldSet.add(UCharacter.toUpperCase(root, str));
+                foldSet.add(UCharacter.foldCase(str, 0));
+            }
+        }
+        set(foldSet);
+    }
+
    /**
     * Internal class for customizing UnicodeSet parsing of properties.
     * TODO: extend to allow customizing of codepoint ranges
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/lang/UnicodeSetTest.java
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/lang/UnicodeSetTest.java
@ -20,6 +20,7 @@ import java.util.HashSet;
 import java.util.Iterator;
 import java.util.LinkedHashSet;
 import java.util.List;
+import java.util.Map;
 import java.util.Set;
 import java.util.SortedSet;
 import java.util.TreeSet;
@ -32,6 +33,7 @@ import com.ibm.icu.dev.test.TestFmwk;
 import com.ibm.icu.dev.util.CollectionUtilities;
 import com.ibm.icu.impl.SortedSetRelation;
 import com.ibm.icu.impl.Utility;
+import com.ibm.icu.lang.CharacterProperties;
 import com.ibm.icu.lang.UCharacter;
 import com.ibm.icu.lang.UCharacterEnums.ECharacterCategory;
 import com.ibm.icu.lang.UProperty;
@ -1323,38 +1325,98 @@ public class UnicodeSetTest extends TestFmwk {
    @Test
    public void TestCloseOver() {
        String CASE = String.valueOf(UnicodeSet.CASE);
+        String CASE_MAPPINGS = String.valueOf(UnicodeSet.ADD_CASE_MAPPINGS);
+        String SIMPLE_CASE_INSENSITIVE = String.valueOf(UnicodeSet.SIMPLE_CASE_INSENSITIVE);
        String[] DATA = {
                // selector, input, output
                CASE,
                "[aq\u00DF{Bc}{bC}{Fi}]",
                "[aAqQ\u00DF\u1E9E\uFB01{ss}{bc}{fi}]", // U+1E9E LATIN CAPITAL LETTER SHARP S is new in Unicode 5.1

+                SIMPLE_CASE_INSENSITIVE,
+                "[aq\u00DF{Bc}{bC}{Fi}]",
+                "[aAqQ\u00DF\u1E9E{bc}{fi}]",
+
                CASE,
                "[\u01F1]", // 'DZ'
                "[\u01F1\u01F2\u01F3]",

+                SIMPLE_CASE_INSENSITIVE,
+                "[\u01F1]", // 'DZ'
+                "[\u01F1\u01F2\u01F3]",
+
                CASE,
                "[\u1FB4]",
                "[\u1FB4{\u03AC\u03B9}]",

+                SIMPLE_CASE_INSENSITIVE,
+                "[\u1FB4]",
+                "[\u1FB4]",
+
                CASE,
                "[{F\uFB01}]",
                "[\uFB03{ffi}]",

+                CASE, // make sure binary search finds limits
+                "[a\uFF3A]",
+                "[aA\uFF3A\uFF5A]",
+
                CASE,
                "[a-z]","[A-Za-z\u017F\u212A]",
+
+                SIMPLE_CASE_INSENSITIVE,
+                "[a-z]","[A-Za-z\u017F\u212A]",
+
                CASE,
                "[abc]","[A-Ca-c]",
                CASE,
                "[ABC]","[A-Ca-c]",
+
+                CASE, "[i]", "[iI]",
+
+                CASE, "[\u0130]",          "[\u0130{i\u0307}]", // dotted I
+                CASE, "[{i\u0307}]",       "[\u0130{i\u0307}]", // i with dot
+
+                CASE, "[\u0131]",          "[\u0131]", // dotless i
+
+                CASE, "[\u0390]",          "[\u0390\u1FD3{\u03B9\u0308\u0301}]",
+
+                CASE, "[\u03c2]",          "[\u03a3\u03c2\u03c3]", // sigmas
+
+                CASE, "[\u03f2]",          "[\u03f2\u03f9]", // lunate sigmas
+
+                CASE, "[\u03f7]",          "[\u03f7\u03f8]",
+
+                CASE, "[\u1fe3]",          "[\u03b0\u1fe3{\u03c5\u0308\u0301}]",
+
+                CASE, "[\ufb05]",          "[\ufb05\ufb06{st}]",
+                CASE, "[{st}]",             "[\ufb05\ufb06{st}]",
+
+                CASE, "[\\U0001044F]",      "[\\U00010427\\U0001044F]",
+
+                CASE, "[{a\u02BE}]",       "[\u1E9A{a\u02BE}]", // first in sorted table
+
+                CASE, "[{\u1f7c\u03b9}]", "[\u1ff2{\u1f7c\u03b9}]", // last in sorted table
+
+                CASE_MAPPINGS,
+                "[aq\u00DF{Bc}{bC}{Fi}]",
+                "[aAqQ\u00DF{ss}{Ss}{SS}{Bc}{BC}{bC}{bc}{FI}{Fi}{fi}]",
+
+                CASE_MAPPINGS,
+                "[\u01F1]", // 'DZ'
+                "[\u01F1\u01F2\u01F3]",
+
+                CASE_MAPPINGS,
+                "[a-z]",
+                "[A-Za-z]",
        };

        UnicodeSet s = new UnicodeSet();
        UnicodeSet t = new UnicodeSet();
        for (int i=0; i<DATA.length; i+=3) {
            int selector = Integer.parseInt(DATA[i]);
-            String pat = DATA[i+1];
-            String exp = DATA[i+2];
+            String pat = Utility.unescape(DATA[i+1]);
+            String exp = Utility.unescape(DATA[i+2]);
            s.applyPattern(pat);
            s.closeOver(selector);
            t.applyPattern(exp);
@ -1371,6 +1433,149 @@ public class UnicodeSetTest extends TestFmwk {
        expectContainment(s, "abcABC", "defDEF");
        s = new UnicodeSet("[^abc]", UnicodeSet.CASE);
        expectContainment(s, "defDEF", "abcABC");
+        s = new UnicodeSet("[abck]", UnicodeSet.ADD_CASE_MAPPINGS);
+        expectContainment(s, "abckABCK", "defDEF\u212A");
+    }
+
+    private void add(Map<Integer, Collection<Integer>> closure, Integer c, Integer t) {
+        Collection<Integer> values = closure.get(c);
+        if (values == null) {
+            values = new TreeSet<>();
+            closure.put(c, values);
+        }
+        values.add(t);
+    }
+
+    private void addIfAbsent(Map<Integer, Collection<Integer>> closure, Integer c, Integer t,
+            Map<Integer, Collection<Integer>> additions) {
+        Collection<Integer> values = closure.get(c);
+        if (values == null || !values.contains(t)) {
+            if (additions != closure) {
+                values = additions.get(c);
+            }
+            if (values == null) {
+                values = new TreeSet<>();
+                additions.put(c, values);
+            }
+            values.add(t);
+        }
+    }
+
+    @Test
+    public void TestCloseOverSimpleCaseFolding() {
+        UnicodeSet sensitive = CharacterProperties.getBinaryPropertySet(UProperty.CASE_SENSITIVE);
+        // Compute the scf=Simple_Case_Folding closure:
+        // For each scf(c)=t, start with mappings c->t and t->c.
+
+        // Poor man's multimap from code points to code points.
+        Map<Integer, Collection<Integer>> closure = new HashMap<>();
+        UnicodeSetIterator iter = new UnicodeSetIterator(sensitive);
+        while (iter.next()) {
+            int c = iter.codepoint;
+            int scfChar = UCharacter.foldCase(c, UCharacter.FOLD_CASE_DEFAULT);
+            if (scfChar != c) {
+                add(closure, c, scfChar);
+                add(closure, scfChar, c);
+            }
+        }
+        // Complete the closure: Add mappings of mappings.
+        Map<Integer, Collection<Integer>> additions = new HashMap<>();
+        for (;;) {
+            // for each mapping c->t
+            for (Map.Entry<Integer, Collection<Integer>> entry : closure.entrySet()) {
+                Integer c = entry.getKey();
+                Collection<Integer> cValues = entry.getValue();
+                for (Integer t : cValues) {
+                    // enumerate each t->u
+                    Collection<Integer> tValues = closure.get(t);
+                    if (tValues != null) {
+                        for (Integer u : tValues) {
+                            if (!u.equals(c)) {
+                                addIfAbsent(closure, c, u, additions);
+                                addIfAbsent(closure, u, c, additions);
+                            }
+                        }
+                    }
+                }
+
+            }
+            if (additions.isEmpty()) {
+                break;  // The closure is complete.
+            }
+            // Add all of the additions back into the closure.
+            for (Map.Entry<Integer, Collection<Integer>> entry : additions.entrySet()) {
+                Integer c = entry.getKey();
+                Collection<Integer> cValues = entry.getValue();
+                Collection<Integer> closureValues = closure.get(c);
+                if (closureValues == null) {
+                    closureValues = new TreeSet<>();
+                    closure.put(c, closureValues);
+                }
+                closureValues.addAll(cValues);
+            }
+            additions.clear();
+        }
+        // Compare closeOver(USET_SIMPLE_CASE_INSENSITIVE) with an unoptimized implementation.
+        // Here we focus on single code points as input.
+        // Other examples, including strings, are tested in TestCloseOver().
+        int errors = 0;
+        iter.reset();
+        UnicodeSet set = new UnicodeSet(), expected = new UnicodeSet();
+        while (iter.next()) {
+            int c = iter.codepoint;
+            // closeOver()
+            set.clear().add(c);
+            set.closeOver(UnicodeSet.SIMPLE_CASE_INSENSITIVE);
+            // From-first-principles implementation.
+            expected.clear().add(c);
+            Collection<Integer> values = closure.get(c);
+            if (values != null) {
+                for (Integer t : values) {
+                    expected.add(t);
+                }
+            }
+            // compare
+            if (!checkEqual(expected, set, "closeOver() vs. test impl")) {
+                errln("    c=U+" + Utility.hex(c));
+                if (++errors == 10) {
+                    break;
+                }
+            }
+        }
+    }
+
+    @Test
+    public void TestCloseOverLargeSets() {
+        // Check that an optimization for large sets does not change the result.
+
+        // Most code points except ones that are boring for case mappings.
+        UnicodeSet manyCp = new UnicodeSet("[^[:C:][:Ideographic:][:Hang:]]");
+        // Main Unihan block.
+        int LARGE_START = 0x4E00;
+        int LARGE_END = 0x9FFF;
+
+        int OPTIONS[] = {
+            UnicodeSet.CASE_INSENSITIVE, UnicodeSet.ADD_CASE_MAPPINGS,
+            UnicodeSet.SIMPLE_CASE_INSENSITIVE
+        };
+        UnicodeSet input = new UnicodeSet(), small, large;
+        for (int option : OPTIONS) {
+            UnicodeSetIterator iter = new UnicodeSetIterator(manyCp);
+            while (iter.next()) {
+                int c = iter.codepoint;
+                input.clear().add(c);
+                small = (UnicodeSet) input.clone();
+                small.closeOver(option);
+                large = (UnicodeSet) input.clone();
+                large.add(LARGE_START, LARGE_END);
+                large.closeOver(option);
+                large.remove(LARGE_START, LARGE_END);
+                if (!checkEqual(small, large, "small != large")) {
+                    errln("    option=" + option + " c=U+" + Utility.hex(c));
+                    break;
+                }
+            }
+        }
    }

    @Test
@ -1709,8 +1914,8 @@ public class UnicodeSetTest extends TestFmwk {
            test2.add("a" + (max - i)); // add in reverse order
        }
        assertNotEquals("compare iterable test", test1, test2);
-        TreeSet<CharSequence> sortedTest1 = new TreeSet<CharSequence>(test1);
-        TreeSet<CharSequence> sortedTest2 = new TreeSet<CharSequence>(test2);
+        TreeSet<CharSequence> sortedTest1 = new TreeSet<>(test1);
+        TreeSet<CharSequence> sortedTest2 = new TreeSet<>(test2);
        assertEquals("compare iterable test", sortedTest1, sortedTest2);
    }