ICU-9131 update trunk from branch, after fixes as per core review.

X-SVN-Rev: 36187
2025-04-07 22:44:49 +00:00 · 2014-08-18 12:58:44 +00:00 · 2014-08-18 12:58:44 +00:00 · f7c551d636
commit f7c551d636
parent b31ff49acf
9 changed files with 1408 additions and 453 deletions
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/BMPSet.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/BMPSet.java
@ -1,7 +1,7 @@
 /*
 ******************************************************************************
 *
- *   Copyright (C) 2009-2011, International Business Machines
+ *   Copyright (C) 2009-2014, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 ******************************************************************************
@ -10,23 +10,25 @@
 package com.ibm.icu.impl;

 import com.ibm.icu.text.UnicodeSet.SpanCondition;
+import com.ibm.icu.util.OutputInt;

-/*
+/**
 * Helper class for frozen UnicodeSets, implements contains() and span() optimized for BMP code points.
 * 
- * Latin-1: Look up bytes. 2-byte characters: Bits organized vertically. 3-byte characters: Use zero/one/mixed data
- * per 64-block in U+0000..U+FFFF, with mixed for illegal ranges. Supplementary characters: Call contains() on the
- * parent set.
+ * Latin-1: Look up bytes.
+ * 2-byte characters: Bits organized vertically.
+ * 3-byte characters: Use zero/one/mixed data per 64-block in U+0000..U+FFFF, with mixed for illegal ranges.
+ * Supplementary characters: Call contains() on the parent set.
 */
 public final class BMPSet {
    public static int U16_SURROGATE_OFFSET = ((0xd800 << 10) + 0xdc00 - 0x10000);

-    /*
+    /**
     * One boolean ('true' or 'false') per Latin-1 character.
     */
    private boolean[] latin1Contains;

-    /*
+    /**
     * One bit per code point from U+0000..U+07FF. The bits are organized vertically; consecutive code points
     * correspond to the same bit positions in consecutive table words. With code point parts lead=c{10..6}
     * trail=c{5..0} it is set.contains(c)==(table7FF[trail] bit lead)
@ -36,7 +38,7 @@ public final class BMPSet {
     */
    private int[] table7FF;

-    /*
+    /**
     * One bit per 64 BMP code points. The bits are organized vertically; consecutive 64-code point blocks
     * correspond to the same bit position in consecutive table words. With code point parts lead=c{15..12}
     * t1=c{11..6} test bits (lead+16) and lead in bmpBlockBits[t1]. If the upper bit is 0, then the lower bit
@ -48,14 +50,14 @@ public final class BMPSet {
     */
    private int[] bmpBlockBits;

-    /*
+    /**
     * Inversion list indexes for restricted binary searches in findCodePoint(), from findCodePoint(U+0800, U+1000,
     * U+2000, .., U+F000, U+10000). U+0800 is the first 3-byte-UTF-8 code point. Code points below U+0800 are
     * always looked up in the bit tables. The last pair of indexes is for finding supplementary code points.
     */
    private int[] list4kStarts;

-    /*
+    /**
     * The inversion list of the parent set, for the slower contains() implementation for mixed BMP blocks and for
     * supplementary code points. The list is terminated with list[listLength-1]=0x110000.
     */
@ -120,22 +122,24 @@ public final class BMPSet {
        }
    }

-    /*
+    /**
     * Span the initial substring for which each character c has spanCondition==contains(c). It must be
     * spanCondition==0 or 1.
     * 
     * @param start The start index
-     * @param end   The end   index
-     * @return The length of the span.
+     * @param outCount If not null: Receives the number of code points in the span.
+     * @return the limit (exclusive end) of the span
     *
     * NOTE: to reduce the overhead of function call to contains(c), it is manually inlined here. Check for
     * sufficient length for trail unit for each surrogate pair. Handle single surrogates as surrogate code points
     * as usual in ICU.
     */
-    public final int span(CharSequence s, int start, int end, SpanCondition spanCondition) {
+    public final int span(CharSequence s, int start, SpanCondition spanCondition,
+            OutputInt outCount) {
        char c, c2;
        int i = start;
-        int limit = Math.min(s.length(), end);
+        int limit = s.length();
+        int numSupplementary = 0;
        if (SpanCondition.NOT_CONTAINED != spanCondition) {
            // span
            while (i < limit) {
@ -170,6 +174,7 @@ public final class BMPSet {
                    if (!containsSlow(supplementary, list4kStarts[0x10], list4kStarts[0x11])) {
                        break;
                    }
+                    ++numSupplementary;
                    ++i;
                }
                ++i;
@ -208,15 +213,20 @@ public final class BMPSet {
                    if (containsSlow(supplementary, list4kStarts[0x10], list4kStarts[0x11])) {
                        break;
                    }
+                    ++numSupplementary;
                    ++i;
                }
                ++i;
            }
        }
-        return i - start;
+        if (outCount != null) {
+            int spanLength = i - start;
+            outCount.value = spanLength - numSupplementary;  // number of code points
+        }
+        return i;
    }

-    /*
+    /**
     * Symmetrical with span().
     * Span the trailing substring for which each character c has spanCondition==contains(c). It must be s.length >=
     * limit and spanCondition==0 or 1.
@ -226,7 +236,6 @@ public final class BMPSet {
    public final int spanBack(CharSequence s, int limit, SpanCondition spanCondition) {
        char c, c2;

-        limit = Math.min(s.length(), limit);
        if (SpanCondition.NOT_CONTAINED != spanCondition) {
            // span
            for (;;) {
@ -311,7 +320,7 @@ public final class BMPSet {
        return limit + 1;
    }

-    /*
+    /**
     * Set bits in a bit rectangle in "vertical" bit organization. start<limit<=0x800
     */
    private static void set32x64Bits(int[] table, int start, int limit) {
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/UnicodeSetStringSpan.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/UnicodeSetStringSpan.java
@ -1,7 +1,7 @@
 /*
 ******************************************************************************
 *
- *   Copyright (C) 2009-2012, International Business Machines
+ *   Copyright (C) 2009-2014, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 ******************************************************************************
@ -13,6 +13,7 @@ import java.util.ArrayList;

 import com.ibm.icu.text.UnicodeSet;
 import com.ibm.icu.text.UnicodeSet.SpanCondition;
+import com.ibm.icu.util.OutputInt;

 /*
 * Implement span() etc. for a set with strings.
@ -22,54 +23,69 @@ import com.ibm.icu.text.UnicodeSet.SpanCondition;
 public class UnicodeSetStringSpan {

    /*
-     * Which span() variant will be used? The object is either built for one variant and used once, or built for all and
-     * may be used many times.
+     * Which span() variant will be used? The object is either built for one variant and used once,
+     * or built for all and may be used many times.
     */
+    public static final int WITH_COUNT    = 0x40;  // spanAndCount() may be called
    public static final int FWD           = 0x20;
    public static final int BACK          = 0x10;
-    public static final int UTF16         = 8;
+    // public static final int UTF16      = 8;
    public static final int CONTAINED     = 2;
    public static final int NOT_CONTAINED = 1;

-    public static final int ALL = 0x3f;
+    public static final int ALL = 0x7f;

-    public static final int FWD_UTF16_CONTAINED      = FWD  | UTF16 |     CONTAINED;
-    public static final int FWD_UTF16_NOT_CONTAINED  = FWD  | UTF16 | NOT_CONTAINED;
-    public static final int BACK_UTF16_CONTAINED     = BACK | UTF16 |     CONTAINED;
-    public static final int BACK_UTF16_NOT_CONTAINED = BACK | UTF16 | NOT_CONTAINED;
+    public static final int FWD_UTF16_CONTAINED      = FWD  | /* UTF16 | */    CONTAINED;
+    public static final int FWD_UTF16_NOT_CONTAINED  = FWD  | /* UTF16 | */NOT_CONTAINED;
+    public static final int BACK_UTF16_CONTAINED     = BACK | /* UTF16 | */    CONTAINED;
+    public static final int BACK_UTF16_NOT_CONTAINED = BACK | /* UTF16 | */NOT_CONTAINED;

-    // Special spanLength short values. (since Java has not unsigned byte type)
-    // All code points in the string are contained in the parent set.
+    /**
+     * Special spanLength short values. (since Java has not unsigned byte type)
+     * All code points in the string are contained in the parent set.
+     */
    static final short ALL_CP_CONTAINED = 0xff;
-    // The spanLength is >=0xfe.
+    /** The spanLength is >=0xfe. */
    static final short LONG_SPAN = ALL_CP_CONTAINED - 1;

-    // Set for span(). Same as parent but without strings.
+    /** Set for span(). Same as parent but without strings. */
    private UnicodeSet spanSet;

-    // Set for span(not contained).
-    // Same as spanSet, plus characters that start or end strings.
+    /**
+     * Set for span(not contained).
+     * Same as spanSet, plus characters that start or end strings.
+     */
    private UnicodeSet spanNotSet;

-    // The strings of the parent set.
+    /** The strings of the parent set. */
    private ArrayList<String> strings;

-    // the lengths of span(), spanBack() etc. for each string.
+    /** The lengths of span(), spanBack() etc. for each string. */
    private short[] spanLengths;

-    // Maximum lengths of relevant strings.
+    /** Maximum lengths of relevant strings. */
    private int maxLength16;

-    // Set up for all variants of span()?
+    /** Are there strings that are not fully contained in the code point set? */
+    private boolean someRelevant;
+
+    /** Set up for all variants of span()? */
    private boolean all;

-    // Span helper
+    /** Span helper */
    private OffsetList offsets;

-    // Construct for all variants of span(), or only for any one variant.
-    // Initialize as little as possible, for single use.
+    /**
+     * Constructs for all variants of span(), or only for any one variant.
+     * Initializes as little as possible, for single use.
+     */
    public UnicodeSetStringSpan(final UnicodeSet set, final ArrayList<String> setStrings, int which) {
        spanSet = new UnicodeSet(0, 0x10ffff);
+        // TODO: With Java 6, just take the parent set's strings as is,
+        // as a NavigableSet<String>, rather than as an ArrayList copy of the set of strings.
+        // Then iterate via the first() and higher() methods.
+        // (We do not want to create multiple Iterator objects in each span().)
+        // See ICU ticket #7454.
        strings = setStrings;
        all = (which == ALL);
        spanSet.retainAll(set);
@ -90,7 +106,7 @@ public class UnicodeSetStringSpan {
        int stringsLength = strings.size();

        int i, spanLength;
-        boolean someRelevant = false;
+        someRelevant = false;
        for (i = 0; i < stringsLength; ++i) {
            String string = strings.get(i);
            int length16 = string.length();
@ -98,12 +114,11 @@ public class UnicodeSetStringSpan {
            if (spanLength < length16) { // Relevant string.
                someRelevant = true;
            }
-            if ((0 != (which & UTF16)) && length16 > maxLength16) {
+            if (/* (0 != (which & UTF16)) && */ length16 > maxLength16) {
                maxLength16 = length16;
            }
        }
-        if (!someRelevant) {
-            maxLength16 = 0;
+        if (!someRelevant && (which & WITH_COUNT) == 0) {
            return;
        }

@ -140,7 +155,7 @@ public class UnicodeSetStringSpan {
            int length16 = string.length();
            spanLength = spanSet.span(string, SpanCondition.CONTAINED);
            if (spanLength < length16) { // Relevant string.
-                if (0 != (which & UTF16)) {
+                if (true /* 0 != (which & UTF16) */) {
                    if (0 != (which & CONTAINED)) {
                        if (0 != (which & FWD)) {
                            spanLengths[i] = makeSpanLengthByte(spanLength);
@ -188,10 +203,12 @@ public class UnicodeSetStringSpan {
     * Constructs a copy of an existing UnicodeSetStringSpan.
     * Assumes which==ALL for a frozen set.
     */
-    public UnicodeSetStringSpan(final UnicodeSetStringSpan otherStringSpan, final ArrayList<String> newParentSetStrings) {
+    public UnicodeSetStringSpan(final UnicodeSetStringSpan otherStringSpan,
+            final ArrayList<String> newParentSetStrings) {
        spanSet = otherStringSpan.spanSet;
        strings = newParentSetStrings;
        maxLength16 = otherStringSpan.maxLength16;
+        someRelevant = otherStringSpan.someRelevant;
        all = true;
        if (otherStringSpan.spanNotSet == otherStringSpan.spanSet) {
            spanNotSet = spanSet;
@ -203,22 +220,25 @@ public class UnicodeSetStringSpan {
        spanLengths = otherStringSpan.spanLengths.clone();
    }

-    /*
+    /**
     * Do the strings need to be checked in span() etc.?
     * 
-     * @return TRUE if strings need to be checked (call span() here), FALSE if not (use a BMPSet for best performance).
+     * @return true if strings need to be checked (call span() here),
+     *         false if not (use a BMPSet for best performance).
     */
    public boolean needsStringSpanUTF16() {
-        return (maxLength16 != 0);
+        return someRelevant;
    }

-    // For fast UnicodeSet::contains(c).
+    /** For fast UnicodeSet::contains(c). */
    public boolean contains(int c) {
        return spanSet.contains(c);
    }

-    // Add a starting or ending string character to the spanNotSet
-    // so that a character span ends before any string.
+    /**
+     * Adds a starting or ending string character to the spanNotSet
+     * so that a character span ends before any string.
+     */
    private void addToSpanNotSet(int c) {
        if (spanNotSet == null || spanNotSet == spanSet) {
            if (spanSet.contains(c)) {
@ -230,12 +250,14 @@ public class UnicodeSetStringSpan {
    }

    /*
-     * Note: In span() when spanLength==0 (after a string match, or at the beginning after an empty code point span) and
-     * in spanNot() and spanNotUTF8(), string matching could use a binary search because all string matches are done
+     * Note: In span() when spanLength==0
+     * (after a string match, or at the beginning after an empty code point span)
+     * and in spanNot() and spanNotUTF8(),
+     * string matching could use a binary search because all string matches are done
     * from the same start index.
-     * 
+     *
     * For UTF-8, this would require a comparison function that returns UTF-16 order.
-     * 
+     *
     * This optimization should not be necessary for normal UnicodeSets because most sets have no strings, and most sets
     * with strings have very few very short strings. For cases with many strings, it might be better to use a different
     * API and implementation with a DFA (state machine).
@ -244,84 +266,119 @@ public class UnicodeSetStringSpan {
    /*
     * Algorithm for span(SpanCondition.CONTAINED)
     * 
-     * Theoretical algorithm: - Iterate through the string, and at each code point boundary: + If the code point there
-     * is in the set, then remember to continue after it. + If a set string matches at the current position, then
-     * remember to continue after it. + Either recursively span for each code point or string match, or recursively span
-     * for all but the shortest one and iteratively continue the span with the shortest local match. + Remember the
-     * longest recursive span (the farthest end point). + If there is no match at the current position, neither for the
-     * code point there nor for any set string, then stop and return the longest recursive span length.
-     * 
+     * Theoretical algorithm:
+     * - Iterate through the string, and at each code point boundary:
+     *   + If the code point there is in the set, then remember to continue after it.
+     *   + If a set string matches at the current position, then remember to continue after it.
+     *   + Either recursively span for each code point or string match, or recursively span
+     *     for all but the shortest one and iteratively continue the span with the shortest local match.
+     *   + Remember the longest recursive span (the farthest end point).
+     *   + If there is no match at the current position,
+     *     neither for the code point there nor for any set string,
+     *     then stop and return the longest recursive span length.
+     *
     * Optimized implementation:
-     * 
-     * (We assume that most sets will have very few very short strings. A span using a string-less set is extremely
-     * fast.)
-     * 
-     * Create and cache a spanSet which contains all of the single code points of the original set but none of its
-     * strings.
-     * 
-     * - Start with spanLength=spanSet.span(SpanCondition.CONTAINED). - Loop: + Try to match each set
-     * string at the end of the spanLength. ~ Set strings that start with set-contained code points must be matched with
-     * a partial overlap because the recursive algorithm would have tried to match them at every position. ~ Set strings
-     * that entirely consist of set-contained code points are irrelevant for span(SpanCondition.CONTAINED)
-     * because the recursive algorithm would continue after them anyway and find the longest recursive match from their
-     * end. ~ Rather than recursing, note each end point of a set string match. + If no set string matched after
-     * spanSet.span(), then return with where the spanSet.span() ended. + If at least one set string matched after
-     * spanSet.span(), then pop the shortest string match end point and continue the loop, trying to match all set
-     * strings from there. + If at least one more set string matched after a previous string match, then test if the
-     * code point after the previous string match is also contained in the set. Continue the loop with the shortest end
-     * point of either this code point or a matching set string. + If no more set string matched after a previous string
-     * match, then try another spanLength=spanSet.span(SpanCondition.CONTAINED). Stop if spanLength==0,
-     * otherwise continue the loop.
-     * 
+     *
+     * (We assume that most sets will have very few very short strings.
+     * A span using a string-less set is extremely fast.)
+     *
+     * Create and cache a spanSet which contains all of the single code points of the original set
+     * but none of its strings.
+     *
+     * - Start with spanLength=spanSet.span(SpanCondition.CONTAINED).
+     * - Loop:
+     *   + Try to match each set string at the end of the spanLength.
+     *     ~ Set strings that start with set-contained code points
+     *       must be matched with a partial overlap
+     *       because the recursive algorithm would have tried to match them at every position.
+     *     ~ Set strings that entirely consist of set-contained code points
+     *       are irrelevant for span(SpanCondition.CONTAINED)
+     *       because the recursive algorithm would continue after them anyway and
+     *       find the longest recursive match from their end.
+     *     ~ Rather than recursing, note each end point of a set string match.
+     *   + If no set string matched after spanSet.span(),
+     *     then return with where the spanSet.span() ended.
+     *   + If at least one set string matched after spanSet.span(),
+     *     then pop the shortest string match end point and continue the loop,
+     *     trying to match all set strings from there.
+     *   + If at least one more set string matched after a previous string match, then test if the
+     *     code point after the previous string match is also contained in the set.
+     *     Continue the loop with the shortest end point of
+     *     either this code point or a matching set string.
+     *   + If no more set string matched after a previous string match,
+     *     then try another spanLength=spanSet.span(SpanCondition.CONTAINED).
+     *     Stop if spanLength==0, otherwise continue the loop.
+     *
     * By noting each end point of a set string match, the function visits each string position at most once and
     * finishes in linear time.
-     * 
-     * The recursive algorithm may visit the same string position many times if multiple paths lead to it and finishes
-     * in exponential time.
+     *
+     * The recursive algorithm may visit the same string position many times
+     * if multiple paths lead to it and finishes in exponential time.
     */

    /*
     * Algorithm for span(SIMPLE)
     * 
-     * Theoretical algorithm: - Iterate through the string, and at each code point boundary: + If the code point there
-     * is in the set, then remember to continue after it. + If a set string matches at the current position, then
-     * remember to continue after it. + Continue from the farthest match position and ignore all others. + If there is
-     * no match at the current position, then stop and return the current position.
-     * 
+     * Theoretical algorithm:
+     * - Iterate through the string, and at each code point boundary:
+     *   + If the code point there is in the set, then remember to continue after it.
+     *   + If a set string matches at the current position, then remember to continue after it.
+     *   + Continue from the farthest match position and ignore all others.
+     *   + If there is no match at the current position, then stop and return the current position.
+     *
     * Optimized implementation:
-     * 
+     *
     * (Same assumption and spanSet as above.)
-     * 
-     * - Start with spanLength=spanSet.span(SpanCondition.CONTAINED). - Loop: + Try to match each set
-     * string at the end of the spanLength. ~ Set strings that start with set-contained code points must be matched with
-     * a partial overlap because the standard algorithm would have tried to match them earlier. ~ Set strings that
-     * entirely consist of set-contained code points must be matched with a full overlap because the longest-match
-     * algorithm would hide set string matches that end earlier. Such set strings need not be matched earlier inside the
-     * code point span because the standard algorithm would then have continued after the set string match anyway. ~
-     * Remember the longest set string match (farthest end point) from the earliest starting point. + If no set string
-     * matched after spanSet.span(), then return with where the spanSet.span() ended. + If at least one set string
-     * matched, then continue the loop after the longest match from the earliest position. + If no more set string
-     * matched after a previous string match, then try another
-     * spanLength=spanSet.span(SpanCondition.CONTAINED). Stop if spanLength==0, otherwise continue the
-     * loop.
+     *
+     * - Start with spanLength=spanSet.span(SpanCondition.CONTAINED).
+     * - Loop:
+     *   + Try to match each set string at the end of the spanLength.
+     *     ~ Set strings that start with set-contained code points
+     *       must be matched with a partial overlap
+     *       because the standard algorithm would have tried to match them earlier.
+     *     ~ Set strings that entirely consist of set-contained code points
+     *       must be matched with a full overlap because the longest-match algorithm
+     *       would hide set string matches that end earlier.
+     *       Such set strings need not be matched earlier inside the code point span
+     *       because the standard algorithm would then have
+     *       continued after the set string match anyway.
+     *     ~ Remember the longest set string match (farthest end point)
+     *       from the earliest starting point.
+     *   + If no set string matched after spanSet.span(),
+     *     then return with where the spanSet.span() ended.
+     *   + If at least one set string matched,
+     *     then continue the loop after the longest match from the earliest position.
+     *   + If no more set string matched after a previous string match,
+     *     then try another spanLength=spanSet.span(SpanCondition.CONTAINED).
+     *     Stop if spanLength==0, otherwise continue the loop.
     */
    /**
-     * Span a string.
+     * Spans a string.
     * 
     * @param s The string to be spanned
     * @param start The start index that the span begins
     * @param spanCondition The span condition
-     * @return the length of the span
+     * @return the limit (exclusive end) of the span
     */
-    public synchronized int span(CharSequence s, int start, int length, SpanCondition spanCondition) {
+    public int span(CharSequence s, int start, SpanCondition spanCondition) {
        if (spanCondition == SpanCondition.NOT_CONTAINED) {
-            return spanNot(s, start, length);
+            return spanNot(s, start, null);
        }
-        int spanLength = spanSet.span(s.subSequence(start, start + length), SpanCondition.CONTAINED);
-        if (spanLength == length) {
-            return length;
+        int spanLimit = spanSet.span(s, start, SpanCondition.CONTAINED);
+        if (spanLimit == s.length()) {
+            return spanLimit;
        }
+        return spanWithStrings(s, start, spanLimit, spanCondition);
+    }

+    /**
+     * Synchronized method for complicated spans using the offsets.
+     * Avoids synchronization for simple cases.
+     *
+     * @param spanLimit = spanSet.span(s, start, CONTAINED)
+     */
+    private synchronized int spanWithStrings(CharSequence s, int start, int spanLimit,
+            SpanCondition spanCondition) {
        // Consider strings; they may overlap with the span.
        int initSize = 0;
        if (spanCondition == SpanCondition.CONTAINED) {
@ -329,7 +386,9 @@ public class UnicodeSetStringSpan {
            initSize = maxLength16;
        }
        offsets.setMaxLength(initSize);
-        int pos = start + spanLength, rest = length - spanLength;
+        int length = s.length();
+        int pos = spanLimit, rest = length - spanLimit;
+        int spanLength = spanLimit - start;
        int i, stringsLength = strings.size();
        for (;;) {
            if (spanCondition == SpanCondition.CONTAINED) {
@ -429,7 +488,7 @@ public class UnicodeSetStringSpan {
                // Otherwise, an unlimited code point span is only tried again when no
                // strings match, and if such a non-initial span fails we stop.
                if (offsets.isEmpty()) {
-                    return pos - start; // No strings matched after a span.
+                    return pos; // No strings matched after a span.
                }
                // Match strings from after the next string match.
            } else {
@ -437,11 +496,12 @@ public class UnicodeSetStringSpan {
                if (offsets.isEmpty()) {
                    // No more strings matched after a previous string match.
                    // Try another code point span from after the last string match.
-                    spanLength = spanSet.span(s.subSequence(pos, pos + rest), SpanCondition.CONTAINED);
+                    spanLimit = spanSet.span(s, pos, SpanCondition.CONTAINED);
+                    spanLength = spanLimit - pos;
                    if (spanLength == rest || // Reached the end of the string, or
                            spanLength == 0 // neither strings nor span progressed.
                    ) {
-                        return pos + spanLength - start;
+                        return spanLimit;
                    }
                    pos += spanLength;
                    rest -= spanLength;
@ -467,13 +527,110 @@ public class UnicodeSetStringSpan {
                    // Match strings from after the next string match.
                }
            }
-            int minOffset = offsets.popMinimum();
+            int minOffset = offsets.popMinimum(null);
            pos += minOffset;
            rest -= minOffset;
            spanLength = 0; // Match strings from after a string match.
        }
    }

+    /**
+     * Spans a string and counts the smallest number of set elements on any path across the span.
+     *
+     * <p>For proper counting, we cannot ignore strings that are fully contained in code point spans.
+     *
+     * <p>If the set does not have any fully-contained strings, then we could optimize this
+     * like span(), but such sets are likely rare, and this is at least still linear.
+     *
+     * @param s The string to be spanned
+     * @param start The start index that the span begins
+     * @param spanCondition The span condition
+     * @param outCount The count
+     * @return the limit (exclusive end) of the span
+     */
+    public int spanAndCount(CharSequence s, int start, SpanCondition spanCondition,
+            OutputInt outCount) {
+        if (spanCondition == SpanCondition.NOT_CONTAINED) {
+            return spanNot(s, start, outCount);
+        }
+        // Consider strings; they may overlap with the span,
+        // and they may result in a smaller count that with just code points.
+        if (spanCondition == SpanCondition.CONTAINED) {
+            return spanContainedAndCount(s, start, outCount);
+        }
+        // SIMPLE (not synchronized, does not use offsets)
+        int stringsLength = strings.size();
+        int length = s.length();
+        int pos = start;
+        int rest = length - start;
+        int count = 0;
+        while (rest != 0) {
+            // Try to match the next code point.
+            int cpLength = spanOne(spanSet, s, pos, rest);
+            int maxInc = (cpLength > 0) ? cpLength : 0;
+            // Try to match all of the strings.
+            for (int i = 0; i < stringsLength; ++i) {
+                String string = strings.get(i);
+                int length16 = string.length();
+                if (maxInc < length16 && length16 <= rest &&
+                        matches16CPB(s, pos, length, string, length16)) {
+                    maxInc = length16;
+                }
+            }
+            // We are done if there is no match beyond pos.
+            if (maxInc == 0) {
+                outCount.value = count;
+                return pos;
+            }
+            // Continue from the longest match.
+            ++count;
+            pos += maxInc;
+            rest -= maxInc;
+        }
+        outCount.value = count;
+        return pos;
+    }
+
+    private synchronized int spanContainedAndCount(CharSequence s, int start, OutputInt outCount) {
+        // Use offset list to try all possibilities.
+        offsets.setMaxLength(maxLength16);
+        int stringsLength = strings.size();
+        int length = s.length();
+        int pos = start;
+        int rest = length - start;
+        int count = 0;
+        while (rest != 0) {
+            // Try to match the next code point.
+            int cpLength = spanOne(spanSet, s, pos, rest);
+            if (cpLength > 0) {
+                offsets.addOffsetAndCount(cpLength, count + 1);
+            }
+            // Try to match all of the strings.
+            for (int i = 0; i < stringsLength; ++i) {
+                String string = strings.get(i);
+                int length16 = string.length();
+                // Note: If the strings were sorted by length, then we could also
+                // avoid trying to match if there is already a match of the same length.
+                if (length16 <= rest && !offsets.hasCountAtOffset(length16, count + 1) &&
+                        matches16CPB(s, pos, length, string, length16)) {
+                    offsets.addOffsetAndCount(length16, count + 1);
+                }
+            }
+            // We are done if there is no match beyond pos.
+            if (offsets.isEmpty()) {
+                outCount.value = count;
+                return pos;
+            }
+            // Continue from the nearest match.
+            int minOffset = offsets.popMinimum(outCount);
+            count = outCount.value;
+            pos += minOffset;
+            rest -= minOffset;
+        }
+        outCount.value = count;
+        return pos;
+    }
+
    /**
     * Span a string backwards.
     * 
@ -638,59 +795,72 @@ public class UnicodeSetStringSpan {
                    // Match strings from before the next string match.
                }
            }
-            pos -= offsets.popMinimum();
+            pos -= offsets.popMinimum(null);
            spanLength = 0; // Match strings from before a string match.
        }
    }

-    /*
+    /**
     * Algorithm for spanNot()==span(SpanCondition.NOT_CONTAINED)
     * 
-     * Theoretical algorithm: - Iterate through the string, and at each code point boundary: + If the code point there
-     * is in the set, then return with the current position. + If a set string matches at the current position, then
-     * return with the current position.
-     * 
+     * Theoretical algorithm:
+     * - Iterate through the string, and at each code point boundary:
+     *   + If the code point there is in the set, then return with the current position.
+     *   + If a set string matches at the current position, then return with the current position.
+     *
     * Optimized implementation:
-     * 
+     *
     * (Same assumption as for span() above.)
-     * 
-     * Create and cache a spanNotSet which contains all of the single code points of the original set but none of its
-     * strings. For each set string add its initial code point to the spanNotSet. (Also add its final code point for
-     * spanNotBack().)
-     * 
+     *
+     * Create and cache a spanNotSet which contains
+     * all of the single code points of the original set but none of its strings.
+     * For each set string add its initial code point to the spanNotSet.
+     * (Also add its final code point for spanNotBack().)
+     *
     * - Loop:
     *   + Do spanLength=spanNotSet.span(SpanCondition.NOT_CONTAINED).
     *   + If the current code point is in the original set, then return the current position.
     *   + If any set string matches at the current position, then return the current position.
     *   + If there is no match at the current position, neither for the code point
-     * there nor for any set string, then skip this code point and continue the loop. This happens for
-     * set-string-initial code points that were added to spanNotSet when there is not actually a match for such a set
-     * string.
+     *     there nor for any set string, then skip this code point and continue the loop.
+     *     This happens for set-string-initial code points that were added to spanNotSet
+     *     when there is not actually a match for such a set string.
     *
-     * @return the length of the span
+     * @param s The string to be spanned
+     * @param start The start index that the span begins
+     * @param outCount If not null: Receives the number of code points across the span.
+     * @return the limit (exclusive end) of the span
     */
-    private int spanNot(CharSequence s, int start, int length) {
-        int pos = start, rest = length;
-        int i, stringsLength = strings.size();
+    private int spanNot(CharSequence s, int start, OutputInt outCount) {
+        int length = s.length();
+        int pos = start, rest = length - start;
+        int stringsLength = strings.size();
+        int count = 0;
        do {
            // Span until we find a code point from the set,
            // or a code point that starts or ends some string.
-            i = spanNotSet.span(s.subSequence(pos, pos + rest), SpanCondition.NOT_CONTAINED);
-            if (i == rest) {
+            int spanLimit;
+            if (outCount == null) {
+                spanLimit = spanNotSet.span(s, pos, SpanCondition.NOT_CONTAINED);
+            } else {
+                spanLimit = spanNotSet.spanAndCount(s, pos, SpanCondition.NOT_CONTAINED, outCount);
+                outCount.value = count = count + outCount.value;
+            }
+            if (spanLimit == length) {
                return length; // Reached the end of the string.
            }
-            pos += i;
-            rest -= i;
+            pos = spanLimit;
+            rest = length - spanLimit;

            // Check whether the current code point is in the original set,
            // without the string starts and ends.
            int cpLength = spanOne(spanSet, s, pos, rest);
            if (cpLength > 0) {
-                return pos - start; // There is a set element at pos.
+                return pos; // There is a set element at pos.
            }

            // Try to match the strings at pos.
-            for (i = 0; i < stringsLength; ++i) {
+            for (int i = 0; i < stringsLength; ++i) {
                if (spanLengths[i] == ALL_CP_CONTAINED) {
                    continue; // Irrelevant string.
                }
@ -698,7 +868,7 @@ public class UnicodeSetStringSpan {

                int length16 = string.length();
                if (length16 <= rest && matches16CPB(s, pos, length, string, length16)) {
-                    return pos - start; // There is a set element at pos.
+                    return pos; // There is a set element at pos.
                }
            }

@ -707,7 +877,11 @@ public class UnicodeSetStringSpan {
            // cpLength<0
            pos -= cpLength;
            rest += cpLength;
+            ++count;
        } while (rest != 0);
+        if (outCount != null) {
+            outCount.value = count;
+        }
        return length; // Reached the end of the string.
    }

@ -773,20 +947,24 @@ public class UnicodeSetStringSpan {
     * Compare 16-bit Unicode strings (which may be malformed UTF-16)
     * at code point boundaries.
     * That is, each edge of a match must not be in the middle of a surrogate pair.
+     * @param s       The string to match in.
     * @param start   The start index of s.
-     * @param slength The length of s from start.
+     * @param limit   The limit of the subsequence of s being spanned.
+     * @param t       The substring to be matched in s.
     * @param tlength The length of t.
     */
-    static boolean matches16CPB(CharSequence s, int start, int slength, final String t, int tlength) {
-        return !(0 < start && com.ibm.icu.text.UTF16.isLeadSurrogate (s.charAt(start - 1)) &&
-                              com.ibm.icu.text.UTF16.isTrailSurrogate(s.charAt(start + 0)))
-                && !(tlength < slength && com.ibm.icu.text.UTF16.isLeadSurrogate (s.charAt(start + tlength - 1)) &&
-                                       com.ibm.icu.text.UTF16.isTrailSurrogate(s.charAt(start + tlength)))
-                && matches16(s, start, t, tlength);
+    static boolean matches16CPB(CharSequence s, int start, int limit, final String t, int tlength) {
+        return matches16(s, start, t, tlength)
+                && !(0 < start && Character.isHighSurrogate(s.charAt(start - 1)) &&
+                        Character.isLowSurrogate(s.charAt(start)))
+                && !((start + tlength) < limit && Character.isHighSurrogate(s.charAt(start + tlength - 1)) &&
+                        Character.isLowSurrogate(s.charAt(start + tlength)));
    }

-    // Does the set contain the next code point?
-    // If so, return its length; otherwise return its negative length.
+    /**
+     * Does the set contain the next code point?
+     * If so, return its length; otherwise return its negative length.
+     */
    static int spanOne(final UnicodeSet set, CharSequence s, int start, int length) {
        char c = s.charAt(start);
        if (c >= 0xd800 && c <= 0xdbff && length >= 2) {
@ -811,47 +989,57 @@ public class UnicodeSetStringSpan {
        return set.contains(c) ? 1 : -1;
    }

-
-    /*
+    /**
     * Helper class for UnicodeSetStringSpan.
     *
-     * List of offsets from the current position from where to try matching a code point or a string. Store offsets rather
-     * than indexes to simplify the code and use the same list for both increments (in span()) and decrements (in
-     * spanBack()).
-     * 
-     * Assumption: The maximum offset is limited, and the offsets that are stored at any one time are relatively dense, that
-     * is, there are normally no gaps of hundreds or thousands of offset values.
-     * 
-     * The implementation uses a circular buffer of byte flags, each indicating whether the corresponding offset is in the
-     * list. This avoids inserting into a sorted list of offsets (or absolute indexes) and physically moving part of the
-     * list.
-     * 
-     * Note: In principle, the caller should setMaxLength() to the maximum of the max string length and U16_LENGTH/U8_LENGTH
+     * <p>List of offsets from the current position from where to try matching
+     * a code point or a string.
+     * Stores offsets rather than indexes to simplify the code and use the same list
+     * for both increments (in span()) and decrements (in spanBack()).
+     *
+     * <p>Assumption: The maximum offset is limited, and the offsets that are stored at any one time
+     * are relatively dense, that is,
+     * there are normally no gaps of hundreds or thousands of offset values.
+     *
+     * <p>This class optionally also tracks the minimum non-negative count for each position,
+     * intended to count the smallest number of elements of any path leading to that position.
+     *
+     * <p>The implementation uses a circular buffer of count integers,
+     * each indicating whether the corresponding offset is in the list,
+     * and its path element count.
+     * This avoids inserting into a sorted list of offsets (or absolute indexes)
+     * and physically moving part of the list.
+     *
+     * <p>Note: In principle, the caller should setMaxLength() to
+     * the maximum of the max string length and U16_LENGTH/U8_LENGTH
     * to account for "long" single code points.
-     * 
-     * Note: If maxLength were guaranteed to be no more than 32 or 64, the list could be stored as bit flags in a single
-     * integer. Rather than handling a circular buffer with a start list index, the integer would simply be shifted when
-     * lower offsets are removed. UnicodeSet does not have a limit on the lengths of strings.
+     *
+     * <p>Note: An earlier version did not track counts and stored only byte flags.
+     * With boolean flags, if maxLength were guaranteed to be no more than 32 or 64,
+     * the list could be stored as bit flags in a single integer.
+     * Rather than handling a circular buffer with a start list index,
+     * the integer would simply be shifted when lower offsets are removed.
+     * UnicodeSet does not have a limit on the lengths of strings.
     */
-    static class OffsetList {
-        private boolean[] list;
+    private static final class OffsetList {
+        private int[] list;
        private int length;
        private int start;

        public OffsetList() {
-            list = new boolean[16];  // default size
+            list = new int[16];  // default size
        }

        public void setMaxLength(int maxLength) {
            if (maxLength > list.length) {
-                list = new boolean[maxLength];
+                list = new int[maxLength];
            }
            clear();
        }

        public void clear() {
            for (int i = list.length; i-- > 0;) {
-                list[i] = false;
+                list[i] = 0;
            }
            start = length = 0;
        }
@ -860,55 +1048,97 @@ public class UnicodeSetStringSpan {
            return (length == 0);
        }

-        // Reduce all stored offsets by delta, used when the current position
-        // moves by delta.
-        // There must not be any offsets lower than delta.
-        // If there is an offset equal to delta, it is removed.
-        // delta=[1..maxLength]
+        /**
+         * Reduces all stored offsets by delta, used when the current position moves by delta.
+         * There must not be any offsets lower than delta.
+         * If there is an offset equal to delta, it is removed.
+         *
+         * @param delta [1..maxLength]
+         */
        public void shift(int delta) {
            int i = start + delta;
            if (i >= list.length) {
                i -= list.length;
            }
-            if (list[i]) {
-                list[i] = false;
+            if (list[i] != 0) {
+                list[i] = 0;
                --length;
            }
            start = i;
        }

-        // Add an offset. The list must not contain it yet.
-        // offset=[1..maxLength]
+        /**
+         * Adds an offset. The list must not contain it yet.
+         * @param offset [1..maxLength]
+         */
        public void addOffset(int offset) {
            int i = start + offset;
            if (i >= list.length) {
                i -= list.length;
            }
-            list[i] = true;
+            assert list[i] == 0;
+            list[i] = 1;
            ++length;
        }

-        // offset=[1..maxLength]
+        /**
+         * Adds an offset and updates its count.
+         * The list may already contain the offset.
+         * @param offset [1..maxLength]
+         */
+        public void addOffsetAndCount(int offset, int count) {
+            assert count > 0;
+            int i = start + offset;
+            if (i >= list.length) {
+                i -= list.length;
+            }
+            if (list[i] == 0) {
+                list[i] = count;
+                ++length;
+            } else if (count < list[i]) {
+                list[i] = count;
+            }
+        }
+
+        /**
+         * @param offset [1..maxLength]
+         */
        public boolean containsOffset(int offset) {
            int i = start + offset;
            if (i >= list.length) {
                i -= list.length;
            }
-            return list[i];
+            return list[i] != 0;
        }

-        // Find the lowest stored offset from a non-empty list, remove it,
-        // and reduce all other offsets by this minimum.
-        // Returns [1..maxLength].
-        public int popMinimum() {
+        /**
+         * @param offset [1..maxLength]
+         */
+        public boolean hasCountAtOffset(int offset, int count) {
+            int i = start + offset;
+            if (i >= list.length) {
+                i -= list.length;
+            }
+            int oldCount = list[i];
+            return oldCount != 0 && oldCount <= count;
+        }
+
+        /**
+         * Finds the lowest stored offset from a non-empty list, removes it,
+         * and reduces all other offsets by this minimum.
+         * @return min=[1..maxLength]
+         */
+        public int popMinimum(OutputInt outCount) {
            // Look for the next offset in list[start+1..list.length-1].
            int i = start, result;
            while (++i < list.length) {
-                if (list[i]) {
-                    list[i] = false;
+                int count = list[i];
+                if (count != 0) {
+                    list[i] = 0;
                    --length;
                    result = i - start;
                    start = i;
+                    if (outCount != null) { outCount.value = count; }
                    return result;
                }
            }
@ -918,12 +1148,14 @@ public class UnicodeSetStringSpan {
            // Since the list is not empty, there will be one.
            result = list.length - start;
            i = 0;
-            while (!list[i]) {
+            int count;
+            while ((count = list[i]) == 0) {
                ++i;
            }
-            list[i] = false;
+            list[i] = 0;
            --length;
            start = i;
+            if (outCount != null) { outCount.value = count; }
            return result + i;
        }
    }
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/UTF16.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/UTF16.java
@ -1,6 +1,6 @@
 /**
 *******************************************************************************
- * Copyright (C) 1996-2012, International Business Machines Corporation and
+ * Copyright (C) 1996-2014, International Business Machines Corporation and
 * others. All Rights Reserved.
 *******************************************************************************
 */
@ -2612,6 +2612,61 @@ public final class UTF16 {
        }
    }

+    /**
+     * Utility for getting a code point from a CharSequence that contains exactly one code point.
+     * @return a code point IF the string is non-null and consists of a single code point.
+     * otherwise returns -1.
+     * @param s to test
+     */
+    public static int getSingleCodePoint(CharSequence s) {
+        if (s == null || s.length() == 0) {
+            return -1;
+        } else if (s.length() == 1) {
+            return s.charAt(0);
+        } else if (s.length() > 2) {
+            return -1;
+        }
+
+        // at this point, len = 2
+        int cp = UTF16.charAt(s, 0); 
+        if (cp > 0xFFFF) { // is surrogate pair
+            return cp;
+        }
+        return -1;
+    }
+
+    /**
+     * Utility for comparing a code point to a string without having to create a new string. Returns the same results
+     * as a code point comparison of UTF16.valueOf(codePoint) and s.toString(). More specifically, if
+     * <pre>
+     * sc = new StringComparator(true,false,0);
+     * fast = UTF16.compare(codePoint, charSequence)
+     * slower = sc.compare(UTF16.valueOf(codePoint), charSequence == null ? "" : charSequence.toString())
+     * </pre>
+     * then
+     * </pre>
+     * Integer.signum(fast) == Integer.signum(slower)
+     * </pre>
+     * @param codePoint to test
+     * @param s to test
+     * @return equivalent of code point comparator comparing two strings.
+     */
+    public static int compareCodePoint(int codePoint, CharSequence s) {
+        if (s == null) {
+            return 1;
+        }
+        final int strLen = s.length();
+        if (strLen == 0) {
+            return 1;
+        }
+        int second = Character.codePointAt(s, 0);
+        int diff = codePoint - second;
+        if (diff != 0) {
+            return diff;
+        }
+        return strLen == Character.charCount(codePoint) ? 0 : -1;
+    }
+
    // private data members -------------------------------------------------

    /**
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/UnicodeSet.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/UnicodeSet.java
@ -29,6 +29,7 @@ import com.ibm.icu.lang.UCharacter;
 import com.ibm.icu.lang.UProperty;
 import com.ibm.icu.lang.UScript;
 import com.ibm.icu.util.Freezable;
+import com.ibm.icu.util.OutputInt;
 import com.ibm.icu.util.ULocale;
 import com.ibm.icu.util.VersionInfo;

@ -265,11 +266,20 @@ import com.ibm.icu.util.VersionInfo;
 *     </tr>
 *   </table>
 * </blockquote>
- * <p>To iterate over contents of UnicodeSet, use UnicodeSetIterator class.
+ * <p>To iterate over contents of UnicodeSet, the following are available:
+ * <ul><li>{@link #ranges()} to iterate through the ranges</li>
+ * <li>{@link #strings()} to iterate through the strings</li>
+ * <li>{@link #iterator()} to iterate through the entire contents in a single loop.
+ * That method is, however, not particularly efficient, since it "boxes" each code point into a String.
+ * </ul>
+ * All of the above can be used in <b>for</b> loops.
+ * The {@link com.ibm.icu.text.UnicodeSetIterator UnicodeSetIterator} can also be used, but not in <b>for</b> loops.
+ * <p>To replace, count elements, or delete spans, see {@link com.ibm.icu.text.UnicodeSetSpanner UnicodeSetSpanner}.
 *
 * @author Alan Liu
 * @stable ICU 2.0
 * @see UnicodeSetIterator
+ * @see UnicodeSetSpanner
 */
 public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Comparable<UnicodeSet>, Freezable<UnicodeSet> {

@ -283,7 +293,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
     * @stable ICU 4.8
     */
    public static final UnicodeSet ALL_CODE_POINTS = new UnicodeSet(0, 0x10FFFF).freeze();
-    
+
    private static XSymbolTable XSYMBOL_TABLE = null; // for overriding the the function processing

    private static final int LOW = 0x000000; // LOW <= all valid values. ZERO for codepoints
@ -338,7 +348,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
     */
    private static UnicodeSet INCLUSIONS[] = null;

-    private BMPSet bmpSet; // The set is frozen iff either bmpSet or stringSpan is not null.
+    private BMPSet bmpSet; // The set is frozen if bmpSet or stringSpan is not null.
    private UnicodeSetStringSpan stringSpan;
    //----------------------------------------------------------------
    // Public API
@ -492,6 +502,9 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
     * @stable ICU 2.0
     */
    public Object clone() {
+        if (isFrozen()) {
+            return this;
+        }
        UnicodeSet result = new UnicodeSet(this);
        result.bmpSet = this.bmpSet;
        result.stringSpan = this.stringSpan;
@ -588,27 +601,30 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
    /**
     * Append the <code>toPattern()</code> representation of a
     * string to the given <code>StringBuffer</code>.
+     * @return 
     */
-    private static void _appendToPat(StringBuffer buf, String s, boolean escapeUnprintable) {
+    private static StringBuffer _appendToPat(StringBuffer buf, String s, boolean escapeUnprintable) {
        int cp;
        for (int i = 0; i < s.length(); i += Character.charCount(cp)) {
            cp = s.codePointAt(i);
            _appendToPat(buf, cp, escapeUnprintable);
        }
+        return buf;
    }

    /**
     * Append the <code>toPattern()</code> representation of a
     * character to the given <code>StringBuffer</code>.
+     * @return 
     */
-    private static void _appendToPat(StringBuffer buf, int c, boolean escapeUnprintable) {
+    private static StringBuffer _appendToPat(StringBuffer buf, int c, boolean escapeUnprintable) {
        // "Utility.isUnprintable(c)" seems redundant since the the call
        //      "Utility.escapeUnprintable(buf, c)" does it again inside the if statement
        if (escapeUnprintable && Utility.isUnprintable(c)) {
            // Use hex escape notation (<backslash>uxxxx or <backslash>Uxxxxxxxx) for anything
            // unprintable
            if (Utility.escapeUnprintable(buf, c)) {
-                return;
+                return buf;
            }
        }
        // Okay to let ':' pass through
@ -633,6 +649,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
            break;
        }
        UTF16.append(buf, c);
+        return buf;
    }

    /**
@ -1279,9 +1296,11 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
    }

    /**
+     * Utility for getting code point from single code point CharSequence.
+     * See the public UTF16.getSingleCodePoint()
     * @return a code point IF the string consists of a single one.
     * otherwise returns -1.
-     * @param string to test
+     * @param s to test
     */
    private static int getSingleCP(CharSequence s) {
        if (s.length() < 1) {
@ -1322,7 +1341,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
     * @return this object, for chaining
     * @stable ICU 2.0
     */
-    public final UnicodeSet retainAll(String s) {
+    public final UnicodeSet retainAll(CharSequence s) {
        return retainAll(fromAll(s));
    }

@ -1333,7 +1352,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
     * @return this object, for chaining
     * @stable ICU 2.0
     */
-    public final UnicodeSet complementAll(String s) {
+    public final UnicodeSet complementAll(CharSequence s) {
        return complementAll(fromAll(s));
    }

@ -1344,7 +1363,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
     * @return this object, for chaining
     * @stable ICU 2.0
     */
-    public final UnicodeSet removeAll(String s) {
+    public final UnicodeSet removeAll(CharSequence s) {
        return removeAll(fromAll(s));
    }

@ -1369,7 +1388,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
     * @return a newly created set containing the given string
     * @stable ICU 2.0
     */
-    public static UnicodeSet from(String s) {
+    public static UnicodeSet from(CharSequence s) {
        return new UnicodeSet().add(s);
    }

@ -1380,7 +1399,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
     * @return a newly created set containing the given characters
     * @stable ICU 2.0
     */
-    public static UnicodeSet fromAll(String s) {
+    public static UnicodeSet fromAll(CharSequence s) {
        return new UnicodeSet().addAll(s);
    }

@ -1428,13 +1447,15 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
     * Retain the specified string in this set if it is present.
     * Upon return this set will be empty if it did not contain s, or
     * will only contain s if it did contain s.
-     * @param s the string to be retained
+     * @param cs the string to be retained
     * @return this object, for chaining
     * @stable ICU 2.0
     */
-    public final UnicodeSet retain(String s) {
-        int cp = getSingleCP(s); 
+    public final UnicodeSet retain(CharSequence cs) {
+
+        int cp = getSingleCP(cs); 
        if (cp < 0) {
+            String s = cs.toString();
            boolean isIn = strings.contains(s);
            if (isIn && size() == 1) {
                return this;
@ -1494,7 +1515,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
     * @return this object, for chaining
     * @stable ICU 2.0
     */
-    public final UnicodeSet remove(String s) {
+    public final UnicodeSet remove(CharSequence s) {
        int cp = getSingleCP(s);
        if (cp < 0) {
            strings.remove(s);
@ -1571,14 +1592,14 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
     * @return this object, for chaining
     * @stable ICU 2.0
     */
-    public final UnicodeSet complement(String s) {
+    public final UnicodeSet complement(CharSequence s) {
        checkFrozen();
        int cp = getSingleCP(s);
        if (cp < 0) {
            if (strings.contains(s)) {
                strings.remove(s);
            } else {
-                strings.add(s);
+                strings.add(s.toString());
            }
            pat = null;
        } else {
@ -1804,11 +1825,11 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
     * @return <tt>true</tt> if this set contains the specified string
     * @stable ICU 2.0
     */
-    public final boolean contains(String s) {
+    public final boolean contains(CharSequence s) {

        int cp = getSingleCP(s);
        if (cp < 0) {
-            return strings.contains(s);
+            return strings.contains(s.toString());
        } else {
            return contains(cp);
        }
@ -2072,7 +2093,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
     * @return true if the test condition is met
     * @stable ICU 2.0
     */
-    public boolean containsNone(String s) {
+    public boolean containsNone(CharSequence s) {
        return span(s, SpanCondition.NOT_CONTAINED) == s.length();
    }

@ -2106,7 +2127,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
     * @return true if the condition is met
     * @stable ICU 2.0
     */
-    public final boolean containsSome(String s) {
+    public final boolean containsSome(CharSequence s) {
        return !containsNone(s);
    }

@ -2344,7 +2365,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa

        StringBuffer rebuiltPat = new StringBuffer();
        RuleCharacterIterator chars =
-            new RuleCharacterIterator(pattern, symbols, pos);
+                new RuleCharacterIterator(pattern, symbols, pos);
        applyPattern(chars, symbols, rebuiltPat, options);
        if (chars.inVariable()) {
            syntaxError(chars, "Extra chars in variable value");
@ -2388,7 +2409,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
        // Recognized special forms for chars, sets: c-c s-s s&s

        int opts = RuleCharacterIterator.PARSE_VARIABLES |
-        RuleCharacterIterator.PARSE_ESCAPES;
+                RuleCharacterIterator.PARSE_ESCAPES;
        if ((options & IGNORE_SPACE) != 0) {
            opts |= RuleCharacterIterator.SKIP_WHITESPACE;
        }
@ -2740,7 +2761,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
    private static void syntaxError(RuleCharacterIterator chars, String msg) {
        throw new IllegalArgumentException("Error: " + msg + " at \"" +
                Utility.escape(chars.toString()) +
-        '"');
+                '"');
    }

    /**
@ -2771,23 +2792,24 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
    }

    /**
-     * Add the contents of the collection (as strings) into this UnicodeSet.
+     * Add the contents of the collection (as strings) into this UnicodeSet. 
+     * The collection must not contain null.
     * @param source the collection to add
     * @return a reference to this object
     * @stable ICU 4.4
     */
-    public UnicodeSet add(Collection<?> source) {
+    public UnicodeSet add(Iterable<?> source) {
        return addAll(source);
    }

    /**
-     * Add the contents of the UnicodeSet (as strings) into a collection.
+     * Add a collection (as strings) into this UnicodeSet.
     * Uses standard naming convention.
     * @param source collection to add into
     * @return a reference to this object
     * @stable ICU 4.4
     */
-    public UnicodeSet addAll(Collection<?> source) {
+    public UnicodeSet addAll(Iterable<?> source) {
        checkFrozen();
        for (Object o : source) {
            add(o.toString());
@ -3104,7 +3126,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
            // Reference comparison ok; VersionInfo caches and reuses
            // unique objects.
            return v != NO_VERSION &&
-            v.compareTo(version) <= 0;
+                    v.compareTo(version) <= 0;
        }
    }

@ -3297,7 +3319,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
    public UnicodeSet applyPropertyAlias(String propertyAlias, String valueAlias) {
        return applyPropertyAlias(propertyAlias, valueAlias, null);
    }
-    
+
    /**
     * Modifies this set to contain those code points which have the
     * given value for the given property.  Prior contents of this
@ -3321,7 +3343,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
                && ((XSymbolTable)symbols).applyPropertyAlias(propertyAlias, valueAlias, this)) {
            return this;
        }
-        
+
        if (XSYMBOL_TABLE != null) {
            if (XSYMBOL_TABLE.applyPropertyAlias(propertyAlias, valueAlias, this)) {
                return this;
@ -3476,8 +3498,8 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa

        // Look for an opening [:, [:^, \p, or \P
        return pattern.regionMatches(pos, "[:", 0, 2) ||
-        pattern.regionMatches(true, pos, "\\p", 0, 2) ||
-        pattern.regionMatches(pos, "\\N", 0, 2);
+                pattern.regionMatches(true, pos, "\\p", 0, 2) ||
+                pattern.regionMatches(pos, "\\N", 0, 2);
    }

    /**
@ -3879,17 +3901,14 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
            // Optimize contains() and span() and similar functions.
            if (!strings.isEmpty()) {
                stringSpan = new UnicodeSetStringSpan(this, new ArrayList<String>(strings), UnicodeSetStringSpan.ALL);
-                if (!stringSpan.needsStringSpanUTF16()) {
-                    // All strings are irrelevant for span() etc. because
-                    // all of each string's code points are contained in this set.
-                    // Do not check needsStringSpanUTF8() because UTF-8 has at most as
-                    // many relevant strings as UTF-16.
-                    // (Thus needsStringSpanUTF8() implies needsStringSpanUTF16().)
-                    stringSpan = null;
-                }
            }
-            if (stringSpan == null) {
-                // No span-relevant strings: Optimize for code point spans.
+            if (stringSpan == null || !stringSpan.needsStringSpanUTF16()) {
+                // Optimize for code point spans.
+                // There are no strings, or
+                // all strings are irrelevant for span() etc. because
+                // all of each string's code points are contained in this set.
+                // However, fully contained strings are relevant for spanAndCount(),
+                // so we create both objects.
                bmpSet = new BMPSet(list, len);
            }
        }
@ -3898,7 +3917,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa

    /**
     * Span a string using this UnicodeSet.
-     * 
+     * <p>To replace, count elements, or delete spans, see {@link com.ibm.icu.text.UnicodeSetSpanner UnicodeSetSpanner}.
     * @param s The string to be spanned
     * @param spanCondition The span condition
     * @return the length of the span
@ -3912,7 +3931,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
     * Span a string using this UnicodeSet.
     *   If the start index is less than 0, span will start from 0.
     *   If the start index is greater than the string length, span returns the string length.
-     * 
+     * <p>To replace, count elements, or delete spans, see {@link com.ibm.icu.text.UnicodeSetSpanner UnicodeSetSpanner}.
     * @param s The string to be spanned
     * @param start The start index that the span begins
     * @param spanCondition The span condition
@ -3927,52 +3946,97 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
            return end;
        }
        if (bmpSet != null) {
-            return start + bmpSet.span(s, start, end, spanCondition);
+            // Frozen set without strings, or no string is relevant for span().
+            return bmpSet.span(s, start, spanCondition, null);
        }
-        int len = end - start;
        if (stringSpan != null) {
-            return start + stringSpan.span(s, start, len, spanCondition);
+            return stringSpan.span(s, start, spanCondition);
        } else if (!strings.isEmpty()) {
            int which = spanCondition == SpanCondition.NOT_CONTAINED ? UnicodeSetStringSpan.FWD_UTF16_NOT_CONTAINED
                    : UnicodeSetStringSpan.FWD_UTF16_CONTAINED;
            UnicodeSetStringSpan strSpan = new UnicodeSetStringSpan(this, new ArrayList<String>(strings), which);
            if (strSpan.needsStringSpanUTF16()) {
-                return start + strSpan.span(s, start, len, spanCondition);
+                return strSpan.span(s, start, spanCondition);
            }
        }

+        return spanCodePointsAndCount(s, start, spanCondition, null);
+    }
+
+    /**
+     * Same as span() but also counts the smallest number of set elements on any path across the span.
+     * <p>To replace, count elements, or delete spans, see {@link com.ibm.icu.text.UnicodeSetSpanner UnicodeSetSpanner}.
+     * @param outCount An output-only object (must not be null) for returning the count.
+     * @return the limit (exclusive end) of the span
+     * @internal
+     * @deprecated This API is ICU internal only.
+     */
+    @Deprecated
+    public int spanAndCount(CharSequence s, int start, SpanCondition spanCondition, OutputInt outCount) {
+        if (outCount == null) {
+            throw new IllegalArgumentException("outCount must not be null");
+        }
+        int end = s.length();
+        if (start < 0) {
+            start = 0;
+        } else if (start >= end) {
+            return end;
+        }
+        if (stringSpan != null) {
+            // We might also have bmpSet != null,
+            // but fully-contained strings are relevant for counting elements.
+            return stringSpan.spanAndCount(s, start, spanCondition, outCount);
+        } else if (bmpSet != null) {
+            return bmpSet.span(s, start, spanCondition, outCount);
+        } else if (!strings.isEmpty()) {
+            int which = spanCondition == SpanCondition.NOT_CONTAINED ? UnicodeSetStringSpan.FWD_UTF16_NOT_CONTAINED
+                    : UnicodeSetStringSpan.FWD_UTF16_CONTAINED;
+            which |= UnicodeSetStringSpan.WITH_COUNT;
+            UnicodeSetStringSpan strSpan = new UnicodeSetStringSpan(this, new ArrayList<String>(strings), which);
+            return strSpan.spanAndCount(s, start, spanCondition, outCount);
+        }
+
+        return spanCodePointsAndCount(s, start, spanCondition, outCount);
+    }
+
+    private int spanCodePointsAndCount(CharSequence s, int start,
+            SpanCondition spanCondition, OutputInt outCount) {
        // Pin to 0/1 values.
        boolean spanContained = (spanCondition != SpanCondition.NOT_CONTAINED);

        int c;
        int next = start;
+        int length = s.length();
+        int count = 0;
        do {
            c = Character.codePointAt(s, next);
            if (spanContained != contains(c)) {
                break;
            }
-            next = Character.offsetByCodePoints(s, next, 1);
-        } while (next < end);
+            ++count;
+            next += Character.charCount(c);
+        } while (next < length);
+        if (outCount != null) { outCount.value = count; }
        return next;
    }

    /**
     * Span a string backwards (from the end) using this UnicodeSet.
-     * 
+     * <p>To replace, count elements, or delete spans, see {@link com.ibm.icu.text.UnicodeSetSpanner UnicodeSetSpanner}.
     * @param s The string to be spanned
     * @param spanCondition The span condition
     * @return The string index which starts the span (i.e. inclusive).
     * @stable ICU 4.4
     */
    public int spanBack(CharSequence s, SpanCondition spanCondition) {
-      return spanBack(s, s.length(), spanCondition);
+        return spanBack(s, s.length(), spanCondition);
    }

    /**
     * Span a string backwards (from the fromIndex) using this UnicodeSet.
     * If the fromIndex is less than 0, spanBack will return 0.
     * If fromIndex is greater than the string length, spanBack will start from the string length.
-     * 
+     * <p>To replace, count elements, or delete spans, see {@link com.ibm.icu.text.UnicodeSetSpanner UnicodeSetSpanner}.
     * @param s The string to be spanned
     * @param fromIndex The index of the char (exclusive) that the string should be spanned backwards
     * @param spanCondition The span condition
@ -3987,6 +4051,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
            fromIndex = s.length();
        }
        if (bmpSet != null) {
+            // Frozen set without strings, or no string is relevant for spanBack().
            return bmpSet.spanBack(s, fromIndex, spanCondition);
        }
        if (stringSpan != null) {
@ -3994,7 +4059,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
        } else if (!strings.isEmpty()) {
            int which = (spanCondition == SpanCondition.NOT_CONTAINED)
                    ? UnicodeSetStringSpan.BACK_UTF16_NOT_CONTAINED
-                    : UnicodeSetStringSpan.BACK_UTF16_CONTAINED;
+                            : UnicodeSetStringSpan.BACK_UTF16_CONTAINED;
            UnicodeSetStringSpan strSpan = new UnicodeSetStringSpan(this, new ArrayList<String>(strings), which);
            if (strSpan.needsStringSpanUTF16()) {
                return strSpan.spanBack(s, fromIndex, spanCondition);
@ -4011,20 +4076,19 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
            if (spanContained != contains(c)) {
                break;
            }
-            prev = Character.offsetByCodePoints(s, prev, -1);
+            prev -= Character.charCount(c);
        } while (prev > 0);
        return prev;
    }

    /**
     * Clone a thawed version of this class, according to the Freezable interface.
-     * @return this
+     * @return the clone, not frozen
     * @stable ICU 4.4
     */
    public UnicodeSet cloneAsThawed() {
-        UnicodeSet result = (UnicodeSet) clone();
-        result.bmpSet = null;
-        result.stringSpan = null;
+        UnicodeSet result = new UnicodeSet(this);
+        assert !result.isFrozen();
        return result;
    }

@ -4039,6 +4103,80 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
    // Additional methods for integration with Generics and Collections
    // ************************

+    /**
+     * A struct-like class used for iteration through ranges, for faster iteration than by String.
+     * Read about the restrictions on usage in {@link #UnicodeSet.ranges()}.
+     */
+    public static class EntryRange {
+        /**
+         * The starting code point of the range.
+         */
+        public int codepoint;
+        /**
+         * The ending code point of the range
+         */
+        public int codepointEnd;
+        
+        @Override
+        public String toString() {
+            StringBuffer b = new StringBuffer();
+            return ( 
+                    codepoint == codepointEnd ? _appendToPat(b, codepoint, false)
+                            : _appendToPat(_appendToPat(b, codepoint, false).append('-'), codepointEnd, false))
+                            .toString();
+        }
+    }
+
+    /**
+     * Provide for faster iteration than by String. Returns an iterator over a range values. The UnicodeSet
+     * must not be altered during the iteration. The EntryRange is the same each time; the contents are just reset.
+     * <br><b>Warning: </b>To iterate over the full contents, you have to also iterate over the strings.
+     * 
+     * <pre>
+     * // Sample code
+     * for (EntryRange range : us1.ranges()) {
+     *     // do something with code points between range.codepointEnd and range.codepointEnd;
+     * }
+     * for (String s : us1.strings()) {
+     *     // do something with each string;
+     * }
+     * </pre>
+     */
+    public Iterable<EntryRange> ranges() {
+        return new EntryRanges();
+    }
+
+    private class EntryRanges implements Iterable<EntryRange>, Iterator<EntryRange> {
+        int pos;
+        EntryRange result = new EntryRange();
+        // Iterator<String> stringIterator = strings == null ? null : strings.iterator();
+
+        public Iterator<EntryRange> iterator() {
+            return this;
+        }
+        public boolean hasNext() {
+            return pos < len-1 
+                    // || (stringIterator != null && stringIterator.hasNext())
+                    ;
+        }
+        public EntryRange next() {
+            if (pos < len-1) {
+                result.codepoint = list[pos++];
+                result.codepointEnd = list[pos++]-1;
+//                result.string = null;
+            } else {
+                throw new ArrayIndexOutOfBoundsException(pos);
+//                result.codepoint = -1;
+//                result.string = stringIterator.next();
+            }
+            return result;
+        }
+        public void remove() {
+            throw new UnsupportedOperationException();
+        }
+    }
+
+
    /**
     * Returns a string iterator. Uses the same order of iteration as {@link UnicodeSetIterator}.
     * @see java.util.Set#iterator()
@ -4129,8 +4267,8 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
     * @see #containsAll(com.ibm.icu.text.UnicodeSet)
     * @stable ICU 4.4
     */
-    public boolean containsAll(Collection<String> collection) {
-        for (String o : collection) {
+    public <T extends CharSequence> boolean containsAll(Iterable<T> collection) {
+        for (T o : collection) {
            if (!contains(o)) {
                return false;
            }
@ -4142,8 +4280,8 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
     * @see #containsNone(com.ibm.icu.text.UnicodeSet)
     * @stable ICU 4.4
     */
-    public boolean containsNone(Collection<String> collection) {
-        for (String o : collection) {
+    public <T extends CharSequence> boolean containsNone(Iterable<T> collection) {
+        for (T o : collection) {
            if (contains(o)) {
                return false;
            }
@ -4155,7 +4293,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
     * @see #containsAll(com.ibm.icu.text.UnicodeSet)
     * @stable ICU 4.4
     */
-    public final boolean containsSome(Collection<String> collection) {
+    public final <T extends CharSequence> boolean containsSome(Iterable<T> collection) {
        return !containsNone(collection);
    }

@ -4163,9 +4301,9 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
     * @see #addAll(com.ibm.icu.text.UnicodeSet)
     * @stable ICU 4.4
     */
-    public UnicodeSet addAll(String... collection) {
+    public <T extends CharSequence> UnicodeSet addAll(T... collection) {
        checkFrozen();
-        for (String str : collection) {
+        for (T str : collection) {
            add(str);
        }
        return this;
@ -4176,9 +4314,9 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
     * @see #removeAll(com.ibm.icu.text.UnicodeSet)
     * @stable ICU 4.4
     */
-    public UnicodeSet removeAll(Collection<String> collection) {
+    public <T extends CharSequence> UnicodeSet removeAll(Iterable<T> collection) {
        checkFrozen();
-        for (String o : collection) {
+        for (T o : collection) {
            remove(o);
        }
        return this;
@ -4188,7 +4326,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
     * @see #retainAll(com.ibm.icu.text.UnicodeSet)
     * @stable ICU 4.4
     */
-    public UnicodeSet retainAll(Collection<String> collection) {
+    public <T extends CharSequence> UnicodeSet retainAll(Iterable<T> collection) {
        checkFrozen();
        // TODO optimize
        UnicodeSet toRetain = new UnicodeSet();
@ -4277,7 +4415,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
     * @stable ICU 4.4
     */

-    public static int compare(String string, int codePoint) {
+    public static int compare(CharSequence string, int codePoint) {
        return CharSequences.compare(string, codePoint);
    }

@ -4288,7 +4426,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
     * Note that this (=String) order is UTF-16 order -- *not* code point order.
     * @stable ICU 4.4
     */
-    public static int compare(int codePoint, String string) {
+    public static int compare(int codePoint, CharSequence string) {
        return -CharSequences.compare(string, codePoint);
    }

@ -4304,7 +4442,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
    public static <T extends Comparable<T>> int compare(Iterable<T> collection1, Iterable<T> collection2) {
        return compare(collection1.iterator(), collection2.iterator());
    }
-    
+
    /**
     * Utility to compare two iterators. Warning: the ordering in iterables is important. For Collections that are ordered,
     * like Lists, that is expected. However, Sets in Java violate Leibniz's law when it comes to iteration.
@ -4378,7 +4516,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
     * </pre>
     * @stable ICU 4.4
     */
-    public Iterable<String> strings() {
+    public Collection<String> strings() {
        return Collections.unmodifiableSortedSet(strings);
    }

@ -4417,7 +4555,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
     * If findNot is true, then reverse the sense of the match: find the first place where the UnicodeSet doesn't match.
     * If there is no match, length is returned.
     * @internal
-     * @deprecated This API is ICU internal only.
+     * @deprecated This API is ICU internal only. Use span instead.
     */
    @Deprecated
    public int findIn(CharSequence value, int fromIndex, boolean findNot) {
@ -4438,7 +4576,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
     * If there is no match, -1 is returned.
     * BEFORE index is not in the UnicodeSet.
     * @internal
-     * @deprecated This API is ICU internal only.
+     * @deprecated This API is ICU internal only. Use spanBack instead.
     */
    @Deprecated
    public int findLastIn(CharSequence value, int fromIndex, boolean findNot) {
@ -4460,7 +4598,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
     * @param matches A boolean to either strip all that matches or don't match with the current UnicodeSet object.
     * @return The string after it has been stripped.
     * @internal
-     * @deprecated This API is ICU internal only.
+     * @deprecated This API is ICU internal only. Use replaceFrom.
     */
    @Deprecated
    public String stripFrom(CharSequence source, boolean matches) {
@ -4593,6 +4731,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
     */
    @Deprecated
    public static void setDefaultXSymbolTable(XSymbolTable xSymbolTable) {
+        INCLUSIONS = null; // If the properties override inclusions, these have to be regenerated. 
        XSYMBOL_TABLE = xSymbolTable;
    }
 }
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/UnicodeSetSpanner.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/UnicodeSetSpanner.java
@ -0,0 +1,333 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 2014, International Business Machines Corporation and
+ * others. All Rights Reserved.
+ *******************************************************************************
+ */
+package com.ibm.icu.text;
+
+import com.ibm.icu.text.UnicodeSet.SpanCondition;
+import com.ibm.icu.util.OutputInt;
+
+/**
+ * A helper class used to count, replace, and trim CharSequences based on UnicodeSet matches.
+ * An instance is immutable (and thus thread-safe) iff the source UnicodeSet is frozen.
+ */
+public class UnicodeSetSpanner {
+
+    private final UnicodeSet unicodeSet;
+
+    /**
+     * Create a spanner from a UnicodeSet. For speed and safety, the UnicodeSet should be frozen. However, this class
+     * can be used with a non-frozen version to avoid the cost of freezing.
+     * 
+     * @param source
+     *            the original UnicodeSet
+     */
+    public UnicodeSetSpanner(UnicodeSet source) {
+        unicodeSet = source;
+    }
+
+    /**
+     * Returns the UnicodeSet used for processing. It is frozen iff the original was.
+     * 
+     * @return the construction set.
+     */
+    public UnicodeSet getUnicodeSet() {
+        return unicodeSet;
+    }
+
+
+    /*
+     * (non-Javadoc)
+     * 
+     * @see java.lang.Object#equals(java.lang.Object)
+     */
+    @Override
+    public boolean equals(Object other) {
+        return other instanceof UnicodeSetSpanner && unicodeSet.equals(((UnicodeSetSpanner) other).unicodeSet);
+    }
+
+    /*
+     * (non-Javadoc)
+     * 
+     * @see java.lang.Object#hashCode()
+     */
+    @Override
+    public int hashCode() {
+        return unicodeSet.hashCode();
+    }
+
+    /**
+     * Options for replaceFrom and countIn to control how to treat each matched span. The name is from "qualifier" as used in regex,
+     * since it is similar to whether one is replacing [abc] by x, or [abc]* by x.
+     * 
+     */
+    public enum Quantifier {
+        /**
+         * Collapse spans. That is, modify/count the entire matching span as a single item, instead of separate
+         * code points.
+         * 
+         */
+        SPAN,
+        /**
+         * Use the smallest number of elements in the spanned range for counting and modification. In other words, the "longest matches" are
+         * used where possible. If there are no strings, this will be the same as code points.
+         * <p>For example, in the string "abab":
+         * <ul>
+         * <li>spanning with [ab] will also count four MIN_ELEMENTS.</li>
+         * <li>spanning with [{ab}] will count two MIN_ELEMENTS.</li>
+         * <li>spanning with [ab{ab}] will also count two MIN_ELEMENTS.</li>
+         * </ul>
+         */
+        MIN_ELEMENTS,
+        // Note: could in the future have an additional option MAX_ELEMENTS
+    }
+
+    /**
+     * Returns the number of matching characters found in a character sequence, counting by Quantifier.ELEMENT using SpanCondition.CONTAINED.
+     * 
+     * @param sequence
+     *            the sequence to count characters in
+     * @return the count. Zero if there are none.
+     */
+    public int countIn(CharSequence sequence) {
+        return countIn(sequence, Quantifier.MIN_ELEMENTS, SpanCondition.CONTAINED);
+    }
+
+    /**
+     * Returns the number of matching characters found in a character sequence, using SpanCondition.CONTAINED
+     * 
+     * @param sequence
+     *            the sequence to count characters in
+     * @return the count. Zero if there are none.
+     */
+    public int countIn(CharSequence sequence, Quantifier quantifier) {
+        return countIn(sequence, quantifier, SpanCondition.CONTAINED);
+    }
+
+    /**
+     * Returns the number of matching characters found in a character sequence.
+     * 
+     * @param sequence
+     *            the sequence to count characters in
+     * @param quantifier
+     *            (optional) whether to treat the entire span as a match, or individual code points
+     * @param countSpan
+     *            (optional) the spanCondition to use. CONTAINED means only count the code points in the CONTAINED span;
+     *            NOT_CONTAINED is the reverse.
+     * @return the count. Zero if there are none.
+     */
+    public int countIn(CharSequence sequence, Quantifier quantifier, SpanCondition countSpan) {
+        int count = 0;
+        int start = 0;
+        SpanCondition skipSpan = countSpan == SpanCondition.CONTAINED ? SpanCondition.NOT_CONTAINED
+                : SpanCondition.CONTAINED;
+        final int length = sequence.length();
+        OutputInt spanCount = new OutputInt();
+        while (start != length) {
+            int endNotContained = unicodeSet.span(sequence, start, skipSpan);
+            if (endNotContained == length) {
+                break;
+            }
+            start = unicodeSet.spanAndCount(sequence, endNotContained, countSpan, spanCount);
+            count += quantifier == Quantifier.SPAN ? 1 : spanCount.value;
+        }
+        return count;
+    }
+
+    /**
+     * Delete all the matching spans in sequence, using SpanCondition.CONTAINED
+     * 
+     * @param sequence
+     *            charsequence to replace matching spans in.
+     * @return modified string.
+     */
+    public String deleteFrom(CharSequence sequence) {
+        return replaceFrom(sequence, "", Quantifier.SPAN, SpanCondition.CONTAINED);
+    }
+
+    /**
+     * Delete all matching spans in sequence, according to the operations.
+     * 
+     * @param sequence
+     *            charsequence to replace matching spans in.
+     * @param modifySpan
+     *            specify whether to modify the matching spans (CONTAINED) or the non-matching (NOT_CONTAINED)
+     * @return modified string.
+     */
+    public String deleteFrom(CharSequence sequence, SpanCondition modifySpan) {
+        return replaceFrom(sequence, "", Quantifier.SPAN, modifySpan);
+    }
+
+    /**
+     * Replace all matching spans in sequence by the replacement,
+     * counting by Quantifier.ELEMENT using SpanCondition.CONTAINED.
+     * 
+     * @param sequence
+     *            charsequence to replace matching spans in.
+     * @param replacement
+     *            replacement sequence. To delete, use ""
+     * @return modified string.
+     */
+    public String replaceFrom(CharSequence sequence, CharSequence replacement) {
+        return replaceFrom(sequence, replacement, Quantifier.MIN_ELEMENTS, SpanCondition.CONTAINED);
+    }
+
+    /**
+     * Replace all matching spans in sequence by replacement, according to the Quantifier, using SpanCondition.CONTAINED. 
+     * 
+     * @param sequence
+     *            charsequence to replace matching spans in.
+     * @param replacement
+     *            replacement sequence. To delete, use ""
+     * @param quantifier
+     *            whether to treat the entire span as a match, or individual code points
+     * @return modified string.
+     */
+    public String replaceFrom(CharSequence sequence, CharSequence replacement, Quantifier quantifier) {
+        return replaceFrom(sequence, replacement, quantifier, SpanCondition.CONTAINED);
+    }
+
+    /**
+     * Replace all matching spans in sequence by replacement, according to the operations quantifier and modifySpan.
+     * 
+     * @param sequence
+     *            charsequence to replace matching spans in.
+     * @param replacement
+     *            replacement sequence. To delete, use ""
+     * @param modifySpan
+     *            (optional) specify whether to modify the matching spans (CONTAINED) or the non-matching
+     *            (NOT_CONTAINED)
+     * @param quantifier
+     *            (optional) specify whether to collapse or do codepoint by codepoint.
+     * @return modified string.
+     */
+    public String replaceFrom(CharSequence sequence, CharSequence replacement, Quantifier quantifier,
+            SpanCondition modifySpan) {
+        SpanCondition copySpan = modifySpan == SpanCondition.CONTAINED ? SpanCondition.NOT_CONTAINED
+                : SpanCondition.CONTAINED;
+        final boolean remove = replacement.length() == 0;
+        StringBuilder result = new StringBuilder();
+        // TODO, we can optimize this to
+        // avoid this allocation unless needed
+
+        final int length = sequence.length();
+        OutputInt spanCount = new OutputInt();
+        for (int endCopy = 0; endCopy != length;) {
+            int endModify = unicodeSet.spanAndCount(sequence, endCopy, modifySpan, spanCount);
+            if (remove || endModify == 0) {
+                // do nothing
+            } else if (quantifier == Quantifier.SPAN) {
+                result.append(replacement);
+            } else {
+                for (int i = spanCount.value; i > 0; --i) {
+                    result.append(replacement);
+                }
+            }
+            if (endModify == length) {
+                break;
+            }
+            endCopy = unicodeSet.span(sequence, endModify, copySpan);
+            result.append(sequence.subSequence(endModify, endCopy));
+        }
+        return result.toString();
+    }
+
+    /**
+     * Options for the trim() method
+     * 
+     */
+    public enum TrimOption {
+        /**
+         * Trim leading spans (subject to INVERT).
+         * 
+         */
+        LEADING,
+        /**
+         * Trim leading and trailing spans (subject to INVERT).
+         * 
+         */
+        BOTH,
+        /**
+         * Trim trailing spans (subject to INVERT).
+         * 
+         */
+        TRAILING;
+    }
+
+    /**
+     * Returns a trimmed sequence (using CharSequence.subsequence()), that omits matching code points at the start or
+     * end of the string, using TrimOption.BOTH and SpanCondition.CONTAINED. For example:
+     * 
+     * <pre>
+     * {@code
+     * 
+     *   new UnicodeSet("[ab]").trim("abacatbab")}
+     * </pre>
+     * 
+     * ... returns {@code "catbab"}.
+     * 
+     */
+    public CharSequence trim(CharSequence sequence) {
+        return trim(sequence, TrimOption.BOTH, SpanCondition.CONTAINED);
+    }
+
+    /**
+     * Returns a trimmed sequence (using CharSequence.subsequence()), that omits matching code points at the start or
+     * end of the string, using the trimOption and SpanCondition.CONTAINED. For example:
+     * 
+     * <pre>
+     * {@code
+     * 
+     *   new UnicodeSet("[ab]").trim("abacatbab")}
+     * </pre>
+     * 
+     * ... returns {@code "catbab"}.
+     * 
+     */
+    public CharSequence trim(CharSequence sequence, TrimOption trimOption) {
+        return trim(sequence, trimOption, SpanCondition.CONTAINED);
+    }
+
+    /**
+     * Returns a trimmed sequence (using CharSequence.subsequence()), that omits matching code points at the start or
+     * end of the string, depending on the trimOption and modifySpan. For example:
+     * 
+     * <pre>
+     * {@code
+     * 
+     *   new UnicodeSet("[ab]").trim("abacatbab")}
+     * </pre>
+     * 
+     * ... returns {@code "catbab"}.
+     * 
+     * @param sequence
+     *            the sequence to trim
+     * @param trimOption
+     *            (optional) LEADING, TRAILING, or BOTH
+     * @param modifySpan
+     *            (optional) CONTAINED or NOT_CONTAINED
+     * @return a subsequence
+     */
+    public CharSequence trim(CharSequence sequence, TrimOption trimOption, SpanCondition modifySpan) {
+        int endLeadContained, startTrailContained;
+        final int length = sequence.length();
+        if (trimOption != TrimOption.TRAILING) {
+            endLeadContained = unicodeSet.span(sequence, modifySpan);
+            if (endLeadContained == length) {
+                return "";
+            }
+        } else {
+            endLeadContained = 0;
+        }
+        if (trimOption != TrimOption.LEADING) {
+            startTrailContained = unicodeSet.spanBack(sequence, modifySpan);
+        } else {
+            startTrailContained = length;
+        }
+        return endLeadContained == 0 && startTrailContained == length ? sequence : sequence.subSequence(
+                endLeadContained, startTrailContained);
+    }
+
+}
--- a/icu4j/main/classes/core/src/com/ibm/icu/util/OutputInt.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/util/OutputInt.java
@ -0,0 +1,58 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 2014, International Business Machines Corporation and
+ * others. All Rights Reserved.
+ *******************************************************************************
+ */
+package com.ibm.icu.util;
+
+/**
+ * Simple struct-like class for int output parameters.
+ * Like <code>Output&lt;Integer&gt;</code> but without auto-boxing.
+ *
+ * @internal but could become public
+ * @deprecated This API is ICU internal only.
+ */
+@Deprecated
+public class OutputInt {
+    /**
+     * The value field.
+     *
+     * @internal
+     * @deprecated This API is ICU internal only.
+     */
+    @Deprecated
+    public int value;
+
+    /**
+     * Constructs an <code>OutputInt</code> with value 0.
+     *
+     * @internal
+     * @deprecated This API is ICU internal only.
+     */
+    @Deprecated
+    public OutputInt() {
+    }
+
+    /**
+     * Constructs an <code>OutputInt</code> with the given value.
+     *
+     * @param value the initial value
+     * @internal
+     * @deprecated This API is ICU internal only.
+     */
+    @Deprecated
+    public OutputInt(int value) {
+        this.value = value;
+    }
+
+    /**
+     * {@inheritDoc}
+     * @internal
+     * @deprecated This API is ICU internal only.
+     */
+    @Deprecated
+    public String toString() {
+        return Integer.toString(value);
+    }
+}
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/lang/UTF16Test.java
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/lang/UTF16Test.java
@ -1,6 +1,6 @@
 /*
 *******************************************************************************
-* Copyright (C) 1996-2010, International Business Machines Corporation and    *
+* Copyright (C) 1996-2014, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 */
@ -13,6 +13,7 @@ import com.ibm.icu.impl.Utility;
 import com.ibm.icu.lang.UCharacter;
 import com.ibm.icu.text.ReplaceableString;
 import com.ibm.icu.text.UTF16;
+import com.ibm.icu.text.UTF16.StringComparator;

 /**
 * Testing class for UTF16
@ -1560,6 +1561,39 @@ public final class UTF16Test extends TestFmwk
        }
    }

+    public void TestUtilities() {
+        String[] tests = {
+                "a",
+                "\uFFFF",
+                "😀",
+                "\uD800",
+                "\uDC00",
+                "\uDBFF\uDfff",
+                "",
+                "\u0000",
+                "\uDC00\uD800",
+                "ab",
+                "😀a",
+                null,
+        };
+        StringComparator sc = new UTF16.StringComparator(true,false,0);
+        for (String item1 : tests) {
+            String nonNull1 = item1 == null ? "" : item1;
+            int count = UTF16.countCodePoint(nonNull1);
+            int expected = count == 0 || count > 1 ? -1 : nonNull1.codePointAt(0);
+            assertEquals("codepoint test " + Utility.hex(nonNull1), expected, UTF16.getSingleCodePoint(item1));
+            if (expected == -1) {
+                continue;
+            }
+            for (String item2 : tests) {
+                String nonNull2 = item2 == null ? "" : item2;
+                int scValue = Integer.signum(sc.compare(nonNull1, nonNull2));
+                int fValue = Integer.signum(UTF16.compareCodePoint(expected, item2));
+                assertEquals("comparison " + Utility.hex(nonNull1) + ", " + Utility.hex(nonNull2), scValue, fValue);
+            }
+        }
+    }
+
    public void TestNewString() {
    final int[] codePoints = {
        UCharacter.toCodePoint(UCharacter.MIN_HIGH_SURROGATE, UCharacter.MAX_LOW_SURROGATE),
@ -1568,6 +1602,7 @@ public final class UTF16Test extends TestFmwk
        'A',
        -1,
    };
+    

    final String cpString = "" +
        UCharacter.MIN_HIGH_SURROGATE +
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/lang/UnicodeSetStringSpanTest.java
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/lang/UnicodeSetStringSpanTest.java
@ -1,17 +1,19 @@
 /*
 *******************************************************************************
- * Copyright (C) 2009-2011, International Business Machines Corporation and    *
- * others. All Rights Reserved.                                                *
+ * Copyright (C) 2009-2014, International Business Machines Corporation and
+ * others. All Rights Reserved.
 *******************************************************************************
 */
 package com.ibm.icu.dev.test.lang;

+import java.util.Collection;
+
 import com.ibm.icu.dev.test.TestFmwk;
 import com.ibm.icu.impl.Utility;
 import com.ibm.icu.text.UTF16;
 import com.ibm.icu.text.UnicodeSet;
 import com.ibm.icu.text.UnicodeSet.SpanCondition;
-import com.ibm.icu.text.UnicodeSetIterator;
+import com.ibm.icu.util.OutputInt;

 /**
 * @test
@ -41,7 +43,7 @@ public class UnicodeSetStringSpanTest extends TestFmwk {
        }
        pos = set.span(string, 1, SpanCondition.SIMPLE);
        if (pos != 3) {
-            errln(String.format("FAIL: UnicodeSet(%s).span(%s) returns the wrong value pos %d (!= 3)",
+            errln(String.format("FAIL: UnicodeSet(%s).span(%s, 1) returns the wrong value pos %d (!= 3)",
                    set.toString(), string, pos));
        }
    }
@ -129,33 +131,15 @@ public class UnicodeSetStringSpanTest extends TestFmwk {
    // more complex test. --------------------------------------------------------

    // Make the strings in a UnicodeSet easily accessible.
-    static class UnicodeSetWithStrings {
-
+    private static class UnicodeSetWithStrings {
        private UnicodeSet set;
-
-        private String strings[];
+        private Collection<String> setStrings;
        private int stringsLength;
-        private boolean hasSurrogates;

        public UnicodeSetWithStrings(final UnicodeSet normalSet) {
            set = normalSet;
-            stringsLength = 0;
-            hasSurrogates = false;
-            strings = new String[20];
-            int size = set.size();
-            if (size > 0 && set.charAt(size - 1) < 0) {
-                // If a set's last element is not a code point, then it must contain strings.
-                // Iterate over the set, skip all code point ranges, and cache the strings.
-                UnicodeSetIterator iter = new UnicodeSetIterator(set);
-                while (iter.nextRange() && stringsLength < strings.length) {
-                    if (iter.codepoint == UnicodeSetIterator.IS_STRING) {
-                        // Store the pointer to the set's string element
-                        // which we happen to know is a stable pointer.
-                        strings[stringsLength] = iter.getString();
-                        ++stringsLength;
-                    }
-                }
-            }
+            setStrings = normalSet.strings();
+            stringsLength = setStrings.size();
        }

        public final UnicodeSet getSet() {
@ -166,34 +150,9 @@ public class UnicodeSetStringSpanTest extends TestFmwk {
            return (stringsLength > 0);
        }

-        public boolean hasStringsWithSurrogates() {
-            return hasSurrogates;
+        public Iterable<String> strings() {
+            return setStrings;
        }
-
-    }
-
-    static class UnicodeSetWithStringsIterator {
-
-        private UnicodeSetWithStrings fSet;
-        private int nextStringIndex;
-
-        public UnicodeSetWithStringsIterator(final UnicodeSetWithStrings set) {
-            fSet = set;
-            nextStringIndex = 0;
-        }
-
-        public void reset() {
-            nextStringIndex = 0;
-        }
-
-        public final String nextString() {
-            if (nextStringIndex < fSet.stringsLength) {
-                return fSet.strings[nextStringIndex++];
-            } else {
-                return null;
-            }
-        }
-
    }

    // Compare 16-bit Unicode strings (which may be malformed UTF-16)
@ -231,7 +190,6 @@ public class UnicodeSetStringSpanTest extends TestFmwk {
            }
            return prev;
        } else if (spanCondition == SpanCondition.NOT_CONTAINED) {
-            UnicodeSetWithStringsIterator iter = new UnicodeSetWithStringsIterator(set);
            int c;
            int start, next;
            for (start = next = 0; start < length;) {
@ -240,9 +198,7 @@ public class UnicodeSetStringSpanTest extends TestFmwk {
                if (realSet.contains(c)) {
                    break;
                }
-                String str;
-                iter.reset();
-                while ((str = iter.nextString()) != null) {
+                for (String str : set.strings()) {
                    if (str.length() <= (length - start) && matches16CPB(s, start, length, str)) {
                        // spanNeedsStrings=true;
                        return start;
@ -252,7 +208,6 @@ public class UnicodeSetStringSpanTest extends TestFmwk {
            }
            return start;
        } else /* CONTAINED or SIMPLE */{
-            UnicodeSetWithStringsIterator iter = new UnicodeSetWithStringsIterator(set);
            int c;
            int start, next, maxSpanLimit = 0;
            for (start = next = 0; start < length;) {
@ -261,9 +216,7 @@ public class UnicodeSetStringSpanTest extends TestFmwk {
                if (!realSet.contains(c)) {
                    next = start; // Do not span this single, not-contained code point.
                }
-                String str;
-                iter.reset();
-                while ((str = iter.nextString()) != null) {
+                for (String str : set.strings()) {
                    if (str.length() <= (length - start) && matches16CPB(s, start, length, str)) {
                        // spanNeedsStrings=true;
                        int matchLimit = start + str.length();
@ -336,7 +289,6 @@ public class UnicodeSetStringSpanTest extends TestFmwk {
            } while (prev > 0);
            return prev;
        } else if (spanCondition == SpanCondition.NOT_CONTAINED) {
-            UnicodeSetWithStringsIterator iter = new UnicodeSetWithStringsIterator(set);
            int c;
            int prev = length, length0 = length;
            do {
@ -344,9 +296,7 @@ public class UnicodeSetStringSpanTest extends TestFmwk {
                if (realSet.contains(c)) {
                    break;
                }
-                String str;
-                iter.reset();
-                while ((str = iter.nextString()) != null) {
+                for (String str : set.strings()) {
                    if (str.length() <= prev && matches16CPB(s, prev - str.length(), length0, str)) {
                        // spanNeedsStrings=true;
                        return prev;
@ -356,7 +306,6 @@ public class UnicodeSetStringSpanTest extends TestFmwk {
            } while (prev > 0);
            return prev;
        } else /* SpanCondition.CONTAINED or SIMPLE */{
-            UnicodeSetWithStringsIterator iter = new UnicodeSetWithStringsIterator(set);
            int c;
            int prev = length, minSpanStart = length, length0 = length;
            do {
@ -365,9 +314,7 @@ public class UnicodeSetStringSpanTest extends TestFmwk {
                if (!realSet.contains(c)) {
                    length = prev; // Do not span this single, not-contained code point.
                }
-                String str;
-                iter.reset();
-                while ((str = iter.nextString()) != null) {
+                for (String str : set.strings()) {
                    if (str.length() <= prev && matches16CPB(s, prev - str.length(), length0, str)) {
                        // spanNeedsStrings=true;
                        int matchStart = prev - str.length();
@ -616,7 +563,7 @@ public class UnicodeSetStringSpanTest extends TestFmwk {
     * input expectCount<0).
     */
    void verifySpan(final UnicodeSetWithStrings sets[], final String s, int whichSpans,
-            int expectLimits[], int expectCount, // TODO
+            int expectLimits[], int expectCount,
            final String testName, int index) {
        int[] limits = new int[500];
        int limitsCount;
@ -1129,4 +1076,54 @@ public class UnicodeSetStringSpanTest extends TestFmwk {
        }
    }

+    public void TestSpanAndCount() {
+        // a set with no strings
+        UnicodeSet abc = new UnicodeSet('a', 'c');
+        // a set with an "irrelevant" string (fully contained in the code point set)
+        UnicodeSet crlf = new UnicodeSet().add('\n').add('\r').add("\r\n");
+        // a set with no "irrelevant" string but some interesting overlaps
+        UnicodeSet ab_cd = new UnicodeSet().add('a').add("ab").add("abc").add("cd");
+        String s = "ab\n\r\r\n" + UTF16.valueOf(0x50000) + "abcde";
+        OutputInt count = new OutputInt();
+        assertEquals("abc span[8, 11[", 11,
+                abc.spanAndCount(s, 8, SpanCondition.SIMPLE, count));
+        assertEquals("abc count=3", 3, count.value);
+        assertEquals("no abc span[2, 8[", 8,
+                abc.spanAndCount(s, 2, SpanCondition.NOT_CONTAINED, count));
+        assertEquals("no abc count=5", 5, count.value);
+        assertEquals("line endings span[2, 6[", 6,
+                crlf.spanAndCount(s, 2, SpanCondition.CONTAINED, count));
+        assertEquals("line endings count=3", 3, count.value);
+        assertEquals("no ab+cd span[2, 8[", 8,
+                ab_cd.spanAndCount(s, 2, SpanCondition.NOT_CONTAINED, count));
+        assertEquals("no ab+cd count=5", 5, count.value);
+        assertEquals("ab+cd span[8, 12[", 12,
+                ab_cd.spanAndCount(s, 8, SpanCondition.CONTAINED, count));
+        assertEquals("ab+cd count=2", 2, count.value);
+        assertEquals("1x abc span[8, 11[", 11,
+                ab_cd.spanAndCount(s, 8, SpanCondition.SIMPLE, count));
+        assertEquals("1x abc count=1", 1, count.value);
+
+        abc.freeze();
+        crlf.freeze();
+        ab_cd.freeze();
+        assertEquals("abc span[8, 11[ (frozen)", 11,
+                abc.spanAndCount(s, 8, SpanCondition.SIMPLE, count));
+        assertEquals("abc count=3 (frozen)", 3, count.value);
+        assertEquals("no abc span[2, 8[ (frozen)", 8,
+                abc.spanAndCount(s, 2, SpanCondition.NOT_CONTAINED, count));
+        assertEquals("no abc count=5 (frozen)", 5, count.value);
+        assertEquals("line endings span[2, 6[ (frozen)", 6,
+                crlf.spanAndCount(s, 2, SpanCondition.CONTAINED, count));
+        assertEquals("line endings count=3 (frozen)", 3, count.value);
+        assertEquals("no ab+cd span[2, 8[ (frozen)", 8,
+                ab_cd.spanAndCount(s, 2, SpanCondition.NOT_CONTAINED, count));
+        assertEquals("no ab+cd count=5 (frozen)", 5, count.value);
+        assertEquals("ab+cd span[8, 12[ (frozen)", 12,
+                ab_cd.spanAndCount(s, 8, SpanCondition.CONTAINED, count));
+        assertEquals("ab+cd count=2 (frozen)", 2, count.value);
+        assertEquals("1x abc span[8, 11[ (frozen)", 11,
+                ab_cd.spanAndCount(s, 8, SpanCondition.SIMPLE, count));
+        assertEquals("1x abc count=1 (frozen)", 1, count.value);
+    }
 }
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/lang/UnicodeSetTest.java
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/lang/UnicodeSetTest.java
@ -11,6 +11,7 @@ import java.text.ParsePosition;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
+import java.util.Collections;
 import java.util.Comparator;
 import java.util.HashMap;
 import java.util.HashSet;
@ -22,6 +23,7 @@ import java.util.SortedSet;
 import java.util.TreeSet;

 import com.ibm.icu.dev.test.TestFmwk;
+import com.ibm.icu.dev.util.CollectionUtilities;
 import com.ibm.icu.impl.SortedSetRelation;
 import com.ibm.icu.impl.Utility;
 import com.ibm.icu.lang.UCharacter;
@ -33,6 +35,11 @@ import com.ibm.icu.text.UTF16;
 import com.ibm.icu.text.UnicodeMatcher;
 import com.ibm.icu.text.UnicodeSet;
 import com.ibm.icu.text.UnicodeSet.ComparisonStyle;
+import com.ibm.icu.text.UnicodeSet.EntryRange;
+import com.ibm.icu.text.UnicodeSetSpanner;
+import com.ibm.icu.text.UnicodeSetSpanner.Quantifier;
+import com.ibm.icu.text.UnicodeSet.SpanCondition;
+import com.ibm.icu.text.UnicodeSetSpanner.TrimOption;
 import com.ibm.icu.text.UnicodeSetIterator;

 /**
@ -1256,10 +1263,10 @@ public class UnicodeSetTest extends TestFmwk {
                String pat = "";
                try {
                    String name =
-                        (j==0) ? UScript.getName(i) : UScript.getShortName(i);
-                        pat = "[:" + name + ":]";
-                        UnicodeSet set = new UnicodeSet(pat);
-                        logln("Ok: " + pat + " -> " + set.toPattern(false));
+                            (j==0) ? UScript.getName(i) : UScript.getShortName(i);
+                            pat = "[:" + name + ":]";
+                            UnicodeSet set = new UnicodeSet(pat);
+                            logln("Ok: " + pat + " -> " + set.toPattern(false));
                } catch (IllegalArgumentException e) {
                    if (pat.length() == 0) {
                        errln("FAIL (in UScript): No name for script " + i);
@ -1330,9 +1337,9 @@ public class UnicodeSetTest extends TestFmwk {
        // The following pattern must contain at least one range "c-d"
        // where c or d is a Pattern_White_Space.
        String pattern =
-            "[\\uFEFF \\u200E-\\u20FF \\uFFF9-\\uFFFC \\U0001D173-\\U0001D17A \\U000F0000-\\U000FFFFD ]";
+                "[\\uFEFF \\u200E-\\u20FF \\uFFF9-\\uFFFC \\U0001D173-\\U0001D17A \\U000F0000-\\U000FFFFD ]";
        String exp =
-            "[\\u200E-\\u20FF\\uFEFF\\uFFF9-\\uFFFC\\U0001D173-\\U0001D17A\\U000F0000-\\U000FFFFD]";
+                "[\\u200E-\\u20FF\\uFEFF\\uFFF9-\\uFFFC\\U0001D173-\\U0001D17A\\U000F0000-\\U000FFFFD]";
        // We test this with two passes; in the second pass we
        // pre-unescape the pattern.  Since U+200E is Pattern_White_Space,
        // this fails -- which is what we expect.
@ -1563,7 +1570,7 @@ public class UnicodeSetTest extends TestFmwk {
        mod2 = new UnicodeSet(set1).retainAll(set2.addAllTo(new LinkedHashSet<String>()));
        assertEquals("remove all", mod1, mod2);
    }
-    
+
    public void TestComparison() {
        UnicodeSet set1 = new UnicodeSet("[a-b d-g {ch} {zh}]").freeze();
        UnicodeSet set2 = new UnicodeSet("[c-e {ch}]").freeze();
@ -1579,7 +1586,7 @@ public class UnicodeSetTest extends TestFmwk {
        List<UnicodeSet> sorted = new ArrayList(new TreeSet<UnicodeSet>(unsorted));
        assertNotEquals("compareTo-shorter-first", unsorted, sorted);
        assertEquals("compareTo-shorter-first", goalShortest, sorted);
-        
+
        TreeSet<UnicodeSet> sorted1 = new TreeSet<UnicodeSet>(new Comparator<UnicodeSet>(){
            public int compare(UnicodeSet o1, UnicodeSet o2) {
                // TODO Auto-generated method stub
@ -1616,34 +1623,34 @@ public class UnicodeSetTest extends TestFmwk {
        // now compare all the combinations. If any of them is a code point, use it.
        int maxErrorCount = 0;
        compare:
-        for (String last : target) {
-            for (String curr : target) {
-                int lastCount = Character.codePointCount(last, 0, last.length());
-                int currCount = Character.codePointCount(curr, 0, curr.length());
-                int comparison;
-                if (lastCount == 1) {
-                    comparison = UnicodeSet.compare(last.codePointAt(0), curr);
-                } else if (currCount == 1) {
-                    comparison = UnicodeSet.compare(last, curr.codePointAt(0));
-                } else {
-                    continue;
-                }
-                if (comparison != last.compareTo(curr)) {
-                    // repeat for debugging
+            for (String last : target) {
+                for (String curr : target) {
+                    int lastCount = Character.codePointCount(last, 0, last.length());
+                    int currCount = Character.codePointCount(curr, 0, curr.length());
+                    int comparison;
                    if (lastCount == 1) {
                        comparison = UnicodeSet.compare(last.codePointAt(0), curr);
                    } else if (currCount == 1) {
                        comparison = UnicodeSet.compare(last, curr.codePointAt(0));
+                    } else {
+                        continue;
                    }
-                    if (maxErrorCount++ > 10) {
-                        errln(maxErrorCount + " Failure in comparing " + last + " & " + curr + "\tOmitting others...");
-                        break compare;
+                    if (comparison != last.compareTo(curr)) {
+                        // repeat for debugging
+                        if (lastCount == 1) {
+                            comparison = UnicodeSet.compare(last.codePointAt(0), curr);
+                        } else if (currCount == 1) {
+                            comparison = UnicodeSet.compare(last, curr.codePointAt(0));
+                        }
+                        if (maxErrorCount++ > 10) {
+                            errln(maxErrorCount + " Failure in comparing " + last + " & " + curr + "\tOmitting others...");
+                            break compare;
+                        }
+                        errln(maxErrorCount + " Failure in comparing " + last + " & " + curr);
                    }
-                    errln(maxErrorCount + " Failure in comparing " + last + " & " + curr);
                }
            }
-        }
-        
+
        //compare(Iterable<T>, Iterable<T>)
        int max = 10;
        List<String> test1 = new ArrayList<String>(max);
@ -1669,7 +1676,7 @@ public class UnicodeSetTest extends TestFmwk {
        // check to make sure right exceptions are thrown
        Class expected = IllegalArgumentException.class;
        Class actual;
-        
+
        try {
            actual = null;
            @SuppressWarnings("unused")
@ -1678,7 +1685,7 @@ public class UnicodeSetTest extends TestFmwk {
            actual = e.getClass();
        }
        assertEquals("exception if odd", expected, actual);
-        
+
        try {
            actual = null;
            @SuppressWarnings("unused")
@ -1687,7 +1694,7 @@ public class UnicodeSetTest extends TestFmwk {
            actual = e.getClass();
        }
        assertEquals("exception for start/end problem", expected, actual);
-        
+
        try {
            actual = null;
            @SuppressWarnings("unused")
@ -1696,7 +1703,7 @@ public class UnicodeSetTest extends TestFmwk {
            actual = e.getClass();
        }
        assertEquals("exception for end/start problem", expected, actual);
-        
+
        CheckRangeSpeed(10000, new UnicodeSet("[:whitespace:]"));
        CheckRangeSpeed(1000, new UnicodeSet("[:letter:]"));
    }
@ -1731,14 +1738,14 @@ public class UnicodeSetTest extends TestFmwk {
        double rangeConstructorTime = (middle - start)/iterations;
        double patternConstructorTime = (end - middle)/iterations;
        String message = "Range constructor:\t" + rangeConstructorTime + ";\tPattern constructor:\t" + patternConstructorTime + "\t\t"
-        + percent.format(rangeConstructorTime/patternConstructorTime-1);
+                + percent.format(rangeConstructorTime/patternConstructorTime-1);
        if (rangeConstructorTime < 2*patternConstructorTime) {
            logln(message);
        } else {
            errln(message);
        }
    }
-    
+
    NumberFormat percent = NumberFormat.getPercentInstance();
    {
        percent.setMaximumFractionDigits(2);
@ -1806,69 +1813,69 @@ public class UnicodeSetTest extends TestFmwk {
            }
    }

-// Following cod block is commented out to eliminate PrettyPrinter depenencies
+    // Following cod block is commented out to eliminate PrettyPrinter depenencies

-//    String[] prettyData = {
-//            "[\\uD7DE-\\uD90C \\uDCB5-\\uDD9F]", // special case
-//            "[:any:]",
-//            "[:whitespace:]",
-//            "[:linebreak=AL:]",
-//    };
-//
-//    public void TestPrettyPrinting() {
-//        try{
-//            PrettyPrinter pp = new PrettyPrinter();
-//
-//            int i = 0;
-//            for (; i < prettyData.length; ++i) {
-//                UnicodeSet test = new UnicodeSet(prettyData[i]);
-//                checkPrettySet(pp, i, test);
-//            }
-//            Random random = new Random(0);
-//            UnicodeSet test = new UnicodeSet();
-//
-//            // To keep runtimes under control, make the number of random test cases
-//            //   to try depends on the test framework exhaustive setting.
-//            //  params.inclusions = 5:   default exhaustive value
-//            //  params.inclusions = 10:  max exhaustive value.
-//            int iterations = 50;
-//            if (params.inclusion > 5) {
-//                iterations = (params.inclusion-5) * 200;
-//            }
-//            for (; i < iterations; ++i) {
-//                double start = random.nextGaussian() * 0x10000;
-//                if (start < 0) start = - start;
-//                if (start > 0x10FFFF) {
-//                    start = 0x10FFFF;
-//                }
-//                double end = random.nextGaussian() * 0x100;
-//                if (end < 0) end = -end;
-//                end = start + end;
-//                if (end > 0x10FFFF) {
-//                    end = 0x10FFFF;
-//                }
-//                test.complement((int)start, (int)end);
-//                checkPrettySet(pp, i, test);
-//            }
-//        }catch(RuntimeException ex){
-//            warnln("Could not load Collator");
-//        }
-//    }
-//
-//    private void checkPrettySet(PrettyPrinter pp, int i, UnicodeSet test) {
-//        String pretty = pp.toPattern(test);
-//        UnicodeSet retry = new UnicodeSet(pretty);
-//        if (!test.equals(retry)) {
-//            errln(i + ". Failed test: " + test + " != " + pretty);
-//        } else {
-//            logln(i + ". Worked for " + truncate(test.toString()) + " => " + truncate(pretty));
-//        }
-//    }
-//
-//    private String truncate(String string) {
-//        if (string.length() <= 100) return string;
-//        return string.substring(0,97) + "...";
-//    }
+    //    String[] prettyData = {
+    //            "[\\uD7DE-\\uD90C \\uDCB5-\\uDD9F]", // special case
+    //            "[:any:]",
+    //            "[:whitespace:]",
+    //            "[:linebreak=AL:]",
+    //    };
+    //
+    //    public void TestPrettyPrinting() {
+    //        try{
+    //            PrettyPrinter pp = new PrettyPrinter();
+    //
+    //            int i = 0;
+    //            for (; i < prettyData.length; ++i) {
+    //                UnicodeSet test = new UnicodeSet(prettyData[i]);
+    //                checkPrettySet(pp, i, test);
+    //            }
+    //            Random random = new Random(0);
+    //            UnicodeSet test = new UnicodeSet();
+    //
+    //            // To keep runtimes under control, make the number of random test cases
+    //            //   to try depends on the test framework exhaustive setting.
+    //            //  params.inclusions = 5:   default exhaustive value
+    //            //  params.inclusions = 10:  max exhaustive value.
+    //            int iterations = 50;
+    //            if (params.inclusion > 5) {
+    //                iterations = (params.inclusion-5) * 200;
+    //            }
+    //            for (; i < iterations; ++i) {
+    //                double start = random.nextGaussian() * 0x10000;
+    //                if (start < 0) start = - start;
+    //                if (start > 0x10FFFF) {
+    //                    start = 0x10FFFF;
+    //                }
+    //                double end = random.nextGaussian() * 0x100;
+    //                if (end < 0) end = -end;
+    //                end = start + end;
+    //                if (end > 0x10FFFF) {
+    //                    end = 0x10FFFF;
+    //                }
+    //                test.complement((int)start, (int)end);
+    //                checkPrettySet(pp, i, test);
+    //            }
+    //        }catch(RuntimeException ex){
+    //            warnln("Could not load Collator");
+    //        }
+    //    }
+    //
+    //    private void checkPrettySet(PrettyPrinter pp, int i, UnicodeSet test) {
+    //        String pretty = pp.toPattern(test);
+    //        UnicodeSet retry = new UnicodeSet(pretty);
+    //        if (!test.equals(retry)) {
+    //            errln(i + ". Failed test: " + test + " != " + pretty);
+    //        } else {
+    //            logln(i + ". Worked for " + truncate(test.toString()) + " => " + truncate(pretty));
+    //        }
+    //    }
+    //
+    //    private String truncate(String string) {
+    //        if (string.length() <= 100) return string;
+    //        return string.substring(0,97) + "...";
+    //    }

    public class TokenSymbolTable implements SymbolTable {
        HashMap contents = new HashMap();
@ -1944,7 +1951,7 @@ public class UnicodeSetTest extends TestFmwk {
            UnicodeSet set = new UnicodeSet(DATA[i]);
            expectContainment(set,
                    CharsToUnicodeString("abc\\U00010000"),
-            "\uD800;\uDC00"); // split apart surrogate-pair
+                    "\uD800;\uDC00"); // split apart surrogate-pair
            if (set.size() != 4) {
                errln(Utility.escape("FAIL: " + DATA[i] + ".size() == " + 
                        set.size() + ", expected 4"));
@ -2171,16 +2178,16 @@ public class UnicodeSetTest extends TestFmwk {

        // Now see if the expected relation is true
        int status = (minus12.size() != 0 ? 4 : 0)
-        | (intersection.size() != 0 ? 2 : 0)
-        | (minus21.size() != 0 ? 1 : 0);
+                | (intersection.size() != 0 ? 2 : 0)
+                | (minus21.size() != 0 ? 1 : 0);

        if (status != relation) {
            errln("FAIL relation incorrect" + message
                    + "; desired = " + RELATION_NAME[relation]
-                                                     + "; found = " + RELATION_NAME[status]
-                                                                                    + "; set1 = " + set1.toPattern(true)
-                                                                                    + "; set2 = " + set2.toPattern(true)
-            );
+                            + "; found = " + RELATION_NAME[status]
+                                    + "; set1 = " + set1.toPattern(true)
+                                    + "; set2 = " + set2.toPattern(true)
+                    );
        }
    }

@ -2234,7 +2241,7 @@ public class UnicodeSetTest extends TestFmwk {
            errln("FAIL " + message
                    + "; source = " + s.toPattern(true)
                    + "; result = " + t.toPattern(true)
-            );
+                    );
            return false;
        }
        return true;
@ -2379,7 +2386,7 @@ public class UnicodeSetTest extends TestFmwk {
            errln("UnicodeSetIterator.getSet() was not suppose to given an " + "an exception.");
        }
    }
-    
+
    /* Tests the method public UnicodeSet add(Collection<?> source) */
    public void TestAddCollection() {
        UnicodeSet us = new UnicodeSet();
@ -2390,9 +2397,99 @@ public class UnicodeSetTest extends TestFmwk {
        } catch (Exception e) {
        }
    }
-    
+
    public void TestConstants() {
        assertEquals("Empty", new UnicodeSet(), UnicodeSet.EMPTY);
        assertEquals("All", new UnicodeSet(0,0x10FFFF), UnicodeSet.ALL_CODE_POINTS);
    }
+
+    public void TestIteration() {
+        UnicodeSet us1 = new UnicodeSet("[abcM{xy}]");
+        assertEquals("", "M, a-c", CollectionUtilities.join(us1.ranges(), ", "));
+        
+        // Sample code
+        for (EntryRange range : us1.ranges()) { 
+            // do something with code points between range.codepointEnd and range.codepointEnd; 
+        }
+        for (String s : us1.strings()) { 
+            // do something with each string;
+        }
+
+        String[] tests = {
+                "[M-Qzab{XY}{ZW}]",
+                "[]",
+                "[a]",
+                "[a-c]",
+                "[{XY}]",
+        };
+        for (String test : tests) {
+            UnicodeSet us = new UnicodeSet(test);
+            UnicodeSetIterator it = new UnicodeSetIterator(us);
+            for (EntryRange range : us.ranges()) {
+                final String title = range.toString();
+                logln(title);
+                it.nextRange();
+                assertEquals(title, it.codepoint, range.codepoint);
+                assertEquals(title, it.codepointEnd, range.codepointEnd);
+//                if (range.codepoint != -1) {
+//                } else {
+//                    assertEquals(title, it.string, range.string);
+//                }
+            }
+            for (String s : us.strings()) {
+                it.nextRange();
+                assertEquals("strings", it.string, s);
+            }
+            assertFalse("", it.next());
+        }
+    }
+
+    public void TestReplaceAndDelete() {
+        UnicodeSetSpanner m;
+
+        m = new UnicodeSetSpanner(new UnicodeSet("[._]"));
+        assertEquals("", "abc", m.deleteFrom("_._a_._b_._c_._"));        
+        assertEquals("", "_.__.__.__._", m.deleteFrom("_._a_._b_._c_._", SpanCondition.NOT_CONTAINED));
+
+        assertEquals("", "a_._b_._c", m.trim("_._a_._b_._c_._"));
+        assertEquals("", "a_._b_._c_._", m.trim("_._a_._b_._c_._", TrimOption.LEADING));
+        assertEquals("", "_._a_._b_._c", m.trim("_._a_._b_._c_._", TrimOption.TRAILING));
+
+        assertEquals("", "a??b??c", m.replaceFrom("a_._b_._c", "??", Quantifier.SPAN));
+        assertEquals("", "a??b??c", m.replaceFrom(m.trim("_._a_._b_._c_._"), "??", Quantifier.SPAN));
+        assertEquals("", "XYXYXYaXYXYXYbXYXYXYcXYXYXY", m.replaceFrom("_._a_._b_._c_._", "XY"));
+        assertEquals("", "XYaXYbXYcXY", m.replaceFrom("_._a_._b_._c_._", "XY", Quantifier.SPAN));
+
+        m = new UnicodeSetSpanner(new UnicodeSet("\\p{uppercase}"));
+        assertEquals("", "TQBF", m.deleteFrom("The Quick Brown Fox.", SpanCondition.NOT_CONTAINED));
+
+        m = new UnicodeSetSpanner(m.getUnicodeSet().addAll(new UnicodeSet("\\p{lowercase}")));
+        assertEquals("", "TheQuickBrownFox", m.deleteFrom("The Quick Brown Fox.", SpanCondition.NOT_CONTAINED));
+
+        m = new UnicodeSetSpanner(new UnicodeSet("[{ab}]"));
+        assertEquals("", "XXc acb", m.replaceFrom("ababc acb", "X"));
+        assertEquals("", "Xc acb", m.replaceFrom("ababc acb", "X", Quantifier.SPAN));
+    }
+
+    public void TestCodePoints() {
+        // test supplemental code points and strings clusters
+        checkCodePoints("x\u0308", "z\u0308", Quantifier.MIN_ELEMENTS, null, 1);
+        checkCodePoints("𣿡", "𣿢", Quantifier.MIN_ELEMENTS, null, 1);
+        checkCodePoints("👦", "👧", Quantifier.MIN_ELEMENTS, null, 1);
+    }
+
+    private void checkCodePoints(String a, String b, Quantifier quantifier, String expectedReplaced, int expectedCount) {
+        final String ab = a+b;
+        UnicodeSetSpanner m = new UnicodeSetSpanner(new UnicodeSet("[{" + a + "}]"));
+        assertEquals("new UnicodeSetSpanner(\"[{" + a + "}]\").countIn(\"" + ab + "\")", 
+                expectedCount,
+                m.countIn(ab, quantifier));
+        
+        if (expectedReplaced == null) {
+            expectedReplaced = "-" + b;
+        }
+        assertEquals("new UnicodeSetSpanner(\"[{" + a + "}]\").replaceFrom(\"" + ab + "\", \"-\")", 
+                expectedReplaced, m.replaceFrom(ab, "-", quantifier));
+    }
+
 }