ICU-9131 First batch of fixes for review comments. More to come.

X-SVN-Rev: 36500
2025-04-14 01:11:02 +00:00 · 2014-09-13 12:08:00 +00:00 · 2014-09-13 12:08:00 +00:00 · 1d5dbc0af9
commit 1d5dbc0af9
parent cea11c1786
4 changed files with 68 additions and 45 deletions
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/UTF16.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/UTF16.java
@ -2617,8 +2617,8 @@ public final class UTF16 {
     * @return the code point IF the string is non-null and consists of a single code point.
     * otherwise returns -1.
     * @param s to test
-     * @internal
-     * @deprecated This API is ICU internal only.
+     * @draft ICU 54
+     * @provisional This API might change or be removed in a future release.
     */
    @Deprecated
    public static int getSingleCodePoint(CharSequence s) {
@ -2631,7 +2631,7 @@ public final class UTF16 {
        }

        // at this point, len = 2
-        int cp = UTF16.charAt(s, 0); 
+        int cp = Character.codePointAt(s, 0); 
        if (cp > 0xFFFF) { // is surrogate pair
            return cp;
        }
@ -2653,8 +2653,8 @@ public final class UTF16 {
     * @param codePoint to test
     * @param s to test
     * @return equivalent of code point comparator comparing two strings.
-     * @internal
-     * @deprecated This API is ICU internal only.
+     * @draft ICU 54
+     * @provisional This API might change or be removed in a future release.
     */
    @Deprecated
    public static int compareCodePoint(int codePoint, CharSequence s) {
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/UnicodeSet.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/UnicodeSet.java
@ -11,6 +11,7 @@ import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.Iterator;
+import java.util.NoSuchElementException;
 import java.util.TreeSet;

 import com.ibm.icu.impl.BMPSet;
@ -1516,7 +1517,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
    public final UnicodeSet remove(CharSequence s) {
        int cp = getSingleCP(s);
        if (cp < 0) {
-            strings.remove(s);
+            strings.remove(s.toString());
            pat = null;
        } else {
            remove(cp, cp);
@ -1595,7 +1596,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
        int cp = getSingleCP(s);
        if (cp < 0) {
            if (strings.contains(s)) {
-                strings.remove(s);
+                strings.remove(s.toString());
            } else {
                strings.add(s.toString());
            }
@ -4196,11 +4197,8 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
            if (pos < len-1) {
                result.codepoint = list[pos++];
                result.codepointEnd = list[pos++]-1;
-//                result.string = null;
            } else {
-                throw new ArrayIndexOutOfBoundsException(pos);
-//                result.codepoint = -1;
-//                result.string = stringIterator.next();
+                throw new NoSuchElementException();
            }
            return result;
        }
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/UnicodeSetSpanner.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/UnicodeSetSpanner.java
@ -95,8 +95,8 @@ public class UnicodeSetSpanner {
    }

    /**
-     * Options for replaceFrom and countIn to control how to treat each matched span. The name is from "qualifier" as used in regex,
-     * since it is similar to whether one is replacing [abc] by x, or [abc]* by x.
+     * Options for replaceFrom and countIn to control how to treat each matched span. 
+     * It is similar to whether one is replacing [abc] by x, or [abc]* by x.
     * 
     * @draft ICU 54
     * @provisional This is a draft API and might change in a future release of ICU.
@ -130,7 +130,7 @@ public class UnicodeSetSpanner {

    /**
     * Returns the number of matching characters found in a character sequence, 
-     * counting by Quantifier.MIN_ELEMENTS using SpanCondition.SIMPLE.
+     * counting by CountMethod.MIN_ELEMENTS using SpanCondition.SIMPLE.
     * The code alternates spans; see the class doc for {@link UnicodeSetSpanner} for a note about boundary conditions.
     * @param sequence
     *            the sequence to count characters in
@ -148,13 +148,15 @@ public class UnicodeSetSpanner {
     * The code alternates spans; see the class doc for {@link UnicodeSetSpanner} for a note about boundary conditions.
     * @param sequence
     *            the sequence to count characters in
+     * @param countMethod
+     *            whether to treat an entire span as a match, or individual elements as matches
     * @return the count. Zero if there are none.
     * 
     * @draft ICU 54
     * @provisional This is a draft API and might change in a future release of ICU.
     */
-    public int countIn(CharSequence sequence, CountMethod quantifier) {
-        return countIn(sequence, quantifier, SpanCondition.SIMPLE);
+    public int countIn(CharSequence sequence, CountMethod countMethod) {
+        return countIn(sequence, countMethod, SpanCondition.SIMPLE);
    }

    /**
@ -162,7 +164,8 @@ public class UnicodeSetSpanner {
     * The code alternates spans; see the class doc for {@link UnicodeSetSpanner} for a note about boundary conditions.
     * @param sequence
     *            the sequence to count characters in
-     * @param quantifier whether to treat an entire span as a match, or individual code points
+     * @param countMethod
+     *            whether to treat an entire span as a match, or individual elements as matches
     * @param spanCondition
     *            the spanCondition to use. SIMPLE or CONTAINED means only count the code points in the span;
     *            NOT_CONTAINED is the reverse.
@ -172,20 +175,28 @@ public class UnicodeSetSpanner {
     * @draft ICU 54
     * @provisional This is a draft API and might change in a future release of ICU.
     */
-    public int countIn(CharSequence sequence, CountMethod quantifier, SpanCondition spanCondition) {
+    public int countIn(CharSequence sequence, CountMethod countMethod, SpanCondition spanCondition) {
        int count = 0;
        int start = 0;
        SpanCondition skipSpan = spanCondition == SpanCondition.NOT_CONTAINED ? SpanCondition.SIMPLE
                : SpanCondition.NOT_CONTAINED;
        final int length = sequence.length();
-        OutputInt spanCount = new OutputInt();
+        OutputInt spanCount = null;
        while (start != length) {
-            int endNotContained = unicodeSet.span(sequence, start, skipSpan);
-            if (endNotContained == length) {
+            int endOfSpan = unicodeSet.span(sequence, start, skipSpan);
+            if (endOfSpan == length) {
                break;
            }
-            start = unicodeSet.spanAndCount(sequence, endNotContained, spanCondition, spanCount);
-            count += quantifier == CountMethod.WHOLE_SPAN ? 1 : spanCount.value;
+            if (countMethod == CountMethod.WHOLE_SPAN) {
+                start = unicodeSet.span(sequence, endOfSpan, spanCondition);
+                count += 1;
+            } else {
+                if (spanCount == null) {
+                    spanCount = new OutputInt();
+                }
+                start = unicodeSet.spanAndCount(sequence, endOfSpan, spanCondition, spanCount);
+                count += spanCount.value;
+            }
        }
        return count;
    }
@ -205,7 +216,7 @@ public class UnicodeSetSpanner {
    }

    /**
-     * Delete all matching spans in sequence, according to the operations.
+     * Delete all matching spans in sequence, according to the spanCondition.
     * The code alternates spans; see the class doc for {@link UnicodeSetSpanner} for a note about boundary conditions.
     * @param sequence
     *            charsequence to replace matching spans in.
@ -222,7 +233,7 @@ public class UnicodeSetSpanner {

    /**
     * Replace all matching spans in sequence by the replacement,
-     * counting by Quantifier.MIN_ELEMENTS using SpanCondition.SIMPLE.
+     * counting by CountMethod.MIN_ELEMENTS using SpanCondition.SIMPLE.
     * The code alternates spans; see the class doc for {@link UnicodeSetSpanner} for a note about boundary conditions.
     * @param sequence
     *            charsequence to replace matching spans in.
@ -238,41 +249,42 @@ public class UnicodeSetSpanner {
    }

    /**
-     * Replace all matching spans in sequence by replacement, according to the Quantifier, using SpanCondition.SIMPLE. 
+     * Replace all matching spans in sequence by replacement, according to the CountMethod, using SpanCondition.SIMPLE.
     * The code alternates spans; see the class doc for {@link UnicodeSetSpanner} for a note about boundary conditions.
+     * 
     * @param sequence
     *            charsequence to replace matching spans in.
     * @param replacement
     *            replacement sequence. To delete, use ""
-     * @param quantifier
-     *            whether to treat an entire span as a match, or individual code points
+     * @param countMethod
+     *            whether to treat an entire span as a match, or individual elements as matches
     * @return modified string.
     * 
     * @draft ICU 54
     * @provisional This is a draft API and might change in a future release of ICU.
     */
-    public String replaceFrom(CharSequence sequence, CharSequence replacement, CountMethod quantifier) {
-        return replaceFrom(sequence, replacement, quantifier, SpanCondition.SIMPLE);
+    public String replaceFrom(CharSequence sequence, CharSequence replacement, CountMethod countMethod) {
+        return replaceFrom(sequence, replacement, countMethod, SpanCondition.SIMPLE);
    }

    /**
-     * Replace all matching spans in sequence by replacement, according to the operations quantifier and spanCondition.
+     * Replace all matching spans in sequence by replacement, according to the countMethod and spanCondition.
     * The code alternates spans; see the class doc for {@link UnicodeSetSpanner} for a note about boundary conditions.
     * @param sequence
     *            charsequence to replace matching spans in.
     * @param replacement
     *            replacement sequence. To delete, use ""
+     * @param countMethod 
+     *            whether to treat an entire span as a match, or individual elements as matches
     * @param spanCondition
     *            specify whether to modify the matching spans (CONTAINED or SIMPLE) or the non-matching
     *            (NOT_CONTAINED)
-     * @param quantifier
-     *            specify whether to collapse or do codepoint by codepoint.
     * @return modified string.
     * 
     * @draft ICU 54
     * @provisional This is a draft API and might change in a future release of ICU.
     */
-    public String replaceFrom(CharSequence sequence, CharSequence replacement, CountMethod quantifier,
+    public String replaceFrom(CharSequence sequence, CharSequence replacement, CountMethod countMethod,
            SpanCondition spanCondition) {
        SpanCondition copySpan = spanCondition == SpanCondition.NOT_CONTAINED ? SpanCondition.SIMPLE
                : SpanCondition.NOT_CONTAINED;
@ -282,12 +294,20 @@ public class UnicodeSetSpanner {
        // avoid this allocation unless needed

        final int length = sequence.length();
-        OutputInt spanCount = new OutputInt();
+        OutputInt spanCount = null;
        for (int endCopy = 0; endCopy != length;) {
-            int endModify = unicodeSet.spanAndCount(sequence, endCopy, spanCondition, spanCount);
+            int endModify;
+            if (countMethod == CountMethod.WHOLE_SPAN) {
+                endModify = unicodeSet.span(sequence, endCopy, spanCondition);
+            } else {
+                if (spanCount == null) {
+                    spanCount = new OutputInt();
+                }
+                endModify = unicodeSet.spanAndCount(sequence, endCopy, spanCondition, spanCount);
+            }
            if (remove || endModify == 0) {
                // do nothing
-            } else if (quantifier == CountMethod.WHOLE_SPAN) {
+            } else if (countMethod == CountMethod.WHOLE_SPAN) {
                result.append(replacement);
            } else {
                for (int i = spanCount.value; i > 0; --i) {
@ -334,7 +354,7 @@ public class UnicodeSetSpanner {
    }

    /**
-     * Returns a trimmed sequence (using CharSequence.subsequence()), that omits matching code points at the start or
+     * Returns a trimmed sequence (using CharSequence.subsequence()), that omits matching code points at the start and
     * end of the string, using TrimOption.BOTH and SpanCondition.SIMPLE. For example:
     * 
     * <pre>
@ -343,7 +363,10 @@ public class UnicodeSetSpanner {
     *   new UnicodeSet("[ab]").trim("abacatbab")}
     * </pre>
     * 
-     * ... returns {@code "catbab"}.
+     * ... returns {@code "cat"}.
+     * @param sequence
+     *            the sequence to trim
+     * @return a subsequence
     * 
     * @draft ICU 54
     * @provisional This is a draft API and might change in a future release of ICU.
@ -359,11 +382,17 @@ public class UnicodeSetSpanner {
     * <pre>
     * {@code
     * 
-     *   new UnicodeSet("[ab]").trim("abacatbab")}
+     *   new UnicodeSet("[ab]").trim("abacatbab", TrimOption.LEADING)}
     * </pre>
     * 
     * ... returns {@code "catbab"}.
     * 
+     * @param sequence
+     *            the sequence to trim
+     * @param trimOption
+     *            LEADING, TRAILING, or BOTH
+     * @return a subsequence
+     * 
     * @draft ICU 54
     * @provisional This is a draft API and might change in a future release of ICU.
     */
@ -378,7 +407,7 @@ public class UnicodeSetSpanner {
     * <pre>
     * {@code
     * 
-     *   new UnicodeSet("[ab]").trim("abacatbab")}
+     *   new UnicodeSet("[ab]").trim("abacatbab", TrimOption.LEADING, SpanCondition.SIMPLE)}
     * </pre>
     * 
     * ... returns {@code "catbab"}.
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/lang/UnicodeSetTest.java
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/lang/UnicodeSetTest.java
@ -2430,10 +2430,6 @@ public class UnicodeSetTest extends TestFmwk {
                it.nextRange();
                assertEquals(title, it.codepoint, range.codepoint);
                assertEquals(title, it.codepointEnd, range.codepointEnd);
-//                if (range.codepoint != -1) {
-//                } else {
-//                    assertEquals(title, it.string, range.string);
-//                }
            }
            for (String s : us.strings()) {
                it.nextRange();