ICU-9131 First batch of fixes for review comments. More to come.

X-SVN-Rev: 36500
This commit is contained in:
Mark Davis 2014-09-13 12:08:00 +00:00
parent cea11c1786
commit 1d5dbc0af9
4 changed files with 68 additions and 45 deletions

View file

@ -2617,8 +2617,8 @@ public final class UTF16 {
* @return the code point IF the string is non-null and consists of a single code point.
* otherwise returns -1.
* @param s to test
* @internal
* @deprecated This API is ICU internal only.
* @draft ICU 54
* @provisional This API might change or be removed in a future release.
*/
@Deprecated
public static int getSingleCodePoint(CharSequence s) {
@ -2631,7 +2631,7 @@ public final class UTF16 {
}
// at this point, len = 2
int cp = UTF16.charAt(s, 0);
int cp = Character.codePointAt(s, 0);
if (cp > 0xFFFF) { // is surrogate pair
return cp;
}
@ -2653,8 +2653,8 @@ public final class UTF16 {
* @param codePoint to test
* @param s to test
* @return equivalent of code point comparator comparing two strings.
* @internal
* @deprecated This API is ICU internal only.
* @draft ICU 54
* @provisional This API might change or be removed in a future release.
*/
@Deprecated
public static int compareCodePoint(int codePoint, CharSequence s) {

View file

@ -11,6 +11,7 @@ import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.NoSuchElementException;
import java.util.TreeSet;
import com.ibm.icu.impl.BMPSet;
@ -1516,7 +1517,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
public final UnicodeSet remove(CharSequence s) {
int cp = getSingleCP(s);
if (cp < 0) {
strings.remove(s);
strings.remove(s.toString());
pat = null;
} else {
remove(cp, cp);
@ -1595,7 +1596,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
int cp = getSingleCP(s);
if (cp < 0) {
if (strings.contains(s)) {
strings.remove(s);
strings.remove(s.toString());
} else {
strings.add(s.toString());
}
@ -4196,11 +4197,8 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
if (pos < len-1) {
result.codepoint = list[pos++];
result.codepointEnd = list[pos++]-1;
// result.string = null;
} else {
throw new ArrayIndexOutOfBoundsException(pos);
// result.codepoint = -1;
// result.string = stringIterator.next();
throw new NoSuchElementException();
}
return result;
}

View file

@ -95,8 +95,8 @@ public class UnicodeSetSpanner {
}
/**
* Options for replaceFrom and countIn to control how to treat each matched span. The name is from "qualifier" as used in regex,
* since it is similar to whether one is replacing [abc] by x, or [abc]* by x.
* Options for replaceFrom and countIn to control how to treat each matched span.
* It is similar to whether one is replacing [abc] by x, or [abc]* by x.
*
* @draft ICU 54
* @provisional This is a draft API and might change in a future release of ICU.
@ -130,7 +130,7 @@ public class UnicodeSetSpanner {
/**
* Returns the number of matching characters found in a character sequence,
* counting by Quantifier.MIN_ELEMENTS using SpanCondition.SIMPLE.
* counting by CountMethod.MIN_ELEMENTS using SpanCondition.SIMPLE.
* The code alternates spans; see the class doc for {@link UnicodeSetSpanner} for a note about boundary conditions.
* @param sequence
* the sequence to count characters in
@ -148,13 +148,15 @@ public class UnicodeSetSpanner {
* The code alternates spans; see the class doc for {@link UnicodeSetSpanner} for a note about boundary conditions.
* @param sequence
* the sequence to count characters in
* @param countMethod
* whether to treat an entire span as a match, or individual elements as matches
* @return the count. Zero if there are none.
*
* @draft ICU 54
* @provisional This is a draft API and might change in a future release of ICU.
*/
public int countIn(CharSequence sequence, CountMethod quantifier) {
return countIn(sequence, quantifier, SpanCondition.SIMPLE);
public int countIn(CharSequence sequence, CountMethod countMethod) {
return countIn(sequence, countMethod, SpanCondition.SIMPLE);
}
/**
@ -162,7 +164,8 @@ public class UnicodeSetSpanner {
* The code alternates spans; see the class doc for {@link UnicodeSetSpanner} for a note about boundary conditions.
* @param sequence
* the sequence to count characters in
* @param quantifier whether to treat an entire span as a match, or individual code points
* @param countMethod
* whether to treat an entire span as a match, or individual elements as matches
* @param spanCondition
* the spanCondition to use. SIMPLE or CONTAINED means only count the code points in the span;
* NOT_CONTAINED is the reverse.
@ -172,20 +175,28 @@ public class UnicodeSetSpanner {
* @draft ICU 54
* @provisional This is a draft API and might change in a future release of ICU.
*/
public int countIn(CharSequence sequence, CountMethod quantifier, SpanCondition spanCondition) {
public int countIn(CharSequence sequence, CountMethod countMethod, SpanCondition spanCondition) {
int count = 0;
int start = 0;
SpanCondition skipSpan = spanCondition == SpanCondition.NOT_CONTAINED ? SpanCondition.SIMPLE
: SpanCondition.NOT_CONTAINED;
final int length = sequence.length();
OutputInt spanCount = new OutputInt();
OutputInt spanCount = null;
while (start != length) {
int endNotContained = unicodeSet.span(sequence, start, skipSpan);
if (endNotContained == length) {
int endOfSpan = unicodeSet.span(sequence, start, skipSpan);
if (endOfSpan == length) {
break;
}
start = unicodeSet.spanAndCount(sequence, endNotContained, spanCondition, spanCount);
count += quantifier == CountMethod.WHOLE_SPAN ? 1 : spanCount.value;
if (countMethod == CountMethod.WHOLE_SPAN) {
start = unicodeSet.span(sequence, endOfSpan, spanCondition);
count += 1;
} else {
if (spanCount == null) {
spanCount = new OutputInt();
}
start = unicodeSet.spanAndCount(sequence, endOfSpan, spanCondition, spanCount);
count += spanCount.value;
}
}
return count;
}
@ -205,7 +216,7 @@ public class UnicodeSetSpanner {
}
/**
* Delete all matching spans in sequence, according to the operations.
* Delete all matching spans in sequence, according to the spanCondition.
* The code alternates spans; see the class doc for {@link UnicodeSetSpanner} for a note about boundary conditions.
* @param sequence
* charsequence to replace matching spans in.
@ -222,7 +233,7 @@ public class UnicodeSetSpanner {
/**
* Replace all matching spans in sequence by the replacement,
* counting by Quantifier.MIN_ELEMENTS using SpanCondition.SIMPLE.
* counting by CountMethod.MIN_ELEMENTS using SpanCondition.SIMPLE.
* The code alternates spans; see the class doc for {@link UnicodeSetSpanner} for a note about boundary conditions.
* @param sequence
* charsequence to replace matching spans in.
@ -238,41 +249,42 @@ public class UnicodeSetSpanner {
}
/**
* Replace all matching spans in sequence by replacement, according to the Quantifier, using SpanCondition.SIMPLE.
* Replace all matching spans in sequence by replacement, according to the CountMethod, using SpanCondition.SIMPLE.
* The code alternates spans; see the class doc for {@link UnicodeSetSpanner} for a note about boundary conditions.
*
* @param sequence
* charsequence to replace matching spans in.
* @param replacement
* replacement sequence. To delete, use ""
* @param quantifier
* whether to treat an entire span as a match, or individual code points
* @param countMethod
* whether to treat an entire span as a match, or individual elements as matches
* @return modified string.
*
* @draft ICU 54
* @provisional This is a draft API and might change in a future release of ICU.
*/
public String replaceFrom(CharSequence sequence, CharSequence replacement, CountMethod quantifier) {
return replaceFrom(sequence, replacement, quantifier, SpanCondition.SIMPLE);
public String replaceFrom(CharSequence sequence, CharSequence replacement, CountMethod countMethod) {
return replaceFrom(sequence, replacement, countMethod, SpanCondition.SIMPLE);
}
/**
* Replace all matching spans in sequence by replacement, according to the operations quantifier and spanCondition.
* Replace all matching spans in sequence by replacement, according to the countMethod and spanCondition.
* The code alternates spans; see the class doc for {@link UnicodeSetSpanner} for a note about boundary conditions.
* @param sequence
* charsequence to replace matching spans in.
* @param replacement
* replacement sequence. To delete, use ""
* @param countMethod
* whether to treat an entire span as a match, or individual elements as matches
* @param spanCondition
* specify whether to modify the matching spans (CONTAINED or SIMPLE) or the non-matching
* (NOT_CONTAINED)
* @param quantifier
* specify whether to collapse or do codepoint by codepoint.
* @return modified string.
*
* @draft ICU 54
* @provisional This is a draft API and might change in a future release of ICU.
*/
public String replaceFrom(CharSequence sequence, CharSequence replacement, CountMethod quantifier,
public String replaceFrom(CharSequence sequence, CharSequence replacement, CountMethod countMethod,
SpanCondition spanCondition) {
SpanCondition copySpan = spanCondition == SpanCondition.NOT_CONTAINED ? SpanCondition.SIMPLE
: SpanCondition.NOT_CONTAINED;
@ -282,12 +294,20 @@ public class UnicodeSetSpanner {
// avoid this allocation unless needed
final int length = sequence.length();
OutputInt spanCount = new OutputInt();
OutputInt spanCount = null;
for (int endCopy = 0; endCopy != length;) {
int endModify = unicodeSet.spanAndCount(sequence, endCopy, spanCondition, spanCount);
int endModify;
if (countMethod == CountMethod.WHOLE_SPAN) {
endModify = unicodeSet.span(sequence, endCopy, spanCondition);
} else {
if (spanCount == null) {
spanCount = new OutputInt();
}
endModify = unicodeSet.spanAndCount(sequence, endCopy, spanCondition, spanCount);
}
if (remove || endModify == 0) {
// do nothing
} else if (quantifier == CountMethod.WHOLE_SPAN) {
} else if (countMethod == CountMethod.WHOLE_SPAN) {
result.append(replacement);
} else {
for (int i = spanCount.value; i > 0; --i) {
@ -334,7 +354,7 @@ public class UnicodeSetSpanner {
}
/**
* Returns a trimmed sequence (using CharSequence.subsequence()), that omits matching code points at the start or
* Returns a trimmed sequence (using CharSequence.subsequence()), that omits matching code points at the start and
* end of the string, using TrimOption.BOTH and SpanCondition.SIMPLE. For example:
*
* <pre>
@ -343,7 +363,10 @@ public class UnicodeSetSpanner {
* new UnicodeSet("[ab]").trim("abacatbab")}
* </pre>
*
* ... returns {@code "catbab"}.
* ... returns {@code "cat"}.
* @param sequence
* the sequence to trim
* @return a subsequence
*
* @draft ICU 54
* @provisional This is a draft API and might change in a future release of ICU.
@ -359,11 +382,17 @@ public class UnicodeSetSpanner {
* <pre>
* {@code
*
* new UnicodeSet("[ab]").trim("abacatbab")}
* new UnicodeSet("[ab]").trim("abacatbab", TrimOption.LEADING)}
* </pre>
*
* ... returns {@code "catbab"}.
*
* @param sequence
* the sequence to trim
* @param trimOption
* LEADING, TRAILING, or BOTH
* @return a subsequence
*
* @draft ICU 54
* @provisional This is a draft API and might change in a future release of ICU.
*/
@ -378,7 +407,7 @@ public class UnicodeSetSpanner {
* <pre>
* {@code
*
* new UnicodeSet("[ab]").trim("abacatbab")}
* new UnicodeSet("[ab]").trim("abacatbab", TrimOption.LEADING, SpanCondition.SIMPLE)}
* </pre>
*
* ... returns {@code "catbab"}.

View file

@ -2430,10 +2430,6 @@ public class UnicodeSetTest extends TestFmwk {
it.nextRange();
assertEquals(title, it.codepoint, range.codepoint);
assertEquals(title, it.codepointEnd, range.codepointEnd);
// if (range.codepoint != -1) {
// } else {
// assertEquals(title, it.string, range.string);
// }
}
for (String s : us.strings()) {
it.nextRange();