mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-14 01:11:02 +00:00
ICU-9131 First batch of fixes for review comments. More to come.
X-SVN-Rev: 36500
This commit is contained in:
parent
cea11c1786
commit
1d5dbc0af9
4 changed files with 68 additions and 45 deletions
|
@ -2617,8 +2617,8 @@ public final class UTF16 {
|
|||
* @return the code point IF the string is non-null and consists of a single code point.
|
||||
* otherwise returns -1.
|
||||
* @param s to test
|
||||
* @internal
|
||||
* @deprecated This API is ICU internal only.
|
||||
* @draft ICU 54
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
@Deprecated
|
||||
public static int getSingleCodePoint(CharSequence s) {
|
||||
|
@ -2631,7 +2631,7 @@ public final class UTF16 {
|
|||
}
|
||||
|
||||
// at this point, len = 2
|
||||
int cp = UTF16.charAt(s, 0);
|
||||
int cp = Character.codePointAt(s, 0);
|
||||
if (cp > 0xFFFF) { // is surrogate pair
|
||||
return cp;
|
||||
}
|
||||
|
@ -2653,8 +2653,8 @@ public final class UTF16 {
|
|||
* @param codePoint to test
|
||||
* @param s to test
|
||||
* @return equivalent of code point comparator comparing two strings.
|
||||
* @internal
|
||||
* @deprecated This API is ICU internal only.
|
||||
* @draft ICU 54
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
@Deprecated
|
||||
public static int compareCodePoint(int codePoint, CharSequence s) {
|
||||
|
|
|
@ -11,6 +11,7 @@ import java.util.ArrayList;
|
|||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.Iterator;
|
||||
import java.util.NoSuchElementException;
|
||||
import java.util.TreeSet;
|
||||
|
||||
import com.ibm.icu.impl.BMPSet;
|
||||
|
@ -1516,7 +1517,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||
public final UnicodeSet remove(CharSequence s) {
|
||||
int cp = getSingleCP(s);
|
||||
if (cp < 0) {
|
||||
strings.remove(s);
|
||||
strings.remove(s.toString());
|
||||
pat = null;
|
||||
} else {
|
||||
remove(cp, cp);
|
||||
|
@ -1595,7 +1596,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||
int cp = getSingleCP(s);
|
||||
if (cp < 0) {
|
||||
if (strings.contains(s)) {
|
||||
strings.remove(s);
|
||||
strings.remove(s.toString());
|
||||
} else {
|
||||
strings.add(s.toString());
|
||||
}
|
||||
|
@ -4196,11 +4197,8 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||
if (pos < len-1) {
|
||||
result.codepoint = list[pos++];
|
||||
result.codepointEnd = list[pos++]-1;
|
||||
// result.string = null;
|
||||
} else {
|
||||
throw new ArrayIndexOutOfBoundsException(pos);
|
||||
// result.codepoint = -1;
|
||||
// result.string = stringIterator.next();
|
||||
throw new NoSuchElementException();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
|
|
@ -95,8 +95,8 @@ public class UnicodeSetSpanner {
|
|||
}
|
||||
|
||||
/**
|
||||
* Options for replaceFrom and countIn to control how to treat each matched span. The name is from "qualifier" as used in regex,
|
||||
* since it is similar to whether one is replacing [abc] by x, or [abc]* by x.
|
||||
* Options for replaceFrom and countIn to control how to treat each matched span.
|
||||
* It is similar to whether one is replacing [abc] by x, or [abc]* by x.
|
||||
*
|
||||
* @draft ICU 54
|
||||
* @provisional This is a draft API and might change in a future release of ICU.
|
||||
|
@ -130,7 +130,7 @@ public class UnicodeSetSpanner {
|
|||
|
||||
/**
|
||||
* Returns the number of matching characters found in a character sequence,
|
||||
* counting by Quantifier.MIN_ELEMENTS using SpanCondition.SIMPLE.
|
||||
* counting by CountMethod.MIN_ELEMENTS using SpanCondition.SIMPLE.
|
||||
* The code alternates spans; see the class doc for {@link UnicodeSetSpanner} for a note about boundary conditions.
|
||||
* @param sequence
|
||||
* the sequence to count characters in
|
||||
|
@ -148,13 +148,15 @@ public class UnicodeSetSpanner {
|
|||
* The code alternates spans; see the class doc for {@link UnicodeSetSpanner} for a note about boundary conditions.
|
||||
* @param sequence
|
||||
* the sequence to count characters in
|
||||
* @param countMethod
|
||||
* whether to treat an entire span as a match, or individual elements as matches
|
||||
* @return the count. Zero if there are none.
|
||||
*
|
||||
* @draft ICU 54
|
||||
* @provisional This is a draft API and might change in a future release of ICU.
|
||||
*/
|
||||
public int countIn(CharSequence sequence, CountMethod quantifier) {
|
||||
return countIn(sequence, quantifier, SpanCondition.SIMPLE);
|
||||
public int countIn(CharSequence sequence, CountMethod countMethod) {
|
||||
return countIn(sequence, countMethod, SpanCondition.SIMPLE);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -162,7 +164,8 @@ public class UnicodeSetSpanner {
|
|||
* The code alternates spans; see the class doc for {@link UnicodeSetSpanner} for a note about boundary conditions.
|
||||
* @param sequence
|
||||
* the sequence to count characters in
|
||||
* @param quantifier whether to treat an entire span as a match, or individual code points
|
||||
* @param countMethod
|
||||
* whether to treat an entire span as a match, or individual elements as matches
|
||||
* @param spanCondition
|
||||
* the spanCondition to use. SIMPLE or CONTAINED means only count the code points in the span;
|
||||
* NOT_CONTAINED is the reverse.
|
||||
|
@ -172,20 +175,28 @@ public class UnicodeSetSpanner {
|
|||
* @draft ICU 54
|
||||
* @provisional This is a draft API and might change in a future release of ICU.
|
||||
*/
|
||||
public int countIn(CharSequence sequence, CountMethod quantifier, SpanCondition spanCondition) {
|
||||
public int countIn(CharSequence sequence, CountMethod countMethod, SpanCondition spanCondition) {
|
||||
int count = 0;
|
||||
int start = 0;
|
||||
SpanCondition skipSpan = spanCondition == SpanCondition.NOT_CONTAINED ? SpanCondition.SIMPLE
|
||||
: SpanCondition.NOT_CONTAINED;
|
||||
final int length = sequence.length();
|
||||
OutputInt spanCount = new OutputInt();
|
||||
OutputInt spanCount = null;
|
||||
while (start != length) {
|
||||
int endNotContained = unicodeSet.span(sequence, start, skipSpan);
|
||||
if (endNotContained == length) {
|
||||
int endOfSpan = unicodeSet.span(sequence, start, skipSpan);
|
||||
if (endOfSpan == length) {
|
||||
break;
|
||||
}
|
||||
start = unicodeSet.spanAndCount(sequence, endNotContained, spanCondition, spanCount);
|
||||
count += quantifier == CountMethod.WHOLE_SPAN ? 1 : spanCount.value;
|
||||
if (countMethod == CountMethod.WHOLE_SPAN) {
|
||||
start = unicodeSet.span(sequence, endOfSpan, spanCondition);
|
||||
count += 1;
|
||||
} else {
|
||||
if (spanCount == null) {
|
||||
spanCount = new OutputInt();
|
||||
}
|
||||
start = unicodeSet.spanAndCount(sequence, endOfSpan, spanCondition, spanCount);
|
||||
count += spanCount.value;
|
||||
}
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
@ -205,7 +216,7 @@ public class UnicodeSetSpanner {
|
|||
}
|
||||
|
||||
/**
|
||||
* Delete all matching spans in sequence, according to the operations.
|
||||
* Delete all matching spans in sequence, according to the spanCondition.
|
||||
* The code alternates spans; see the class doc for {@link UnicodeSetSpanner} for a note about boundary conditions.
|
||||
* @param sequence
|
||||
* charsequence to replace matching spans in.
|
||||
|
@ -222,7 +233,7 @@ public class UnicodeSetSpanner {
|
|||
|
||||
/**
|
||||
* Replace all matching spans in sequence by the replacement,
|
||||
* counting by Quantifier.MIN_ELEMENTS using SpanCondition.SIMPLE.
|
||||
* counting by CountMethod.MIN_ELEMENTS using SpanCondition.SIMPLE.
|
||||
* The code alternates spans; see the class doc for {@link UnicodeSetSpanner} for a note about boundary conditions.
|
||||
* @param sequence
|
||||
* charsequence to replace matching spans in.
|
||||
|
@ -238,41 +249,42 @@ public class UnicodeSetSpanner {
|
|||
}
|
||||
|
||||
/**
|
||||
* Replace all matching spans in sequence by replacement, according to the Quantifier, using SpanCondition.SIMPLE.
|
||||
* Replace all matching spans in sequence by replacement, according to the CountMethod, using SpanCondition.SIMPLE.
|
||||
* The code alternates spans; see the class doc for {@link UnicodeSetSpanner} for a note about boundary conditions.
|
||||
*
|
||||
* @param sequence
|
||||
* charsequence to replace matching spans in.
|
||||
* @param replacement
|
||||
* replacement sequence. To delete, use ""
|
||||
* @param quantifier
|
||||
* whether to treat an entire span as a match, or individual code points
|
||||
* @param countMethod
|
||||
* whether to treat an entire span as a match, or individual elements as matches
|
||||
* @return modified string.
|
||||
*
|
||||
* @draft ICU 54
|
||||
* @provisional This is a draft API and might change in a future release of ICU.
|
||||
*/
|
||||
public String replaceFrom(CharSequence sequence, CharSequence replacement, CountMethod quantifier) {
|
||||
return replaceFrom(sequence, replacement, quantifier, SpanCondition.SIMPLE);
|
||||
public String replaceFrom(CharSequence sequence, CharSequence replacement, CountMethod countMethod) {
|
||||
return replaceFrom(sequence, replacement, countMethod, SpanCondition.SIMPLE);
|
||||
}
|
||||
|
||||
/**
|
||||
* Replace all matching spans in sequence by replacement, according to the operations quantifier and spanCondition.
|
||||
* Replace all matching spans in sequence by replacement, according to the countMethod and spanCondition.
|
||||
* The code alternates spans; see the class doc for {@link UnicodeSetSpanner} for a note about boundary conditions.
|
||||
* @param sequence
|
||||
* charsequence to replace matching spans in.
|
||||
* @param replacement
|
||||
* replacement sequence. To delete, use ""
|
||||
* @param countMethod
|
||||
* whether to treat an entire span as a match, or individual elements as matches
|
||||
* @param spanCondition
|
||||
* specify whether to modify the matching spans (CONTAINED or SIMPLE) or the non-matching
|
||||
* (NOT_CONTAINED)
|
||||
* @param quantifier
|
||||
* specify whether to collapse or do codepoint by codepoint.
|
||||
* @return modified string.
|
||||
*
|
||||
* @draft ICU 54
|
||||
* @provisional This is a draft API and might change in a future release of ICU.
|
||||
*/
|
||||
public String replaceFrom(CharSequence sequence, CharSequence replacement, CountMethod quantifier,
|
||||
public String replaceFrom(CharSequence sequence, CharSequence replacement, CountMethod countMethod,
|
||||
SpanCondition spanCondition) {
|
||||
SpanCondition copySpan = spanCondition == SpanCondition.NOT_CONTAINED ? SpanCondition.SIMPLE
|
||||
: SpanCondition.NOT_CONTAINED;
|
||||
|
@ -282,12 +294,20 @@ public class UnicodeSetSpanner {
|
|||
// avoid this allocation unless needed
|
||||
|
||||
final int length = sequence.length();
|
||||
OutputInt spanCount = new OutputInt();
|
||||
OutputInt spanCount = null;
|
||||
for (int endCopy = 0; endCopy != length;) {
|
||||
int endModify = unicodeSet.spanAndCount(sequence, endCopy, spanCondition, spanCount);
|
||||
int endModify;
|
||||
if (countMethod == CountMethod.WHOLE_SPAN) {
|
||||
endModify = unicodeSet.span(sequence, endCopy, spanCondition);
|
||||
} else {
|
||||
if (spanCount == null) {
|
||||
spanCount = new OutputInt();
|
||||
}
|
||||
endModify = unicodeSet.spanAndCount(sequence, endCopy, spanCondition, spanCount);
|
||||
}
|
||||
if (remove || endModify == 0) {
|
||||
// do nothing
|
||||
} else if (quantifier == CountMethod.WHOLE_SPAN) {
|
||||
} else if (countMethod == CountMethod.WHOLE_SPAN) {
|
||||
result.append(replacement);
|
||||
} else {
|
||||
for (int i = spanCount.value; i > 0; --i) {
|
||||
|
@ -334,7 +354,7 @@ public class UnicodeSetSpanner {
|
|||
}
|
||||
|
||||
/**
|
||||
* Returns a trimmed sequence (using CharSequence.subsequence()), that omits matching code points at the start or
|
||||
* Returns a trimmed sequence (using CharSequence.subsequence()), that omits matching code points at the start and
|
||||
* end of the string, using TrimOption.BOTH and SpanCondition.SIMPLE. For example:
|
||||
*
|
||||
* <pre>
|
||||
|
@ -343,7 +363,10 @@ public class UnicodeSetSpanner {
|
|||
* new UnicodeSet("[ab]").trim("abacatbab")}
|
||||
* </pre>
|
||||
*
|
||||
* ... returns {@code "catbab"}.
|
||||
* ... returns {@code "cat"}.
|
||||
* @param sequence
|
||||
* the sequence to trim
|
||||
* @return a subsequence
|
||||
*
|
||||
* @draft ICU 54
|
||||
* @provisional This is a draft API and might change in a future release of ICU.
|
||||
|
@ -359,11 +382,17 @@ public class UnicodeSetSpanner {
|
|||
* <pre>
|
||||
* {@code
|
||||
*
|
||||
* new UnicodeSet("[ab]").trim("abacatbab")}
|
||||
* new UnicodeSet("[ab]").trim("abacatbab", TrimOption.LEADING)}
|
||||
* </pre>
|
||||
*
|
||||
* ... returns {@code "catbab"}.
|
||||
*
|
||||
* @param sequence
|
||||
* the sequence to trim
|
||||
* @param trimOption
|
||||
* LEADING, TRAILING, or BOTH
|
||||
* @return a subsequence
|
||||
*
|
||||
* @draft ICU 54
|
||||
* @provisional This is a draft API and might change in a future release of ICU.
|
||||
*/
|
||||
|
@ -378,7 +407,7 @@ public class UnicodeSetSpanner {
|
|||
* <pre>
|
||||
* {@code
|
||||
*
|
||||
* new UnicodeSet("[ab]").trim("abacatbab")}
|
||||
* new UnicodeSet("[ab]").trim("abacatbab", TrimOption.LEADING, SpanCondition.SIMPLE)}
|
||||
* </pre>
|
||||
*
|
||||
* ... returns {@code "catbab"}.
|
||||
|
|
|
@ -2430,10 +2430,6 @@ public class UnicodeSetTest extends TestFmwk {
|
|||
it.nextRange();
|
||||
assertEquals(title, it.codepoint, range.codepoint);
|
||||
assertEquals(title, it.codepointEnd, range.codepointEnd);
|
||||
// if (range.codepoint != -1) {
|
||||
// } else {
|
||||
// assertEquals(title, it.string, range.string);
|
||||
// }
|
||||
}
|
||||
for (String s : us.strings()) {
|
||||
it.nextRange();
|
||||
|
|
Loading…
Add table
Reference in a new issue