mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-10 15:42:14 +00:00
parent
fb715eab4c
commit
9963b4d62a
17 changed files with 231 additions and 150 deletions
|
@ -25,15 +25,25 @@ Here are a few examples of sets:
|
|||
| `[abc123]` | The six characters a,b,c,1,2 and 3 |
|
||||
| `[\p{Letter}]` | All characters with the Unicode General Category of Letter. |
|
||||
|
||||
String Values In addition to being a set of characters (of Unicode code points),
|
||||
### String Values
|
||||
|
||||
In addition to being a set of characters (of Unicode code points),
|
||||
a UnicodeSet may also contain string values. Conceptually, the UnicodeSet is
|
||||
always a set of strings, not a set of characters, although in many common use
|
||||
cases the strings are all of length one, which reduces to being a set of
|
||||
characters.
|
||||
|
||||
This concept can be confusing when first encountered, probably because similar
|
||||
set constructs from other environments (regular expressions) can only contain
|
||||
characters.
|
||||
set constructs from other environments
|
||||
(e.g., character classes in most regular expression implementations)
|
||||
can only contain characters.
|
||||
|
||||
Until ICU 68, it was not possible for a UnicodeSet to contain the empty string.
|
||||
In Java, an exception was thrown. In C++, the empty string was silently ignored.
|
||||
|
||||
Starting with ICU 69 [ICU-13702](https://unicode-org.atlassian.net/browse/ICU-13702)
|
||||
the empty string is supported as a set element;
|
||||
however, it is ignored in matching functions such as `span(string)`.
|
||||
|
||||
## UnicodeSet Patterns
|
||||
|
||||
|
|
|
@ -178,8 +178,6 @@ class RuleCharacterIterator;
|
|||
* Unicode property
|
||||
* </table>
|
||||
*
|
||||
* <p><b>Warning</b>: you cannot add an empty string ("") to a UnicodeSet.</p>
|
||||
*
|
||||
* <p><b>Formal syntax</b></p>
|
||||
*
|
||||
* \htmlonly<blockquote>\endhtmlonly
|
||||
|
@ -1104,8 +1102,8 @@ public:
|
|||
* present. If this set already contains the multicharacter,
|
||||
* the call leaves this set unchanged.
|
||||
* Thus "ch" => {"ch"}
|
||||
* <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
|
||||
* A frozen set will not be modified.
|
||||
*
|
||||
* @param s the source string
|
||||
* @return this object, for chaining
|
||||
* @stable ICU 2.4
|
||||
|
@ -1165,7 +1163,7 @@ public:
|
|||
|
||||
/**
|
||||
* Makes a set from a multicharacter string. Thus "ch" => {"ch"}
|
||||
* <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
|
||||
*
|
||||
* @param s the source string
|
||||
* @return a newly created set containing the given string.
|
||||
* The caller owns the return object and is responsible for deleting it.
|
||||
|
@ -1279,8 +1277,8 @@ public:
|
|||
* Complement the specified string in this set.
|
||||
* The set will not contain the specified string once the call
|
||||
* returns.
|
||||
* <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
|
||||
* A frozen set will not be modified.
|
||||
*
|
||||
* @param s the string to complement
|
||||
* @return this object, for chaining
|
||||
* @stable ICU 2.4
|
||||
|
|
|
@ -444,7 +444,6 @@ UBool UnicodeSet::contains(UChar32 start, UChar32 end) const {
|
|||
* @return <tt>true</tt> if this set contains the specified string
|
||||
*/
|
||||
UBool UnicodeSet::contains(const UnicodeString& s) const {
|
||||
if (s.length() == 0) return FALSE;
|
||||
int32_t cp = getSingleCP(s);
|
||||
if (cp < 0) {
|
||||
return stringsContains(s);
|
||||
|
@ -559,11 +558,9 @@ UBool UnicodeSet::matchesIndexValue(uint8_t v) const {
|
|||
if (hasStrings()) {
|
||||
for (i=0; i<strings->size(); ++i) {
|
||||
const UnicodeString& s = *(const UnicodeString*)strings->elementAt(i);
|
||||
//if (s.length() == 0) {
|
||||
// // Empty strings match everything
|
||||
// return TRUE;
|
||||
//}
|
||||
// assert(s.length() != 0); // We enforce this elsewhere
|
||||
if (s.isEmpty()) {
|
||||
continue; // skip the empty string
|
||||
}
|
||||
UChar32 c = s.char32At(0);
|
||||
if ((c & 0xFF) == v) {
|
||||
return TRUE;
|
||||
|
@ -582,9 +579,6 @@ UMatchDegree UnicodeSet::matches(const Replaceable& text,
|
|||
int32_t limit,
|
||||
UBool incremental) {
|
||||
if (offset == limit) {
|
||||
// Strings, if any, have length != 0, so we don't worry
|
||||
// about them here. If we ever allow zero-length strings
|
||||
// we much check for them here.
|
||||
if (contains(U_ETHER)) {
|
||||
return incremental ? U_PARTIAL_MATCH : U_MATCH;
|
||||
} else {
|
||||
|
@ -614,11 +608,9 @@ UMatchDegree UnicodeSet::matches(const Replaceable& text,
|
|||
|
||||
for (i=0; i<strings->size(); ++i) {
|
||||
const UnicodeString& trial = *(const UnicodeString*)strings->elementAt(i);
|
||||
|
||||
//if (trial.length() == 0) {
|
||||
// return U_MATCH; // null-string always matches
|
||||
//}
|
||||
// assert(trial.length() != 0); // We ensure this elsewhere
|
||||
if (trial.isEmpty()) {
|
||||
continue; // skip the empty string
|
||||
}
|
||||
|
||||
UChar c = trial.charAt(forward ? 0 : trial.length() - 1);
|
||||
|
||||
|
@ -971,12 +963,12 @@ UnicodeSet& UnicodeSet::add(UChar32 c) {
|
|||
* present. If this set already contains the multicharacter,
|
||||
* the call leaves this set unchanged.
|
||||
* Thus "ch" => {"ch"}
|
||||
* <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
|
||||
*
|
||||
* @param s the source string
|
||||
* @return the modified set, for chaining
|
||||
*/
|
||||
UnicodeSet& UnicodeSet::add(const UnicodeString& s) {
|
||||
if (s.length() == 0 || isFrozen() || isBogus()) return *this;
|
||||
if (isFrozen() || isBogus()) return *this;
|
||||
int32_t cp = getSingleCP(s);
|
||||
if (cp < 0) {
|
||||
if (!stringsContains(s)) {
|
||||
|
@ -991,8 +983,7 @@ UnicodeSet& UnicodeSet::add(const UnicodeString& s) {
|
|||
|
||||
/**
|
||||
* Adds the given string, in order, to 'strings'. The given string
|
||||
* must have been checked by the caller to not be empty and to not
|
||||
* already be in 'strings'.
|
||||
* must have been checked by the caller to not already be in 'strings'.
|
||||
*/
|
||||
void UnicodeSet::_add(const UnicodeString& s) {
|
||||
if (isFrozen() || isBogus()) {
|
||||
|
@ -1021,16 +1012,13 @@ void UnicodeSet::_add(const UnicodeString& s) {
|
|||
* @param string to test
|
||||
*/
|
||||
int32_t UnicodeSet::getSingleCP(const UnicodeString& s) {
|
||||
//if (s.length() < 1) {
|
||||
// throw new IllegalArgumentException("Can't use zero-length strings in UnicodeSet");
|
||||
//}
|
||||
if (s.length() > 2) return -1;
|
||||
if (s.length() == 1) return s.charAt(0);
|
||||
|
||||
// at this point, len = 2
|
||||
UChar32 cp = s.char32At(0);
|
||||
if (cp > 0xFFFF) { // is surrogate pair
|
||||
return cp;
|
||||
int32_t sLength = s.length();
|
||||
if (sLength == 1) return s.charAt(0);
|
||||
if (sLength == 2) {
|
||||
UChar32 cp = s.char32At(0);
|
||||
if (cp > 0xFFFF) { // is surrogate pair
|
||||
return cp;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
@ -1186,7 +1174,7 @@ UnicodeSet& UnicodeSet::remove(UChar32 c) {
|
|||
* @return the modified set, for chaining
|
||||
*/
|
||||
UnicodeSet& UnicodeSet::remove(const UnicodeString& s) {
|
||||
if (s.length() == 0 || isFrozen() || isBogus()) return *this;
|
||||
if (isFrozen() || isBogus()) return *this;
|
||||
int32_t cp = getSingleCP(s);
|
||||
if (cp < 0) {
|
||||
if (strings != nullptr && strings->removeElement((void*) &s)) {
|
||||
|
@ -1252,12 +1240,12 @@ UnicodeSet& UnicodeSet::complement(void) {
|
|||
* Complement the specified string in this set.
|
||||
* The set will not contain the specified string once the call
|
||||
* returns.
|
||||
* <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
|
||||
*
|
||||
* @param s the string to complement
|
||||
* @return this object, for chaining
|
||||
*/
|
||||
UnicodeSet& UnicodeSet::complement(const UnicodeString& s) {
|
||||
if (s.length() == 0 || isFrozen() || isBogus()) return *this;
|
||||
if (isFrozen() || isBogus()) return *this;
|
||||
int32_t cp = getSingleCP(s);
|
||||
if (cp < 0) {
|
||||
if (stringsContains(s)) {
|
||||
|
|
|
@ -555,7 +555,7 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
|
|||
}
|
||||
buf.append(c);
|
||||
}
|
||||
if (buf.length() < 1 || !ok) {
|
||||
if (!ok) {
|
||||
// syntaxError(chars, "Invalid multicharacter string");
|
||||
ec = U_MALFORMED_SET;
|
||||
return;
|
||||
|
|
|
@ -231,6 +231,9 @@ UnicodeSetStringSpan::UnicodeSetStringSpan(const UnicodeSet &set,
|
|||
const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i);
|
||||
const UChar *s16=string.getBuffer();
|
||||
int32_t length16=string.length();
|
||||
if (length16==0) {
|
||||
continue; // skip the empty string
|
||||
}
|
||||
UBool thisRelevant;
|
||||
spanLength=spanSet.span(s16, length16, USET_SPAN_CONTAINED);
|
||||
if(spanLength<length16) { // Relevant string.
|
||||
|
@ -312,7 +315,7 @@ UnicodeSetStringSpan::UnicodeSetStringSpan(const UnicodeSet &set,
|
|||
const UChar *s16=string.getBuffer();
|
||||
int32_t length16=string.length();
|
||||
spanLength=spanSet.span(s16, length16, USET_SPAN_CONTAINED);
|
||||
if(spanLength<length16) { // Relevant string.
|
||||
if(spanLength<length16 && length16>0) { // Relevant string.
|
||||
if(which&UTF16) {
|
||||
if(which&CONTAINED) {
|
||||
if(which&FWD) {
|
||||
|
@ -362,7 +365,7 @@ UnicodeSetStringSpan::UnicodeSetStringSpan(const UnicodeSet &set,
|
|||
addToSpanNotSet(c);
|
||||
}
|
||||
}
|
||||
} else { // Irrelevant string.
|
||||
} else { // Irrelevant string. (Also the empty string.)
|
||||
if(which&UTF8) {
|
||||
if(which&CONTAINED) { // Only necessary for LONGEST_MATCH.
|
||||
uint8_t *s8=utf8+utf8Count;
|
||||
|
@ -653,11 +656,12 @@ int32_t UnicodeSetStringSpan::span(const UChar *s, int32_t length, USetSpanCondi
|
|||
for(i=0; i<stringsLength; ++i) {
|
||||
int32_t overlap=spanLengths[i];
|
||||
if(overlap==ALL_CP_CONTAINED) {
|
||||
continue; // Irrelevant string.
|
||||
continue; // Irrelevant string. (Also the empty string.)
|
||||
}
|
||||
const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i);
|
||||
const UChar *s16=string.getBuffer();
|
||||
int32_t length16=string.length();
|
||||
U_ASSERT(length>0);
|
||||
|
||||
// Try to match this string at pos-overlap..pos.
|
||||
if(overlap>=LONG_SPAN) {
|
||||
|
@ -697,6 +701,9 @@ int32_t UnicodeSetStringSpan::span(const UChar *s, int32_t length, USetSpanCondi
|
|||
const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i);
|
||||
const UChar *s16=string.getBuffer();
|
||||
int32_t length16=string.length();
|
||||
if (length16==0) {
|
||||
continue; // skip the empty string
|
||||
}
|
||||
|
||||
// Try to match this string at pos-overlap..pos.
|
||||
if(overlap>=LONG_SPAN) {
|
||||
|
@ -817,11 +824,12 @@ int32_t UnicodeSetStringSpan::spanBack(const UChar *s, int32_t length, USetSpanC
|
|||
for(i=0; i<stringsLength; ++i) {
|
||||
int32_t overlap=spanBackLengths[i];
|
||||
if(overlap==ALL_CP_CONTAINED) {
|
||||
continue; // Irrelevant string.
|
||||
continue; // Irrelevant string. (Also the empty string.)
|
||||
}
|
||||
const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i);
|
||||
const UChar *s16=string.getBuffer();
|
||||
int32_t length16=string.length();
|
||||
U_ASSERT(length>0);
|
||||
|
||||
// Try to match this string at pos-(length16-overlap)..pos-length16.
|
||||
if(overlap>=LONG_SPAN) {
|
||||
|
@ -863,6 +871,9 @@ int32_t UnicodeSetStringSpan::spanBack(const UChar *s, int32_t length, USetSpanC
|
|||
const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i);
|
||||
const UChar *s16=string.getBuffer();
|
||||
int32_t length16=string.length();
|
||||
if (length16==0) {
|
||||
continue; // skip the empty string
|
||||
}
|
||||
|
||||
// Try to match this string at pos-(length16-overlap)..pos-length16.
|
||||
if(overlap>=LONG_SPAN) {
|
||||
|
@ -1358,11 +1369,12 @@ int32_t UnicodeSetStringSpan::spanNot(const UChar *s, int32_t length) const {
|
|||
// Try to match the strings at pos.
|
||||
for(i=0; i<stringsLength; ++i) {
|
||||
if(spanLengths[i]==ALL_CP_CONTAINED) {
|
||||
continue; // Irrelevant string.
|
||||
continue; // Irrelevant string. (Also the empty string.)
|
||||
}
|
||||
const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i);
|
||||
const UChar *s16=string.getBuffer();
|
||||
int32_t length16=string.length();
|
||||
U_ASSERT(length>0);
|
||||
if(length16<=rest && matches16CPB(s, pos, length, s16, length16)) {
|
||||
return pos; // There is a set element at pos.
|
||||
}
|
||||
|
@ -1401,11 +1413,12 @@ int32_t UnicodeSetStringSpan::spanNotBack(const UChar *s, int32_t length) const
|
|||
// it is easier and we only need to know whether the string is irrelevant
|
||||
// which is the same in either array.
|
||||
if(spanLengths[i]==ALL_CP_CONTAINED) {
|
||||
continue; // Irrelevant string.
|
||||
continue; // Irrelevant string. (Also the empty string.)
|
||||
}
|
||||
const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i);
|
||||
const UChar *s16=string.getBuffer();
|
||||
int32_t length16=string.length();
|
||||
U_ASSERT(length>0);
|
||||
if(length16<=pos && matches16CPB(s, pos-length16, length, s16, length16)) {
|
||||
return pos; // There is a set element at pos.
|
||||
}
|
||||
|
|
|
@ -98,6 +98,7 @@ UnicodeSetTest::runIndexedTest(int32_t index, UBool exec,
|
|||
TESTCASE_AUTO(TestIntOverflow);
|
||||
TESTCASE_AUTO(TestUnusedCcc);
|
||||
TESTCASE_AUTO(TestDeepPattern);
|
||||
TESTCASE_AUTO(TestEmptyString);
|
||||
TESTCASE_AUTO_END;
|
||||
}
|
||||
|
||||
|
@ -3984,3 +3985,46 @@ void UnicodeSetTest::TestDeepPattern() {
|
|||
assertTrue("[a[a[a...1000s...]]] -> error", errorCode.isFailure());
|
||||
errorCode.reset();
|
||||
}
|
||||
|
||||
void UnicodeSetTest::TestEmptyString() {
|
||||
IcuTestErrorCode errorCode(*this, "TestEmptyString");
|
||||
// Starting with ICU 69, the empty string is allowed in UnicodeSet. ICU-13702
|
||||
UnicodeSet set(u"[{}]", errorCode);
|
||||
if (!assertSuccess("set from pattern with {}", errorCode)) { return; }
|
||||
assertTrue("set from pattern with {}", set.contains(u""));
|
||||
assertEquals("set from pattern with {}: size", 1, set.size());
|
||||
assertFalse("set from pattern with {}: isEmpty", set.isEmpty());
|
||||
|
||||
// Remove, add back, ...
|
||||
assertFalse("remove empty string", set.remove(u"").contains(u""));
|
||||
assertEquals("remove empty string: size", 0, set.size());
|
||||
assertTrue("remove empty string: isEmpty", set.isEmpty());
|
||||
assertTrue("add empty string", set.add(u"").contains(u""));
|
||||
// missing API -- assertTrue("retain empty string", set.retain(u"").contains(u""));
|
||||
assertFalse("complement-remove empty string", set.complement(u"").contains(u""));
|
||||
assertTrue("complement-add empty string", set.complement(u"").contains(u""));
|
||||
|
||||
assertFalse("clear", set.clear().contains(u""));
|
||||
assertTrue("add empty string 2", set.add(u"").contains(u""));
|
||||
assertFalse("removeAllStrings", set.removeAllStrings().contains(u""));
|
||||
assertTrue("add empty string 3", set.add(u"").contains(u""));
|
||||
// Note that this leaves the set containing exactly the empty string.
|
||||
|
||||
// strings() access and iteration
|
||||
// no C++ equivalent for Java strings() -- assertTrue("strings()", set.strings().contains(u""));
|
||||
UnicodeSetIterator sit(set);
|
||||
assertTrue("set iterator.next()", sit.next());
|
||||
assertTrue("set iterator has empty string", sit.isString() && sit.getString().isEmpty());
|
||||
|
||||
// The empty string is ignored in matching.
|
||||
set.add(u'a').add(u'c');
|
||||
assertEquals("span", 1, set.span(u"abc", 3, USET_SPAN_SIMPLE));
|
||||
assertEquals("spanBack", 2, set.spanBack(u"abc", 3, USET_SPAN_SIMPLE));
|
||||
assertTrue("containsNone", set.containsNone(u"def"));
|
||||
assertFalse("containsSome", set.containsSome(u"def"));
|
||||
set.freeze();
|
||||
assertEquals("frozen span", 1, set.span(u"abc", 3, USET_SPAN_SIMPLE));
|
||||
assertEquals("frozen spanBack", 2, set.spanBack(u"abc", 3, USET_SPAN_SIMPLE));
|
||||
assertTrue("frozen containsNone", set.containsNone(u"def"));
|
||||
assertFalse("frozen containsSome", set.containsSome(u"def"));
|
||||
}
|
||||
|
|
|
@ -94,6 +94,7 @@ private:
|
|||
void TestIntOverflow();
|
||||
void TestUnusedCcc();
|
||||
void TestDeepPattern();
|
||||
void TestEmptyString();
|
||||
|
||||
private:
|
||||
|
||||
|
|
|
@ -2,8 +2,6 @@
|
|||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
package com.ibm.icu.impl;
|
||||
|
||||
import static com.ibm.icu.impl.number.parse.ParsingUtils.safeContains;
|
||||
|
||||
import java.util.EnumMap;
|
||||
import java.util.Map;
|
||||
|
||||
|
@ -95,7 +93,7 @@ public class StaticUnicodeSets {
|
|||
* @return key1 if the set contains str, or COUNT if not.
|
||||
*/
|
||||
public static Key chooseFrom(String str, Key key1) {
|
||||
return safeContains(get(key1), str) ? key1 : null;
|
||||
return get(key1).contains(str) ? key1 : null;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -113,7 +111,7 @@ public class StaticUnicodeSets {
|
|||
* contains str.
|
||||
*/
|
||||
public static Key chooseFrom(String str, Key key1, Key key2) {
|
||||
return safeContains(get(key1), str) ? key1 : chooseFrom(str, key2);
|
||||
return get(key1).contains(str) ? key1 : chooseFrom(str, key2);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -110,9 +110,15 @@ public class UnicodeSetStringSpan {
|
|||
int i, spanLength;
|
||||
int maxLength16 = 0;
|
||||
someRelevant = false;
|
||||
for (i = 0; i < stringsLength; ++i) {
|
||||
for (i = 0; i < stringsLength;) {
|
||||
String string = strings.get(i);
|
||||
int length16 = string.length();
|
||||
if (length16 == 0) {
|
||||
// Remove the empty string.
|
||||
strings.remove(i);
|
||||
--stringsLength;
|
||||
continue;
|
||||
}
|
||||
spanLength = spanSet.span(string, SpanCondition.CONTAINED);
|
||||
if (spanLength < length16) { // Relevant string.
|
||||
someRelevant = true;
|
||||
|
@ -120,6 +126,7 @@ public class UnicodeSetStringSpan {
|
|||
if (/* (0 != (which & UTF16)) && */ length16 > maxLength16) {
|
||||
maxLength16 = length16;
|
||||
}
|
||||
++i;
|
||||
}
|
||||
this.maxLength16 = maxLength16;
|
||||
if (!someRelevant && (which & WITH_COUNT) == 0) {
|
||||
|
|
|
@ -2,8 +2,6 @@
|
|||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
package com.ibm.icu.impl.number.parse;
|
||||
|
||||
import static com.ibm.icu.impl.number.parse.ParsingUtils.safeContains;
|
||||
|
||||
import com.ibm.icu.impl.StaticUnicodeSets;
|
||||
import com.ibm.icu.impl.StringSegment;
|
||||
import com.ibm.icu.text.DecimalFormatSymbols;
|
||||
|
@ -18,7 +16,7 @@ public class InfinityMatcher extends SymbolMatcher {
|
|||
|
||||
public static InfinityMatcher getInstance(DecimalFormatSymbols symbols) {
|
||||
String symbolString = symbols.getInfinity();
|
||||
if (safeContains(DEFAULT.uniSet, symbolString)) {
|
||||
if (DEFAULT.uniSet.contains(symbolString)) {
|
||||
return DEFAULT;
|
||||
} else {
|
||||
return new InfinityMatcher(symbolString);
|
||||
|
|
|
@ -2,8 +2,6 @@
|
|||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
package com.ibm.icu.impl.number.parse;
|
||||
|
||||
import static com.ibm.icu.impl.number.parse.ParsingUtils.safeContains;
|
||||
|
||||
import com.ibm.icu.impl.StaticUnicodeSets;
|
||||
import com.ibm.icu.impl.StringSegment;
|
||||
import com.ibm.icu.text.DecimalFormatSymbols;
|
||||
|
@ -19,7 +17,7 @@ public class MinusSignMatcher extends SymbolMatcher {
|
|||
|
||||
public static MinusSignMatcher getInstance(DecimalFormatSymbols symbols, boolean allowTrailing) {
|
||||
String symbolString = symbols.getMinusSignString();
|
||||
if (safeContains(DEFAULT.uniSet, symbolString)) {
|
||||
if (DEFAULT.uniSet.contains(symbolString)) {
|
||||
return allowTrailing ? DEFAULT_ALLOW_TRAILING : DEFAULT;
|
||||
} else {
|
||||
return new MinusSignMatcher(symbolString, allowTrailing);
|
||||
|
|
|
@ -33,7 +33,9 @@ public class ParsingUtils {
|
|||
output.add(range.codepoint, range.codepointEnd);
|
||||
}
|
||||
for (String str : input.strings()) {
|
||||
output.add(str.codePointAt(0));
|
||||
if (!str.isEmpty()) {
|
||||
output.add(str.codePointAt(0));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -42,10 +44,4 @@ public class ParsingUtils {
|
|||
output.add(input.codePointAt(0));
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: Remove this helper function (and update call sites) when #13805 is fixed
|
||||
public static boolean safeContains(UnicodeSet uniset, CharSequence str) {
|
||||
return str.length() != 0 && uniset.contains(str);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -2,8 +2,6 @@
|
|||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
package com.ibm.icu.impl.number.parse;
|
||||
|
||||
import static com.ibm.icu.impl.number.parse.ParsingUtils.safeContains;
|
||||
|
||||
import com.ibm.icu.impl.StaticUnicodeSets;
|
||||
import com.ibm.icu.impl.StringSegment;
|
||||
import com.ibm.icu.text.DecimalFormatSymbols;
|
||||
|
@ -19,7 +17,7 @@ public class PlusSignMatcher extends SymbolMatcher {
|
|||
|
||||
public static PlusSignMatcher getInstance(DecimalFormatSymbols symbols, boolean allowTrailing) {
|
||||
String symbolString = symbols.getPlusSignString();
|
||||
if (safeContains(DEFAULT.uniSet, symbolString)) {
|
||||
if (DEFAULT.uniSet.contains(symbolString)) {
|
||||
return allowTrailing ? DEFAULT_ALLOW_TRAILING : DEFAULT;
|
||||
} else {
|
||||
return new PlusSignMatcher(symbolString, allowTrailing);
|
||||
|
|
|
@ -2,8 +2,6 @@
|
|||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
package com.ibm.icu.impl.number.parse;
|
||||
|
||||
import static com.ibm.icu.impl.number.parse.ParsingUtils.safeContains;
|
||||
|
||||
import com.ibm.icu.impl.StaticUnicodeSets;
|
||||
import com.ibm.icu.impl.StringSegment;
|
||||
import com.ibm.icu.impl.number.DecimalQuantity_DualStorageBCD;
|
||||
|
@ -36,9 +34,9 @@ public class ScientificMatcher implements NumberParseMatcher {
|
|||
ignorablesMatcher = IgnorablesMatcher.getInstance(ParsingUtils.PARSE_FLAG_STRICT_IGNORABLES);
|
||||
|
||||
String minusSign = symbols.getMinusSignString();
|
||||
customMinusSign = safeContains(minusSignSet(), minusSign) ? null : minusSign;
|
||||
customMinusSign = minusSignSet().contains(minusSign) ? null : minusSign;
|
||||
String plusSign = symbols.getPlusSignString();
|
||||
customPlusSign = safeContains(plusSignSet(), plusSign) ? null : plusSign;
|
||||
customPlusSign = plusSignSet().contains(plusSign) ? null : plusSign;
|
||||
}
|
||||
|
||||
private static UnicodeSet minusSignSet() {
|
||||
|
|
|
@ -186,8 +186,6 @@ import com.ibm.icu.util.VersionInfo;
|
|||
* Unicode property
|
||||
* </table>
|
||||
*
|
||||
* <p><b>Warning</b>: you cannot add an empty string ("") to a UnicodeSet.</p>
|
||||
*
|
||||
* <p><b>Formal syntax</b></p>
|
||||
*
|
||||
* <blockquote>
|
||||
|
@ -892,11 +890,9 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||
}
|
||||
if (hasStrings()) {
|
||||
for (String s : strings) {
|
||||
//if (s.length() == 0) {
|
||||
// // Empty strings match everything
|
||||
// return true;
|
||||
//}
|
||||
// assert(s.length() != 0); // We enforce this elsewhere
|
||||
if (s.isEmpty()) {
|
||||
continue; // skip the empty string
|
||||
}
|
||||
int c = UTF16.charAt(s, 0);
|
||||
if ((c & 0xFF) == v) {
|
||||
return true;
|
||||
|
@ -918,9 +914,6 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||
boolean incremental) {
|
||||
|
||||
if (offset[0] == limit) {
|
||||
// Strings, if any, have length != 0, so we don't worry
|
||||
// about them here. If we ever allow zero-length strings
|
||||
// we much check for them here.
|
||||
if (contains(UnicodeMatcher.ETHER)) {
|
||||
return incremental ? U_PARTIAL_MATCH : U_MATCH;
|
||||
} else {
|
||||
|
@ -948,10 +941,9 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||
int highWaterLength = 0;
|
||||
|
||||
for (String trial : strings) {
|
||||
//if (trial.length() == 0) {
|
||||
// return U_MATCH; // null-string always matches
|
||||
//}
|
||||
// assert(trial.length() != 0); // We ensure this elsewhere
|
||||
if (trial.isEmpty()) {
|
||||
continue; // skip the empty string
|
||||
}
|
||||
|
||||
char c = trial.charAt(forward ? 0 : trial.length() - 1);
|
||||
|
||||
|
@ -1363,7 +1355,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||
* present. If this set already contains the multicharacter,
|
||||
* the call leaves this set unchanged.
|
||||
* Thus "ch" => {"ch"}
|
||||
* <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
|
||||
*
|
||||
* @param s the source string
|
||||
* @return this object, for chaining
|
||||
* @stable ICU 2.0
|
||||
|
@ -1392,22 +1384,19 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||
|
||||
/**
|
||||
* Utility for getting code point from single code point CharSequence.
|
||||
* See the public UTF16.getSingleCodePoint()
|
||||
* See the public UTF16.getSingleCodePoint() (which returns -1 for null rather than throwing NPE).
|
||||
*
|
||||
* @return a code point IF the string consists of a single one.
|
||||
* otherwise returns -1.
|
||||
* @param s to test
|
||||
*/
|
||||
private static int getSingleCP(CharSequence s) {
|
||||
if (s.length() < 1) {
|
||||
throw new IllegalArgumentException("Can't use zero-length strings in UnicodeSet");
|
||||
}
|
||||
if (s.length() > 2) return -1;
|
||||
if (s.length() == 1) return s.charAt(0);
|
||||
|
||||
// at this point, len = 2
|
||||
int cp = UTF16.charAt(s, 0);
|
||||
if (cp > 0xFFFF) { // is surrogate pair
|
||||
return cp;
|
||||
if (s.length() == 2) {
|
||||
int cp = Character.codePointAt(s, 0);
|
||||
if (cp > 0xFFFF) { // is surrogate pair
|
||||
return cp;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
@ -1478,7 +1467,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||
|
||||
/**
|
||||
* Makes a set from a multicharacter string. Thus "ch" => {"ch"}
|
||||
* <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
|
||||
*
|
||||
* @param s the source string
|
||||
* @return a newly created set containing the given string
|
||||
* @stable ICU 2.0
|
||||
|
@ -1686,7 +1675,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||
* Complement the specified string in this set.
|
||||
* The set will not contain the specified string once the call
|
||||
* returns.
|
||||
* <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
|
||||
*
|
||||
* @param s the string to complement
|
||||
* @return this object, for chaining
|
||||
* @stable ICU 2.0
|
||||
|
@ -2056,7 +2045,8 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||
return true;
|
||||
}
|
||||
for (String setStr : strings) {
|
||||
if (s.startsWith(setStr, i) && containsAll(s, i+setStr.length())) {
|
||||
if (!setStr.isEmpty() && // skip the empty string
|
||||
s.startsWith(setStr, i) && containsAll(s, i+setStr.length())) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
@ -2801,7 +2791,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||
}
|
||||
appendCodePoint(buf, c);
|
||||
}
|
||||
if (buf.length() < 1 || !ok) {
|
||||
if (!ok) {
|
||||
syntaxError(chars, "Invalid multicharacter string");
|
||||
}
|
||||
// We have new string. Add it to set and continue;
|
||||
|
|
|
@ -1528,12 +1528,12 @@ public class UnicodeSetTest extends TestFmwk {
|
|||
|
||||
//public Iterator<String> iterator() {
|
||||
|
||||
ArrayList<String> oldList = new ArrayList<String>();
|
||||
ArrayList<String> oldList = new ArrayList<>();
|
||||
for (UnicodeSetIterator it = new UnicodeSetIterator(set1); it.next();) {
|
||||
oldList.add(it.getString());
|
||||
}
|
||||
|
||||
ArrayList<String> list1 = new ArrayList<String>();
|
||||
ArrayList<String> list1 = new ArrayList<>();
|
||||
for (String s : set1) {
|
||||
list1.add(s);
|
||||
}
|
||||
|
@ -1613,11 +1613,11 @@ public class UnicodeSetTest extends TestFmwk {
|
|||
List<UnicodeSet> goalLongest = Arrays.asList(set1, set3, set2);
|
||||
List<UnicodeSet> goalLex = Arrays.asList(set1, set2, set3);
|
||||
|
||||
List<UnicodeSet> sorted = new ArrayList(new TreeSet<UnicodeSet>(unsorted));
|
||||
List<UnicodeSet> sorted = new ArrayList(new TreeSet<>(unsorted));
|
||||
assertNotEquals("compareTo-shorter-first", unsorted, sorted);
|
||||
assertEquals("compareTo-shorter-first", goalShortest, sorted);
|
||||
|
||||
TreeSet<UnicodeSet> sorted1 = new TreeSet<UnicodeSet>(new Comparator<UnicodeSet>(){
|
||||
TreeSet<UnicodeSet> sorted1 = new TreeSet<>(new Comparator<UnicodeSet>(){
|
||||
@Override
|
||||
public int compare(UnicodeSet o1, UnicodeSet o2) {
|
||||
// TODO Auto-generated method stub
|
||||
|
@ -1628,7 +1628,7 @@ public class UnicodeSetTest extends TestFmwk {
|
|||
assertNotEquals("compareTo-longer-first", unsorted, sorted);
|
||||
assertEquals("compareTo-longer-first", goalLongest, sorted);
|
||||
|
||||
sorted1 = new TreeSet<UnicodeSet>(new Comparator<UnicodeSet>(){
|
||||
sorted1 = new TreeSet<>(new Comparator<UnicodeSet>(){
|
||||
@Override
|
||||
public int compare(UnicodeSet o1, UnicodeSet o2) {
|
||||
// TODO Auto-generated method stub
|
||||
|
@ -1642,7 +1642,7 @@ public class UnicodeSetTest extends TestFmwk {
|
|||
//compare(String, int)
|
||||
// make a list of interesting combinations
|
||||
List<String> sources = Arrays.asList("\u0000", "a", "b", "\uD7FF", "\uD800", "\uDBFF", "\uDC00", "\uDFFF", "\uE000", "\uFFFD", "\uFFFF");
|
||||
TreeSet<String> target = new TreeSet<String>();
|
||||
TreeSet<String> target = new TreeSet<>();
|
||||
for (String s : sources) {
|
||||
target.add(s);
|
||||
for (String t : sources) {
|
||||
|
@ -1685,8 +1685,8 @@ public class UnicodeSetTest extends TestFmwk {
|
|||
|
||||
//compare(Iterable<T>, Iterable<T>)
|
||||
int max = 10;
|
||||
List<String> test1 = new ArrayList<String>(max);
|
||||
List<String> test2 = new ArrayList<String>(max);
|
||||
List<String> test1 = new ArrayList<>(max);
|
||||
List<String> test2 = new ArrayList<>(max);
|
||||
for (int i = 0; i <= max; ++i) {
|
||||
test1.add("a" + i);
|
||||
test2.add("a" + (max - i)); // add in reverse order
|
||||
|
@ -2792,4 +2792,47 @@ public class UnicodeSetTest extends TestFmwk {
|
|||
} catch(RuntimeException expected) {
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void TestEmptyString() {
|
||||
// Starting with ICU 69, the empty string is allowed in UnicodeSet. ICU-13702
|
||||
UnicodeSet set = new UnicodeSet("[{}]");
|
||||
assertTrue("set from pattern with {}", set.contains(""));
|
||||
assertEquals("set from pattern with {}: size", 1, set.size());
|
||||
assertFalse("set from pattern with {}: isEmpty", set.isEmpty());
|
||||
|
||||
// Remove, add back, ...
|
||||
assertFalse("remove empty string", set.remove("").contains(""));
|
||||
assertEquals("remove empty string: size", 0, set.size());
|
||||
assertTrue("remove empty string: isEmpty", set.isEmpty());
|
||||
assertTrue("add empty string", set.add("").contains(""));
|
||||
assertTrue("retain empty string", set.retain("").contains(""));
|
||||
assertFalse("complement-remove empty string", set.complement("").contains(""));
|
||||
assertTrue("complement-add empty string", set.complement("").contains(""));
|
||||
|
||||
assertFalse("clear", set.clear().contains(""));
|
||||
assertTrue("add empty string 2", set.add("").contains(""));
|
||||
assertFalse("removeAllStrings", set.removeAllStrings().contains(""));
|
||||
assertTrue("add empty string 3", set.add("").contains(""));
|
||||
// Note that this leaves the set containing exactly the empty string.
|
||||
|
||||
// strings() access and iteration
|
||||
assertTrue("strings()", set.strings().contains(""));
|
||||
UnicodeSetIterator sit = new UnicodeSetIterator(set);
|
||||
assertTrue("set iterator.next()", sit.next());
|
||||
assertTrue("set iterator has empty string",
|
||||
sit.codepoint == UnicodeSetIterator.IS_STRING && sit.getString().isEmpty());
|
||||
|
||||
// The empty string is ignored in matching.
|
||||
set.add('a').add('c');
|
||||
assertEquals("span", 1, set.span("abc", SpanCondition.SIMPLE));
|
||||
assertEquals("spanBack", 2, set.spanBack("abc", SpanCondition.SIMPLE));
|
||||
assertTrue("containsNone", set.containsNone("def"));
|
||||
assertFalse("containsSome", set.containsSome("def"));
|
||||
set.freeze();
|
||||
assertEquals("frozen span", 1, set.span("abc", SpanCondition.SIMPLE));
|
||||
assertEquals("frozen spanBack", 2, set.spanBack("abc", SpanCondition.SIMPLE));
|
||||
assertTrue("frozen containsNone", set.containsNone("def"));
|
||||
assertFalse("frozen containsSome", set.containsSome("def"));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -562,12 +562,12 @@ public final class StringTokenizerTest extends TestFmwk
|
|||
us._generatePattern(sb.append(1.0), true);
|
||||
us._generatePattern(sb.reverse(), true);
|
||||
} catch(Exception e){
|
||||
errln("UnicodeSet._generatePattern is not suppose to return an exception.");
|
||||
errln("UnicodeSet._generatePattern is not supposed to return an exception.");
|
||||
}
|
||||
|
||||
try{
|
||||
us._generatePattern(null, true);
|
||||
errln("UnicodeSet._generatePattern is suppose to return an exception.");
|
||||
errln("UnicodeSet._generatePattern is supposed to return an exception.");
|
||||
} catch(Exception e){}
|
||||
}
|
||||
|
||||
|
@ -585,12 +585,12 @@ public final class StringTokenizerTest extends TestFmwk
|
|||
int limit = 0;
|
||||
|
||||
if(us.matches(null, offset, limit, true) != UnicodeSet.U_PARTIAL_MATCH){
|
||||
errln("UnicodeSet.matches is suppose to return " + UnicodeSet.U_PARTIAL_MATCH +
|
||||
errln("UnicodeSet.matches is supposed to return " + UnicodeSet.U_PARTIAL_MATCH +
|
||||
" but got " + us.matches(null, offset, limit, true));
|
||||
}
|
||||
|
||||
if(us.matches(null, offset, limit, false) != UnicodeSet.U_MATCH){
|
||||
errln("UnicodeSet.matches is suppose to return " + UnicodeSet.U_MATCH +
|
||||
errln("UnicodeSet.matches is supposed to return " + UnicodeSet.U_MATCH +
|
||||
" but got " + us.matches(null, offset, limit, false));
|
||||
}
|
||||
|
||||
|
@ -601,7 +601,7 @@ public final class StringTokenizerTest extends TestFmwk
|
|||
offset[0] = 4; // Takes the letter "y"
|
||||
us.matches(rs, offset, 1, true);
|
||||
} catch(Exception e) {
|
||||
errln("UnicodeSet.matches is not suppose to return an exception");
|
||||
errln("UnicodeSet.matches is not supposed to return an exception");
|
||||
}
|
||||
|
||||
// TODO: Tests when "if (forward && length < highWaterLength)" is true
|
||||
|
@ -650,7 +650,7 @@ public final class StringTokenizerTest extends TestFmwk
|
|||
for(int i=0; i < invalid.length; i++){
|
||||
try{
|
||||
us.indexOf(invalid[i]);
|
||||
errln("UnicodeSet.indexOf is suppose to return an exception " +
|
||||
errln("UnicodeSet.indexOf is supposed to return an exception " +
|
||||
"for a value of " + invalid[i]);
|
||||
} catch(Exception e){}
|
||||
}
|
||||
|
@ -659,7 +659,7 @@ public final class StringTokenizerTest extends TestFmwk
|
|||
try{
|
||||
us.indexOf(valid[i]);
|
||||
} catch(Exception e){
|
||||
errln("UnicodeSet.indexOf is not suppose to return an exception " +
|
||||
errln("UnicodeSet.indexOf is not supposed to return an exception " +
|
||||
"for a value of " + valid[i]);
|
||||
}
|
||||
}
|
||||
|
@ -676,7 +676,7 @@ public final class StringTokenizerTest extends TestFmwk
|
|||
int[] invalid = {-100,-10,-5,-2,-1};
|
||||
for(int i=0; i < invalid.length; i++){
|
||||
if(us.charAt(invalid[i]) != -1){
|
||||
errln("UnicodeSet.charAt(int index) was suppose to return -1 "
|
||||
errln("UnicodeSet.charAt(int index) was supposed to return -1 "
|
||||
+ "for an invalid input of " + invalid[i]);
|
||||
}
|
||||
}
|
||||
|
@ -696,7 +696,7 @@ public final class StringTokenizerTest extends TestFmwk
|
|||
for(int i=0; i < invalid.length; i++){
|
||||
try{
|
||||
us.add(invalid[i], UnicodeSet.MAX_VALUE);
|
||||
errln("UnicodeSet.add(int start, int end) was suppose to give "
|
||||
errln("UnicodeSet.add(int start, int end) was supposed to give "
|
||||
+ "an exception for an start invalid input of "
|
||||
+ invalid[i]);
|
||||
} catch (Exception e){}
|
||||
|
@ -706,7 +706,7 @@ public final class StringTokenizerTest extends TestFmwk
|
|||
for(int i=0; i < invalid.length; i++){
|
||||
try{
|
||||
us.add(UnicodeSet.MIN_VALUE, invalid[i]);
|
||||
errln("UnicodeSet.add(int start, int end) was suppose to give "
|
||||
errln("UnicodeSet.add(int start, int end) was supposed to give "
|
||||
+ "an exception for an end invalid input of "
|
||||
+ invalid[i]);
|
||||
} catch (Exception e){}
|
||||
|
@ -714,12 +714,12 @@ public final class StringTokenizerTest extends TestFmwk
|
|||
|
||||
// Tests when "else if (start == end)" is false
|
||||
if(!(us.add(UnicodeSet.MIN_VALUE+1, UnicodeSet.MIN_VALUE).equals(us)))
|
||||
errln("UnicodeSet.add(int start, int end) was suppose to return "
|
||||
errln("UnicodeSet.add(int start, int end) was supposed to return "
|
||||
+ "the same object because start of value " + (UnicodeSet.MIN_VALUE+1)
|
||||
+ " is greater than end of value " + UnicodeSet.MIN_VALUE);
|
||||
|
||||
if(!(us.add(UnicodeSet.MAX_VALUE, UnicodeSet.MAX_VALUE-1).equals(us)))
|
||||
errln("UnicodeSet.add(int start, int end) was suppose to return "
|
||||
errln("UnicodeSet.add(int start, int end) was supposed to return "
|
||||
+ "the same object because start of value " + UnicodeSet.MAX_VALUE
|
||||
+ " is greater than end of value " + (UnicodeSet.MAX_VALUE-1));
|
||||
}
|
||||
|
@ -738,7 +738,7 @@ public final class StringTokenizerTest extends TestFmwk
|
|||
for(int i=0; i < invalid.length; i++){
|
||||
try{
|
||||
us.add(invalid[i]);
|
||||
errln("UnicodeSet.add(int c) was suppose to give "
|
||||
errln("UnicodeSet.add(int c) was supposed to give "
|
||||
+ "an exception for an start invalid input of "
|
||||
+ invalid[i]);
|
||||
} catch (Exception e){}
|
||||
|
@ -758,14 +758,15 @@ public final class StringTokenizerTest extends TestFmwk
|
|||
// Tests when "if (s.length() < 1)" is true
|
||||
try{
|
||||
us.contains("");
|
||||
errln("UnicodeSet.getSingleCP is suppose to give an exception for " +
|
||||
} catch (Exception e) {
|
||||
errln("UnicodeSet.getSingleCP is not supposed to give an exception for " +
|
||||
"an empty string.");
|
||||
} catch (Exception e){}
|
||||
}
|
||||
|
||||
try{
|
||||
us.contains((String)null);
|
||||
errln("UnicodeSet.getSingleCP is suppose to give an exception for " +
|
||||
"a null string.");
|
||||
errln("UnicodeSet.getSingleCP is supposed to give an exception for " +
|
||||
"a null string.");
|
||||
} catch (Exception e){}
|
||||
|
||||
// Tests when "if (cp > 0xFFFF)" is true
|
||||
|
@ -774,8 +775,8 @@ public final class StringTokenizerTest extends TestFmwk
|
|||
try{
|
||||
us.contains(cases[i]);
|
||||
} catch (Exception e){
|
||||
errln("UnicodeSet.getSingleCP is not suppose to give an exception for " +
|
||||
"a null string.");
|
||||
errln("UnicodeSet.getSingleCP is not supposed to give an exception for " +
|
||||
"a surrogate pair.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -790,7 +791,7 @@ public final class StringTokenizerTest extends TestFmwk
|
|||
try{
|
||||
us.removeAllStrings();
|
||||
} catch(Exception e){
|
||||
errln("UnicodeSet.removeAllString() was not suppose to given an " +
|
||||
errln("UnicodeSet.removeAllString() was not supposed to given an " +
|
||||
"exception for a strings size of 0");
|
||||
}
|
||||
}
|
||||
|
@ -808,7 +809,7 @@ public final class StringTokenizerTest extends TestFmwk
|
|||
for(int i=0; i < invalid.length; i++){
|
||||
try{
|
||||
us.retain(invalid[i], UnicodeSet.MAX_VALUE);
|
||||
errln("UnicodeSet.retain(int start, int end) was suppose to give "
|
||||
errln("UnicodeSet.retain(int start, int end) was supposed to give "
|
||||
+ "an exception for an start invalid input of "
|
||||
+ invalid[i]);
|
||||
} catch (Exception e){}
|
||||
|
@ -818,7 +819,7 @@ public final class StringTokenizerTest extends TestFmwk
|
|||
for(int i=0; i < invalid.length; i++){
|
||||
try{
|
||||
us.retain(UnicodeSet.MIN_VALUE, invalid[i]);
|
||||
errln("UnicodeSet.retain(int start, int end) was suppose to give "
|
||||
errln("UnicodeSet.retain(int start, int end) was supposed to give "
|
||||
+ "an exception for an end invalid input of "
|
||||
+ invalid[i]);
|
||||
} catch (Exception e){}
|
||||
|
@ -828,14 +829,14 @@ public final class StringTokenizerTest extends TestFmwk
|
|||
try{
|
||||
us.retain(UnicodeSet.MIN_VALUE+1, UnicodeSet.MIN_VALUE);
|
||||
} catch(Exception e){
|
||||
errln("UnicodeSet.retain(int start, int end) was not suppose to give "
|
||||
errln("UnicodeSet.retain(int start, int end) was not supposed to give "
|
||||
+ "an exception.");
|
||||
}
|
||||
|
||||
try{
|
||||
us.retain(UnicodeSet.MAX_VALUE, UnicodeSet.MAX_VALUE-1);
|
||||
} catch(Exception e){
|
||||
errln("UnicodeSet.retain(int start, int end) was not suppose to give "
|
||||
errln("UnicodeSet.retain(int start, int end) was not supposed to give "
|
||||
+ "an exception.");
|
||||
}
|
||||
}
|
||||
|
@ -849,7 +850,7 @@ public final class StringTokenizerTest extends TestFmwk
|
|||
UnicodeSet us = new UnicodeSet();
|
||||
us.add("dummy");
|
||||
if(!(us.retain("dummy").equals(us))){
|
||||
errln("UnicodeSet.retain(String s) was suppose to return the " +
|
||||
errln("UnicodeSet.retain(String s) was supposed to return the " +
|
||||
"same UnicodeSet since the string was found in the original.");
|
||||
}
|
||||
}
|
||||
|
@ -867,7 +868,7 @@ public final class StringTokenizerTest extends TestFmwk
|
|||
for(int i=0; i < invalid.length; i++){
|
||||
try{
|
||||
us.remove(invalid[i], UnicodeSet.MAX_VALUE);
|
||||
errln("UnicodeSet.remove(int start, int end) was suppose to give "
|
||||
errln("UnicodeSet.remove(int start, int end) was supposed to give "
|
||||
+ "an exception for an start invalid input of "
|
||||
+ invalid[i]);
|
||||
} catch (Exception e){}
|
||||
|
@ -877,7 +878,7 @@ public final class StringTokenizerTest extends TestFmwk
|
|||
for(int i=0; i < invalid.length; i++){
|
||||
try{
|
||||
us.remove(UnicodeSet.MIN_VALUE, invalid[i]);
|
||||
errln("UnicodeSet.remove(int start, int end) was suppose to give "
|
||||
errln("UnicodeSet.remove(int start, int end) was supposed to give "
|
||||
+ "an exception for an end invalid input of "
|
||||
+ invalid[i]);
|
||||
} catch (Exception e){}
|
||||
|
@ -887,14 +888,14 @@ public final class StringTokenizerTest extends TestFmwk
|
|||
try{
|
||||
us.remove(UnicodeSet.MIN_VALUE+1, UnicodeSet.MIN_VALUE);
|
||||
} catch(Exception e){
|
||||
errln("UnicodeSet.remove(int start, int end) was not suppose to give "
|
||||
errln("UnicodeSet.remove(int start, int end) was not supposed to give "
|
||||
+ "an exception.");
|
||||
}
|
||||
|
||||
try{
|
||||
us.remove(UnicodeSet.MAX_VALUE, UnicodeSet.MAX_VALUE-1);
|
||||
} catch(Exception e){
|
||||
errln("UnicodeSet.remove(int start, int end) was not suppose to give "
|
||||
errln("UnicodeSet.remove(int start, int end) was not supposed to give "
|
||||
+ "an exception.");
|
||||
}
|
||||
}
|
||||
|
@ -912,7 +913,7 @@ public final class StringTokenizerTest extends TestFmwk
|
|||
for(int i=0; i < invalid.length; i++){
|
||||
try{
|
||||
us.complement(invalid[i], UnicodeSet.MAX_VALUE);
|
||||
errln("UnicodeSet.complement(int start, int end) was suppose to give "
|
||||
errln("UnicodeSet.complement(int start, int end) was supposed to give "
|
||||
+ "an exception for an start invalid input of "
|
||||
+ invalid[i]);
|
||||
} catch (Exception e){}
|
||||
|
@ -922,7 +923,7 @@ public final class StringTokenizerTest extends TestFmwk
|
|||
for(int i=0; i < invalid.length; i++){
|
||||
try{
|
||||
us.complement(UnicodeSet.MIN_VALUE, invalid[i]);
|
||||
errln("UnicodeSet.complement(int start, int end) was suppose to give "
|
||||
errln("UnicodeSet.complement(int start, int end) was supposed to give "
|
||||
+ "an exception for an end invalid input of "
|
||||
+ invalid[i]);
|
||||
} catch (Exception e){}
|
||||
|
@ -932,14 +933,14 @@ public final class StringTokenizerTest extends TestFmwk
|
|||
try{
|
||||
us.complement(UnicodeSet.MIN_VALUE+1, UnicodeSet.MIN_VALUE);
|
||||
} catch(Exception e){
|
||||
errln("UnicodeSet.complement(int start, int end) was not suppose to give "
|
||||
errln("UnicodeSet.complement(int start, int end) was not supposed to give "
|
||||
+ "an exception.");
|
||||
}
|
||||
|
||||
try{
|
||||
us.complement(UnicodeSet.MAX_VALUE, UnicodeSet.MAX_VALUE-1);
|
||||
} catch(Exception e){
|
||||
errln("UnicodeSet.complement(int start, int end) was not suppose to give "
|
||||
errln("UnicodeSet.complement(int start, int end) was not supposed to give "
|
||||
+ "an exception.");
|
||||
}
|
||||
}
|
||||
|
@ -955,7 +956,7 @@ public final class StringTokenizerTest extends TestFmwk
|
|||
try{
|
||||
us.complement("dummy");
|
||||
} catch (Exception e){
|
||||
errln("UnicodeSet.complement(String s) was not suppose to give "
|
||||
errln("UnicodeSet.complement(String s) was not supposed to give "
|
||||
+ "an exception for 'dummy'.");
|
||||
}
|
||||
|
||||
|
@ -965,7 +966,7 @@ public final class StringTokenizerTest extends TestFmwk
|
|||
try{
|
||||
us.complement("\uDC11");
|
||||
} catch (Exception e){
|
||||
errln("UnicodeSet.complement(String s) was not suppose to give "
|
||||
errln("UnicodeSet.complement(String s) was not supposed to give "
|
||||
+ "an exception for '\uDC11'.");
|
||||
}
|
||||
}
|
||||
|
@ -983,7 +984,7 @@ public final class StringTokenizerTest extends TestFmwk
|
|||
for(int i=0; i < invalid.length; i++){
|
||||
try{
|
||||
us.contains(invalid[i]);
|
||||
errln("UnicodeSet.contains(int c) was suppose to give "
|
||||
errln("UnicodeSet.contains(int c) was supposed to give "
|
||||
+ "an exception for an start invalid input of "
|
||||
+ invalid[i]);
|
||||
} catch (Exception e){}
|
||||
|
@ -1003,7 +1004,7 @@ public final class StringTokenizerTest extends TestFmwk
|
|||
for(int i=0; i < invalid.length; i++){
|
||||
try{
|
||||
us.contains(invalid[i], UnicodeSet.MAX_VALUE);
|
||||
errln("UnicodeSet.contains(int start, int end) was suppose to give "
|
||||
errln("UnicodeSet.contains(int start, int end) was supposed to give "
|
||||
+ "an exception for an start invalid input of "
|
||||
+ invalid[i]);
|
||||
} catch (Exception e){}
|
||||
|
@ -1013,7 +1014,7 @@ public final class StringTokenizerTest extends TestFmwk
|
|||
for(int i=0; i < invalid.length; i++){
|
||||
try{
|
||||
us.contains(UnicodeSet.MIN_VALUE, invalid[i]);
|
||||
errln("UnicodeSet.contains(int start, int end) was suppose to give "
|
||||
errln("UnicodeSet.contains(int start, int end) was supposed to give "
|
||||
+ "an exception for an end invalid input of "
|
||||
+ invalid[i]);
|
||||
} catch (Exception e){}
|
||||
|
@ -1028,7 +1029,7 @@ public final class StringTokenizerTest extends TestFmwk
|
|||
UnicodeSet us = new UnicodeSet();
|
||||
String res = us.getRegexEquivalent();
|
||||
if(!(res.equals("[]")))
|
||||
errln("UnicodeSet.getRegexEquivalent is suppose to return '[]' " +
|
||||
errln("UnicodeSet.getRegexEquivalent is supposed to return '[]' " +
|
||||
"but got " + res);
|
||||
}
|
||||
|
||||
|
@ -1045,7 +1046,7 @@ public final class StringTokenizerTest extends TestFmwk
|
|||
for(int i=0; i < invalid.length; i++){
|
||||
try{
|
||||
us.containsNone(invalid[i], UnicodeSet.MAX_VALUE);
|
||||
errln("UnicodeSet.containsNoneint start, int end) was suppose to give "
|
||||
errln("UnicodeSet.containsNoneint start, int end) was supposed to give "
|
||||
+ "an exception for an start invalid input of "
|
||||
+ invalid[i]);
|
||||
} catch (Exception e){}
|
||||
|
@ -1055,7 +1056,7 @@ public final class StringTokenizerTest extends TestFmwk
|
|||
for(int i=0; i < invalid.length; i++){
|
||||
try{
|
||||
us.containsNone(UnicodeSet.MIN_VALUE, invalid[i]);
|
||||
errln("UnicodeSet.containsNone(int start, int end) was suppose to give "
|
||||
errln("UnicodeSet.containsNone(int start, int end) was supposed to give "
|
||||
+ "an exception for an end invalid input of "
|
||||
+ invalid[i]);
|
||||
} catch (Exception e){}
|
||||
|
@ -1066,7 +1067,7 @@ public final class StringTokenizerTest extends TestFmwk
|
|||
us.add(0);
|
||||
us.containsNone(1, 2); // 1 > 0
|
||||
} catch (Exception e){
|
||||
errln("UnicodeSet.containsNone(int start, int end) was not suppose to give " +
|
||||
errln("UnicodeSet.containsNone(int start, int end) was not supposed to give " +
|
||||
"an exception.");
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue