diff --git a/icu4j/src/com/ibm/icu/dev/test/translit/UnicodeSetTest.java b/icu4j/src/com/ibm/icu/dev/test/translit/UnicodeSetTest.java index 32565511953..4582f37f434 100755 --- a/icu4j/src/com/ibm/icu/dev/test/translit/UnicodeSetTest.java +++ b/icu4j/src/com/ibm/icu/dev/test/translit/UnicodeSetTest.java @@ -994,13 +994,48 @@ public class UnicodeSetTest extends TestFmwk { "[:Assigned:]", "A\\uE000\\uF8FF\\uFDC7\\U00010000\\U0010FFFD", - "\\u0888\\uFDD3\\uFFFE\\U00050005" + "\\u0888\\uFDD3\\uFFFE\\U00050005", + }; for (int i=0; i + * containsAll is false for each of: "acb", "bcda", "bcx"
* @param s string containing characters to be checked for containment * @return true if the test condition is met * @stable ICU 2.0 @@ -1656,12 +1660,42 @@ public class UnicodeSet extends UnicodeFilter { int cp; for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) { cp = UTF16.charAt(s, i); - if (!contains(cp)) return false; + if (!contains(cp)) { + if (strings.size() == 0) return false; // quick exit + // TODO: later, optimize for two common cases + // 1. If all the characters in the strings are individually in the set, then just return false + // in that case, looking at the strings wouldn't help. + // This setting can be cached. + // 2. If none of the strings overlap, then we don't need to go to regex, + // we can use a simpler test. + // We would cache this setting also, plus the maximum string length + + // TODO: later, cache the Matcher + // with all caches, we need to flush them if the set changes, of course! + return Pattern.matches(getRegexEquivalent() + "*", s); + } } return true; } /** + * @internal + * @deprecated + * @return regex pattern equivalent to this UnicodeSet + */ + public String getRegexEquivalent() { + if (strings.size() == 0) return toString(); + StringBuffer result = new StringBuffer("(?:"); + _generatePattern(result, true, false); + Iterator it = strings.iterator(); + while (it.hasNext()) { + result.append('|'); + _appendToPat(result, (String) it.next(), true); + } + return result.append(")").toString(); + } + + /** * Returns true if this set contains none of the characters * of the given range. * @param start first character, inclusive, of the range @@ -1684,8 +1718,10 @@ public class UnicodeSet extends UnicodeFilter { } /** - * Returns true if this set contains none of the characters and strings - * of the given set. + * Returns true if none of the characters or strings in this UnicodeSet appears in the string. + * For example, for the Unicode set [a{bc}{cd}]
+ * containsNone is true for: "xy", "cb"
+ * containsNone is false for: "a", "bc", "bcd"
* @param c set to be checked for containment * @return true if the test condition is met * @stable ICU 2.0 @@ -1717,6 +1753,12 @@ public class UnicodeSet extends UnicodeFilter { cp = UTF16.charAt(s, i); if (contains(cp)) return false; } + if (strings.size() == 0) return true; + // do a last check to make sure no strings are in. + for (Iterator it = strings.iterator(); it.hasNext();) { + String item = (String)it.next(); + if (s.indexOf(item) >= 0) return false; + } return true; } @@ -2356,7 +2398,7 @@ public class UnicodeSet extends UnicodeFilter { if (usePat) { rebuiltPat.append(pat.toString()); } else { - _generatePattern(rebuiltPat, false); + _generatePattern(rebuiltPat, false, true); } }