ICU-13702 add missing API functions

and fix a bug in Java UnicodeSet.retain(String) which added the string even if the set did not contain it before,
and some drive-by API doc fixes/clarifications
This commit is contained in:
Markus Scherer 2021-02-16 16:09:18 -08:00
parent 7159e334ff
commit 66460b9fad
8 changed files with 273 additions and 42 deletions

View file

@ -599,7 +599,7 @@ public:
/**
* Make this object represent the range `start - end`.
* If `end > start` then this object is set to an empty range.
* If `start > end` then this object is set to an empty range.
* A frozen set will not be modified.
*
* @param start first character in the set, inclusive
@ -1075,7 +1075,7 @@ public:
/**
* Adds the specified range to this set if it is not already
* present. If this set already contains the specified range,
* the call leaves this set unchanged. If <code>end > start</code>
* the call leaves this set unchanged. If <code>start > end</code>
* then an empty range is added, leaving the set unchanged.
* This is equivalent to a boolean logic OR, or a set UNION.
* A frozen set will not be modified.
@ -1093,6 +1093,9 @@ public:
* present. If this set already contains the specified character,
* the call leaves this set unchanged.
* A frozen set will not be modified.
*
* @param c the character (code point)
* @return this object, for chaining
* @stable ICU 2.0
*/
UnicodeSet& add(UChar32 c);
@ -1122,8 +1125,8 @@ public:
public:
/**
* Adds each of the characters in this string to the set. Thus "ch" => {"c", "h"}
* If this set already any particular character, it has no effect on that character.
* Adds each of the characters in this string to the set. Note: "ch" => {"c", "h"}
* If this set already contains any particular character, it has no effect on that character.
* A frozen set will not be modified.
* @param s the source string
* @return this object, for chaining
@ -1133,7 +1136,6 @@ public:
/**
* Retains EACH of the characters in this string. Note: "ch" == {"c", "h"}
* If this set already any particular character, it has no effect on that character.
* A frozen set will not be modified.
* @param s the source string
* @return this object, for chaining
@ -1143,7 +1145,6 @@ public:
/**
* Complement EACH of the characters in this string. Note: "ch" == {"c", "h"}
* If this set already any particular character, it has no effect on that character.
* A frozen set will not be modified.
* @param s the source string
* @return this object, for chaining
@ -1153,7 +1154,6 @@ public:
/**
* Remove EACH of the characters in this string. Note: "ch" == {"c", "h"}
* If this set already any particular character, it has no effect on that character.
* A frozen set will not be modified.
* @param s the source string
* @return this object, for chaining
@ -1183,15 +1183,13 @@ public:
/**
* Retain only the elements in this set that are contained in the
* specified range. If <code>end > start</code> then an empty range is
* specified range. If <code>start > end</code> then an empty range is
* retained, leaving the set empty. This is equivalent to
* a boolean logic AND, or a set INTERSECTION.
* A frozen set will not be modified.
*
* @param start first character, inclusive, of range to be retained
* to this set.
* @param end last character, inclusive, of range to be retained
* to this set.
* @param start first character, inclusive, of range
* @param end last character, inclusive, of range
* @stable ICU 2.0
*/
virtual UnicodeSet& retain(UChar32 start, UChar32 end);
@ -1200,14 +1198,31 @@ public:
/**
* Retain the specified character from this set if it is present.
* A frozen set will not be modified.
*
* @param c the character (code point)
* @return this object, for chaining
* @stable ICU 2.0
*/
UnicodeSet& retain(UChar32 c);
#ifndef U_HIDE_DRAFT_API
/**
* Retains only the specified string from this set if it is present.
* Upon return this set will be empty if it did not contain s, or
* will only contain s if it did contain s.
* A frozen set will not be modified.
*
* @param s the source string
* @return this object, for chaining
* @draft ICU 69
*/
UnicodeSet& retain(const UnicodeString &s);
#endif // U_HIDE_DRAFT_API
/**
* Removes the specified range from this set if it is present.
* The set will not contain the specified range once the call
* returns. If <code>end > start</code> then an empty range is
* returns. If <code>start > end</code> then an empty range is
* removed, leaving the set unchanged.
* A frozen set will not be modified.
*
@ -1224,6 +1239,9 @@ public:
* The set will not contain the specified range once the call
* returns.
* A frozen set will not be modified.
*
* @param c the character (code point)
* @return this object, for chaining
* @stable ICU 2.0
*/
UnicodeSet& remove(UChar32 c);
@ -1251,15 +1269,13 @@ public:
/**
* Complements the specified range in this set. Any character in
* the range will be removed if it is in this set, or will be
* added if it is not in this set. If <code>end > start</code>
* added if it is not in this set. If <code>start > end</code>
* then an empty range is complemented, leaving the set unchanged.
* This is equivalent to a boolean logic XOR.
* A frozen set will not be modified.
*
* @param start first character, inclusive, of range to be removed
* from this set.
* @param end last character, inclusive, of range to be removed
* from this set.
* @param start first character, inclusive, of range
* @param end last character, inclusive, of range
* @stable ICU 2.0
*/
virtual UnicodeSet& complement(UChar32 start, UChar32 end);
@ -1269,14 +1285,16 @@ public:
* will be removed if it is in this set, or will be added if it is
* not in this set.
* A frozen set will not be modified.
*
* @param c the character (code point)
* @return this object, for chaining
* @stable ICU 2.0
*/
UnicodeSet& complement(UChar32 c);
/**
* Complement the specified string in this set.
* The set will not contain the specified string once the call
* returns.
* The string will be removed if it is in this set, or will be added if it is not in this set.
* A frozen set will not be modified.
*
* @param s the string to complement

View file

@ -582,8 +582,8 @@ U_CAPI void U_EXPORT2
uset_addString(USet* set, const UChar* str, int32_t strLen);
/**
* Adds each of the characters in this string to the set. Thus "ch" => {"c", "h"}
* If this set already any particular character, it has no effect on that character.
* Adds each of the characters in this string to the set. Note: "ch" => {"c", "h"}
* If this set already contains any particular character, it has no effect on that character.
* A frozen set will not be modified.
* @param set the object to which to add the character
* @param str the source string
@ -628,6 +628,20 @@ uset_removeRange(USet* set, UChar32 start, UChar32 end);
U_CAPI void U_EXPORT2
uset_removeString(USet* set, const UChar* str, int32_t strLen);
#ifndef U_HIDE_DRAFT_API
/**
* Removes EACH of the characters in this string. Note: "ch" == {"c", "h"}
* A frozen set will not be modified.
*
* @param set the object to be modified
* @param str the string
* @param length the length of the string, or -1 if NUL-terminated
* @draft ICU 69
*/
U_CAPI void U_EXPORT2
uset_removeAllCodePoints(USet *set, const UChar *str, int32_t length);
#endif // U_HIDE_DRAFT_API
/**
* Removes from this set all of its elements that are contained in the
* specified set. This operation effectively modifies this
@ -650,15 +664,41 @@ uset_removeAll(USet* set, const USet* removeSet);
* A frozen set will not be modified.
*
* @param set the object for which to retain only the specified range
* @param start first character, inclusive, of range to be retained
* to this set.
* @param end last character, inclusive, of range to be retained
* to this set.
* @param start first character, inclusive, of range
* @param end last character, inclusive, of range
* @stable ICU 3.2
*/
U_CAPI void U_EXPORT2
uset_retain(USet* set, UChar32 start, UChar32 end);
#ifndef U_HIDE_DRAFT_API
/**
* Retains only the specified string from this set if it is present.
* Upon return this set will be empty if it did not contain s, or
* will only contain s if it did contain s.
* A frozen set will not be modified.
*
* @param set the object to be modified
* @param str the string
* @param length the length of the string, or -1 if NUL-terminated
* @draft ICU 69
*/
U_CAPI void U_EXPORT2
uset_retainString(USet *set, const UChar *str, int32_t length);
/**
* Retains EACH of the characters in this string. Note: "ch" == {"c", "h"}
* A frozen set will not be modified.
*
* @param set the object to be modified
* @param str the string
* @param length the length of the string, or -1 if NUL-terminated
* @draft ICU 69
*/
U_CAPI void U_EXPORT2
uset_retainAllCodePoints(USet *set, const UChar *str, int32_t length);
#endif // U_HIDE_DRAFT_API
/**
* Retains only the elements in this set that are contained in the
* specified set. In other words, removes from this set all of
@ -696,6 +736,49 @@ uset_compact(USet* set);
U_CAPI void U_EXPORT2
uset_complement(USet* set);
#ifndef U_HIDE_DRAFT_API
/**
* Complements the specified range in this set. Any character in
* the range will be removed if it is in this set, or will be
* added if it is not in this set. If <code>start > end</code>
* then an empty range is complemented, leaving the set unchanged.
* This is equivalent to a boolean logic XOR.
* A frozen set will not be modified.
*
* @param set the object to be modified
* @param start first character, inclusive, of range
* @param end last character, inclusive, of range
* @draft ICU 69
*/
U_CAPI void U_EXPORT2
uset_complementRange(USet *set, UChar32 start, UChar32 end);
/**
* Complements the specified string in this set.
* The string will be removed if it is in this set, or will be added if it is not in this set.
* A frozen set will not be modified.
*
* @param set the object to be modified
* @param str the string
* @param length the length of the string, or -1 if NUL-terminated
* @draft ICU 69
*/
U_CAPI void U_EXPORT2
uset_complementString(USet *set, const UChar *str, int32_t length);
/**
* Complements EACH of the characters in this string. Note: "ch" == {"c", "h"}
* A frozen set will not be modified.
*
* @param set the object to be modified
* @param str the string
* @param length the length of the string, or -1 if NUL-terminated
* @draft ICU 69
*/
U_CAPI void U_EXPORT2
uset_complementAllCodePoints(USet *set, const UChar *str, int32_t length);
#endif // U_HIDE_DRAFT_API
/**
* Complements in this set all elements contained in the specified
* set. Any character in the other set will be removed if it is

View file

@ -1120,6 +1120,26 @@ UnicodeSet& UnicodeSet::retain(UChar32 c) {
return retain(c, c);
}
UnicodeSet& UnicodeSet::retain(const UnicodeString &s) {
if (isFrozen() || isBogus()) { return *this; }
UChar32 cp = getSingleCP(s);
if (cp < 0) {
bool isIn = stringsContains(s);
// Check for getRangeCount() first to avoid somewhat-expensive size()
// when there are single code points.
if (isIn && getRangeCount() == 0 && size() == 1) {
return *this;
}
clear();
if (isIn) {
_add(s);
}
} else {
retain(cp, cp);
}
return *this;
}
/**
* Removes the specified range from this set if it is present.
* The set will not contain the specified range once the call

View file

@ -116,6 +116,12 @@ uset_removeString(USet* set, const UChar* str, int32_t strLen) {
((UnicodeSet*) set)->UnicodeSet::remove(s);
}
U_CAPI void U_EXPORT2
uset_removeAllCodePoints(USet *set, const UChar *str, int32_t length) {
UnicodeString s(length==-1, str, length);
((UnicodeSet*) set)->UnicodeSet::removeAll(s);
}
U_CAPI void U_EXPORT2
uset_removeAll(USet* set, const USet* remove) {
((UnicodeSet*) set)->UnicodeSet::removeAll(*(const UnicodeSet*)remove);
@ -126,6 +132,18 @@ uset_retain(USet* set, UChar32 start, UChar32 end) {
((UnicodeSet*) set)->UnicodeSet::retain(start, end);
}
U_CAPI void U_EXPORT2
uset_retainString(USet *set, const UChar *str, int32_t length) {
UnicodeString s(length==-1, str, length);
((UnicodeSet*) set)->UnicodeSet::retain(s);
}
U_CAPI void U_EXPORT2
uset_retainAllCodePoints(USet *set, const UChar *str, int32_t length) {
UnicodeString s(length==-1, str, length);
((UnicodeSet*) set)->UnicodeSet::retainAll(s);
}
U_CAPI void U_EXPORT2
uset_retainAll(USet* set, const USet* retain) {
((UnicodeSet*) set)->UnicodeSet::retainAll(*(const UnicodeSet*)retain);
@ -141,6 +159,23 @@ uset_complement(USet* set) {
((UnicodeSet*) set)->UnicodeSet::complement();
}
U_CAPI void U_EXPORT2
uset_complementRange(USet *set, UChar32 start, UChar32 end) {
((UnicodeSet*) set)->UnicodeSet::complement(start, end);
}
U_CAPI void U_EXPORT2
uset_complementString(USet *set, const UChar *str, int32_t length) {
UnicodeString s(length==-1, str, length);
((UnicodeSet*) set)->UnicodeSet::complement(s);
}
U_CAPI void U_EXPORT2
uset_complementAllCodePoints(USet *set, const UChar *str, int32_t length) {
UnicodeString s(length==-1, str, length);
((UnicodeSet*) set)->UnicodeSet::complementAll(s);
}
U_CAPI void U_EXPORT2
uset_complementAll(USet* set, const USet* complement) {
((UnicodeSet*) set)->UnicodeSet::complementAll(*(const UnicodeSet*)complement);

View file

@ -211,6 +211,45 @@ static void TestAPI() {
uset_retainAll(set2, set);
expect(set2, "ghi", "abcdef{bc}", NULL);
// ICU 69 added some missing functions for parity with C++ and Java.
uset_applyPattern(set, u"[abcdef{ch}{sch}]", -1, 0, &ec);
if(U_FAILURE(ec)) {
log_err("uset_openPattern([abcdef{ch}{sch}]) failed - %s\n", u_errorName(ec));
return;
}
expect(set, "abcdef{ch}{sch}", "", NULL);
uset_removeAllCodePoints(set, u"ce", 2);
expect(set, "abdf{ch}{sch}", "ce", NULL);
uset_complementRange(set, u'b', u'f');
expect(set, "ace{ch}{sch}", "bdf", NULL);
uset_complementString(set, u"ch", -1);
expect(set, "ace{sch}", "bdf{ch}", NULL);
uset_complementString(set, u"xy", -1);
expect(set, "ace{sch}{xy}", "bdf{ch}", NULL);
uset_complementAllCodePoints(set, u"abef", 4);
expect(set, "bcf{sch}{xy}", "ade{ch}", NULL);
uset_retainAllCodePoints(set, u"abef", -1);
expect(set, "bf", "acde{ch}{sch}{xy}", NULL);
uset_applyPattern(set, u"[abcdef{ch}{sch}]", -1, 0, &ec);
if(U_FAILURE(ec)) {
log_err("uset_openPattern([abcdef{ch}{sch}]) failed - %s\n", u_errorName(ec));
return;
}
expect(set, "abcdef{ch}{sch}", "", NULL);
uset_retainString(set, u"sch", 3);
expect(set, "{sch}", "abcdef{ch}", NULL);
uset_retainString(set, u"ch", 3);
expect(set, "", "abcdef{ch}{sch}", NULL);
uset_close(set);
uset_close(set2);
}

View file

@ -696,6 +696,37 @@ void UnicodeSetTest::TestAPI() {
if (U_FAILURE(status)) { errln("FAIL"); return; }
if (set != exp) { errln("FAIL: retain('s')"); return; }
// ICU 2.6 coverage tests
// public final UnicodeSet retain(String s);
// public final UnicodeSet remove(int c);
// public final UnicodeSet remove(String s);
// public int hashCode();
set.applyPattern(u"[a-z{ab}{cd}]", status);
if (U_FAILURE(status)) { errln("FAIL"); return; }
set.retain(u"cd");
exp.applyPattern(u"[{cd}]", status);
if (U_FAILURE(status)) { errln("FAIL"); return; }
if (set != exp) { errln("FAIL: (with cd).retain(\"cd\")"); return; }
set.applyPattern(u"[a-z{ab}{yz}]", status);
if (U_FAILURE(status)) { errln("FAIL"); return; }
set.retain(u"cd");
exp.clear();
if (set != exp) { errln("FAIL: (without cd).retain(\"cd\")"); return; }
set.applyPattern(u"[a-z{ab}{cd}]", status);
if (U_FAILURE(status)) { errln("FAIL"); return; }
set.remove(u'c');
exp.applyPattern(u"[abd-z{ab}{cd}]", status);
if (set != exp) { errln("FAIL: remove('c')"); return; }
set.remove(u"cd");
exp.applyPattern(u"[abd-z{ab}]", status);
if (U_FAILURE(status)) { errln("FAIL"); return; }
if (set != exp) { errln("FAIL: remove(\"cd\")"); return; }
set.applyPattern("[s]", status);
if (U_FAILURE(status)) { errln("FAIL"); return; }
uint16_t buf[32];
int32_t slen = set.serialize(buf, UPRV_LENGTHOF(buf), status);
if (U_FAILURE(status)) { errln("FAIL: serialize"); return; }

View file

@ -514,7 +514,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
/**
* Make this object represent the range <code>start - end</code>.
* If <code>end &gt; start</code> then this object is set to an empty range.
* If <code>start &gt; end</code> then this object is set to an empty range.
*
* @param start first character in the set, inclusive
* @param end last character in the set, inclusive
@ -1159,7 +1159,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
/**
* Adds the specified range to this set if it is not already
* present. If this set already contains the specified range,
* the call leaves this set unchanged. If <code>end &gt; start</code>
* the call leaves this set unchanged. If <code>start &gt; end</code>
* then an empty range is added, leaving the set unchanged.
*
* @param start first character, inclusive, of range to be added
@ -1490,13 +1490,11 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
/**
* Retain only the elements in this set that are contained in the
* specified range. If <code>end &gt; start</code> then an empty range is
* specified range. If <code>start &gt; end</code> then an empty range is
* retained, leaving the set empty.
*
* @param start first character, inclusive, of range to be retained
* to this set.
* @param end last character, inclusive, of range to be retained
* to this set.
* @param start first character, inclusive, of range
* @param end last character, inclusive, of range
* @stable ICU 2.0
*/
public UnicodeSet retain(int start, int end) {
@ -1541,11 +1539,15 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
checkFrozen();
String s = cs.toString();
boolean isIn = strings.contains(s);
if (isIn && size() == 1) {
// Check for getRangeCount() first to avoid somewhat-expensive size()
// when there are single code points.
if (isIn && getRangeCount() == 0 && size() == 1) {
return this;
}
clear();
addString(s);
if (isIn) {
addString(s);
}
pat = null;
} else {
retain(cp, cp);
@ -1556,7 +1558,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
/**
* Removes the specified range from this set if it is present.
* The set will not contain the specified range once the call
* returns. If <code>end &gt; start</code> then an empty range is
* returns. If <code>start &gt; end</code> then an empty range is
* removed, leaving the set unchanged.
*
* @param start first character, inclusive, of range to be removed
@ -1617,13 +1619,11 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
/**
* Complements the specified range in this set. Any character in
* the range will be removed if it is in this set, or will be
* added if it is not in this set. If <code>end &gt; start</code>
* added if it is not in this set. If <code>start &gt; end</code>
* then an empty range is complemented, leaving the set unchanged.
*
* @param start first character, inclusive, of range to be removed
* from this set.
* @param end last character, inclusive, of range to be removed
* from this set.
* @param start first character, inclusive, of range
* @param end last character, inclusive, of range
* @stable ICU 2.0
*/
public UnicodeSet complement(int start, int end) {

View file

@ -727,7 +727,12 @@ public class UnicodeSetTest extends TestFmwk {
set.applyPattern("[a-z{ab}{cd}]");
set.retain("cd");
exp.applyPattern("[{cd}]");
if (!set.equals(exp)) { errln("FAIL: retain(\"cd\")"); return; }
if (!set.equals(exp)) { errln("FAIL: (with cd).retain(\"cd\")"); return; }
set.applyPattern("[a-z{ab}{yz}]");
set.retain("cd");
exp.clear();
if (!set.equals(exp)) { errln("FAIL: (without cd).retain(\"cd\")"); return; }
set.applyPattern("[a-z{ab}{cd}]");
set.remove((char)0x63);