ICU-2758 allow usage of lccc and tccc in unicode sets

X-SVN-Rev: 15659
This commit is contained in:
Vladimir Weinstein 2004-06-01 22:09:56 +00:00
parent 05789e4c8b
commit cce68a9b45
4 changed files with 49 additions and 4 deletions

View file

@ -2753,7 +2753,9 @@ UnicodeSet::applyPropertyAlias(const UnicodeString& prop,
v = u_getPropertyValueEnum(p, vname);
if (v == UCHAR_INVALID_CODE) {
// Handle numeric CCC
if (p == UCHAR_CANONICAL_COMBINING_CLASS) {
if (p == UCHAR_CANONICAL_COMBINING_CLASS ||
p == UCHAR_TRAIL_CANONICAL_COMBINING_CLASS ||
p == UCHAR_LEAD_CANONICAL_COMBINING_CLASS) {
char* end;
double value = uprv_strtod(vname, &end);
v = (int32_t) value;

View file

@ -854,7 +854,28 @@ void UnicodeSetTest::TestPropertySet() {
"[ab\\uDC00cd]", // JB#2906: isolated trail in middle
"abcd\\uDC00",
"ef\\uD800\\U00010000"
"ef\\uD800\\U00010000",
"[:^lccc=0:]", // Lead canonical class
"\\u0300\\u0301",
"abcd\\u00c0\\u00c5",
"[:^tccc=0:]", // Trail canonical class
"\\u0300\\u0301\\u00c0\\u00c5",
"abcd",
"[[:^lccc=0:][:^tccc=0:]]", // Lead and trail canonical class
"\\u0300\\u0301\\u00c0\\u00c5",
"abcd",
"[[:^lccc=0:]-[:^tccc=0:]]", // Stuff that starts with an accent but ends with a base (none right now)
"",
"abcd\\u0300\\u0301\\u00c0\\u00c5",
"[[:ccc=0:]-[:lccc=0:]-[:tccc=0:]]", // Weirdos. Complete canonical class is zero, but both lead and trail are not
"\\u0F73\\u0F75\\u0F81",
"abcd\\u0300\\u0301\\u00c0\\u00c5",
};
static const int32_t DATA_LEN = sizeof(DATA)/sizeof(DATA[0]);

View file

@ -913,7 +913,27 @@ public class UnicodeSetTest extends TestFmwk {
"[ab\uDC00cd]", // JB#2906: isolated trail in middle
"abcd\uDC00",
"ef\uD800\\U00010000"
"ef\uD800\\U00010000",
"[:^lccc=0:]", // Lead canonical class
"\u0300\u0301",
"abcd\u00c0\u00c5",
"[:^tccc=0:]", // Trail canonical class
"\u0300\u0301\u00c0\u00c5",
"abcd",
"[[:^lccc=0:][:^tccc=0:]]", // Lead and trail canonical class
"\u0300\u0301\u00c0\u00c5",
"abcd",
"[[:^lccc=0:]-[:^tccc=0:]]", // Stuff that starts with an accent but ends with a base (none right now)
"",
"abcd\u0300\u0301\u00c0\u00c5",
"[[:ccc=0:]-[:lccc=0:]-[:tccc=0:]]", // Weirdos. Complete canonical class is zero, but both lead and trail are not
"\u0F73\u0F75\u0F81",
"abcd\u0300\u0301\u00c0\u00c5",
};
for (int i=0; i<DATA.length; i+=3) {

View file

@ -2819,7 +2819,9 @@ public class UnicodeSet extends UnicodeFilter {
v = UCharacter.getPropertyValueEnum(p, valueAlias);
} catch (IllegalArgumentException e) {
// Handle numeric CCC
if (p == UProperty.CANONICAL_COMBINING_CLASS) {
if (p == UProperty.CANONICAL_COMBINING_CLASS ||
p == UProperty.LEAD_CANONICAL_COMBINING_CLASS ||
p == UProperty.TRAIL_CANONICAL_COMBINING_CLASS) {
v = Integer.parseInt(Utility.deleteRuleWhiteSpace(valueAlias));
// If the resultant set is empty then the numeric value
// was invalid.