mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-18 11:14:22 +00:00
ICU-2758 allow usage of lccc and tccc in unicode sets
X-SVN-Rev: 15659
This commit is contained in:
parent
05789e4c8b
commit
cce68a9b45
4 changed files with 49 additions and 4 deletions
|
@ -2753,7 +2753,9 @@ UnicodeSet::applyPropertyAlias(const UnicodeString& prop,
|
|||
v = u_getPropertyValueEnum(p, vname);
|
||||
if (v == UCHAR_INVALID_CODE) {
|
||||
// Handle numeric CCC
|
||||
if (p == UCHAR_CANONICAL_COMBINING_CLASS) {
|
||||
if (p == UCHAR_CANONICAL_COMBINING_CLASS ||
|
||||
p == UCHAR_TRAIL_CANONICAL_COMBINING_CLASS ||
|
||||
p == UCHAR_LEAD_CANONICAL_COMBINING_CLASS) {
|
||||
char* end;
|
||||
double value = uprv_strtod(vname, &end);
|
||||
v = (int32_t) value;
|
||||
|
|
|
@ -854,7 +854,28 @@ void UnicodeSetTest::TestPropertySet() {
|
|||
|
||||
"[ab\\uDC00cd]", // JB#2906: isolated trail in middle
|
||||
"abcd\\uDC00",
|
||||
"ef\\uD800\\U00010000"
|
||||
"ef\\uD800\\U00010000",
|
||||
|
||||
"[:^lccc=0:]", // Lead canonical class
|
||||
"\\u0300\\u0301",
|
||||
"abcd\\u00c0\\u00c5",
|
||||
|
||||
"[:^tccc=0:]", // Trail canonical class
|
||||
"\\u0300\\u0301\\u00c0\\u00c5",
|
||||
"abcd",
|
||||
|
||||
"[[:^lccc=0:][:^tccc=0:]]", // Lead and trail canonical class
|
||||
"\\u0300\\u0301\\u00c0\\u00c5",
|
||||
"abcd",
|
||||
|
||||
"[[:^lccc=0:]-[:^tccc=0:]]", // Stuff that starts with an accent but ends with a base (none right now)
|
||||
"",
|
||||
"abcd\\u0300\\u0301\\u00c0\\u00c5",
|
||||
|
||||
"[[:ccc=0:]-[:lccc=0:]-[:tccc=0:]]", // Weirdos. Complete canonical class is zero, but both lead and trail are not
|
||||
"\\u0F73\\u0F75\\u0F81",
|
||||
"abcd\\u0300\\u0301\\u00c0\\u00c5",
|
||||
|
||||
};
|
||||
|
||||
static const int32_t DATA_LEN = sizeof(DATA)/sizeof(DATA[0]);
|
||||
|
|
|
@ -913,7 +913,27 @@ public class UnicodeSetTest extends TestFmwk {
|
|||
|
||||
"[ab\uDC00cd]", // JB#2906: isolated trail in middle
|
||||
"abcd\uDC00",
|
||||
"ef\uD800\\U00010000"
|
||||
"ef\uD800\\U00010000",
|
||||
|
||||
"[:^lccc=0:]", // Lead canonical class
|
||||
"\u0300\u0301",
|
||||
"abcd\u00c0\u00c5",
|
||||
|
||||
"[:^tccc=0:]", // Trail canonical class
|
||||
"\u0300\u0301\u00c0\u00c5",
|
||||
"abcd",
|
||||
|
||||
"[[:^lccc=0:][:^tccc=0:]]", // Lead and trail canonical class
|
||||
"\u0300\u0301\u00c0\u00c5",
|
||||
"abcd",
|
||||
|
||||
"[[:^lccc=0:]-[:^tccc=0:]]", // Stuff that starts with an accent but ends with a base (none right now)
|
||||
"",
|
||||
"abcd\u0300\u0301\u00c0\u00c5",
|
||||
|
||||
"[[:ccc=0:]-[:lccc=0:]-[:tccc=0:]]", // Weirdos. Complete canonical class is zero, but both lead and trail are not
|
||||
"\u0F73\u0F75\u0F81",
|
||||
"abcd\u0300\u0301\u00c0\u00c5",
|
||||
};
|
||||
|
||||
for (int i=0; i<DATA.length; i+=3) {
|
||||
|
|
|
@ -2819,7 +2819,9 @@ public class UnicodeSet extends UnicodeFilter {
|
|||
v = UCharacter.getPropertyValueEnum(p, valueAlias);
|
||||
} catch (IllegalArgumentException e) {
|
||||
// Handle numeric CCC
|
||||
if (p == UProperty.CANONICAL_COMBINING_CLASS) {
|
||||
if (p == UProperty.CANONICAL_COMBINING_CLASS ||
|
||||
p == UProperty.LEAD_CANONICAL_COMBINING_CLASS ||
|
||||
p == UProperty.TRAIL_CANONICAL_COMBINING_CLASS) {
|
||||
v = Integer.parseInt(Utility.deleteRuleWhiteSpace(valueAlias));
|
||||
// If the resultant set is empty then the numeric value
|
||||
// was invalid.
|
||||
|
|
Loading…
Add table
Reference in a new issue