ICU-2758 allow usage of lccc and tccc in unicode sets

X-SVN-Rev: 15659
2025-04-18 11:14:22 +00:00 · 2004-06-01 22:09:56 +00:00 · 2004-06-01 22:09:56 +00:00 · cce68a9b45
commit cce68a9b45
parent 05789e4c8b
4 changed files with 49 additions and 4 deletions
--- a/icu4c/source/common/uniset.cpp
+++ b/icu4c/source/common/uniset.cpp
@ -2753,7 +2753,9 @@ UnicodeSet::applyPropertyAlias(const UnicodeString& prop,
            v = u_getPropertyValueEnum(p, vname);
            if (v == UCHAR_INVALID_CODE) {
                // Handle numeric CCC
-                if (p == UCHAR_CANONICAL_COMBINING_CLASS) {
+                if (p == UCHAR_CANONICAL_COMBINING_CLASS ||
+                    p == UCHAR_TRAIL_CANONICAL_COMBINING_CLASS ||
+                    p == UCHAR_LEAD_CANONICAL_COMBINING_CLASS) {
                    char* end;
                    double value = uprv_strtod(vname, &end);
                    v = (int32_t) value;
--- a/icu4c/source/test/intltest/usettest.cpp
+++ b/icu4c/source/test/intltest/usettest.cpp
@ -854,7 +854,28 @@ void UnicodeSetTest::TestPropertySet() {
        
        "[ab\\uDC00cd]", // JB#2906: isolated trail in middle
        "abcd\\uDC00",
-        "ef\\uD800\\U00010000"
+        "ef\\uD800\\U00010000",
+
+		"[:^lccc=0:]", // Lead canonical class
+		"\\u0300\\u0301",
+		"abcd\\u00c0\\u00c5",
+
+		"[:^tccc=0:]", // Trail canonical class
+		"\\u0300\\u0301\\u00c0\\u00c5",
+		"abcd",
+
+		"[[:^lccc=0:][:^tccc=0:]]", // Lead and trail canonical class
+		"\\u0300\\u0301\\u00c0\\u00c5",
+		"abcd",
+
+		"[[:^lccc=0:]-[:^tccc=0:]]", // Stuff that starts with an accent but ends with a base (none right now)
+		"",
+		"abcd\\u0300\\u0301\\u00c0\\u00c5",
+		
+		"[[:ccc=0:]-[:lccc=0:]-[:tccc=0:]]", // Weirdos. Complete canonical class is zero, but both lead and trail are not
+		"\\u0F73\\u0F75\\u0F81",
+		"abcd\\u0300\\u0301\\u00c0\\u00c5",
+
    };

    static const int32_t DATA_LEN = sizeof(DATA)/sizeof(DATA[0]);
--- a/icu4j/src/com/ibm/icu/dev/test/translit/UnicodeSetTest.java
+++ b/icu4j/src/com/ibm/icu/dev/test/translit/UnicodeSetTest.java
@ -913,7 +913,27 @@ public class UnicodeSetTest extends TestFmwk {

            "[ab\uDC00cd]", // JB#2906: isolated trail in middle
            "abcd\uDC00",
-            "ef\uD800\\U00010000"
+            "ef\uD800\\U00010000",
+			
+			"[:^lccc=0:]", // Lead canonical class
+			"\u0300\u0301",
+			"abcd\u00c0\u00c5",
+
+			"[:^tccc=0:]", // Trail canonical class
+			"\u0300\u0301\u00c0\u00c5",
+			"abcd",
+
+			"[[:^lccc=0:][:^tccc=0:]]", // Lead and trail canonical class
+			"\u0300\u0301\u00c0\u00c5",
+			"abcd",
+
+			"[[:^lccc=0:]-[:^tccc=0:]]", // Stuff that starts with an accent but ends with a base (none right now)
+			"",
+			"abcd\u0300\u0301\u00c0\u00c5",
+			
+			"[[:ccc=0:]-[:lccc=0:]-[:tccc=0:]]", // Weirdos. Complete canonical class is zero, but both lead and trail are not
+			"\u0F73\u0F75\u0F81",
+			"abcd\u0300\u0301\u00c0\u00c5",
        };

        for (int i=0; i<DATA.length; i+=3) {  
--- a/icu4j/src/com/ibm/icu/text/UnicodeSet.java
+++ b/icu4j/src/com/ibm/icu/text/UnicodeSet.java
@ -2819,7 +2819,9 @@ public class UnicodeSet extends UnicodeFilter {
                    v = UCharacter.getPropertyValueEnum(p, valueAlias);
                } catch (IllegalArgumentException e) {
                    // Handle numeric CCC
-                    if (p == UProperty.CANONICAL_COMBINING_CLASS) {
+                    if (p == UProperty.CANONICAL_COMBINING_CLASS ||
+                		p == UProperty.LEAD_CANONICAL_COMBINING_CLASS ||
+						p == UProperty.TRAIL_CANONICAL_COMBINING_CLASS) {
                        v = Integer.parseInt(Utility.deleteRuleWhiteSpace(valueAlias));
                        // If the resultant set is empty then the numeric value
                        // was invalid.