diff --git a/icu4c/source/common/uprops.c b/icu4c/source/common/uprops.c index 23de6053322..a50744f1e81 100644 --- a/icu4c/source/common/uprops.c +++ b/icu4c/source/common/uprops.c @@ -258,6 +258,14 @@ u_isUWhiteSpace(UChar32 c) { return u_hasBinaryProperty(c, UCHAR_WHITE_SPACE); } +U_CAPI UBool U_EXPORT2 +uprv_isRuleWhiteSpace(UChar32 c) { + /* "white space" in the sense of ICU rule parsers: Cf+White_Space */ + return + u_charType(c)==U_FORMAT_CHAR || + u_hasBinaryProperty(c, UCHAR_WHITE_SPACE); +} + U_CAPI int32_t U_EXPORT2 u_getIntPropertyValue(UChar32 c, UProperty which) { UErrorCode errorCode; diff --git a/icu4c/source/common/uprops.h b/icu4c/source/common/uprops.h index 120a7e03104..bb0ae40adb6 100644 --- a/icu4c/source/common/uprops.h +++ b/icu4c/source/common/uprops.h @@ -230,4 +230,11 @@ uprv_comparePropertyNames(const char *name1, const char *name2); #define _Pi FLAG(U_INITIAL_PUNCTUATION) #define _Pf FLAG(U_FINAL_PUNCTUATION) +/** + * Is this character a "white space" in the sense of ICU rule parsers? + * @internal + */ +U_CAPI UBool U_EXPORT2 +uprv_isRuleWhiteSpace(UChar32 c); + #endif diff --git a/icu4c/source/common/upropset.cpp b/icu4c/source/common/upropset.cpp index 3fba281bb64..81e27bccd32 100644 --- a/icu4c/source/common/upropset.cpp +++ b/icu4c/source/common/upropset.cpp @@ -427,6 +427,20 @@ UnicodeSet* UnicodePropertySet::createBinaryPropertySet(const UnicodeString& nam return set; } +UnicodeSet +UnicodePropertySet::getRuleWhiteSpaceSet() { + UnicodeSet set; + int32_t code; + + /* "white space" in the sense of ICU rule parsers: Cf+White_Space */ + code = UCHAR_WHITE_SPACE; + initSetFromFilter(set, _binaryPropertyFilter, &code); + + set.addAll(getCategorySet(U_FORMAT_CHAR)); + + return set; /* return by value */ +} + //---------------------------------------------------------------- // Utility methods //---------------------------------------------------------------- diff --git a/icu4c/source/common/upropset.h b/icu4c/source/common/upropset.h index a82fd6fb374..e654d5d3a44 100644 --- a/icu4c/source/common/upropset.h +++ b/icu4c/source/common/upropset.h @@ -97,6 +97,12 @@ class UnicodePropertySet /* not : public UObject because all methods are static typedef UnicodeSet* (*SetFactory)(const UnicodeString& valueName); + /** + * "white space" in the sense of ICU rule parsers + * @internal + */ + static UnicodeSet getRuleWhiteSpaceSet(); + private: //----------------------------------------------------------------