ICU-2766 freeze RuleWhiteSpace=Unicode Pattern_White_Space

X-SVN-Rev: 17646
This commit is contained in:
Markus Scherer 2005-05-22 22:19:37 +00:00
parent 36f008d0bb
commit c552b977a4
5 changed files with 24 additions and 17 deletions

View file

@ -265,6 +265,18 @@ uset_getItem(const USet* uset, int32_t itemIndex,
// return TRUE;
//}
U_CAPI USet* U_EXPORT2
uprv_openRuleWhiteSpaceSet(UErrorCode* ec) {
if(U_FAILURE(*ec)) {
return NULL;
}
// create a set with the Pattern_White_Space characters,
// without a pattern for fewer code dependencies
UnicodeSet *set=new UnicodeSet(9, 0xd);
set->add(0x20).add(0x85).add(0x200e, 0x200f).add(0x2028, 0x2029);
return (USet *)set;
}
/*
* Serialize a USet into 16-bit units.
* Store BMP code points as themselves with one 16-bit unit each.

View file

@ -1,7 +1,7 @@
/*
*******************************************************************************
*
* Copyright (C) 2004, International Business Machines
* Copyright (C) 2004-2005, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@ -51,6 +51,9 @@ U_CDECL_END
/**
* Get the set of "white space" characters in the sense of ICU rule
* parsers. Caller must close/delete result.
* Equivalent to the set of characters with the Pattern_White_Space Unicode property.
* Stable set of characters, won't change.
* See UAX #31 Identifier and Pattern Syntax: http://www.unicode.org/reports/tr31/
* @internal
*/
U_CAPI USet* U_EXPORT2

View file

@ -1,7 +1,7 @@
/*
*******************************************************************************
*
* Copyright (C) 2002-2004, International Business Machines
* Copyright (C) 2002-2005, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@ -132,15 +132,3 @@ uset_toPattern(const USet* set,
((const UnicodeSet*) set)->toPattern(pat, escapeUnprintable);
return pat.extract(result, resultCapacity, *ec);
}
U_CAPI USet* U_EXPORT2
uprv_openRuleWhiteSpaceSet(UErrorCode* ec) {
static const UChar _PATTERN[] = {
/* "[[:Cf:][:WSpace:]]" */
91, 91, 58, 67, 102, 58, 93, 91, 58, 87,
83, 112, 97, 99, 101, 58, 93, 93, 0
};
return uset_openPattern(_PATTERN,
sizeof(_PATTERN)/sizeof(_PATTERN[0])-1, ec);
}

View file

@ -1,6 +1,6 @@
/*
**********************************************************************
* Copyright (c) 2001-2004, International Business Machines
* Copyright (c) 2001-2005, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* Date Name Description
@ -613,8 +613,9 @@ U_CAPI UBool U_EXPORT2
uprv_isRuleWhiteSpace(UChar32 c) {
/* "white space" in the sense of ICU rule parsers
This is a FIXED LIST that is NOT DEPENDENT ON UNICODE PROPERTIES.
See UTR #31: http://www.unicode.org/reports/tr31/.
See UAX #31 Identifier and Pattern Syntax: http://www.unicode.org/reports/tr31/
U+0009..U+000D, U+0020, U+0085, U+200E..U+200F, and U+2028..U+2029
Equivalent to test for Pattern_White_Space Unicode property.
*/
return (c >= 0x0009 && c <= 0x2029 &&
(c <= 0x000D || c == 0x0020 || c == 0x0085 ||

View file

@ -1,6 +1,6 @@
/*
**********************************************************************
* Copyright (c) 2001-2004, International Business Machines
* Copyright (c) 2001-2005, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* Date Name Description
@ -236,6 +236,9 @@ U_NAMESPACE_END
/**
* Is this character a "white space" in the sense of ICU rule parsers?
* Equivalent to test for Pattern_White_Space Unicode property.
* Stable set of characters, won't change.
* See UAX #31 Identifier and Pattern Syntax: http://www.unicode.org/reports/tr31/
* @internal
*/
U_CAPI UBool U_EXPORT2