diff --git a/icu4c/source/common/rbbiscan.cpp b/icu4c/source/common/rbbiscan.cpp index 5c3b15f2c98..f66a019f873 100644 --- a/icu4c/source/common/rbbiscan.cpp +++ b/icu4c/source/common/rbbiscan.cpp @@ -2,7 +2,7 @@ // // file: rbbiscan.cpp // -// Copyright (C) 2002-2006, International Business Machines Corporation and others. +// Copyright (C) 2002-2007, International Business Machines Corporation and others. // All Rights Reserved. // // This file contains the Rule Based Break Iterator Rule Builder functions for @@ -23,7 +23,7 @@ #include "unicode/uchriter.h" #include "unicode/parsepos.h" #include "unicode/parseerr.h" -#include "uprops.h" +#include "util.h" #include "cmemory.h" #include "cstring.h" @@ -127,7 +127,7 @@ RBBIRuleScanner::RBBIRuleScanner(RBBIRuleBuilder *rb) // and the time to build these few sets should be small compared to a // full break iterator build. fRuleSets[kRuleSet_rule_char-128] = new UnicodeSet(gRuleSet_rule_char_pattern, *rb->fStatus); - fRuleSets[kRuleSet_white_space-128] = (UnicodeSet*) uprv_openRuleWhiteSpaceSet(rb->fStatus); + fRuleSets[kRuleSet_white_space-128] = uprv_openRuleWhiteSpaceSet(rb->fStatus); fRuleSets[kRuleSet_name_char-128] = new UnicodeSet(gRuleSet_name_char_pattern, *rb->fStatus); fRuleSets[kRuleSet_name_start_char-128] = new UnicodeSet(gRuleSet_name_start_char_pattern, *rb->fStatus); fRuleSets[kRuleSet_digit_char-128] = new UnicodeSet(gRuleSet_digit_char_pattern, *rb->fStatus); diff --git a/icu4c/source/common/uset.cpp b/icu4c/source/common/uset.cpp index 98fc7076aac..91cd17bb89a 100644 --- a/icu4c/source/common/uset.cpp +++ b/icu4c/source/common/uset.cpp @@ -303,18 +303,6 @@ uset_getItem(const USet* uset, int32_t itemIndex, // return TRUE; //} -U_CAPI USet* U_EXPORT2 -uprv_openRuleWhiteSpaceSet(UErrorCode* ec) { - if(U_FAILURE(*ec)) { - return NULL; - } - // create a set with the Pattern_White_Space characters, - // without a pattern for fewer code dependencies - UnicodeSet *set=new UnicodeSet(9, 0xd); - set->UnicodeSet::add(0x20).add(0x85).add(0x200e, 0x200f).add(0x2028, 0x2029); - return (USet *)set; -} - /* * Serialize a USet into 16-bit units. * Store BMP code points as themselves with one 16-bit unit each. diff --git a/icu4c/source/common/uset_imp.h b/icu4c/source/common/uset_imp.h index c826f49faec..d9916f40351 100644 --- a/icu4c/source/common/uset_imp.h +++ b/icu4c/source/common/uset_imp.h @@ -1,7 +1,7 @@ /* ******************************************************************************* * -* Copyright (C) 2004-2005, International Business Machines +* Copyright (C) 2004-2007, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* @@ -52,16 +52,5 @@ typedef struct USetAdder USetAdder; U_CDECL_END -/** - * Get the set of "white space" characters in the sense of ICU rule - * parsers. Caller must close/delete result. - * Equivalent to the set of characters with the Pattern_White_Space Unicode property. - * Stable set of characters, won't change. - * See UAX #31 Identifier and Pattern Syntax: http://www.unicode.org/reports/tr31/ - * @internal - */ -U_CAPI USet* U_EXPORT2 -uprv_openRuleWhiteSpaceSet(UErrorCode* ec); - #endif diff --git a/icu4c/source/common/util.cpp b/icu4c/source/common/util.cpp index 7a588b5ae63..e456047c0e8 100644 --- a/icu4c/source/common/util.cpp +++ b/icu4c/source/common/util.cpp @@ -1,6 +1,6 @@ /* ********************************************************************** -* Copyright (c) 2001-2006, International Business Machines +* Copyright (c) 2001-2007, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** * Date Name Description @@ -10,6 +10,7 @@ #include "util.h" #include "unicode/unimatch.h" +#include "unicode/uniset.h" // Define UChar constants using hex for EBCDIC compatibility @@ -427,4 +428,16 @@ uprv_isRuleWhiteSpace(UChar32 c) { c == 0x200E || c == 0x200F || c >= 0x2028)); } +U_CAPI UnicodeSet* U_EXPORT2 +uprv_openRuleWhiteSpaceSet(UErrorCode* ec) { + if(U_FAILURE(*ec)) { + return NULL; + } + // create a set with the Pattern_White_Space characters, + // without a pattern for fewer code dependencies + UnicodeSet *set=new UnicodeSet(9, 0xd); + set->UnicodeSet::add(0x20).add(0x85).add(0x200e, 0x200f).add(0x2028, 0x2029); + return set; +} + //eof diff --git a/icu4c/source/common/util.h b/icu4c/source/common/util.h index 6af528b02f8..dd1fb1af181 100644 --- a/icu4c/source/common/util.h +++ b/icu4c/source/common/util.h @@ -22,6 +22,7 @@ U_NAMESPACE_BEGIN class UnicodeMatcher; +class UnicodeSet; class U_COMMON_API ICU_Utility /* not : public UObject because all methods are static */ { public: @@ -234,6 +235,17 @@ private: U_NAMESPACE_END +/** + * Get the set of "white space" characters in the sense of ICU rule + * parsers. Caller must close/delete result. + * Equivalent to the set of characters with the Pattern_White_Space Unicode property. + * Stable set of characters, won't change. + * See UAX #31 Identifier and Pattern Syntax: http://www.unicode.org/reports/tr31/ + * @internal + */ +U_CAPI UnicodeSet* U_EXPORT2 +uprv_openRuleWhiteSpaceSet(UErrorCode* ec); + /** * Is this character a "white space" in the sense of ICU rule parsers? * Equivalent to test for Pattern_White_Space Unicode property.