From 9774441e40d838f91acb573894cd0f9fa00de130 Mon Sep 17 00:00:00 2001 From: Markus Scherer Date: Tue, 26 Apr 2011 06:39:29 +0000 Subject: [PATCH] ICU-8491 remove uprv_isRuleWhiteSpace(c) X-SVN-Rev: 29900 --- icu4c/source/common/rbbiscan.cpp | 4 +-- icu4c/source/common/ruleiter.cpp | 9 +++--- icu4c/source/common/ruleiter.h | 7 ++-- icu4c/source/common/util.cpp | 43 ++++++++----------------- icu4c/source/common/util.h | 25 ++++---------- icu4c/source/common/util_props.cpp | 11 +++---- icu4c/source/i18n/decimfmt.cpp | 40 ++++++++++------------- icu4c/source/i18n/name2uni.cpp | 7 ++-- icu4c/source/i18n/nfrs.cpp | 7 ++-- icu4c/source/i18n/nfrule.cpp | 11 +++---- icu4c/source/i18n/rbnf.cpp | 12 +++---- icu4c/source/i18n/rbt_pars.cpp | 11 ++++--- icu4c/source/i18n/regexcmp.cpp | 6 ++-- icu4c/source/i18n/smpdtfmt.cpp | 36 +++++++++------------ icu4c/source/i18n/ucol_tok.cpp | 22 ++++++------- icu4c/source/i18n/unicode/decimfmt.h | 4 +-- icu4c/source/i18n/unicode/smpdtfmt.h | 6 ++-- icu4c/source/test/intltest/itrbnf.cpp | 4 +-- icu4c/source/test/intltest/textfile.h | 4 +-- icu4c/source/test/intltest/tokiter.cpp | 7 ++-- icu4c/source/test/intltest/tokiter.h | 6 ++-- icu4c/source/test/intltest/transtst.cpp | 6 ++-- icu4c/source/test/intltest/transtst.h | 4 +-- icu4c/source/test/intltest/usettest.cpp | 4 +-- 24 files changed, 127 insertions(+), 169 deletions(-) diff --git a/icu4c/source/common/rbbiscan.cpp b/icu4c/source/common/rbbiscan.cpp index dacbd63ab43..bba16e9ef24 100644 --- a/icu4c/source/common/rbbiscan.cpp +++ b/icu4c/source/common/rbbiscan.cpp @@ -2,7 +2,7 @@ // // file: rbbiscan.cpp // -// Copyright (C) 2002-2010, International Business Machines Corporation and others. +// Copyright (C) 2002-2011, International Business Machines Corporation and others. // All Rights Reserved. // // This file contains the Rule Based Break Iterator Rule Builder functions for @@ -122,7 +122,7 @@ RBBIRuleScanner::RBBIRuleScanner(RBBIRuleBuilder *rb) // and the time to build these few sets should be small compared to a // full break iterator build. fRuleSets[kRuleSet_rule_char-128] = UnicodeSet(gRuleSet_rule_char_pattern, *rb->fStatus); - UnicodeSet *whitespaceSet = uprv_openRuleWhiteSpaceSet(rb->fStatus); + UnicodeSet *whitespaceSet = uprv_openPatternWhiteSpaceSet(rb->fStatus); if (U_FAILURE(*rb->fStatus)) { return; } diff --git a/icu4c/source/common/ruleiter.cpp b/icu4c/source/common/ruleiter.cpp index b99a831513d..0c83671ffaa 100644 --- a/icu4c/source/common/ruleiter.cpp +++ b/icu4c/source/common/ruleiter.cpp @@ -1,6 +1,6 @@ /* ********************************************************************** -* Copyright (c) 2003-2007, International Business Machines +* Copyright (c) 2003-2011, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** * Author: Alan Liu @@ -12,7 +12,7 @@ #include "unicode/parsepos.h" #include "unicode/unistr.h" #include "unicode/symtable.h" -#include "util.h" +#include "patternprops.h" /* \U87654321 or \ud800\udc00 */ #define MAX_U_NOTATION_LEN 12 @@ -63,8 +63,7 @@ UChar32 RuleCharacterIterator::next(int32_t options, UBool& isEscaped, UErrorCod continue; } - if ((options & SKIP_WHITESPACE) != 0 && - uprv_isRuleWhiteSpace(c)) { + if ((options & SKIP_WHITESPACE) != 0 && PatternProps::isWhiteSpace(c)) { continue; } @@ -102,7 +101,7 @@ void RuleCharacterIterator::skipIgnored(int32_t options) { if ((options & SKIP_WHITESPACE) != 0) { for (;;) { UChar32 a = _current(); - if (!uprv_isRuleWhiteSpace(a)) break; + if (!PatternProps::isWhiteSpace(a)) break; _advance(UTF_CHAR_LENGTH(a)); } } diff --git a/icu4c/source/common/ruleiter.h b/icu4c/source/common/ruleiter.h index cc4e8475fd2..d8fe212cf0e 100644 --- a/icu4c/source/common/ruleiter.h +++ b/icu4c/source/common/ruleiter.h @@ -1,6 +1,6 @@ /* ********************************************************************** -* Copyright (c) 2003-2007, International Business Machines +* Copyright (c) 2003-2011, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** * Author: Alan Liu @@ -82,9 +82,8 @@ public: /** * Bitmask option to enable skipping of whitespace. If (options & - * SKIP_WHITESPACE) != 0, then whitespace characters will be silently - * skipped, as if they were not present in the input. Whitespace - * characters are defined by UCharacterProperty.isRuleWhiteSpace(). + * SKIP_WHITESPACE) != 0, then Pattern_White_Space characters will be silently + * skipped, as if they were not present in the input. */ enum { SKIP_WHITESPACE = 4 }; diff --git a/icu4c/source/common/util.cpp b/icu4c/source/common/util.cpp index f084a291343..c5395ffa896 100644 --- a/icu4c/source/common/util.cpp +++ b/icu4c/source/common/util.cpp @@ -1,6 +1,6 @@ /* ********************************************************************** -* Copyright (c) 2001-2008, International Business Machines +* Copyright (c) 2001-2011, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** * Date Name Description @@ -8,9 +8,10 @@ ********************************************************************** */ -#include "util.h" #include "unicode/unimatch.h" #include "unicode/uniset.h" +#include "patternprops.h" +#include "util.h" // Define UChar constants using hex for EBCDIC compatibility @@ -132,13 +133,8 @@ int32_t ICU_Utility::quotedIndexOf(const UnicodeString& text, int32_t ICU_Utility::skipWhitespace(const UnicodeString& str, int32_t& pos, UBool advance) { int32_t p = pos; - while (p < str.length()) { - UChar32 c = str.char32At(p); - if (!uprv_isRuleWhiteSpace(c)) { - break; - } - p += UTF_CHAR_LENGTH(c); - } + const UChar* s = str.getBuffer(); + p = (int32_t)(PatternProps::skipWhiteSpace(s + p, str.length() - p) - s); if (advance) { pos = p; } @@ -146,8 +142,8 @@ int32_t ICU_Utility::skipWhitespace(const UnicodeString& str, int32_t& pos, } /** - * Skip over whitespace in a Replaceable. Whitespace is defined by - * uprv_isRuleWhiteSpace(). Skipping may be done in the forward or + * Skip over Pattern_White_Space in a Replaceable. + * Skipping may be done in the forward or * reverse direction. In either case, the leftmost index will be * inclusive, and the rightmost index will be exclusive. That is, * given a range defined as [start, limit), the call @@ -173,7 +169,7 @@ int32_t ICU_Utility::skipWhitespace(const UnicodeString& str, int32_t& pos, //? } //? //? while (pos != stop && -//? uprv_isRuleWhiteSpace(c = text.char32At(pos))) { +//? PatternProps::isWhiteSpace(c = text.char32At(pos))) { //? if (isForward) { //? pos += UTF_CHAR_LENGTH(c); //? } else { @@ -217,7 +213,7 @@ UBool ICU_Utility::parseChar(const UnicodeString& id, int32_t& pos, UChar ch) { * pattern. Characters are matched literally and case-sensitively * except for the following special characters: * - * ~ zero or more uprv_isRuleWhiteSpace chars + * ~ zero or more Pattern_White_Space chars * * If end of pattern is reached with all matches along the way, * pos is advanced to the first unparsed index and returned. @@ -246,7 +242,7 @@ int32_t ICU_Utility::parsePattern(const UnicodeString& pat, // parse \s* if (cpat == 126 /*~*/) { - if (uprv_isRuleWhiteSpace(c)) { + if (PatternProps::isWhiteSpace(c)) { index += UTF_CHAR_LENGTH(c); continue; } else { @@ -371,7 +367,7 @@ void ICU_Utility::appendToRule(UnicodeString& rule, !((c >= 0x0030/*'0'*/ && c <= 0x0039/*'9'*/) || (c >= 0x0041/*'A'*/ && c <= 0x005A/*'Z'*/) || (c >= 0x0061/*'a'*/ && c <= 0x007A/*'z'*/))) || - uprv_isRuleWhiteSpace(c)) { + PatternProps::isWhiteSpace(c)) { quoteBuf.append(c); // Double ' within a quote if (c == APOSTROPHE) { @@ -412,26 +408,13 @@ void ICU_Utility::appendToRule(UnicodeString& rule, U_NAMESPACE_END -U_CAPI UBool U_EXPORT2 -uprv_isRuleWhiteSpace(UChar32 c) { - /* "white space" in the sense of ICU rule parsers - This is a FIXED LIST that is NOT DEPENDENT ON UNICODE PROPERTIES. - See UAX #31 Identifier and Pattern Syntax: http://www.unicode.org/reports/tr31/ - U+0009..U+000D, U+0020, U+0085, U+200E..U+200F, and U+2028..U+2029 - Equivalent to test for Pattern_White_Space Unicode property. - */ - return (c >= 0x0009 && c <= 0x2029 && - (c <= 0x000D || c == 0x0020 || c == 0x0085 || - c == 0x200E || c == 0x200F || c >= 0x2028)); -} - U_CAPI U_NAMESPACE_QUALIFIER UnicodeSet* U_EXPORT2 -uprv_openRuleWhiteSpaceSet(UErrorCode* ec) { +uprv_openPatternWhiteSpaceSet(UErrorCode* ec) { if(U_FAILURE(*ec)) { return NULL; } // create a set with the Pattern_White_Space characters, - // without a pattern for fewer code dependencies + // without a pattern string for fewer code dependencies U_NAMESPACE_QUALIFIER UnicodeSet *set=new U_NAMESPACE_QUALIFIER UnicodeSet(9, 0xd); // Check for new failure. if (set == NULL) { diff --git a/icu4c/source/common/util.h b/icu4c/source/common/util.h index 37fb56868e4..ac88bd04317 100644 --- a/icu4c/source/common/util.h +++ b/icu4c/source/common/util.h @@ -1,6 +1,6 @@ /* ********************************************************************** - * Copyright (c) 2001-2007, International Business Machines + * Copyright (c) 2001-2011, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** * Date Name Description @@ -90,8 +90,8 @@ class U_COMMON_API ICU_Utility /* not : public UObject because all methods are s UBool advance = FALSE); /** - * Skip over whitespace in a Replaceable. Whitespace is defined by - * uprv_isRuleWhiteSpace(). Skipping may be done in the forward or + * Skip over Pattern_White_Space in a Replaceable. + * Skipping may be done in the forward or * reverse direction. In either case, the leftmost index will be * inclusive, and the rightmost index will be exclusive. That is, * given a range defined as [start, limit), the call @@ -151,7 +151,7 @@ class U_COMMON_API ICU_Utility /* not : public UObject because all methods are s * pattern. Characters are matched literally and case-sensitively * except for the following special characters: * - * ~ zero or more uprv_isRuleWhiteSpace chars + * ~ zero or more Pattern_White_Space chars * * If end of pattern is reached with all matches along the way, * pos is advanced to the first unparsed index and returned. @@ -237,25 +237,14 @@ private: U_NAMESPACE_END /** - * Get the set of "white space" characters in the sense of ICU rule - * parsers. Caller must close/delete result. - * Equivalent to the set of characters with the Pattern_White_Space Unicode property. + * Returns a new set with the Pattern_White_Space characters. + * The caller must close/delete the result. * Stable set of characters, won't change. * See UAX #31 Identifier and Pattern Syntax: http://www.unicode.org/reports/tr31/ * @internal */ U_CAPI U_NAMESPACE_QUALIFIER UnicodeSet* U_EXPORT2 -uprv_openRuleWhiteSpaceSet(UErrorCode* ec); - -/** - * Is this character a "white space" in the sense of ICU rule parsers? - * Equivalent to test for Pattern_White_Space Unicode property. - * Stable set of characters, won't change. - * See UAX #31 Identifier and Pattern Syntax: http://www.unicode.org/reports/tr31/ - * @internal - */ -U_CAPI UBool U_EXPORT2 -uprv_isRuleWhiteSpace(UChar32 c); +uprv_openPatternWhiteSpaceSet(UErrorCode* ec); #endif //eof diff --git a/icu4c/source/common/util_props.cpp b/icu4c/source/common/util_props.cpp index f91666b4f23..2ee536b9b22 100644 --- a/icu4c/source/common/util_props.cpp +++ b/icu4c/source/common/util_props.cpp @@ -1,6 +1,6 @@ /* ********************************************************************** -* Copyright (c) 2001-2006, International Business Machines +* Copyright (c) 2001-2011, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** * Date Name Description @@ -8,8 +8,9 @@ ********************************************************************** */ -#include "util.h" #include "unicode/uchar.h" +#include "patternprops.h" +#include "util.h" U_NAMESPACE_BEGIN @@ -95,7 +96,7 @@ int32_t ICU_Utility::parsePattern(const UnicodeString& rule, int32_t pos, int32_ return -1; } c = rule.charAt(pos++); - if (!uprv_isRuleWhiteSpace(c)) { + if (!PatternProps::isWhiteSpace(c)) { return -1; } // FALL THROUGH to skipWhitespace @@ -133,15 +134,13 @@ int32_t ICU_Utility::parsePattern(const UnicodeString& rule, int32_t pos, int32_ * @param pos INPUT-OUPUT parameter. On INPUT, pos is the * first character to examine. It must be less than str.length(), * and it must not point to a whitespace character. That is, must - * have pos < str.length() and - * !uprv_isRuleWhiteSpace(str.char32At(pos)). On + * have pos < str.length(). On * OUTPUT, the position after the last parsed character. * @return the Unicode identifier, or an empty string if there is * no valid identifier at pos. */ UnicodeString ICU_Utility::parseUnicodeIdentifier(const UnicodeString& str, int32_t& pos) { // assert(pos < str.length()); - // assert(!uprv_isRuleWhiteSpace(str.char32At(pos))); UnicodeString buf; int p = pos; while (p < str.length()) { diff --git a/icu4c/source/i18n/decimfmt.cpp b/icu4c/source/i18n/decimfmt.cpp index 76487b5e18e..f92196abd74 100644 --- a/icu4c/source/i18n/decimfmt.cpp +++ b/icu4c/source/i18n/decimfmt.cpp @@ -56,7 +56,7 @@ #include "ucurrimp.h" #include "charstr.h" #include "cmemory.h" -#include "util.h" +#include "patternprops.h" #include "digitlst.h" #include "cstring.h" #include "umutex.h" @@ -2173,12 +2173,12 @@ int32_t DecimalFormat::compareSimpleAffix(const UnicodeString& affix, for (int32_t i=0; i= 0; ) { UChar32 ch = str.char32At(i); i += U16_LENGTH(ch); - if (uprv_isRuleWhiteSpace(ch)) { - i = skipRuleWhiteSpace(str, i); + if (PatternProps::isWhiteSpace(ch)) { + i = skipPatternWhiteSpace(str, i); } pos = match(text, pos, ch); } diff --git a/icu4c/source/i18n/name2uni.cpp b/icu4c/source/i18n/name2uni.cpp index 4c2e9d28e23..b66bcfeda1d 100644 --- a/icu4c/source/i18n/name2uni.cpp +++ b/icu4c/source/i18n/name2uni.cpp @@ -1,6 +1,6 @@ /* ********************************************************************** -* Copyright (C) 2001-2008, International Business Machines +* Copyright (C) 2001-2011, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** * Date Name Description @@ -15,8 +15,9 @@ #include "unicode/unifilt.h" #include "unicode/uchar.h" #include "unicode/uniset.h" -#include "name2uni.h" #include "cmemory.h" +#include "name2uni.h" +#include "patternprops.h" #include "uprops.h" #include "uinvchar.h" #include "util.h" @@ -162,7 +163,7 @@ void NameUnicodeTransliterator::handleTransliterate(Replaceable& text, UTransPos // Convert \s+ => SPACE. This assumes there are no // runs of >1 space characters in names. - if (uprv_isRuleWhiteSpace(c)) { + if (PatternProps::isWhiteSpace(c)) { // Ignore leading whitespace if (name.length() > 0 && name.charAt(name.length()-1) != SPACE) { diff --git a/icu4c/source/i18n/nfrs.cpp b/icu4c/source/i18n/nfrs.cpp index ef72ce38ccb..b7b93b24b41 100644 --- a/icu4c/source/i18n/nfrs.cpp +++ b/icu4c/source/i18n/nfrs.cpp @@ -1,6 +1,6 @@ /* ****************************************************************************** -* Copyright (C) 1997-2008, International Business Machines +* Copyright (C) 1997-2011, International Business Machines * Corporation and others. All Rights Reserved. ****************************************************************************** * file name: nfrs.cpp @@ -20,13 +20,12 @@ #include "unicode/uchar.h" #include "nfrule.h" #include "nfrlist.h" +#include "patternprops.h" #ifdef RBNF_DEBUG #include "cmemory.h" #endif -#include "util.h" - U_NAMESPACE_BEGIN #if 0 @@ -149,7 +148,7 @@ NFRuleSet::NFRuleSet(UnicodeString* descriptions, int32_t index, UErrorCode& sta status = U_PARSE_ERROR; } else { name.setTo(description, 0, pos); - while (pos < description.length() && uprv_isRuleWhiteSpace(description.charAt(++pos))) { + while (pos < description.length() && PatternProps::isWhiteSpace(description.charAt(++pos))) { } description.remove(0, pos); } diff --git a/icu4c/source/i18n/nfrule.cpp b/icu4c/source/i18n/nfrule.cpp index d73a57062ee..46932273754 100644 --- a/icu4c/source/i18n/nfrule.cpp +++ b/icu4c/source/i18n/nfrule.cpp @@ -1,6 +1,6 @@ /* ****************************************************************************** -* Copyright (C) 1997-2008, International Business Machines +* Copyright (C) 1997-2011, International Business Machines * Corporation and others. All Rights Reserved. ****************************************************************************** * file name: nfrule.cpp @@ -24,8 +24,7 @@ #include "nfrs.h" #include "nfrlist.h" #include "nfsubs.h" - -#include "util.h" +#include "patternprops.h" U_NAMESPACE_BEGIN @@ -235,7 +234,7 @@ NFRule::parseRuleDescriptor(UnicodeString& description, UErrorCode& status) descriptor.setTo(description, 0, p); ++p; - while (p < description.length() && uprv_isRuleWhiteSpace(description.charAt(p))) { + while (p < description.length() && PatternProps::isWhiteSpace(description.charAt(p))) { ++p; } description.removeBetween(0, p); @@ -278,7 +277,7 @@ NFRule::parseRuleDescriptor(UnicodeString& description, UErrorCode& status) else if (c == gSlash || c == gGreaterThan) { break; } - else if (uprv_isRuleWhiteSpace(c) || c == gComma || c == gDot) { + else if (PatternProps::isWhiteSpace(c) || c == gComma || c == gDot) { } else { // throw new IllegalArgumentException("Illegal character in rule descriptor"); @@ -307,7 +306,7 @@ NFRule::parseRuleDescriptor(UnicodeString& description, UErrorCode& status) else if (c == gGreaterThan) { break; } - else if (uprv_isRuleWhiteSpace(c) || c == gComma || c == gDot) { + else if (PatternProps::isWhiteSpace(c) || c == gComma || c == gDot) { } else { // throw new IllegalArgumentException("Illegal character is rule descriptor"); diff --git a/icu4c/source/i18n/rbnf.cpp b/icu4c/source/i18n/rbnf.cpp index b8d392a592b..2e20b92bc08 100644 --- a/icu4c/source/i18n/rbnf.cpp +++ b/icu4c/source/i18n/rbnf.cpp @@ -1,6 +1,6 @@ /* ******************************************************************************* -* Copyright (C) 1997-2010, International Business Machines Corporation +* Copyright (C) 1997-2011, International Business Machines Corporation * and others. All Rights Reserved. ******************************************************************************* */ @@ -25,7 +25,7 @@ #include "cmemory.h" #include "cstring.h" -#include "util.h" +#include "patternprops.h" #include "uresimp.h" // debugging @@ -313,9 +313,9 @@ private: void inc(void) { ++p; ch = 0xffff; } UBool checkInc(UChar c) { if (p < e && (ch == c || *p == c)) { inc(); return TRUE; } return FALSE; } UBool check(UChar c) { return p < e && (ch == c || *p == c); } - void skipWhitespace(void) { while (p < e && uprv_isRuleWhiteSpace(ch != 0xffff ? ch : *p)) inc();} + void skipWhitespace(void) { while (p < e && PatternProps::isWhiteSpace(ch != 0xffff ? ch : *p)) inc();} UBool inList(UChar c, const UChar* list) const { - if (*list == SPACE && uprv_isRuleWhiteSpace(c)) return TRUE; + if (*list == SPACE && PatternProps::isWhiteSpace(c)) return TRUE; while (*list && *list != c) ++list; return *list == c; } void parseError(const char* msg); @@ -1331,7 +1331,7 @@ RuleBasedNumberFormat::init(const UnicodeString& rules, LocalizationInfo* locali lpEnd = description.length() - 1; } int lpStart = lp + u_strlen(gLenientParse); - while (uprv_isRuleWhiteSpace(description.charAt(lpStart))) { + while (PatternProps::isWhiteSpace(description.charAt(lpStart))) { ++lpStart; } @@ -1467,7 +1467,7 @@ RuleBasedNumberFormat::stripWhitespace(UnicodeString& description) while (start != -1 && start < description.length()) { // seek to the first non-whitespace character... while (start < description.length() - && uprv_isRuleWhiteSpace(description.charAt(start))) { + && PatternProps::isWhiteSpace(description.charAt(start))) { ++start; } diff --git a/icu4c/source/i18n/rbt_pars.cpp b/icu4c/source/i18n/rbt_pars.cpp index 483c05476bc..d05a0cc925b 100644 --- a/icu4c/source/i18n/rbt_pars.cpp +++ b/icu4c/source/i18n/rbt_pars.cpp @@ -1,6 +1,6 @@ /* ********************************************************************** - * Copyright (C) 1999-2008, International Business Machines + * Copyright (C) 1999-2011, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** * Date Name Description @@ -33,6 +33,7 @@ #include "tridpars.h" #include "uvector.h" #include "hash.h" +#include "patternprops.h" #include "util.h" #include "cmemory.h" #include "uprops.h" @@ -406,7 +407,7 @@ int32_t RuleHalf::parseSection(const UnicodeString& rule, int32_t pos, int32_t l // Since all syntax characters are in the BMP, fetching // 16-bit code units suffices here. UChar c = rule.charAt(pos++); - if (uprv_isRuleWhiteSpace(c)) { + if (PatternProps::isWhiteSpace(c)) { // Ignore whitespace. Note that this is not Unicode // spaces, but Java spaces -- a subset, representing // whitespace likely to be seen in code. @@ -929,7 +930,7 @@ void TransliteratorParser::parseRules(const UnicodeString& rule, while (pos < limit && U_SUCCESS(status)) { UChar c = rule.charAt(pos++); - if (uprv_isRuleWhiteSpace(c)) { + if (PatternProps::isWhiteSpace(c)) { // Ignore leading whitespace. continue; } @@ -958,7 +959,7 @@ void TransliteratorParser::parseRules(const UnicodeString& rule, rule.compare(pos, ID_TOKEN_LEN, ID_TOKEN) == 0) { pos += ID_TOKEN_LEN; c = rule.charAt(pos); - while (uprv_isRuleWhiteSpace(c) && pos < limit) { + while (PatternProps::isWhiteSpace(c) && pos < limit) { ++pos; c = rule.charAt(pos); } @@ -1690,7 +1691,7 @@ utrans_stripRules(const UChar *source, int32_t sourceLen, UChar *target, UErrorC *status = U_PARSE_ERROR; return 0; } - if (!uprv_isRuleWhiteSpace(c2) && !u_iscntrl(c2) && !u_ispunct(c2)) { + if (!PatternProps::isWhiteSpace(c2) && !u_iscntrl(c2) && !u_ispunct(c2)) { /* It was escaped for a reason. Write what it was suppose to be. */ source+=5; c = c2; diff --git a/icu4c/source/i18n/regexcmp.cpp b/icu4c/source/i18n/regexcmp.cpp index bef729a9fb3..c77181d9ec1 100644 --- a/icu4c/source/i18n/regexcmp.cpp +++ b/icu4c/source/i18n/regexcmp.cpp @@ -1,7 +1,7 @@ // // file: regexcmp.cpp // -// Copyright (C) 2002-2010 International Business Machines Corporation and others. +// Copyright (C) 2002-2011 International Business Machines Corporation and others. // All Rights Reserved. // // This file contains the ICU regular expression compiler, which is responsible @@ -21,7 +21,7 @@ #include "unicode/parsepos.h" #include "unicode/parseerr.h" #include "unicode/regex.h" -#include "util.h" +#include "patternprops.h" #include "putilimp.h" #include "cmemory.h" #include "cstring.h" @@ -3743,7 +3743,7 @@ void RegexCompile::nextChar(RegexPatternChar &c) { } } // TODO: check what Java & Perl do with non-ASCII white spaces. Ticket 6061. - if (uprv_isRuleWhiteSpace(c.fChar) == FALSE) { + if (PatternProps::isWhiteSpace(c.fChar) == FALSE) { break; } c.fChar = nextCharLL(); diff --git a/icu4c/source/i18n/smpdtfmt.cpp b/icu4c/source/i18n/smpdtfmt.cpp index 6f95a349ff8..1c360030001 100644 --- a/icu4c/source/i18n/smpdtfmt.cpp +++ b/icu4c/source/i18n/smpdtfmt.cpp @@ -46,7 +46,7 @@ #include "unicode/rbtz.h" #include "unicode/vtzone.h" #include "olsontz.h" -#include "util.h" +#include "patternprops.h" #include "fphdlimp.h" #include "gregoimp.h" #include "hebrwcal.h" @@ -1912,11 +1912,11 @@ SimpleDateFormat::parse(const UnicodeString& text, Calendar& cal, ParsePosition& UChar ch = fPattern.charAt(i+1); // check for whitespace - if (uprv_isRuleWhiteSpace(ch)) { + if (PatternProps::isWhiteSpace(ch)) { i++; // Advance over run in pattern while ((i+1)= end) { @@ -530,7 +530,7 @@ ucol_tok_getNextArgument(const UChar *start, const UChar *end, foundOption = TRUE; if(end - start > rulesOptions[i].optionLen) { optionArg = start+rulesOptions[i].optionLen+1; /* start of the options, skip space */ - while(u_isWhitespace(*optionArg) || uprv_isRuleWhiteSpace(*optionArg)) { /* eat whitespace */ + while(PatternProps::isWhiteSpace(*optionArg)) { /* eat whitespace */ optionArg++; } } @@ -551,7 +551,7 @@ ucol_tok_getNextArgument(const UChar *start, const UChar *end, *attrib = rulesOptions[i].attr; *value = rulesOptions[i].subopts[j].attrVal; optionArg += rulesOptions[i].subopts[j].subLen; - while(u_isWhitespace(*optionArg) || uprv_isRuleWhiteSpace(*optionArg)) { /* eat whitespace */ + while(PatternProps::isWhiteSpace(*optionArg)) { /* eat whitespace */ optionArg++; } if(*optionArg == 0x005d) { @@ -605,14 +605,14 @@ int32_t ucol_uprv_tok_readOption(const UChar *start, const UChar *end, const UCh int32_t i = 0; ucol_uprv_tok_initData(); - while(u_isWhitespace(*start) || uprv_isRuleWhiteSpace(*start)) { /* eat whitespace */ + while(PatternProps::isWhiteSpace(*start)) { /* eat whitespace */ start++; } while(i < UTOK_OPTION_COUNT) { if(u_strncmpNoCase(start, rulesOptions[i].optionName, rulesOptions[i].optionLen) == 0) { if(end - start > rulesOptions[i].optionLen) { *optionArg = start+rulesOptions[i].optionLen; /* End of option name; start of the options */ - while(u_isWhitespace(**optionArg) || uprv_isRuleWhiteSpace(**optionArg)) { /* eat whitespace */ + while(PatternProps::isWhiteSpace(**optionArg)) { /* eat whitespace */ (*optionArg)++; } } @@ -1041,7 +1041,7 @@ ucol_tok_parseNextTokenInternal(UColTokenParser *src, } } }else { - if(!uprv_isRuleWhiteSpace(ch)) { + if(!PatternProps::isWhiteSpace(ch)) { /* Sets the strength for this entry */ switch (ch) { case 0x003D/*'='*/ : @@ -1267,7 +1267,7 @@ ucol_tok_parseNextTokenInternal(UColTokenParser *src, do { ch = *(++(src->current)); // skip whitespace between '|' and the character - } while (uprv_isRuleWhiteSpace(ch)); + } while (PatternProps::isWhiteSpace(ch)); break; //charsOffset = 0; @@ -1340,7 +1340,7 @@ ucol_tok_parseNextTokenInternal(UColTokenParser *src, if(wasInQuote) { if(ch != 0x27) { - if(inQuote || !uprv_isRuleWhiteSpace(ch)) { + if(inQuote || !PatternProps::isWhiteSpace(ch)) { ucol_tok_addToExtraCurrent(src, &ch, 1, status); } } @@ -2223,7 +2223,7 @@ void ucol_tok_initTokenList( UChar* import_end = u_strchr(setStart, 0x005D); int32_t optionEndOffset = (int32_t)(import_end + 1 - rules); // Ignore trailing whitespace. - while(uprv_isRuleWhiteSpace(*(import_end-1))) { + while(PatternProps::isWhiteSpace(*(import_end-1))) { --import_end; } diff --git a/icu4c/source/i18n/unicode/decimfmt.h b/icu4c/source/i18n/unicode/decimfmt.h index 778bf2292da..229d8f806c5 100644 --- a/icu4c/source/i18n/unicode/decimfmt.h +++ b/icu4c/source/i18n/unicode/decimfmt.h @@ -1,6 +1,6 @@ /* ******************************************************************************** -* Copyright (C) 1997-2010, International Business Machines +* Copyright (C) 1997-2011, International Business Machines * Corporation and others. All Rights Reserved. ******************************************************************************** * @@ -1980,7 +1980,7 @@ private: const UnicodeString& input, int32_t pos); - static int32_t skipRuleWhiteSpace(const UnicodeString& text, int32_t pos); + static int32_t skipPatternWhiteSpace(const UnicodeString& text, int32_t pos); static int32_t skipUWhiteSpace(const UnicodeString& text, int32_t pos); diff --git a/icu4c/source/i18n/unicode/smpdtfmt.h b/icu4c/source/i18n/unicode/smpdtfmt.h index 14c91d4244b..79745e18231 100644 --- a/icu4c/source/i18n/unicode/smpdtfmt.h +++ b/icu4c/source/i18n/unicode/smpdtfmt.h @@ -1,5 +1,5 @@ /* -* Copyright (C) 1997-2010, International Business Machines Corporation and +* Copyright (C) 1997-2011, International Business Machines Corporation and * others. All Rights Reserved. ******************************************************************************* * @@ -986,10 +986,10 @@ private: int32_t pos) const; /** - * Skip over a run of zero or more isRuleWhiteSpace() characters at + * Skip over a run of zero or more Pattern_White_Space characters at * pos in text. */ - int32_t skipRuleWhiteSpace(const UnicodeString& text, int32_t pos) const; + int32_t skipPatternWhiteSpace(const UnicodeString& text, int32_t pos) const; /** * Skip over a run of zero or more isUWhiteSpace() characters at pos diff --git a/icu4c/source/test/intltest/itrbnf.cpp b/icu4c/source/test/intltest/itrbnf.cpp index d1c3445ecbd..9186089b85b 100644 --- a/icu4c/source/test/intltest/itrbnf.cpp +++ b/icu4c/source/test/intltest/itrbnf.cpp @@ -1,6 +1,6 @@ /* ******************************************************************************* - * Copyright (C) 1996-2010, International Business Machines Corporation and * + * Copyright (C) 1996-2011, International Business Machines Corporation and * * others. All Rights Reserved. * ******************************************************************************* */ @@ -1738,7 +1738,7 @@ IntlTestRBNF::TestLocalizations(void) "<<%main,>,>", // comma before close angle ok "<<%main>,\" '>>", // quotes everything until next quote "<<%main>,<'en', \"it's ok\">>", // double quotes work too - " \n <\n <\n %main\n >\n , \t <\t en\t , \tfoo \t\t > \n\n > \n ", // rule whitespace ok + " \n <\n <\n %main\n >\n , \t <\t en\t , \tfoo \t\t > \n\n > \n ", // Pattern_White_Space ok }; int32_t goodLocsLen = sizeof(goodLocs)/sizeof(goodLocs[0]); diff --git a/icu4c/source/test/intltest/textfile.h b/icu4c/source/test/intltest/textfile.h index 3fcea074abc..e51119d4730 100644 --- a/icu4c/source/test/intltest/textfile.h +++ b/icu4c/source/test/intltest/textfile.h @@ -1,6 +1,6 @@ /* ********************************************************************** -* Copyright (c) 2004-2006, International Business Machines +* Copyright (c) 2004-2011, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** * Author: Alan Liu @@ -41,7 +41,7 @@ class TextFile { /** * Read a line, ignoring blank lines and lines that start with * '#'. Trim leading white space. - * @param trim if TRUE then remove leading rule white space + * @param trim if TRUE then remove leading Pattern_White_Space * @return TRUE if a line was read, or FALSE if the EOF * was reached or an error occurred */ diff --git a/icu4c/source/test/intltest/tokiter.cpp b/icu4c/source/test/intltest/tokiter.cpp index 202cbd93e3d..809e03602f3 100644 --- a/icu4c/source/test/intltest/tokiter.cpp +++ b/icu4c/source/test/intltest/tokiter.cpp @@ -1,6 +1,6 @@ /* ********************************************************************** -* Copyright (c) 2004, International Business Machines +* Copyright (c) 2004-2011, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** * Author: Alan Liu @@ -10,6 +10,7 @@ */ #include "tokiter.h" #include "textfile.h" +#include "patternprops.h" #include "util.h" #include "uprops.h" @@ -52,7 +53,7 @@ int32_t TokenIterator::getLineNumber() const { /** * Read the next token from 'this->line' and append it to 'token'. - * Tokens are separated by rule white space. Tokens may also be + * Tokens are separated by Pattern_White_Space. Tokens may also be * delimited by double or single quotes. The closing quote must match * the opening quote. If a '#' is encountered, the rest of the line * is ignored, unless it is backslash-escaped or within quotes. @@ -89,7 +90,7 @@ UBool TokenIterator::nextToken(UnicodeString& token, UErrorCode& ec) { } token.append(c32); } else if ((quote != 0 && c == quote) || - (quote == 0 && uprv_isRuleWhiteSpace(c))) { + (quote == 0 && PatternProps::isWhiteSpace(c))) { ++pos; return TRUE; } else if (quote == 0 && c == '#') { diff --git a/icu4c/source/test/intltest/tokiter.h b/icu4c/source/test/intltest/tokiter.h index 5db5753ae4d..0ea2dcc02c5 100644 --- a/icu4c/source/test/intltest/tokiter.h +++ b/icu4c/source/test/intltest/tokiter.h @@ -1,6 +1,6 @@ /* ********************************************************************** -* Copyright (c) 2004-2006, International Business Machines +* Copyright (c) 2004-2011, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** * Author: Alan Liu @@ -17,8 +17,8 @@ class TextFile; /** * An iterator class that returns successive string tokens from some - * source. String tokens are, in general, separated by rule white - * space in the source test. Furthermore, they may be delimited by + * source. String tokens are, in general, separated by Pattern_White_Space + * in the source test. Furthermore, they may be delimited by * either single or double quotes (opening and closing quotes must * match). Escapes are processed using standard ICU unescaping. */ diff --git a/icu4c/source/test/intltest/transtst.cpp b/icu4c/source/test/intltest/transtst.cpp index e2fd290b7cd..929b4b73b4a 100644 --- a/icu4c/source/test/intltest/transtst.cpp +++ b/icu4c/source/test/intltest/transtst.cpp @@ -182,7 +182,7 @@ TransliteratorTest::runIndexedTest(int32_t index, UBool exec, TESTCASE(71,TestAnyX); TESTCASE(72,TestSourceTargetSet); TESTCASE(73,TestGurmukhiDevanagari); - TESTCASE(74,TestRuleWhitespace); + TESTCASE(74,TestPatternWhiteSpace); TESTCASE(75,TestAllCodepoints); TESTCASE(76,TestBoilerplate); TESTCASE(77,TestAlternateSyntax); @@ -3964,9 +3964,9 @@ void TransliteratorTest::TestSourceTargetSet() { } /** - * Test handling of rule whitespace, for both RBT and UnicodeSet. + * Test handling of Pattern_White_Space, for both RBT and UnicodeSet. */ -void TransliteratorTest::TestRuleWhitespace() { +void TransliteratorTest::TestPatternWhiteSpace() { // Rules const char* r = "a > \\u200E b;"; diff --git a/icu4c/source/test/intltest/transtst.h b/icu4c/source/test/intltest/transtst.h index 03fba9e3bf3..0b1f9a10342 100644 --- a/icu4c/source/test/intltest/transtst.h +++ b/icu4c/source/test/intltest/transtst.h @@ -1,6 +1,6 @@ /* ********************************************************************** -* Copyright (C) 1999-2009, International Business Machines +* Copyright (C) 1999-2011, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** * Date Name Description @@ -338,7 +338,7 @@ private: void TestSourceTargetSet(void); - void TestRuleWhitespace(void); + void TestPatternWhiteSpace(void); void TestAllCodepoints(void); diff --git a/icu4c/source/test/intltest/usettest.cpp b/icu4c/source/test/intltest/usettest.cpp index 682e3a03054..c3d98a3c067 100644 --- a/icu4c/source/test/intltest/usettest.cpp +++ b/icu4c/source/test/intltest/usettest.cpp @@ -1,6 +1,6 @@ /* ******************************************************************************** -* Copyright (C) 1999-2010 International Business Machines Corporation and +* Copyright (C) 1999-2011 International Business Machines Corporation and * others. All Rights Reserved. ******************************************************************************** * Date Name Description @@ -1388,7 +1388,7 @@ void UnicodeSetTest::TestEscapePattern() { const char exp[] = "[\\u200A-\\u200E\\uFEFF\\U0001D173-\\U0001D17A\\U000F0000-\\U000FFFFD]"; // We test this with two passes; in the second pass we - // pre-unescape the pattern. Since U+200E is rule whitespace, + // pre-unescape the pattern. Since U+200E is Pattern_White_Space, // this fails -- which is what we expect. for (int32_t pass=1; pass<=2; ++pass) { UErrorCode ec = U_ZERO_ERROR;