mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-05 21:45:37 +00:00
ICU-8491 remove uprv_isRuleWhiteSpace(c)
X-SVN-Rev: 29900
This commit is contained in:
parent
6ecb5efc59
commit
9774441e40
24 changed files with 127 additions and 169 deletions
|
@ -2,7 +2,7 @@
|
|||
//
|
||||
// file: rbbiscan.cpp
|
||||
//
|
||||
// Copyright (C) 2002-2010, International Business Machines Corporation and others.
|
||||
// Copyright (C) 2002-2011, International Business Machines Corporation and others.
|
||||
// All Rights Reserved.
|
||||
//
|
||||
// This file contains the Rule Based Break Iterator Rule Builder functions for
|
||||
|
@ -122,7 +122,7 @@ RBBIRuleScanner::RBBIRuleScanner(RBBIRuleBuilder *rb)
|
|||
// and the time to build these few sets should be small compared to a
|
||||
// full break iterator build.
|
||||
fRuleSets[kRuleSet_rule_char-128] = UnicodeSet(gRuleSet_rule_char_pattern, *rb->fStatus);
|
||||
UnicodeSet *whitespaceSet = uprv_openRuleWhiteSpaceSet(rb->fStatus);
|
||||
UnicodeSet *whitespaceSet = uprv_openPatternWhiteSpaceSet(rb->fStatus);
|
||||
if (U_FAILURE(*rb->fStatus)) {
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (c) 2003-2007, International Business Machines
|
||||
* Copyright (c) 2003-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* Author: Alan Liu
|
||||
|
@ -12,7 +12,7 @@
|
|||
#include "unicode/parsepos.h"
|
||||
#include "unicode/unistr.h"
|
||||
#include "unicode/symtable.h"
|
||||
#include "util.h"
|
||||
#include "patternprops.h"
|
||||
|
||||
/* \U87654321 or \ud800\udc00 */
|
||||
#define MAX_U_NOTATION_LEN 12
|
||||
|
@ -63,8 +63,7 @@ UChar32 RuleCharacterIterator::next(int32_t options, UBool& isEscaped, UErrorCod
|
|||
continue;
|
||||
}
|
||||
|
||||
if ((options & SKIP_WHITESPACE) != 0 &&
|
||||
uprv_isRuleWhiteSpace(c)) {
|
||||
if ((options & SKIP_WHITESPACE) != 0 && PatternProps::isWhiteSpace(c)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -102,7 +101,7 @@ void RuleCharacterIterator::skipIgnored(int32_t options) {
|
|||
if ((options & SKIP_WHITESPACE) != 0) {
|
||||
for (;;) {
|
||||
UChar32 a = _current();
|
||||
if (!uprv_isRuleWhiteSpace(a)) break;
|
||||
if (!PatternProps::isWhiteSpace(a)) break;
|
||||
_advance(UTF_CHAR_LENGTH(a));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (c) 2003-2007, International Business Machines
|
||||
* Copyright (c) 2003-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* Author: Alan Liu
|
||||
|
@ -82,9 +82,8 @@ public:
|
|||
|
||||
/**
|
||||
* Bitmask option to enable skipping of whitespace. If (options &
|
||||
* SKIP_WHITESPACE) != 0, then whitespace characters will be silently
|
||||
* skipped, as if they were not present in the input. Whitespace
|
||||
* characters are defined by UCharacterProperty.isRuleWhiteSpace().
|
||||
* SKIP_WHITESPACE) != 0, then Pattern_White_Space characters will be silently
|
||||
* skipped, as if they were not present in the input.
|
||||
*/
|
||||
enum { SKIP_WHITESPACE = 4 };
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (c) 2001-2008, International Business Machines
|
||||
* Copyright (c) 2001-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* Date Name Description
|
||||
|
@ -8,9 +8,10 @@
|
|||
**********************************************************************
|
||||
*/
|
||||
|
||||
#include "util.h"
|
||||
#include "unicode/unimatch.h"
|
||||
#include "unicode/uniset.h"
|
||||
#include "patternprops.h"
|
||||
#include "util.h"
|
||||
|
||||
// Define UChar constants using hex for EBCDIC compatibility
|
||||
|
||||
|
@ -132,13 +133,8 @@ int32_t ICU_Utility::quotedIndexOf(const UnicodeString& text,
|
|||
int32_t ICU_Utility::skipWhitespace(const UnicodeString& str, int32_t& pos,
|
||||
UBool advance) {
|
||||
int32_t p = pos;
|
||||
while (p < str.length()) {
|
||||
UChar32 c = str.char32At(p);
|
||||
if (!uprv_isRuleWhiteSpace(c)) {
|
||||
break;
|
||||
}
|
||||
p += UTF_CHAR_LENGTH(c);
|
||||
}
|
||||
const UChar* s = str.getBuffer();
|
||||
p = (int32_t)(PatternProps::skipWhiteSpace(s + p, str.length() - p) - s);
|
||||
if (advance) {
|
||||
pos = p;
|
||||
}
|
||||
|
@ -146,8 +142,8 @@ int32_t ICU_Utility::skipWhitespace(const UnicodeString& str, int32_t& pos,
|
|||
}
|
||||
|
||||
/**
|
||||
* Skip over whitespace in a Replaceable. Whitespace is defined by
|
||||
* uprv_isRuleWhiteSpace(). Skipping may be done in the forward or
|
||||
* Skip over Pattern_White_Space in a Replaceable.
|
||||
* Skipping may be done in the forward or
|
||||
* reverse direction. In either case, the leftmost index will be
|
||||
* inclusive, and the rightmost index will be exclusive. That is,
|
||||
* given a range defined as [start, limit), the call
|
||||
|
@ -173,7 +169,7 @@ int32_t ICU_Utility::skipWhitespace(const UnicodeString& str, int32_t& pos,
|
|||
//? }
|
||||
//?
|
||||
//? while (pos != stop &&
|
||||
//? uprv_isRuleWhiteSpace(c = text.char32At(pos))) {
|
||||
//? PatternProps::isWhiteSpace(c = text.char32At(pos))) {
|
||||
//? if (isForward) {
|
||||
//? pos += UTF_CHAR_LENGTH(c);
|
||||
//? } else {
|
||||
|
@ -217,7 +213,7 @@ UBool ICU_Utility::parseChar(const UnicodeString& id, int32_t& pos, UChar ch) {
|
|||
* pattern. Characters are matched literally and case-sensitively
|
||||
* except for the following special characters:
|
||||
*
|
||||
* ~ zero or more uprv_isRuleWhiteSpace chars
|
||||
* ~ zero or more Pattern_White_Space chars
|
||||
*
|
||||
* If end of pattern is reached with all matches along the way,
|
||||
* pos is advanced to the first unparsed index and returned.
|
||||
|
@ -246,7 +242,7 @@ int32_t ICU_Utility::parsePattern(const UnicodeString& pat,
|
|||
|
||||
// parse \s*
|
||||
if (cpat == 126 /*~*/) {
|
||||
if (uprv_isRuleWhiteSpace(c)) {
|
||||
if (PatternProps::isWhiteSpace(c)) {
|
||||
index += UTF_CHAR_LENGTH(c);
|
||||
continue;
|
||||
} else {
|
||||
|
@ -371,7 +367,7 @@ void ICU_Utility::appendToRule(UnicodeString& rule,
|
|||
!((c >= 0x0030/*'0'*/ && c <= 0x0039/*'9'*/) ||
|
||||
(c >= 0x0041/*'A'*/ && c <= 0x005A/*'Z'*/) ||
|
||||
(c >= 0x0061/*'a'*/ && c <= 0x007A/*'z'*/))) ||
|
||||
uprv_isRuleWhiteSpace(c)) {
|
||||
PatternProps::isWhiteSpace(c)) {
|
||||
quoteBuf.append(c);
|
||||
// Double ' within a quote
|
||||
if (c == APOSTROPHE) {
|
||||
|
@ -412,26 +408,13 @@ void ICU_Utility::appendToRule(UnicodeString& rule,
|
|||
|
||||
U_NAMESPACE_END
|
||||
|
||||
U_CAPI UBool U_EXPORT2
|
||||
uprv_isRuleWhiteSpace(UChar32 c) {
|
||||
/* "white space" in the sense of ICU rule parsers
|
||||
This is a FIXED LIST that is NOT DEPENDENT ON UNICODE PROPERTIES.
|
||||
See UAX #31 Identifier and Pattern Syntax: http://www.unicode.org/reports/tr31/
|
||||
U+0009..U+000D, U+0020, U+0085, U+200E..U+200F, and U+2028..U+2029
|
||||
Equivalent to test for Pattern_White_Space Unicode property.
|
||||
*/
|
||||
return (c >= 0x0009 && c <= 0x2029 &&
|
||||
(c <= 0x000D || c == 0x0020 || c == 0x0085 ||
|
||||
c == 0x200E || c == 0x200F || c >= 0x2028));
|
||||
}
|
||||
|
||||
U_CAPI U_NAMESPACE_QUALIFIER UnicodeSet* U_EXPORT2
|
||||
uprv_openRuleWhiteSpaceSet(UErrorCode* ec) {
|
||||
uprv_openPatternWhiteSpaceSet(UErrorCode* ec) {
|
||||
if(U_FAILURE(*ec)) {
|
||||
return NULL;
|
||||
}
|
||||
// create a set with the Pattern_White_Space characters,
|
||||
// without a pattern for fewer code dependencies
|
||||
// without a pattern string for fewer code dependencies
|
||||
U_NAMESPACE_QUALIFIER UnicodeSet *set=new U_NAMESPACE_QUALIFIER UnicodeSet(9, 0xd);
|
||||
// Check for new failure.
|
||||
if (set == NULL) {
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (c) 2001-2007, International Business Machines
|
||||
* Copyright (c) 2001-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* Date Name Description
|
||||
|
@ -90,8 +90,8 @@ class U_COMMON_API ICU_Utility /* not : public UObject because all methods are s
|
|||
UBool advance = FALSE);
|
||||
|
||||
/**
|
||||
* Skip over whitespace in a Replaceable. Whitespace is defined by
|
||||
* uprv_isRuleWhiteSpace(). Skipping may be done in the forward or
|
||||
* Skip over Pattern_White_Space in a Replaceable.
|
||||
* Skipping may be done in the forward or
|
||||
* reverse direction. In either case, the leftmost index will be
|
||||
* inclusive, and the rightmost index will be exclusive. That is,
|
||||
* given a range defined as [start, limit), the call
|
||||
|
@ -151,7 +151,7 @@ class U_COMMON_API ICU_Utility /* not : public UObject because all methods are s
|
|||
* pattern. Characters are matched literally and case-sensitively
|
||||
* except for the following special characters:
|
||||
*
|
||||
* ~ zero or more uprv_isRuleWhiteSpace chars
|
||||
* ~ zero or more Pattern_White_Space chars
|
||||
*
|
||||
* If end of pattern is reached with all matches along the way,
|
||||
* pos is advanced to the first unparsed index and returned.
|
||||
|
@ -237,25 +237,14 @@ private:
|
|||
U_NAMESPACE_END
|
||||
|
||||
/**
|
||||
* Get the set of "white space" characters in the sense of ICU rule
|
||||
* parsers. Caller must close/delete result.
|
||||
* Equivalent to the set of characters with the Pattern_White_Space Unicode property.
|
||||
* Returns a new set with the Pattern_White_Space characters.
|
||||
* The caller must close/delete the result.
|
||||
* Stable set of characters, won't change.
|
||||
* See UAX #31 Identifier and Pattern Syntax: http://www.unicode.org/reports/tr31/
|
||||
* @internal
|
||||
*/
|
||||
U_CAPI U_NAMESPACE_QUALIFIER UnicodeSet* U_EXPORT2
|
||||
uprv_openRuleWhiteSpaceSet(UErrorCode* ec);
|
||||
|
||||
/**
|
||||
* Is this character a "white space" in the sense of ICU rule parsers?
|
||||
* Equivalent to test for Pattern_White_Space Unicode property.
|
||||
* Stable set of characters, won't change.
|
||||
* See UAX #31 Identifier and Pattern Syntax: http://www.unicode.org/reports/tr31/
|
||||
* @internal
|
||||
*/
|
||||
U_CAPI UBool U_EXPORT2
|
||||
uprv_isRuleWhiteSpace(UChar32 c);
|
||||
uprv_openPatternWhiteSpaceSet(UErrorCode* ec);
|
||||
|
||||
#endif
|
||||
//eof
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (c) 2001-2006, International Business Machines
|
||||
* Copyright (c) 2001-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* Date Name Description
|
||||
|
@ -8,8 +8,9 @@
|
|||
**********************************************************************
|
||||
*/
|
||||
|
||||
#include "util.h"
|
||||
#include "unicode/uchar.h"
|
||||
#include "patternprops.h"
|
||||
#include "util.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
|
@ -95,7 +96,7 @@ int32_t ICU_Utility::parsePattern(const UnicodeString& rule, int32_t pos, int32_
|
|||
return -1;
|
||||
}
|
||||
c = rule.charAt(pos++);
|
||||
if (!uprv_isRuleWhiteSpace(c)) {
|
||||
if (!PatternProps::isWhiteSpace(c)) {
|
||||
return -1;
|
||||
}
|
||||
// FALL THROUGH to skipWhitespace
|
||||
|
@ -133,15 +134,13 @@ int32_t ICU_Utility::parsePattern(const UnicodeString& rule, int32_t pos, int32_
|
|||
* @param pos INPUT-OUPUT parameter. On INPUT, pos is the
|
||||
* first character to examine. It must be less than str.length(),
|
||||
* and it must not point to a whitespace character. That is, must
|
||||
* have pos < str.length() and
|
||||
* !uprv_isRuleWhiteSpace(str.char32At(pos)). On
|
||||
* have pos < str.length(). On
|
||||
* OUTPUT, the position after the last parsed character.
|
||||
* @return the Unicode identifier, or an empty string if there is
|
||||
* no valid identifier at pos.
|
||||
*/
|
||||
UnicodeString ICU_Utility::parseUnicodeIdentifier(const UnicodeString& str, int32_t& pos) {
|
||||
// assert(pos < str.length());
|
||||
// assert(!uprv_isRuleWhiteSpace(str.char32At(pos)));
|
||||
UnicodeString buf;
|
||||
int p = pos;
|
||||
while (p < str.length()) {
|
||||
|
|
|
@ -56,7 +56,7 @@
|
|||
#include "ucurrimp.h"
|
||||
#include "charstr.h"
|
||||
#include "cmemory.h"
|
||||
#include "util.h"
|
||||
#include "patternprops.h"
|
||||
#include "digitlst.h"
|
||||
#include "cstring.h"
|
||||
#include "umutex.h"
|
||||
|
@ -2173,12 +2173,12 @@ int32_t DecimalFormat::compareSimpleAffix(const UnicodeString& affix,
|
|||
for (int32_t i=0; i<affix.length(); ) {
|
||||
UChar32 c = affix.char32At(i);
|
||||
int32_t len = U16_LENGTH(c);
|
||||
if (uprv_isRuleWhiteSpace(c)) {
|
||||
if (PatternProps::isWhiteSpace(c)) {
|
||||
// We may have a pattern like: \u200F \u0020
|
||||
// and input text like: \u200F \u0020
|
||||
// Note that U+200F and U+0020 are RuleWhiteSpace but only
|
||||
// Note that U+200F and U+0020 are Pattern_White_Space but only
|
||||
// U+0020 is UWhiteSpace. So we have to first do a direct
|
||||
// match of the run of RULE whitespace in the pattern,
|
||||
// match of the run of Pattern_White_Space in the pattern,
|
||||
// then match any extra characters.
|
||||
UBool literalMatch = FALSE;
|
||||
while (pos < input.length() &&
|
||||
|
@ -2191,13 +2191,13 @@ int32_t DecimalFormat::compareSimpleAffix(const UnicodeString& affix,
|
|||
}
|
||||
c = affix.char32At(i);
|
||||
len = U16_LENGTH(c);
|
||||
if (!uprv_isRuleWhiteSpace(c)) {
|
||||
if (!PatternProps::isWhiteSpace(c)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Advance over run in pattern
|
||||
i = skipRuleWhiteSpace(affix, i);
|
||||
i = skipPatternWhiteSpace(affix, i);
|
||||
|
||||
// Advance over run in input text
|
||||
// Must see at least one white space char in input,
|
||||
|
@ -2226,18 +2226,12 @@ int32_t DecimalFormat::compareSimpleAffix(const UnicodeString& affix,
|
|||
}
|
||||
|
||||
/**
|
||||
* Skip over a run of zero or more isRuleWhiteSpace() characters at
|
||||
* Skip over a run of zero or more Pattern_White_Space characters at
|
||||
* pos in text.
|
||||
*/
|
||||
int32_t DecimalFormat::skipRuleWhiteSpace(const UnicodeString& text, int32_t pos) {
|
||||
while (pos < text.length()) {
|
||||
UChar32 c = text.char32At(pos);
|
||||
if (!uprv_isRuleWhiteSpace(c)) {
|
||||
break;
|
||||
}
|
||||
pos += U16_LENGTH(c);
|
||||
}
|
||||
return pos;
|
||||
int32_t DecimalFormat::skipPatternWhiteSpace(const UnicodeString& text, int32_t pos) {
|
||||
const UChar* s = text.getBuffer();
|
||||
return (int32_t)(PatternProps::skipWhiteSpace(s + pos, text.length() - pos) - s);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -2354,8 +2348,8 @@ int32_t DecimalFormat::compareComplexAffix(const UnicodeString& affixPat,
|
|||
}
|
||||
|
||||
pos = match(text, pos, c);
|
||||
if (uprv_isRuleWhiteSpace(c)) {
|
||||
i = skipRuleWhiteSpace(affixPat, i);
|
||||
if (PatternProps::isWhiteSpace(c)) {
|
||||
i = skipPatternWhiteSpace(affixPat, i);
|
||||
}
|
||||
}
|
||||
return pos - start;
|
||||
|
@ -2364,14 +2358,14 @@ int32_t DecimalFormat::compareComplexAffix(const UnicodeString& affixPat,
|
|||
/**
|
||||
* Match a single character at text[pos] and return the index of the
|
||||
* next character upon success. Return -1 on failure. If
|
||||
* isRuleWhiteSpace(ch) then match a run of white space in text.
|
||||
* ch is a Pattern_White_Space then match a run of white space in text.
|
||||
*/
|
||||
int32_t DecimalFormat::match(const UnicodeString& text, int32_t pos, UChar32 ch) {
|
||||
if (uprv_isRuleWhiteSpace(ch)) {
|
||||
if (PatternProps::isWhiteSpace(ch)) {
|
||||
// Advance over run of white space in input text
|
||||
// Must see at least one white space char in input
|
||||
int32_t s = pos;
|
||||
pos = skipRuleWhiteSpace(text, pos);
|
||||
pos = skipPatternWhiteSpace(text, pos);
|
||||
if (pos == s) {
|
||||
return -1;
|
||||
}
|
||||
|
@ -2390,8 +2384,8 @@ int32_t DecimalFormat::match(const UnicodeString& text, int32_t pos, const Unico
|
|||
for (int32_t i=0; i<str.length() && pos >= 0; ) {
|
||||
UChar32 ch = str.char32At(i);
|
||||
i += U16_LENGTH(ch);
|
||||
if (uprv_isRuleWhiteSpace(ch)) {
|
||||
i = skipRuleWhiteSpace(str, i);
|
||||
if (PatternProps::isWhiteSpace(ch)) {
|
||||
i = skipPatternWhiteSpace(str, i);
|
||||
}
|
||||
pos = match(text, pos, ch);
|
||||
}
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 2001-2008, International Business Machines
|
||||
* Copyright (C) 2001-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* Date Name Description
|
||||
|
@ -15,8 +15,9 @@
|
|||
#include "unicode/unifilt.h"
|
||||
#include "unicode/uchar.h"
|
||||
#include "unicode/uniset.h"
|
||||
#include "name2uni.h"
|
||||
#include "cmemory.h"
|
||||
#include "name2uni.h"
|
||||
#include "patternprops.h"
|
||||
#include "uprops.h"
|
||||
#include "uinvchar.h"
|
||||
#include "util.h"
|
||||
|
@ -162,7 +163,7 @@ void NameUnicodeTransliterator::handleTransliterate(Replaceable& text, UTransPos
|
|||
|
||||
// Convert \s+ => SPACE. This assumes there are no
|
||||
// runs of >1 space characters in names.
|
||||
if (uprv_isRuleWhiteSpace(c)) {
|
||||
if (PatternProps::isWhiteSpace(c)) {
|
||||
// Ignore leading whitespace
|
||||
if (name.length() > 0 &&
|
||||
name.charAt(name.length()-1) != SPACE) {
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
******************************************************************************
|
||||
* Copyright (C) 1997-2008, International Business Machines
|
||||
* Copyright (C) 1997-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
******************************************************************************
|
||||
* file name: nfrs.cpp
|
||||
|
@ -20,13 +20,12 @@
|
|||
#include "unicode/uchar.h"
|
||||
#include "nfrule.h"
|
||||
#include "nfrlist.h"
|
||||
#include "patternprops.h"
|
||||
|
||||
#ifdef RBNF_DEBUG
|
||||
#include "cmemory.h"
|
||||
#endif
|
||||
|
||||
#include "util.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
#if 0
|
||||
|
@ -149,7 +148,7 @@ NFRuleSet::NFRuleSet(UnicodeString* descriptions, int32_t index, UErrorCode& sta
|
|||
status = U_PARSE_ERROR;
|
||||
} else {
|
||||
name.setTo(description, 0, pos);
|
||||
while (pos < description.length() && uprv_isRuleWhiteSpace(description.charAt(++pos))) {
|
||||
while (pos < description.length() && PatternProps::isWhiteSpace(description.charAt(++pos))) {
|
||||
}
|
||||
description.remove(0, pos);
|
||||
}
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
******************************************************************************
|
||||
* Copyright (C) 1997-2008, International Business Machines
|
||||
* Copyright (C) 1997-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
******************************************************************************
|
||||
* file name: nfrule.cpp
|
||||
|
@ -24,8 +24,7 @@
|
|||
#include "nfrs.h"
|
||||
#include "nfrlist.h"
|
||||
#include "nfsubs.h"
|
||||
|
||||
#include "util.h"
|
||||
#include "patternprops.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
|
@ -235,7 +234,7 @@ NFRule::parseRuleDescriptor(UnicodeString& description, UErrorCode& status)
|
|||
descriptor.setTo(description, 0, p);
|
||||
|
||||
++p;
|
||||
while (p < description.length() && uprv_isRuleWhiteSpace(description.charAt(p))) {
|
||||
while (p < description.length() && PatternProps::isWhiteSpace(description.charAt(p))) {
|
||||
++p;
|
||||
}
|
||||
description.removeBetween(0, p);
|
||||
|
@ -278,7 +277,7 @@ NFRule::parseRuleDescriptor(UnicodeString& description, UErrorCode& status)
|
|||
else if (c == gSlash || c == gGreaterThan) {
|
||||
break;
|
||||
}
|
||||
else if (uprv_isRuleWhiteSpace(c) || c == gComma || c == gDot) {
|
||||
else if (PatternProps::isWhiteSpace(c) || c == gComma || c == gDot) {
|
||||
}
|
||||
else {
|
||||
// throw new IllegalArgumentException("Illegal character in rule descriptor");
|
||||
|
@ -307,7 +306,7 @@ NFRule::parseRuleDescriptor(UnicodeString& description, UErrorCode& status)
|
|||
else if (c == gGreaterThan) {
|
||||
break;
|
||||
}
|
||||
else if (uprv_isRuleWhiteSpace(c) || c == gComma || c == gDot) {
|
||||
else if (PatternProps::isWhiteSpace(c) || c == gComma || c == gDot) {
|
||||
}
|
||||
else {
|
||||
// throw new IllegalArgumentException("Illegal character is rule descriptor");
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1997-2010, International Business Machines Corporation
|
||||
* Copyright (C) 1997-2011, International Business Machines Corporation
|
||||
* and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -25,7 +25,7 @@
|
|||
|
||||
#include "cmemory.h"
|
||||
#include "cstring.h"
|
||||
#include "util.h"
|
||||
#include "patternprops.h"
|
||||
#include "uresimp.h"
|
||||
|
||||
// debugging
|
||||
|
@ -313,9 +313,9 @@ private:
|
|||
void inc(void) { ++p; ch = 0xffff; }
|
||||
UBool checkInc(UChar c) { if (p < e && (ch == c || *p == c)) { inc(); return TRUE; } return FALSE; }
|
||||
UBool check(UChar c) { return p < e && (ch == c || *p == c); }
|
||||
void skipWhitespace(void) { while (p < e && uprv_isRuleWhiteSpace(ch != 0xffff ? ch : *p)) inc();}
|
||||
void skipWhitespace(void) { while (p < e && PatternProps::isWhiteSpace(ch != 0xffff ? ch : *p)) inc();}
|
||||
UBool inList(UChar c, const UChar* list) const {
|
||||
if (*list == SPACE && uprv_isRuleWhiteSpace(c)) return TRUE;
|
||||
if (*list == SPACE && PatternProps::isWhiteSpace(c)) return TRUE;
|
||||
while (*list && *list != c) ++list; return *list == c;
|
||||
}
|
||||
void parseError(const char* msg);
|
||||
|
@ -1331,7 +1331,7 @@ RuleBasedNumberFormat::init(const UnicodeString& rules, LocalizationInfo* locali
|
|||
lpEnd = description.length() - 1;
|
||||
}
|
||||
int lpStart = lp + u_strlen(gLenientParse);
|
||||
while (uprv_isRuleWhiteSpace(description.charAt(lpStart))) {
|
||||
while (PatternProps::isWhiteSpace(description.charAt(lpStart))) {
|
||||
++lpStart;
|
||||
}
|
||||
|
||||
|
@ -1467,7 +1467,7 @@ RuleBasedNumberFormat::stripWhitespace(UnicodeString& description)
|
|||
while (start != -1 && start < description.length()) {
|
||||
// seek to the first non-whitespace character...
|
||||
while (start < description.length()
|
||||
&& uprv_isRuleWhiteSpace(description.charAt(start))) {
|
||||
&& PatternProps::isWhiteSpace(description.charAt(start))) {
|
||||
++start;
|
||||
}
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 1999-2008, International Business Machines
|
||||
* Copyright (C) 1999-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* Date Name Description
|
||||
|
@ -33,6 +33,7 @@
|
|||
#include "tridpars.h"
|
||||
#include "uvector.h"
|
||||
#include "hash.h"
|
||||
#include "patternprops.h"
|
||||
#include "util.h"
|
||||
#include "cmemory.h"
|
||||
#include "uprops.h"
|
||||
|
@ -406,7 +407,7 @@ int32_t RuleHalf::parseSection(const UnicodeString& rule, int32_t pos, int32_t l
|
|||
// Since all syntax characters are in the BMP, fetching
|
||||
// 16-bit code units suffices here.
|
||||
UChar c = rule.charAt(pos++);
|
||||
if (uprv_isRuleWhiteSpace(c)) {
|
||||
if (PatternProps::isWhiteSpace(c)) {
|
||||
// Ignore whitespace. Note that this is not Unicode
|
||||
// spaces, but Java spaces -- a subset, representing
|
||||
// whitespace likely to be seen in code.
|
||||
|
@ -929,7 +930,7 @@ void TransliteratorParser::parseRules(const UnicodeString& rule,
|
|||
|
||||
while (pos < limit && U_SUCCESS(status)) {
|
||||
UChar c = rule.charAt(pos++);
|
||||
if (uprv_isRuleWhiteSpace(c)) {
|
||||
if (PatternProps::isWhiteSpace(c)) {
|
||||
// Ignore leading whitespace.
|
||||
continue;
|
||||
}
|
||||
|
@ -958,7 +959,7 @@ void TransliteratorParser::parseRules(const UnicodeString& rule,
|
|||
rule.compare(pos, ID_TOKEN_LEN, ID_TOKEN) == 0) {
|
||||
pos += ID_TOKEN_LEN;
|
||||
c = rule.charAt(pos);
|
||||
while (uprv_isRuleWhiteSpace(c) && pos < limit) {
|
||||
while (PatternProps::isWhiteSpace(c) && pos < limit) {
|
||||
++pos;
|
||||
c = rule.charAt(pos);
|
||||
}
|
||||
|
@ -1690,7 +1691,7 @@ utrans_stripRules(const UChar *source, int32_t sourceLen, UChar *target, UErrorC
|
|||
*status = U_PARSE_ERROR;
|
||||
return 0;
|
||||
}
|
||||
if (!uprv_isRuleWhiteSpace(c2) && !u_iscntrl(c2) && !u_ispunct(c2)) {
|
||||
if (!PatternProps::isWhiteSpace(c2) && !u_iscntrl(c2) && !u_ispunct(c2)) {
|
||||
/* It was escaped for a reason. Write what it was suppose to be. */
|
||||
source+=5;
|
||||
c = c2;
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
//
|
||||
// file: regexcmp.cpp
|
||||
//
|
||||
// Copyright (C) 2002-2010 International Business Machines Corporation and others.
|
||||
// Copyright (C) 2002-2011 International Business Machines Corporation and others.
|
||||
// All Rights Reserved.
|
||||
//
|
||||
// This file contains the ICU regular expression compiler, which is responsible
|
||||
|
@ -21,7 +21,7 @@
|
|||
#include "unicode/parsepos.h"
|
||||
#include "unicode/parseerr.h"
|
||||
#include "unicode/regex.h"
|
||||
#include "util.h"
|
||||
#include "patternprops.h"
|
||||
#include "putilimp.h"
|
||||
#include "cmemory.h"
|
||||
#include "cstring.h"
|
||||
|
@ -3743,7 +3743,7 @@ void RegexCompile::nextChar(RegexPatternChar &c) {
|
|||
}
|
||||
}
|
||||
// TODO: check what Java & Perl do with non-ASCII white spaces. Ticket 6061.
|
||||
if (uprv_isRuleWhiteSpace(c.fChar) == FALSE) {
|
||||
if (PatternProps::isWhiteSpace(c.fChar) == FALSE) {
|
||||
break;
|
||||
}
|
||||
c.fChar = nextCharLL();
|
||||
|
|
|
@ -46,7 +46,7 @@
|
|||
#include "unicode/rbtz.h"
|
||||
#include "unicode/vtzone.h"
|
||||
#include "olsontz.h"
|
||||
#include "util.h"
|
||||
#include "patternprops.h"
|
||||
#include "fphdlimp.h"
|
||||
#include "gregoimp.h"
|
||||
#include "hebrwcal.h"
|
||||
|
@ -1912,11 +1912,11 @@ SimpleDateFormat::parse(const UnicodeString& text, Calendar& cal, ParsePosition&
|
|||
UChar ch = fPattern.charAt(i+1);
|
||||
|
||||
// check for whitespace
|
||||
if (uprv_isRuleWhiteSpace(ch)) {
|
||||
if (PatternProps::isWhiteSpace(ch)) {
|
||||
i++;
|
||||
// Advance over run in pattern
|
||||
while ((i+1)<fPattern.length() &&
|
||||
uprv_isRuleWhiteSpace(fPattern.charAt(i+1))) {
|
||||
PatternProps::isWhiteSpace(fPattern.charAt(i+1))) {
|
||||
++i;
|
||||
}
|
||||
}
|
||||
|
@ -1954,17 +1954,17 @@ SimpleDateFormat::parse(const UnicodeString& text, Calendar& cal, ParsePosition&
|
|||
|
||||
// A run of white space in the pattern matches a run
|
||||
// of white space in the input text.
|
||||
if (uprv_isRuleWhiteSpace(ch)) {
|
||||
if (PatternProps::isWhiteSpace(ch)) {
|
||||
// Advance over run in pattern
|
||||
while ((i+1)<fPattern.length() &&
|
||||
uprv_isRuleWhiteSpace(fPattern.charAt(i+1))) {
|
||||
PatternProps::isWhiteSpace(fPattern.charAt(i+1))) {
|
||||
++i;
|
||||
}
|
||||
|
||||
// Advance over run in input text
|
||||
int32_t s = pos;
|
||||
while (pos<text.length() &&
|
||||
( u_isUWhiteSpace(text.charAt(pos)) || uprv_isRuleWhiteSpace(text.charAt(pos)))) {
|
||||
( u_isUWhiteSpace(text.charAt(pos)) || PatternProps::isWhiteSpace(text.charAt(pos)))) {
|
||||
++pos;
|
||||
}
|
||||
|
||||
|
@ -2398,7 +2398,7 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC
|
|||
return -start;
|
||||
}
|
||||
UChar32 c = text.char32At(start);
|
||||
if (!u_isUWhiteSpace(c) || !uprv_isRuleWhiteSpace(c)) {
|
||||
if (!u_isUWhiteSpace(c) || !PatternProps::isWhiteSpace(c)) {
|
||||
break;
|
||||
}
|
||||
start += UTF_CHAR_LENGTH(c);
|
||||
|
@ -3206,12 +3206,12 @@ SimpleDateFormat::compareSimpleAffix(const UnicodeString& affix,
|
|||
for (int32_t i=0; i<affix.length(); ) {
|
||||
UChar32 c = affix.char32At(i);
|
||||
int32_t len = U16_LENGTH(c);
|
||||
if (uprv_isRuleWhiteSpace(c)) {
|
||||
if (PatternProps::isWhiteSpace(c)) {
|
||||
// We may have a pattern like: \u200F \u0020
|
||||
// and input text like: \u200F \u0020
|
||||
// Note that U+200F and U+0020 are RuleWhiteSpace but only
|
||||
// Note that U+200F and U+0020 are Pattern_White_Space but only
|
||||
// U+0020 is UWhiteSpace. So we have to first do a direct
|
||||
// match of the run of RULE whitespace in the pattern,
|
||||
// match of the run of Pattern_White_Space in the pattern,
|
||||
// then match any extra characters.
|
||||
UBool literalMatch = FALSE;
|
||||
while (pos < input.length() &&
|
||||
|
@ -3224,13 +3224,13 @@ SimpleDateFormat::compareSimpleAffix(const UnicodeString& affix,
|
|||
}
|
||||
c = affix.char32At(i);
|
||||
len = U16_LENGTH(c);
|
||||
if (!uprv_isRuleWhiteSpace(c)) {
|
||||
if (!PatternProps::isWhiteSpace(c)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Advance over run in pattern
|
||||
i = skipRuleWhiteSpace(affix, i);
|
||||
i = skipPatternWhiteSpace(affix, i);
|
||||
|
||||
// Advance over run in input text
|
||||
// Must see at least one white space char in input,
|
||||
|
@ -3261,15 +3261,9 @@ SimpleDateFormat::compareSimpleAffix(const UnicodeString& affix,
|
|||
//----------------------------------------------------------------------
|
||||
|
||||
int32_t
|
||||
SimpleDateFormat::skipRuleWhiteSpace(const UnicodeString& text, int32_t pos) const {
|
||||
while (pos < text.length()) {
|
||||
UChar32 c = text.char32At(pos);
|
||||
if (!uprv_isRuleWhiteSpace(c)) {
|
||||
break;
|
||||
}
|
||||
pos += U16_LENGTH(c);
|
||||
}
|
||||
return pos;
|
||||
SimpleDateFormat::skipPatternWhiteSpace(const UnicodeString& text, int32_t pos) const {
|
||||
const UChar* s = text.getBuffer();
|
||||
return (int32_t)(PatternProps::skipWhiteSpace(s + pos, text.length() - pos) - s);
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2001-2010, International Business Machines
|
||||
* Copyright (C) 2001-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
|
@ -29,11 +29,11 @@
|
|||
|
||||
#include "cmemory.h"
|
||||
#include "cstring.h"
|
||||
#include "patternprops.h"
|
||||
#include "ucol_bld.h"
|
||||
#include "ucol_tok.h"
|
||||
#include "ulocimp.h"
|
||||
#include "uresimp.h"
|
||||
#include "util.h"
|
||||
|
||||
// Define this only for debugging.
|
||||
// #define DEBUG_FOR_COLL_RULES 1
|
||||
|
@ -511,7 +511,7 @@ ucol_tok_getNextArgument(const UChar *start, const UChar *end,
|
|||
|
||||
ucol_uprv_tok_initData();
|
||||
|
||||
while(start < end && (u_isWhitespace(*start) || uprv_isRuleWhiteSpace(*start))) { /* eat whitespace */
|
||||
while(start < end && PatternProps::isWhiteSpace(*start)) { /* eat whitespace */
|
||||
start++;
|
||||
}
|
||||
if(start >= end) {
|
||||
|
@ -530,7 +530,7 @@ ucol_tok_getNextArgument(const UChar *start, const UChar *end,
|
|||
foundOption = TRUE;
|
||||
if(end - start > rulesOptions[i].optionLen) {
|
||||
optionArg = start+rulesOptions[i].optionLen+1; /* start of the options, skip space */
|
||||
while(u_isWhitespace(*optionArg) || uprv_isRuleWhiteSpace(*optionArg)) { /* eat whitespace */
|
||||
while(PatternProps::isWhiteSpace(*optionArg)) { /* eat whitespace */
|
||||
optionArg++;
|
||||
}
|
||||
}
|
||||
|
@ -551,7 +551,7 @@ ucol_tok_getNextArgument(const UChar *start, const UChar *end,
|
|||
*attrib = rulesOptions[i].attr;
|
||||
*value = rulesOptions[i].subopts[j].attrVal;
|
||||
optionArg += rulesOptions[i].subopts[j].subLen;
|
||||
while(u_isWhitespace(*optionArg) || uprv_isRuleWhiteSpace(*optionArg)) { /* eat whitespace */
|
||||
while(PatternProps::isWhiteSpace(*optionArg)) { /* eat whitespace */
|
||||
optionArg++;
|
||||
}
|
||||
if(*optionArg == 0x005d) {
|
||||
|
@ -605,14 +605,14 @@ int32_t ucol_uprv_tok_readOption(const UChar *start, const UChar *end, const UCh
|
|||
int32_t i = 0;
|
||||
ucol_uprv_tok_initData();
|
||||
|
||||
while(u_isWhitespace(*start) || uprv_isRuleWhiteSpace(*start)) { /* eat whitespace */
|
||||
while(PatternProps::isWhiteSpace(*start)) { /* eat whitespace */
|
||||
start++;
|
||||
}
|
||||
while(i < UTOK_OPTION_COUNT) {
|
||||
if(u_strncmpNoCase(start, rulesOptions[i].optionName, rulesOptions[i].optionLen) == 0) {
|
||||
if(end - start > rulesOptions[i].optionLen) {
|
||||
*optionArg = start+rulesOptions[i].optionLen; /* End of option name; start of the options */
|
||||
while(u_isWhitespace(**optionArg) || uprv_isRuleWhiteSpace(**optionArg)) { /* eat whitespace */
|
||||
while(PatternProps::isWhiteSpace(**optionArg)) { /* eat whitespace */
|
||||
(*optionArg)++;
|
||||
}
|
||||
}
|
||||
|
@ -1041,7 +1041,7 @@ ucol_tok_parseNextTokenInternal(UColTokenParser *src,
|
|||
}
|
||||
}
|
||||
}else {
|
||||
if(!uprv_isRuleWhiteSpace(ch)) {
|
||||
if(!PatternProps::isWhiteSpace(ch)) {
|
||||
/* Sets the strength for this entry */
|
||||
switch (ch) {
|
||||
case 0x003D/*'='*/ :
|
||||
|
@ -1267,7 +1267,7 @@ ucol_tok_parseNextTokenInternal(UColTokenParser *src,
|
|||
do {
|
||||
ch = *(++(src->current));
|
||||
// skip whitespace between '|' and the character
|
||||
} while (uprv_isRuleWhiteSpace(ch));
|
||||
} while (PatternProps::isWhiteSpace(ch));
|
||||
break;
|
||||
|
||||
//charsOffset = 0;
|
||||
|
@ -1340,7 +1340,7 @@ ucol_tok_parseNextTokenInternal(UColTokenParser *src,
|
|||
|
||||
if(wasInQuote) {
|
||||
if(ch != 0x27) {
|
||||
if(inQuote || !uprv_isRuleWhiteSpace(ch)) {
|
||||
if(inQuote || !PatternProps::isWhiteSpace(ch)) {
|
||||
ucol_tok_addToExtraCurrent(src, &ch, 1, status);
|
||||
}
|
||||
}
|
||||
|
@ -2223,7 +2223,7 @@ void ucol_tok_initTokenList(
|
|||
UChar* import_end = u_strchr(setStart, 0x005D);
|
||||
int32_t optionEndOffset = (int32_t)(import_end + 1 - rules);
|
||||
// Ignore trailing whitespace.
|
||||
while(uprv_isRuleWhiteSpace(*(import_end-1))) {
|
||||
while(PatternProps::isWhiteSpace(*(import_end-1))) {
|
||||
--import_end;
|
||||
}
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
********************************************************************************
|
||||
* Copyright (C) 1997-2010, International Business Machines
|
||||
* Copyright (C) 1997-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
********************************************************************************
|
||||
*
|
||||
|
@ -1980,7 +1980,7 @@ private:
|
|||
const UnicodeString& input,
|
||||
int32_t pos);
|
||||
|
||||
static int32_t skipRuleWhiteSpace(const UnicodeString& text, int32_t pos);
|
||||
static int32_t skipPatternWhiteSpace(const UnicodeString& text, int32_t pos);
|
||||
|
||||
static int32_t skipUWhiteSpace(const UnicodeString& text, int32_t pos);
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 1997-2010, International Business Machines Corporation and
|
||||
* Copyright (C) 1997-2011, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
*
|
||||
|
@ -986,10 +986,10 @@ private:
|
|||
int32_t pos) const;
|
||||
|
||||
/**
|
||||
* Skip over a run of zero or more isRuleWhiteSpace() characters at
|
||||
* Skip over a run of zero or more Pattern_White_Space characters at
|
||||
* pos in text.
|
||||
*/
|
||||
int32_t skipRuleWhiteSpace(const UnicodeString& text, int32_t pos) const;
|
||||
int32_t skipPatternWhiteSpace(const UnicodeString& text, int32_t pos) const;
|
||||
|
||||
/**
|
||||
* Skip over a run of zero or more isUWhiteSpace() characters at pos
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2010, International Business Machines Corporation and *
|
||||
* Copyright (C) 1996-2011, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -1738,7 +1738,7 @@ IntlTestRBNF::TestLocalizations(void)
|
|||
"<<%main,>,<en, Main,>>", // comma before close angle ok
|
||||
"<<%main>,<en, ',<>\" '>>", // quotes everything until next quote
|
||||
"<<%main>,<'en', \"it's ok\">>", // double quotes work too
|
||||
" \n <\n <\n %main\n >\n , \t <\t en\t , \tfoo \t\t > \n\n > \n ", // rule whitespace ok
|
||||
" \n <\n <\n %main\n >\n , \t <\t en\t , \tfoo \t\t > \n\n > \n ", // Pattern_White_Space ok
|
||||
};
|
||||
int32_t goodLocsLen = sizeof(goodLocs)/sizeof(goodLocs[0]);
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (c) 2004-2006, International Business Machines
|
||||
* Copyright (c) 2004-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* Author: Alan Liu
|
||||
|
@ -41,7 +41,7 @@ class TextFile {
|
|||
/**
|
||||
* Read a line, ignoring blank lines and lines that start with
|
||||
* '#'. Trim leading white space.
|
||||
* @param trim if TRUE then remove leading rule white space
|
||||
* @param trim if TRUE then remove leading Pattern_White_Space
|
||||
* @return TRUE if a line was read, or FALSE if the EOF
|
||||
* was reached or an error occurred
|
||||
*/
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (c) 2004, International Business Machines
|
||||
* Copyright (c) 2004-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* Author: Alan Liu
|
||||
|
@ -10,6 +10,7 @@
|
|||
*/
|
||||
#include "tokiter.h"
|
||||
#include "textfile.h"
|
||||
#include "patternprops.h"
|
||||
#include "util.h"
|
||||
#include "uprops.h"
|
||||
|
||||
|
@ -52,7 +53,7 @@ int32_t TokenIterator::getLineNumber() const {
|
|||
|
||||
/**
|
||||
* Read the next token from 'this->line' and append it to 'token'.
|
||||
* Tokens are separated by rule white space. Tokens may also be
|
||||
* Tokens are separated by Pattern_White_Space. Tokens may also be
|
||||
* delimited by double or single quotes. The closing quote must match
|
||||
* the opening quote. If a '#' is encountered, the rest of the line
|
||||
* is ignored, unless it is backslash-escaped or within quotes.
|
||||
|
@ -89,7 +90,7 @@ UBool TokenIterator::nextToken(UnicodeString& token, UErrorCode& ec) {
|
|||
}
|
||||
token.append(c32);
|
||||
} else if ((quote != 0 && c == quote) ||
|
||||
(quote == 0 && uprv_isRuleWhiteSpace(c))) {
|
||||
(quote == 0 && PatternProps::isWhiteSpace(c))) {
|
||||
++pos;
|
||||
return TRUE;
|
||||
} else if (quote == 0 && c == '#') {
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (c) 2004-2006, International Business Machines
|
||||
* Copyright (c) 2004-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* Author: Alan Liu
|
||||
|
@ -17,8 +17,8 @@ class TextFile;
|
|||
|
||||
/**
|
||||
* An iterator class that returns successive string tokens from some
|
||||
* source. String tokens are, in general, separated by rule white
|
||||
* space in the source test. Furthermore, they may be delimited by
|
||||
* source. String tokens are, in general, separated by Pattern_White_Space
|
||||
* in the source test. Furthermore, they may be delimited by
|
||||
* either single or double quotes (opening and closing quotes must
|
||||
* match). Escapes are processed using standard ICU unescaping.
|
||||
*/
|
||||
|
|
|
@ -182,7 +182,7 @@ TransliteratorTest::runIndexedTest(int32_t index, UBool exec,
|
|||
TESTCASE(71,TestAnyX);
|
||||
TESTCASE(72,TestSourceTargetSet);
|
||||
TESTCASE(73,TestGurmukhiDevanagari);
|
||||
TESTCASE(74,TestRuleWhitespace);
|
||||
TESTCASE(74,TestPatternWhiteSpace);
|
||||
TESTCASE(75,TestAllCodepoints);
|
||||
TESTCASE(76,TestBoilerplate);
|
||||
TESTCASE(77,TestAlternateSyntax);
|
||||
|
@ -3964,9 +3964,9 @@ void TransliteratorTest::TestSourceTargetSet() {
|
|||
}
|
||||
|
||||
/**
|
||||
* Test handling of rule whitespace, for both RBT and UnicodeSet.
|
||||
* Test handling of Pattern_White_Space, for both RBT and UnicodeSet.
|
||||
*/
|
||||
void TransliteratorTest::TestRuleWhitespace() {
|
||||
void TransliteratorTest::TestPatternWhiteSpace() {
|
||||
// Rules
|
||||
const char* r = "a > \\u200E b;";
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 1999-2009, International Business Machines
|
||||
* Copyright (C) 1999-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* Date Name Description
|
||||
|
@ -338,7 +338,7 @@ private:
|
|||
|
||||
void TestSourceTargetSet(void);
|
||||
|
||||
void TestRuleWhitespace(void);
|
||||
void TestPatternWhiteSpace(void);
|
||||
|
||||
void TestAllCodepoints(void);
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
********************************************************************************
|
||||
* Copyright (C) 1999-2010 International Business Machines Corporation and
|
||||
* Copyright (C) 1999-2011 International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
********************************************************************************
|
||||
* Date Name Description
|
||||
|
@ -1388,7 +1388,7 @@ void UnicodeSetTest::TestEscapePattern() {
|
|||
const char exp[] =
|
||||
"[\\u200A-\\u200E\\uFEFF\\U0001D173-\\U0001D17A\\U000F0000-\\U000FFFFD]";
|
||||
// We test this with two passes; in the second pass we
|
||||
// pre-unescape the pattern. Since U+200E is rule whitespace,
|
||||
// pre-unescape the pattern. Since U+200E is Pattern_White_Space,
|
||||
// this fails -- which is what we expect.
|
||||
for (int32_t pass=1; pass<=2; ++pass) {
|
||||
UErrorCode ec = U_ZERO_ERROR;
|
||||
|
|
Loading…
Add table
Reference in a new issue