ICU-1997 use uprv_isRuleWhiteSpace() in parsers

X-SVN-Rev: 9381
This commit is contained in:
Alan Liu 2002-07-26 22:07:49 +00:00
parent 1d6cb955f1
commit 649057f2f3
5 changed files with 20 additions and 17 deletions

View file

@ -18,6 +18,7 @@
#include "upropset.h"
#include "util.h"
#include "uvector.h"
#include "uprops.h"
// HIGH_VALUE > all valid values. 110000 for codepoints
#define UNICODESET_HIGH 0x0110000
@ -390,7 +391,7 @@ UnicodeSet& UnicodeSet::set(UChar32 start, UChar32 end) {
* @param pattern a string specifying what characters are in the set
* @param ignoreSpaces if <code>true</code>, all spaces in the
* pattern are ignored. Spaces are those characters for which
* <code>Character.isSpaceChar()</code> is <code>true</code>.
* <code>uprv_isRuleWhiteSpace()</code> is <code>true</code>.
* Characters preceded by '\\' are escaped, losing any special
* meaning they otherwise have. Spaces may be included by
* escaping them.
@ -410,7 +411,7 @@ UnicodeSet& UnicodeSet::applyPattern(const UnicodeString& pattern,
// Skip over trailing whitespace
int32_t i = pos.getIndex();
int32_t n = pattern.length();
while (i<n && u_isWhitespace(pattern.charAt(i))) {
while (i<n && uprv_isRuleWhiteSpace(pattern.charAt(i))) {
++i;
}
@ -469,7 +470,7 @@ void UnicodeSet::_appendToPat(UnicodeString& buf, UChar32 c, UBool useHexEscape)
break;
default:
// Escape whitespace
if (u_isspace(c)) {
if (uprv_isRuleWhiteSpace(c)) {
buf.append(BACKSLASH);
}
break;
@ -1638,9 +1639,7 @@ void UnicodeSet::_applyPattern(const UnicodeString& pattern,
i += UTF_CHAR_LENGTH(c);
}
// Ignore whitespace. This is not Unicode whitespace, but Java
// whitespace, a subset of Unicode whitespace.
if (u_isspace(c)) {
if (uprv_isRuleWhiteSpace(c)) {
continue;
}

View file

@ -15,6 +15,7 @@
#include "mutex.h"
#include "ucln.h"
#include "charstr.h"
#include "uprops.h"
static UMTX PROPSET_MUTEX = NULL;
@ -547,7 +548,7 @@ UnicodeString UnicodePropertySet::munge(const UnicodeString& str,
for (int32_t i=start; i<limit; ) {
UChar32 c = str.char32At(i);
i += UTF_CHAR_LENGTH(c);
if (c != 95/*_*/ && c != 45/*-*/ && !u_isspace(c)) {
if (c != 95/*_*/ && c != 45/*-*/ && !uprv_isRuleWhiteSpace(c)) {
buf.append(c);
}
}
@ -563,7 +564,7 @@ int32_t UnicodePropertySet::skipWhitespace(const UnicodeString& str,
int32_t pos) {
while (pos < str.length()) {
UChar32 c = str.char32At(pos);
if (!u_isspace(c)) {
if (!uprv_isRuleWhiteSpace(c)) {
break;
}
pos += UTF_CHAR_LENGTH(c);

View file

@ -11,6 +11,7 @@
#include "util.h"
#include "unicode/uchar.h"
#include "unicode/unimatch.h"
#include "uprops.h"
// Define UChar constants using hex for EBCDIC compatibility
@ -132,7 +133,7 @@ int32_t ICU_Utility::skipWhitespace(const UnicodeString& str, int32_t& pos,
int32_t p = pos;
while (p < str.length()) {
UChar32 c = str.char32At(p);
if (!u_isWhitespace(c)) {
if (!uprv_isRuleWhiteSpace(c)) {
break;
}
p += UTF_CHAR_LENGTH(c);
@ -200,7 +201,7 @@ int32_t ICU_Utility::parsePattern(const UnicodeString& rule, int32_t pos, int32_
return -1;
}
c = rule.charAt(pos++);
if (!u_isWhitespace(c)) {
if (!uprv_isRuleWhiteSpace(c)) {
return -1;
}
// FALL THROUGH to skipWhitespace
@ -287,14 +288,14 @@ int32_t ICU_Utility::parseInteger(const UnicodeString& rule, int32_t& pos, int32
* first character to examine. It must be less than str.length(),
* and it must not point to a whitespace character. That is, must
* have pos < str.length() and
* !UCharacter::isWhitespace(str.char32At(pos)). On
* !uprv_isRuleWhiteSpace(str.char32At(pos)). On
* OUTPUT, the position after the last parsed character.
* @return the Unicode identifier, or an empty string if there is
* no valid identifier at pos.
*/
UnicodeString ICU_Utility::parseUnicodeIdentifier(const UnicodeString& str, int32_t& pos) {
// assert(pos < str.length());
// assert(!UCharacter::isWhitespace(str.char32At(pos)));
// assert(!uprv_isRuleWhiteSpace(str.char32At(pos)));
UnicodeString buf;
int p = pos;
while (p < str.length()) {
@ -456,7 +457,7 @@ void ICU_Utility::appendToRule(UnicodeString& rule,
!((c >= 0x0030/*'0'*/ && c <= 0x0039/*'9'*/) ||
(c >= 0x0041/*'A'*/ && c <= 0x005A/*'Z'*/) ||
(c >= 0x0061/*'a'*/ && c <= 0x007A/*'z'*/))) ||
u_isWhitespace(c)) {
uprv_isRuleWhiteSpace(c)) {
quoteBuf.append(c);
// Double ' within a quote
if (c == APOSTROPHE) {

View file

@ -11,6 +11,7 @@
#include "unicode/unifilt.h"
#include "unicode/uchar.h"
#include "name2uni.h"
#include "uprops.h"
// As of Unicode 3.0.0, the longest name is 83 characters long.
#define LONGEST_NAME 83
@ -116,7 +117,7 @@ void NameUnicodeTransliterator::handleTransliterate(Replaceable& text, UTransPos
// to a single space. If closeDelimiter is found, exit
// the loop. If any other character is found, exit the
// loop. If the limit is found, exit the loop.
if (u_isWhitespace(c)) {
if (uprv_isRuleWhiteSpace(c)) {
// Ignore leading whitespace
if (ibuf != 0 && buf[ibuf-1] != (UChar)0x0020) {
buf[ibuf++] = (UChar)0x0020 /* */;

View file

@ -31,6 +31,7 @@
#include "uvector.h"
#include "util.h"
#include "cmemory.h"
#include "uprops.h"
// Operators
#define VARIABLE_DEF_OP ((UChar)0x003D) /*=*/
@ -417,7 +418,7 @@ int32_t RuleHalf::parseSection(const UnicodeString& rule, int32_t pos, int32_t l
// Since all syntax characters are in the BMP, fetching
// 16-bit code units suffices here.
UChar c = rule.charAt(pos++);
if (u_isWhitespace(c)) {
if (uprv_isRuleWhiteSpace(c)) {
// Ignore whitespace. Note that this is not Unicode
// spaces, but Java spaces -- a subset, representing
// whitespace likely to be seen in code.
@ -943,7 +944,7 @@ void TransliteratorParser::parseRules(const UnicodeString& rule,
while (pos < limit && U_SUCCESS(status)) {
UChar c = rule.charAt(pos++);
if (u_isWhitespace(c)) {
if (uprv_isRuleWhiteSpace(c)) {
// Ignore leading whitespace.
continue;
}
@ -964,7 +965,7 @@ void TransliteratorParser::parseRules(const UnicodeString& rule,
rule.compare(pos, ID_TOKEN_LEN, ID_TOKEN) == 0) {
pos += ID_TOKEN_LEN;
c = rule.charAt(pos);
while (u_isWhitespace(c) && pos < limit) {
while (uprv_isRuleWhiteSpace(c) && pos < limit) {
++pos;
c = rule.charAt(pos);
}