ICU-1997 use uprv_isRuleWhiteSpace() in parsers

X-SVN-Rev: 9381
2025-04-07 22:44:49 +00:00 · 2002-07-26 22:07:49 +00:00 · 2002-07-26 22:07:49 +00:00 · 649057f2f3
commit 649057f2f3
parent 1d6cb955f1
5 changed files with 20 additions and 17 deletions
--- a/icu4c/source/common/uniset.cpp
+++ b/icu4c/source/common/uniset.cpp
@ -18,6 +18,7 @@
 #include "upropset.h"
 #include "util.h"
 #include "uvector.h"
+#include "uprops.h"

 // HIGH_VALUE > all valid values. 110000 for codepoints
 #define UNICODESET_HIGH 0x0110000
@ -390,7 +391,7 @@ UnicodeSet& UnicodeSet::set(UChar32 start, UChar32 end) {
 * @param pattern a string specifying what characters are in the set
 * @param ignoreSpaces if <code>true</code>, all spaces in the
 * pattern are ignored.  Spaces are those characters for which
- * <code>Character.isSpaceChar()</code> is <code>true</code>.
+ * <code>uprv_isRuleWhiteSpace()</code> is <code>true</code>.
 * Characters preceded by '\\' are escaped, losing any special
 * meaning they otherwise have.  Spaces may be included by
 * escaping them.
@ -410,7 +411,7 @@ UnicodeSet& UnicodeSet::applyPattern(const UnicodeString& pattern,
    // Skip over trailing whitespace
    int32_t i = pos.getIndex();
    int32_t n = pattern.length();
-    while (i<n && u_isWhitespace(pattern.charAt(i))) {
+    while (i<n && uprv_isRuleWhiteSpace(pattern.charAt(i))) {
        ++i;
    }

@ -469,7 +470,7 @@ void UnicodeSet::_appendToPat(UnicodeString& buf, UChar32 c, UBool useHexEscape)
        break;
    default:
        // Escape whitespace
-        if (u_isspace(c)) {
+        if (uprv_isRuleWhiteSpace(c)) {
            buf.append(BACKSLASH);
        }
        break;
@ -1638,9 +1639,7 @@ void UnicodeSet::_applyPattern(const UnicodeString& pattern,
            i += UTF_CHAR_LENGTH(c);
        }

-        // Ignore whitespace.  This is not Unicode whitespace, but Java
-        // whitespace, a subset of Unicode whitespace.
-        if (u_isspace(c)) {
+        if (uprv_isRuleWhiteSpace(c)) {
            continue;
        }

--- a/icu4c/source/common/upropset.cpp
+++ b/icu4c/source/common/upropset.cpp
@ -15,6 +15,7 @@
 #include "mutex.h"
 #include "ucln.h"
 #include "charstr.h"
+#include "uprops.h"


 static UMTX PROPSET_MUTEX = NULL;
@ -547,7 +548,7 @@ UnicodeString UnicodePropertySet::munge(const UnicodeString& str,
    for (int32_t i=start; i<limit; ) {
        UChar32 c = str.char32At(i);
        i += UTF_CHAR_LENGTH(c);
-        if (c != 95/*_*/ && c != 45/*-*/ && !u_isspace(c)) {
+        if (c != 95/*_*/ && c != 45/*-*/ && !uprv_isRuleWhiteSpace(c)) {
            buf.append(c);
        }
    }
@ -563,7 +564,7 @@ int32_t UnicodePropertySet::skipWhitespace(const UnicodeString& str,
                                           int32_t pos) {
    while (pos < str.length()) {
        UChar32 c = str.char32At(pos);
-        if (!u_isspace(c)) {
+        if (!uprv_isRuleWhiteSpace(c)) {
            break;
        }
        pos += UTF_CHAR_LENGTH(c);
--- a/icu4c/source/common/util.cpp
+++ b/icu4c/source/common/util.cpp
@ -11,6 +11,7 @@
 #include "util.h"
 #include "unicode/uchar.h"
 #include "unicode/unimatch.h"
+#include "uprops.h"

 // Define UChar constants using hex for EBCDIC compatibility

@ -132,7 +133,7 @@ int32_t ICU_Utility::skipWhitespace(const UnicodeString& str, int32_t& pos,
    int32_t p = pos;
    while (p < str.length()) {
        UChar32 c = str.char32At(p);
-        if (!u_isWhitespace(c)) {
+        if (!uprv_isRuleWhiteSpace(c)) {
            break;
        }
        p += UTF_CHAR_LENGTH(c);
@ -200,7 +201,7 @@ int32_t ICU_Utility::parsePattern(const UnicodeString& rule, int32_t pos, int32_
                return -1;
            }
            c = rule.charAt(pos++);
-            if (!u_isWhitespace(c)) {
+            if (!uprv_isRuleWhiteSpace(c)) {
                return -1;
            }
            // FALL THROUGH to skipWhitespace
@ -287,14 +288,14 @@ int32_t ICU_Utility::parseInteger(const UnicodeString& rule, int32_t& pos, int32
 * first character to examine.  It must be less than str.length(),
 * and it must not point to a whitespace character.  That is, must
 * have pos < str.length() and
- * !UCharacter::isWhitespace(str.char32At(pos)).  On
+ * !uprv_isRuleWhiteSpace(str.char32At(pos)).  On
 * OUTPUT, the position after the last parsed character.
 * @return the Unicode identifier, or an empty string if there is
 * no valid identifier at pos.
 */
 UnicodeString ICU_Utility::parseUnicodeIdentifier(const UnicodeString& str, int32_t& pos) {
    // assert(pos < str.length());
-    // assert(!UCharacter::isWhitespace(str.char32At(pos)));
+    // assert(!uprv_isRuleWhiteSpace(str.char32At(pos)));
    UnicodeString buf;
    int p = pos;
    while (p < str.length()) {
@ -456,7 +457,7 @@ void ICU_Utility::appendToRule(UnicodeString& rule,
              !((c >= 0x0030/*'0'*/ && c <= 0x0039/*'9'*/) ||
                (c >= 0x0041/*'A'*/ && c <= 0x005A/*'Z'*/) ||
                (c >= 0x0061/*'a'*/ && c <= 0x007A/*'z'*/))) ||
-             u_isWhitespace(c)) {
+             uprv_isRuleWhiteSpace(c)) {
        quoteBuf.append(c);
        // Double ' within a quote
        if (c == APOSTROPHE) {
--- a/icu4c/source/i18n/name2uni.cpp
+++ b/icu4c/source/i18n/name2uni.cpp
@ -11,6 +11,7 @@
 #include "unicode/unifilt.h"
 #include "unicode/uchar.h"
 #include "name2uni.h"
+#include "uprops.h"

 // As of Unicode 3.0.0, the longest name is 83 characters long.
 #define LONGEST_NAME 83
@ -116,7 +117,7 @@ void NameUnicodeTransliterator::handleTransliterate(Replaceable& text, UTransPos
            // to a single space.  If closeDelimiter is found, exit
            // the loop.  If any other character is found, exit the
            // loop.  If the limit is found, exit the loop.
-            if (u_isWhitespace(c)) {
+            if (uprv_isRuleWhiteSpace(c)) {
                // Ignore leading whitespace
                if (ibuf != 0 && buf[ibuf-1] != (UChar)0x0020) {
                    buf[ibuf++] = (UChar)0x0020 /* */;
--- a/icu4c/source/i18n/rbt_pars.cpp
+++ b/icu4c/source/i18n/rbt_pars.cpp
@ -31,6 +31,7 @@
 #include "uvector.h"
 #include "util.h"
 #include "cmemory.h"
+#include "uprops.h"

 // Operators
 #define VARIABLE_DEF_OP ((UChar)0x003D) /*=*/
@ -417,7 +418,7 @@ int32_t RuleHalf::parseSection(const UnicodeString& rule, int32_t pos, int32_t l
        // Since all syntax characters are in the BMP, fetching
        // 16-bit code units suffices here.
        UChar c = rule.charAt(pos++);
-        if (u_isWhitespace(c)) {
+        if (uprv_isRuleWhiteSpace(c)) {
            // Ignore whitespace.  Note that this is not Unicode
            // spaces, but Java spaces -- a subset, representing
            // whitespace likely to be seen in code.
@ -943,7 +944,7 @@ void TransliteratorParser::parseRules(const UnicodeString& rule,

    while (pos < limit && U_SUCCESS(status)) {
        UChar c = rule.charAt(pos++);
-        if (u_isWhitespace(c)) {
+        if (uprv_isRuleWhiteSpace(c)) {
            // Ignore leading whitespace.
            continue;
        }
@ -964,7 +965,7 @@ void TransliteratorParser::parseRules(const UnicodeString& rule,
            rule.compare(pos, ID_TOKEN_LEN, ID_TOKEN) == 0) {
            pos += ID_TOKEN_LEN;
            c = rule.charAt(pos);
-            while (u_isWhitespace(c) && pos < limit) {
+            while (uprv_isRuleWhiteSpace(c) && pos < limit) {
                ++pos;
                c = rule.charAt(pos);
            }