mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-07 22:44:49 +00:00
ICU-1997 use uprv_isRuleWhiteSpace() in parsers
X-SVN-Rev: 9381
This commit is contained in:
parent
1d6cb955f1
commit
649057f2f3
5 changed files with 20 additions and 17 deletions
|
@ -18,6 +18,7 @@
|
|||
#include "upropset.h"
|
||||
#include "util.h"
|
||||
#include "uvector.h"
|
||||
#include "uprops.h"
|
||||
|
||||
// HIGH_VALUE > all valid values. 110000 for codepoints
|
||||
#define UNICODESET_HIGH 0x0110000
|
||||
|
@ -390,7 +391,7 @@ UnicodeSet& UnicodeSet::set(UChar32 start, UChar32 end) {
|
|||
* @param pattern a string specifying what characters are in the set
|
||||
* @param ignoreSpaces if <code>true</code>, all spaces in the
|
||||
* pattern are ignored. Spaces are those characters for which
|
||||
* <code>Character.isSpaceChar()</code> is <code>true</code>.
|
||||
* <code>uprv_isRuleWhiteSpace()</code> is <code>true</code>.
|
||||
* Characters preceded by '\\' are escaped, losing any special
|
||||
* meaning they otherwise have. Spaces may be included by
|
||||
* escaping them.
|
||||
|
@ -410,7 +411,7 @@ UnicodeSet& UnicodeSet::applyPattern(const UnicodeString& pattern,
|
|||
// Skip over trailing whitespace
|
||||
int32_t i = pos.getIndex();
|
||||
int32_t n = pattern.length();
|
||||
while (i<n && u_isWhitespace(pattern.charAt(i))) {
|
||||
while (i<n && uprv_isRuleWhiteSpace(pattern.charAt(i))) {
|
||||
++i;
|
||||
}
|
||||
|
||||
|
@ -469,7 +470,7 @@ void UnicodeSet::_appendToPat(UnicodeString& buf, UChar32 c, UBool useHexEscape)
|
|||
break;
|
||||
default:
|
||||
// Escape whitespace
|
||||
if (u_isspace(c)) {
|
||||
if (uprv_isRuleWhiteSpace(c)) {
|
||||
buf.append(BACKSLASH);
|
||||
}
|
||||
break;
|
||||
|
@ -1638,9 +1639,7 @@ void UnicodeSet::_applyPattern(const UnicodeString& pattern,
|
|||
i += UTF_CHAR_LENGTH(c);
|
||||
}
|
||||
|
||||
// Ignore whitespace. This is not Unicode whitespace, but Java
|
||||
// whitespace, a subset of Unicode whitespace.
|
||||
if (u_isspace(c)) {
|
||||
if (uprv_isRuleWhiteSpace(c)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
#include "mutex.h"
|
||||
#include "ucln.h"
|
||||
#include "charstr.h"
|
||||
#include "uprops.h"
|
||||
|
||||
|
||||
static UMTX PROPSET_MUTEX = NULL;
|
||||
|
@ -547,7 +548,7 @@ UnicodeString UnicodePropertySet::munge(const UnicodeString& str,
|
|||
for (int32_t i=start; i<limit; ) {
|
||||
UChar32 c = str.char32At(i);
|
||||
i += UTF_CHAR_LENGTH(c);
|
||||
if (c != 95/*_*/ && c != 45/*-*/ && !u_isspace(c)) {
|
||||
if (c != 95/*_*/ && c != 45/*-*/ && !uprv_isRuleWhiteSpace(c)) {
|
||||
buf.append(c);
|
||||
}
|
||||
}
|
||||
|
@ -563,7 +564,7 @@ int32_t UnicodePropertySet::skipWhitespace(const UnicodeString& str,
|
|||
int32_t pos) {
|
||||
while (pos < str.length()) {
|
||||
UChar32 c = str.char32At(pos);
|
||||
if (!u_isspace(c)) {
|
||||
if (!uprv_isRuleWhiteSpace(c)) {
|
||||
break;
|
||||
}
|
||||
pos += UTF_CHAR_LENGTH(c);
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
#include "util.h"
|
||||
#include "unicode/uchar.h"
|
||||
#include "unicode/unimatch.h"
|
||||
#include "uprops.h"
|
||||
|
||||
// Define UChar constants using hex for EBCDIC compatibility
|
||||
|
||||
|
@ -132,7 +133,7 @@ int32_t ICU_Utility::skipWhitespace(const UnicodeString& str, int32_t& pos,
|
|||
int32_t p = pos;
|
||||
while (p < str.length()) {
|
||||
UChar32 c = str.char32At(p);
|
||||
if (!u_isWhitespace(c)) {
|
||||
if (!uprv_isRuleWhiteSpace(c)) {
|
||||
break;
|
||||
}
|
||||
p += UTF_CHAR_LENGTH(c);
|
||||
|
@ -200,7 +201,7 @@ int32_t ICU_Utility::parsePattern(const UnicodeString& rule, int32_t pos, int32_
|
|||
return -1;
|
||||
}
|
||||
c = rule.charAt(pos++);
|
||||
if (!u_isWhitespace(c)) {
|
||||
if (!uprv_isRuleWhiteSpace(c)) {
|
||||
return -1;
|
||||
}
|
||||
// FALL THROUGH to skipWhitespace
|
||||
|
@ -287,14 +288,14 @@ int32_t ICU_Utility::parseInteger(const UnicodeString& rule, int32_t& pos, int32
|
|||
* first character to examine. It must be less than str.length(),
|
||||
* and it must not point to a whitespace character. That is, must
|
||||
* have pos < str.length() and
|
||||
* !UCharacter::isWhitespace(str.char32At(pos)). On
|
||||
* !uprv_isRuleWhiteSpace(str.char32At(pos)). On
|
||||
* OUTPUT, the position after the last parsed character.
|
||||
* @return the Unicode identifier, or an empty string if there is
|
||||
* no valid identifier at pos.
|
||||
*/
|
||||
UnicodeString ICU_Utility::parseUnicodeIdentifier(const UnicodeString& str, int32_t& pos) {
|
||||
// assert(pos < str.length());
|
||||
// assert(!UCharacter::isWhitespace(str.char32At(pos)));
|
||||
// assert(!uprv_isRuleWhiteSpace(str.char32At(pos)));
|
||||
UnicodeString buf;
|
||||
int p = pos;
|
||||
while (p < str.length()) {
|
||||
|
@ -456,7 +457,7 @@ void ICU_Utility::appendToRule(UnicodeString& rule,
|
|||
!((c >= 0x0030/*'0'*/ && c <= 0x0039/*'9'*/) ||
|
||||
(c >= 0x0041/*'A'*/ && c <= 0x005A/*'Z'*/) ||
|
||||
(c >= 0x0061/*'a'*/ && c <= 0x007A/*'z'*/))) ||
|
||||
u_isWhitespace(c)) {
|
||||
uprv_isRuleWhiteSpace(c)) {
|
||||
quoteBuf.append(c);
|
||||
// Double ' within a quote
|
||||
if (c == APOSTROPHE) {
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
#include "unicode/unifilt.h"
|
||||
#include "unicode/uchar.h"
|
||||
#include "name2uni.h"
|
||||
#include "uprops.h"
|
||||
|
||||
// As of Unicode 3.0.0, the longest name is 83 characters long.
|
||||
#define LONGEST_NAME 83
|
||||
|
@ -116,7 +117,7 @@ void NameUnicodeTransliterator::handleTransliterate(Replaceable& text, UTransPos
|
|||
// to a single space. If closeDelimiter is found, exit
|
||||
// the loop. If any other character is found, exit the
|
||||
// loop. If the limit is found, exit the loop.
|
||||
if (u_isWhitespace(c)) {
|
||||
if (uprv_isRuleWhiteSpace(c)) {
|
||||
// Ignore leading whitespace
|
||||
if (ibuf != 0 && buf[ibuf-1] != (UChar)0x0020) {
|
||||
buf[ibuf++] = (UChar)0x0020 /* */;
|
||||
|
|
|
@ -31,6 +31,7 @@
|
|||
#include "uvector.h"
|
||||
#include "util.h"
|
||||
#include "cmemory.h"
|
||||
#include "uprops.h"
|
||||
|
||||
// Operators
|
||||
#define VARIABLE_DEF_OP ((UChar)0x003D) /*=*/
|
||||
|
@ -417,7 +418,7 @@ int32_t RuleHalf::parseSection(const UnicodeString& rule, int32_t pos, int32_t l
|
|||
// Since all syntax characters are in the BMP, fetching
|
||||
// 16-bit code units suffices here.
|
||||
UChar c = rule.charAt(pos++);
|
||||
if (u_isWhitespace(c)) {
|
||||
if (uprv_isRuleWhiteSpace(c)) {
|
||||
// Ignore whitespace. Note that this is not Unicode
|
||||
// spaces, but Java spaces -- a subset, representing
|
||||
// whitespace likely to be seen in code.
|
||||
|
@ -943,7 +944,7 @@ void TransliteratorParser::parseRules(const UnicodeString& rule,
|
|||
|
||||
while (pos < limit && U_SUCCESS(status)) {
|
||||
UChar c = rule.charAt(pos++);
|
||||
if (u_isWhitespace(c)) {
|
||||
if (uprv_isRuleWhiteSpace(c)) {
|
||||
// Ignore leading whitespace.
|
||||
continue;
|
||||
}
|
||||
|
@ -964,7 +965,7 @@ void TransliteratorParser::parseRules(const UnicodeString& rule,
|
|||
rule.compare(pos, ID_TOKEN_LEN, ID_TOKEN) == 0) {
|
||||
pos += ID_TOKEN_LEN;
|
||||
c = rule.charAt(pos);
|
||||
while (u_isWhitespace(c) && pos < limit) {
|
||||
while (uprv_isRuleWhiteSpace(c) && pos < limit) {
|
||||
++pos;
|
||||
c = rule.charAt(pos);
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue