diff --git a/base/string_utils.cpp b/base/string_utils.cpp index f2546161fb..f90c24e7f8 100644 --- a/base/string_utils.cpp +++ b/base/string_utils.cpp @@ -236,6 +236,12 @@ bool IsASCIIString(std::string const & str) } bool IsASCIIDigit(UniChar c) { return c >= '0' && c <= '9'; } + +bool IsASCIISpace(UniChar c) +{ + return c == ' ' || c == '\f' || c == '\n' || c == '\r' || c == '\t' || c == '\v'; +} + bool IsASCIILatin(UniChar c) { return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); } bool StartsWith(UniString const & s, UniString const & p) diff --git a/base/string_utils.hpp b/base/string_utils.hpp index 1ebec6dc13..3e353fd3ac 100644 --- a/base/string_utils.hpp +++ b/base/string_utils.hpp @@ -88,6 +88,7 @@ UniString MakeUniString(std::string const & utf8s); std::string ToUtf8(UniString const & s); bool IsASCIIString(std::string const & str); bool IsASCIIDigit(UniChar c); +bool IsASCIISpace(UniChar c); bool IsASCIILatin(UniChar c); inline std::string DebugPrint(UniString const & s) { return ToUtf8(s); } diff --git a/indexer/search_string_utils.cpp b/indexer/search_string_utils.cpp index 88027211d6..9a651056ff 100644 --- a/indexer/search_string_utils.cpp +++ b/indexer/search_string_utils.cpp @@ -5,8 +5,6 @@ #include "base/macros.hpp" #include "base/mem_trie.hpp" -#include - using namespace std; using namespace strings; @@ -29,10 +27,10 @@ void RemoveNumeroSigns(UniString & s) } size_t j = i + 1; - while (j < n && isspace(s[j])) + while (j < n && IsASCIISpace(s[j])) ++j; - if (j == n || isdigit(s[j])) + if (j == n || IsASCIIDigit(s[j])) s[i] = ' '; i = j; @@ -68,9 +66,9 @@ UniString NormalizeAndSimplifyString(string const & s) break; // Some Danish-specific hacks. case 0x00d8: // Ø - case 0x00f8: + case 0x00f8: // ø c = 'o'; - break; // ø + break; case 0x0152: // Œ case 0x0153: // œ c = 'o';