diff --git a/base/base_tests/string_utils_test.cpp b/base/base_tests/string_utils_test.cpp index 1a3fac8c45..0759e30410 100644 --- a/base/base_tests/string_utils_test.cpp +++ b/base/base_tests/string_utils_test.cpp @@ -511,3 +511,14 @@ UNIT_TEST(CountNormLowerSymbols) TEST_EQUAL(res, results[i], ()); } } + +UNIT_TEST(IsHTML) +{ + using namespace strings; + + TEST(IsHTML("some link"), ()); + TEST(IsHTML("This is: ---> a broken html"), ()); + TEST(!IsHTML("This is not html"), ()); + TEST(!IsHTML("This is not html < too!"), ()); + TEST(!IsHTML("I am > not html"), ()); +} diff --git a/base/string_utils.cpp b/base/string_utils.cpp index 2de39807af..8b5d81f269 100644 --- a/base/string_utils.cpp +++ b/base/string_utils.cpp @@ -206,4 +206,20 @@ string to_string_dac(double d, int dac) return ss.str(); } +bool IsHTML(string const & utf8) +{ + string::const_iterator it = utf8.begin(); + size_t ltCount = 0; + size_t gtCount = 0; + while (it != utf8.end()) + { + UniChar const c = utf8::unchecked::next(it); + if (c == '<') + ++ltCount; + else if (c == '>') + ++gtCount; + } + return (ltCount > 0 && gtCount > 0); } + +} // namespace strings diff --git a/base/string_utils.hpp b/base/string_utils.hpp index 870f30c992..9b4e3ce662 100644 --- a/base/string_utils.hpp +++ b/base/string_utils.hpp @@ -255,6 +255,9 @@ string to_string_dac(double d, int dac); bool StartsWith(string const & s1, char const * s2); +/// Try to guess if it's HTML or not. No guarantee. +bool IsHTML(string const & utf8); + /* template typename ItT::value_type JoinStrings(ItT begin, ItT end, DelimiterT const & delimiter)