diff --git a/base/base_tests/string_utils_test.cpp b/base/base_tests/string_utils_test.cpp index 9640253548..97d9b65362 100644 --- a/base/base_tests/string_utils_test.cpp +++ b/base/base_tests/string_utils_test.cpp @@ -434,3 +434,63 @@ UNIT_TEST(IsUtf8Test) TEST(strings::IsASCIIString("YES"), ()); TEST(strings::IsASCIIString("Nice places in Zhodino.kml"), ()); } + +UNIT_TEST(CountNormLowerSymbols) +{ + char const * strs[] = { + "æüßs", + "üßü", + "İʼnẖtestὒ", + "İʼnẖ", + "İʼnẖtestὒ", + "HelloWorld", + "üßü", + "", + "", + "Тест на не корректную русскую строку", + "В ответе пустая строка", + "Überstraße" + }; + + char const * low_strs[] = { + "æusss", + "ussu", + "i\u0307\u02bcnh\u0331testυ\u0313\u0300", + "i\u0307\u02bcnh\u0331testυ\u0313\u0300", + "i\u0307\u02bcnh\u0331", + "helloworld", + "usu", + "", + "empty", + "Тест на не корректную строку", + "", + "uberstras" + }; + + size_t const results [] = { + 4, + 3, + 8, + 0, + 3, + 10, + 0, + 0, + 0, + 0, + 0, + 9 + }; + + + size_t const test_count = ARRAY_SIZE(strs); + + for (size_t i = 0; i < test_count; ++i) + { + strings::UniString source = strings::MakeUniString(strs[i]); + strings::UniString result = strings::MakeUniString(low_strs[i]); + + size_t res = strings::CountNormLowerSymbols(source, result); + TEST_EQUAL(res, results[i], ()); + } +} diff --git a/base/lower_case.cpp b/base/lower_case.cpp index 5c65c5727b..7479a478cf 100644 --- a/base/lower_case.cpp +++ b/base/lower_case.cpp @@ -194,4 +194,33 @@ void MakeLowerCase(UniString & s) s.swap(r); } +size_t CountNormLowerSymbols(UniString const & s, UniString const & lowStr) +{ + size_t const size = s.size(); + size_t const lowSize = lowStr.size(); + size_t lowIdx = 0, sIdx = 0; + + while (lowIdx < lowSize) + { + if (sIdx == size) + return 0; // low_s has more length than s + + UniString strCharNorm; + strCharNorm.push_back(s[sIdx++]); + MakeLowerCase(strCharNorm); + Normalize(strCharNorm); + + for (size_t i = 0; i < strCharNorm.size(); ++i) + { + if (lowIdx >= lowSize) + return sIdx; + else + if (lowStr[lowIdx++] != strCharNorm[i]) + return 0; + } + } + + return sIdx; +} + } // namespace strings diff --git a/base/string_utils.hpp b/base/string_utils.hpp index a9a4523040..870f30c992 100644 --- a/base/string_utils.hpp +++ b/base/string_utils.hpp @@ -36,6 +36,12 @@ UniString Normalize(UniString const & s); /// For implementation @see base/normilize_unicode.cpp void Normalize(UniString & s); +/// Counts number of start symbols in string s (that is not lower and not normalized) that maches +/// to lower and normalized string low_s. If s doen't starts with low_s then returns 0; otherwise +/// returns number of start symbols in s that equivalent to lowStr +/// For implementation @see base/lower_case.cpp +size_t CountNormLowerSymbols(UniString const & s, UniString const & lowStr); + void AsciiToLower(string & s); void Trim(string & s);