diff --git a/base/base_tests/string_utils_test.cpp b/base/base_tests/string_utils_test.cpp index 05c269d29c..4eccdbd0f1 100644 --- a/base/base_tests/string_utils_test.cpp +++ b/base/base_tests/string_utils_test.cpp @@ -625,3 +625,17 @@ UNIT_TEST(NormalizeDigits) TEST_EQUAL(nd("a0192 "), "a0192 ", ()); TEST_EQUAL(nd("3456789"), "3456789", ()); } + +UNIT_TEST(NormalizeDigits_UniString) +{ + auto const nd = [](string const & utf8) -> string + { + strings::UniString us = strings::MakeUniString(utf8); + strings::NormalizeDigits(us); + return strings::ToUtf8(us); + }; + TEST_EQUAL(nd(""), "", ()); + TEST_EQUAL(nd("z12345//"), "z12345//", ()); + TEST_EQUAL(nd("a0192 "), "a0192 ", ()); + TEST_EQUAL(nd("3456789"), "3456789", ()); +} diff --git a/base/string_utils.cpp b/base/string_utils.cpp index 857c190c9b..af21d91290 100644 --- a/base/string_utils.cpp +++ b/base/string_utils.cpp @@ -113,7 +113,8 @@ UniString Normalize(UniString const & s) return result; } -void NormalizeDigits(string &utf8) { +void NormalizeDigits(string & utf8) +{ size_t const n = utf8.size(); size_t const m = n >= 2 ? n - 2 : 0; @@ -151,6 +152,17 @@ void NormalizeDigits(string &utf8) { utf8.resize(j); } +void NormalizeDigits(UniString & us) +{ + size_t const size = us.size(); + for (size_t i = 0; i < size; ++i) + { + UniChar const c = us[i]; + if (c >= 0xFF10 /* '0' */ && c <= 0xFF19 /* '9' */) + us[i] = c - 0xFF10 + '0'; + } +} + namespace { char ascii_to_lower(char in) diff --git a/base/string_utils.hpp b/base/string_utils.hpp index a7a6a4290d..1c4634646a 100644 --- a/base/string_utils.hpp +++ b/base/string_utils.hpp @@ -45,6 +45,7 @@ UniString Normalize(UniString const & s); /// Replaces "full width" unicode digits with ascii ones. void NormalizeDigits(string & utf8); +void NormalizeDigits(UniString & us); /// Counts number of start symbols in string s (that is not lower and not normalized) that maches /// to lower and normalized string low_s. If s doen't starts with low_s then returns 0; otherwise