strings::NormalizeDigits for UniString.

This commit is contained in:
Alex Zolotarev 2016-04-06 20:47:30 +03:00
parent d747352761
commit 7802303f12
3 changed files with 28 additions and 1 deletions

View file

@ -625,3 +625,17 @@ UNIT_TEST(NormalizeDigits)
TEST_EQUAL(nd("a9 "), "a0192 ", ());
TEST_EQUAL(nd(""), "3456789", ());
}
UNIT_TEST(NormalizeDigits_UniString)
{
auto const nd = [](string const & utf8) -> string
{
strings::UniString us = strings::MakeUniString(utf8);
strings::NormalizeDigits(us);
return strings::ToUtf8(us);
};
TEST_EQUAL(nd(""), "", ());
TEST_EQUAL(nd("z12345"), "z12345", ());
TEST_EQUAL(nd("a9 "), "a0192 ", ());
TEST_EQUAL(nd(""), "3456789", ());
}

View file

@ -113,7 +113,8 @@ UniString Normalize(UniString const & s)
return result;
}
void NormalizeDigits(string &utf8) {
void NormalizeDigits(string & utf8)
{
size_t const n = utf8.size();
size_t const m = n >= 2 ? n - 2 : 0;
@ -151,6 +152,17 @@ void NormalizeDigits(string &utf8) {
utf8.resize(j);
}
void NormalizeDigits(UniString & us)
{
size_t const size = us.size();
for (size_t i = 0; i < size; ++i)
{
UniChar const c = us[i];
if (c >= 0xFF10 /* '' */ && c <= 0xFF19 /* '' */)
us[i] = c - 0xFF10 + '0';
}
}
namespace
{
char ascii_to_lower(char in)

View file

@ -45,6 +45,7 @@ UniString Normalize(UniString const & s);
/// Replaces "full width" unicode digits with ascii ones.
void NormalizeDigits(string & utf8);
void NormalizeDigits(UniString & us);
/// Counts number of start symbols in string s (that is not lower and not normalized) that maches
/// to lower and normalized string low_s. If s doen't starts with low_s then returns 0; otherwise