Speed improvement from Yury Gorshenin.

This commit is contained in:
Alex Zolotarev 2016-04-04 17:15:45 +03:00
parent 32a50c6a59
commit baf4a0c0cd

View file

@ -113,19 +113,42 @@ UniString Normalize(UniString const & s)
return result;
}
void NormalizeDigits(string & utf8)
{
for (size_t i = 0; i + 2 < utf8.size(); ++i)
void NormalizeDigits(string &utf8) {
size_t const n = utf8.size();
size_t const m = n >= 2 ? n - 2 : 0;
size_t i = 0;
while (i < n && utf8[i] != '\xEF')
++i;
size_t j = i;
// Following invariant holds before/between/after loop iterations below:
// * utf8[0, i) represents a checked part of the input string.
// * utf8[0, j) represents a normalized version of the utf8[0, i).
while (i < m)
{
if (utf8[i] == '\xEF' && utf8[i + 1] == '\xBC')
{
char const n = utf8[i + 2];
if (n < '\x90' || n > '\x99')
continue;
utf8[i] = n - 0x90 + '0';
utf8.erase(i + 1, 2);
auto const n = utf8[i + 2];
if (n >= '\x90' && n <= '\x99')
{
utf8[j++] = n - 0x90 + '0';
i += 3;
}
else
{
utf8[j++] = utf8[i++];
utf8[j++] = utf8[i++];
}
}
else
{
utf8[j++] = utf8[i++];
}
}
while (i < n)
utf8[j++] = utf8[i++];
utf8.resize(j);
}
namespace