forked from organicmaps/organicmaps
Add function to calculate equal range for UTF-16 strings.
This commit is contained in:
parent
23f76dc513
commit
93917f9c7e
3 changed files with 95 additions and 0 deletions
|
@ -434,3 +434,63 @@ UNIT_TEST(IsUtf8Test)
|
|||
TEST(strings::IsASCIIString("YES"), ());
|
||||
TEST(strings::IsASCIIString("Nice places in Zhodino.kml"), ());
|
||||
}
|
||||
|
||||
UNIT_TEST(CountNormLowerSymbols)
|
||||
{
|
||||
char const * strs[] = {
|
||||
"æüßs",
|
||||
"üßü",
|
||||
"İʼnẖtestὒ",
|
||||
"İʼnẖ",
|
||||
"İʼnẖtestὒ",
|
||||
"HelloWorld",
|
||||
"üßü",
|
||||
"",
|
||||
"",
|
||||
"Тест на не корректную русскую строку",
|
||||
"В ответе пустая строка",
|
||||
"Überstraße"
|
||||
};
|
||||
|
||||
char const * low_strs[] = {
|
||||
"æusss",
|
||||
"ussu",
|
||||
"i\u0307\u02bcnh\u0331testυ\u0313\u0300",
|
||||
"i\u0307\u02bcnh\u0331testυ\u0313\u0300",
|
||||
"i\u0307\u02bcnh\u0331",
|
||||
"helloworld",
|
||||
"usu",
|
||||
"",
|
||||
"empty",
|
||||
"Тест на не корректную строку",
|
||||
"",
|
||||
"uberstras"
|
||||
};
|
||||
|
||||
size_t const results [] = {
|
||||
4,
|
||||
3,
|
||||
8,
|
||||
0,
|
||||
3,
|
||||
10,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
9
|
||||
};
|
||||
|
||||
|
||||
size_t const test_count = ARRAY_SIZE(strs);
|
||||
|
||||
for (size_t i = 0; i < test_count; ++i)
|
||||
{
|
||||
strings::UniString source = strings::MakeUniString(strs[i]);
|
||||
strings::UniString result = strings::MakeUniString(low_strs[i]);
|
||||
|
||||
size_t res = strings::CountNormLowerSymbols(source, result);
|
||||
TEST_EQUAL(res, results[i], ());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -194,4 +194,33 @@ void MakeLowerCase(UniString & s)
|
|||
s.swap(r);
|
||||
}
|
||||
|
||||
size_t CountNormLowerSymbols(UniString const & s, UniString const & lowStr)
|
||||
{
|
||||
size_t const size = s.size();
|
||||
size_t const lowSize = lowStr.size();
|
||||
size_t lowIdx = 0, sIdx = 0;
|
||||
|
||||
while (lowIdx < lowSize)
|
||||
{
|
||||
if (sIdx == size)
|
||||
return 0; // low_s has more length than s
|
||||
|
||||
UniString strCharNorm;
|
||||
strCharNorm.push_back(s[sIdx++]);
|
||||
MakeLowerCase(strCharNorm);
|
||||
Normalize(strCharNorm);
|
||||
|
||||
for (size_t i = 0; i < strCharNorm.size(); ++i)
|
||||
{
|
||||
if (lowIdx >= lowSize)
|
||||
return sIdx;
|
||||
else
|
||||
if (lowStr[lowIdx++] != strCharNorm[i])
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
return sIdx;
|
||||
}
|
||||
|
||||
} // namespace strings
|
||||
|
|
|
@ -36,6 +36,12 @@ UniString Normalize(UniString const & s);
|
|||
/// For implementation @see base/normilize_unicode.cpp
|
||||
void Normalize(UniString & s);
|
||||
|
||||
/// Counts number of start symbols in string s (that is not lower and not normalized) that maches
|
||||
/// to lower and normalized string low_s. If s doen't starts with low_s then returns 0; otherwise
|
||||
/// returns number of start symbols in s that equivalent to lowStr
|
||||
/// For implementation @see base/lower_case.cpp
|
||||
size_t CountNormLowerSymbols(UniString const & s, UniString const & lowStr);
|
||||
|
||||
void AsciiToLower(string & s);
|
||||
void Trim(string & s);
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue