[search] NormalizeAndSimplifyString: replace sequence of spaces with single one.

This commit is contained in:
tatiana-yan 2019-10-30 16:11:25 +03:00 committed by mpimenov
parent 9c2b372a04
commit e7650967bf
2 changed files with 9 additions and 4 deletions

View file

@ -92,6 +92,7 @@ UNIT_TEST(NormalizeAndSimplifyStringWithOurTambourines)
"aaaaaooooeeeeiduuuuyaadeoou", // Vietnamese
"ăâț", "aat", // Romanian
"Триу́мф-Пала́с", "триумф-палас", // Russian accent
" a b c d ", " a b c d ", // Multiple spaces
};
for (size_t i = 0; i < ARRAY_SIZE(arr); i += 2)
@ -257,9 +258,9 @@ UNIT_TEST(StreetTokensFilter)
UNIT_TEST(NormalizeAndSimplifyString_Numero)
{
TEST_EQUAL(NormalizeAndSimplifyStringUtf8("Зона №51"), "зона 51", ());
TEST_EQUAL(NormalizeAndSimplifyStringUtf8("Зона № 51"), "зона 51", ());
TEST_EQUAL(NormalizeAndSimplifyStringUtf8("Area #51"), "area 51", ());
TEST_EQUAL(NormalizeAndSimplifyStringUtf8("Area # "), "area ", ());
TEST_EQUAL(NormalizeAndSimplifyStringUtf8("Зона №51"), "зона 51", ());
TEST_EQUAL(NormalizeAndSimplifyStringUtf8("Зона № 51"), "зона 51", ());
TEST_EQUAL(NormalizeAndSimplifyStringUtf8("Area #51"), "area 51", ());
TEST_EQUAL(NormalizeAndSimplifyStringUtf8("Area # "), "area ", ());
TEST_EQUAL(NormalizeAndSimplifyStringUtf8("Area #One"), "area #one", ());
}

View file

@ -141,6 +141,10 @@ UniString NormalizeAndSimplifyString(string const & s)
RemoveNumeroSigns(uniString);
// Replace sequence of spaces with single one.
auto const spacesChecker = [](UniChar lhs, UniChar rhs) { return (lhs == rhs) && (lhs == ' '); };
uniString.erase(unique(uniString.begin(), uniString.end(), spacesChecker), uniString.end());
return uniString;
/// @todo Restore this logic to distinguish и-й in future.