diff --git a/indexer/indexer_tests/search_string_utils_test.cpp b/indexer/indexer_tests/search_string_utils_test.cpp index 97d8c250b2..94fe07f1ad 100644 --- a/indexer/indexer_tests/search_string_utils_test.cpp +++ b/indexer/indexer_tests/search_string_utils_test.cpp @@ -165,19 +165,20 @@ UNIT_TEST(Street_PrefixMatch) // TEST(TestStreetPrefixMatch("проезд"), ()); // TEST(!TestStreetPrefixMatch("проездд"), ()); -// TEST(TestStreetPrefixMatchWithMisprints("пр"), ()); -// TEST(!TestStreetPrefixMatch("пре"), ()); -// TEST(!TestStreetPrefixMatchWithMisprints("пре"), ()); -// TEST(!TestStreetPrefixMatch("преу"), ()); -// TEST(TestStreetPrefixMatchWithMisprints("преу"), ()); -// TEST(!TestStreetPrefixMatch("преул"), ()); -// TEST(TestStreetPrefixMatchWithMisprints("преул"), ()); -// TEST(!TestStreetPrefixMatch("преуло"), ()); -// TEST(TestStreetPrefixMatchWithMisprints("преуло"), ()); -// TEST(!TestStreetPrefixMatch("преулок"), ()); -// TEST(TestStreetPrefixMatchWithMisprints("преулок"), ()); -// TEST(!TestStreetPrefixMatch("преулак"), ()); -// TEST(!TestStreetPrefixMatchWithMisprints("преулак"), ()); + TEST(TestStreetPrefixMatchWithMisprints("ул"), ()); + TEST(!TestStreetPrefixMatch("уле"), ()); + TEST(!TestStreetPrefixMatchWithMisprints("уле"), ()); + TEST(!TestStreetPrefixMatch("улец"), ()); + TEST(TestStreetPrefixMatchWithMisprints("улец"), ()); + TEST(!TestStreetPrefixMatch("улеца"), ()); + TEST(TestStreetPrefixMatchWithMisprints("улеца"), ()); + + TEST(TestStreetPrefixMatchWithMisprints("roadx"), ()); + TEST(!TestStreetPrefixMatchWithMisprints("roadxx"), ()); + + TEST(!TestStreetPrefixMatchWithMisprints("groad"), ()); // road, but no + TEST(TestStreetPrefixMatchWithMisprints("karre"), ()); // carrer + TEST(!TestStreetPrefixMatchWithMisprints("karrerx"), ()); } UNIT_TEST(Street_TokensFilter) diff --git a/indexer/search_string_utils.cpp b/indexer/search_string_utils.cpp index 6f32e2a950..e525ce2309 100644 --- a/indexer/search_string_utils.cpp +++ b/indexer/search_string_utils.cpp @@ -27,9 +27,12 @@ std::vector const kAllowedMisprints = { MakeUniString("gh"), MakeUniString("pf"), MakeUniString("vw"), + + // Russian MakeUniString("ао"), MakeUniString("еиэ"), MakeUniString("шщ"), + // Spanish MakeUniString("jh"), // "Jose" <-> "Hose" MakeUniString("fh"), // "Hernández" <-> "Fernández" diff --git a/search/ranking_utils.cpp b/search/ranking_utils.cpp index 203406319e..f9fdd866b6 100644 --- a/search/ranking_utils.cpp +++ b/search/ranking_utils.cpp @@ -89,7 +89,7 @@ ErrorsMade GetPrefixErrorsMade(QueryParams::Token const & token, strings::UniStr if (token.AnyOfSynonyms([&text](strings::UniString const & s) { return StartsWith(text, s); })) return ErrorsMade(0); - auto const dfa = PrefixDFAModifier(BuildLevenshteinDFA(text)); + auto const dfa = BuildLevenshteinDFA(text); auto it = dfa.Begin(); strings::DFAMove(it, token.GetOriginal().begin(), token.GetOriginal().end()); if (!it.Rejects()) diff --git a/search/ranking_utils.hpp b/search/ranking_utils.hpp index 1983e4aef9..cf64669864 100644 --- a/search/ranking_utils.hpp +++ b/search/ranking_utils.hpp @@ -3,14 +3,11 @@ #include "search/common.hpp" #include "search/query_params.hpp" -#include "indexer/search_delimiters.hpp" #include "indexer/search_string_utils.hpp" -#include "base/stl_helpers.hpp" #include "base/string_utils.hpp" #include -#include #include #include #include diff --git a/search/search_tests/ranking_tests.cpp b/search/search_tests/ranking_tests.cpp index f8a7bc116c..e284ff3eab 100644 --- a/search/search_tests/ranking_tests.cpp +++ b/search/search_tests/ranking_tests.cpp @@ -96,6 +96,62 @@ UNIT_TEST(NameScore_Smoke) test("Зона №51", "зона №", NameScore::FULL_PREFIX, 0, 4); } +UNIT_TEST(ErrorsMade_Smoke) +{ + { + QueryParams::Token const searchToken = strings::MakeUniString("hairdressers"); + + auto nameToken = strings::MakeUniString("h"); + TEST(!search::impl::GetErrorsMade(searchToken, nameToken).IsValid(), ()); + TEST(!search::impl::GetPrefixErrorsMade(searchToken, nameToken).IsValid(), ()); + + nameToken = strings::MakeUniString("hair"); + TEST(!search::impl::GetErrorsMade(searchToken, nameToken).IsValid(), ()); + TEST(!search::impl::GetPrefixErrorsMade(searchToken, nameToken).IsValid(), ()); + } + + { + auto nameToken = strings::MakeUniString("hair"); + + QueryParams::Token searchToken = strings::MakeUniString("hair"); + TEST_EQUAL(search::impl::GetErrorsMade(searchToken, nameToken).m_errorsMade, 0, ()); + TEST_EQUAL(search::impl::GetPrefixErrorsMade(searchToken, nameToken).m_errorsMade, 0, ()); + + searchToken = strings::MakeUniString("gair"); + TEST_EQUAL(search::impl::GetErrorsMade(searchToken, nameToken).m_errorsMade, 1, ()); + TEST_EQUAL(search::impl::GetPrefixErrorsMade(searchToken, nameToken).m_errorsMade, 1, ()); + + searchToken = strings::MakeUniString("gai"); + TEST(!search::impl::GetErrorsMade(searchToken, nameToken).IsValid(), ()); + TEST_EQUAL(search::impl::GetPrefixErrorsMade(searchToken, nameToken).m_errorsMade, 1, ()); + + searchToken = strings::MakeUniString("hairrr"); + TEST(!search::impl::GetErrorsMade(searchToken, nameToken).IsValid(), ()); + TEST(!search::impl::GetPrefixErrorsMade(searchToken, nameToken).IsValid(), ()); + } + + { + auto nameToken = strings::MakeUniString("hairdresser"); + + QueryParams::Token searchToken = strings::MakeUniString("hair"); + TEST(!search::impl::GetErrorsMade(searchToken, nameToken).IsValid(), ()); + TEST_EQUAL(search::impl::GetPrefixErrorsMade(searchToken, nameToken).m_errorsMade, 0, ()); + + searchToken = strings::MakeUniString("gair"); + TEST_EQUAL(search::impl::GetPrefixErrorsMade(searchToken, nameToken).m_errorsMade, 1, ()); + + searchToken = strings::MakeUniString("gairdrese"); + TEST(!search::impl::GetErrorsMade(searchToken, nameToken).IsValid(), ()); + TEST_EQUAL(search::impl::GetPrefixErrorsMade(searchToken, nameToken).m_errorsMade, 2, ()); + } +} + +UNIT_TEST(NameScore_Prefix) +{ + TEST_EQUAL(GetScore("H Nicks", "hairdressers").m_nameScore, NameScore::ZERO, ()); + TEST_EQUAL(GetScore("Hair E14", "hairdressers").m_nameScore, NameScore::ZERO, ()); +} + UNIT_TEST(NameScore_SubstringVsErrors) { string const query = "Simon";