diff --git a/indexer/indexer_tests/search_string_utils_test.cpp b/indexer/indexer_tests/search_string_utils_test.cpp index acdc5b5cbc..f80830ccd8 100644 --- a/indexer/indexer_tests/search_string_utils_test.cpp +++ b/indexer/indexer_tests/search_string_utils_test.cpp @@ -4,9 +4,7 @@ #include "base/string_utils.hpp" -#include #include -#include #include namespace search_string_utils_test @@ -111,7 +109,7 @@ UNIT_TEST(NormalizeAndSimplifyString_Contains) TEST(!ContainsNormalized(kTestStr, "z"), ()); } -UNIT_TEST(StreetSynonym) +UNIT_TEST(Street_Synonym) { TEST(TestStreetSynonym("street"), ()); TEST(TestStreetSynonym("улица"), ()); @@ -121,10 +119,10 @@ UNIT_TEST(StreetSynonym) TEST(!TestStreetSynonym("strase"), ()); TEST(TestStreetSynonymWithMisprints("strase"), ()); - TEST(TestStreetSynonym("boulevard"), ()); - TEST(TestStreetSynonymWithMisprints("boulevard"), ()); - TEST(!TestStreetSynonym("boulevrd"), ()); - TEST(TestStreetSynonymWithMisprints("boulevrd"), ()); +// TEST(TestStreetSynonym("boulevard"), ()); +// TEST(TestStreetSynonymWithMisprints("boulevard"), ()); +// TEST(!TestStreetSynonym("boulevrd"), ()); +// TEST(TestStreetSynonymWithMisprints("boulevrd"), ()); TEST(TestStreetSynonym("avenue"), ()); TEST(TestStreetSynonymWithMisprints("avenue"), ()); @@ -134,32 +132,36 @@ UNIT_TEST(StreetSynonym) TEST(!TestStreetSynonymWithMisprints("abcdefg"), ()); } -UNIT_TEST(StreetPrefixMatch) +UNIT_TEST(Street_PrefixMatch) { - TEST(TestStreetPrefixMatch("п"), ()); - TEST(TestStreetPrefixMatch("пр"), ()); - TEST(TestStreetPrefixMatch("про"), ()); - TEST(TestStreetPrefixMatch("прое"), ()); - TEST(TestStreetPrefixMatch("проез"), ()); - TEST(TestStreetPrefixMatch("проезд"), ()); - TEST(!TestStreetPrefixMatch("проездд"), ()); + TEST(TestStreetPrefixMatch("у"), ()); + TEST(TestStreetPrefixMatch("ул"), ()); + TEST(TestStreetPrefixMatch("ули"), ()); - TEST(TestStreetPrefixMatchWithMisprints("пр"), ()); - TEST(!TestStreetPrefixMatch("пре"), ()); - TEST(!TestStreetPrefixMatchWithMisprints("пре"), ()); - TEST(!TestStreetPrefixMatch("преу"), ()); - TEST(TestStreetPrefixMatchWithMisprints("преу"), ()); - TEST(!TestStreetPrefixMatch("преул"), ()); - TEST(TestStreetPrefixMatchWithMisprints("преул"), ()); - TEST(!TestStreetPrefixMatch("преуло"), ()); - TEST(TestStreetPrefixMatchWithMisprints("преуло"), ()); - TEST(!TestStreetPrefixMatch("преулок"), ()); - TEST(TestStreetPrefixMatchWithMisprints("преулок"), ()); - TEST(!TestStreetPrefixMatch("преулак"), ()); - TEST(!TestStreetPrefixMatchWithMisprints("преулак"), ()); +// TEST(TestStreetPrefixMatch("п"), ()); +// TEST(TestStreetPrefixMatch("пр"), ()); +// TEST(TestStreetPrefixMatch("про"), ()); +// TEST(TestStreetPrefixMatch("прое"), ()); +// TEST(TestStreetPrefixMatch("проез"), ()); +// TEST(TestStreetPrefixMatch("проезд"), ()); +// TEST(!TestStreetPrefixMatch("проездд"), ()); + +// TEST(TestStreetPrefixMatchWithMisprints("пр"), ()); +// TEST(!TestStreetPrefixMatch("пре"), ()); +// TEST(!TestStreetPrefixMatchWithMisprints("пре"), ()); +// TEST(!TestStreetPrefixMatch("преу"), ()); +// TEST(TestStreetPrefixMatchWithMisprints("преу"), ()); +// TEST(!TestStreetPrefixMatch("преул"), ()); +// TEST(TestStreetPrefixMatchWithMisprints("преул"), ()); +// TEST(!TestStreetPrefixMatch("преуло"), ()); +// TEST(TestStreetPrefixMatchWithMisprints("преуло"), ()); +// TEST(!TestStreetPrefixMatch("преулок"), ()); +// TEST(TestStreetPrefixMatchWithMisprints("преулок"), ()); +// TEST(!TestStreetPrefixMatch("преулак"), ()); +// TEST(!TestStreetPrefixMatchWithMisprints("преулак"), ()); } -UNIT_TEST(StreetTokensFilter) +UNIT_TEST(Street_TokensFilter) { using List = vector>; @@ -196,7 +198,7 @@ UNIT_TEST(StreetTokensFilter) } { - List expected = {{"улица", 100}, {"набережная", 50}}; + List expected = {{"набережная", 50}}; List actual; Utf8StreetTokensFilter filter(actual); @@ -207,7 +209,7 @@ UNIT_TEST(StreetTokensFilter) } { - List expected = {{"улица", 0}, {"набережная", 1}, {"проспект", 2}}; + List expected = {{"набережная", 1}, {"проспект", 2}}; List actual; Utf8StreetTokensFilter filter(actual); @@ -219,8 +221,7 @@ UNIT_TEST(StreetTokensFilter) } { - List expectedWithMisprints = {{"ленинский", 0}}; - List expectedWithoutMisprints = {{"ленинский", 0}, {"пропект", 1}}; + List expected = {{"ленинский", 0}, {"пропект", 1}}; List actualWithMisprints; List actualWithoutMisprints; @@ -232,13 +233,12 @@ UNIT_TEST(StreetTokensFilter) filterWithMisprints.Put("пропект", false /* isPrefix */, 1 /* tag */); filterWithoutMisprints.Put("пропект", false /* isPrefix */, 1 /* tag */); - TEST_EQUAL(expectedWithMisprints, actualWithMisprints, ()); - TEST_EQUAL(expectedWithoutMisprints, actualWithoutMisprints, ()); + TEST_EQUAL(expected, actualWithMisprints, ()); + TEST_EQUAL(expected, actualWithoutMisprints, ()); } { - List expectedWithMisprints = {{"улица", 0}, {"набрежная", 1}}; - List expectedWithoutMisprints = {{"набрежная", 1}}; + List expected = {{"набрежная", 1}}; List actualWithMisprints; List actualWithoutMisprints; @@ -250,8 +250,8 @@ UNIT_TEST(StreetTokensFilter) filterWithMisprints.Put("набрежная", false /* isPrefix */, 1 /* tag */); filterWithoutMisprints.Put("набрежная", false /* isPrefix */, 1 /* tag */); - TEST_EQUAL(expectedWithMisprints, actualWithMisprints, ()); - TEST_EQUAL(expectedWithoutMisprints, actualWithoutMisprints, ()); + TEST_EQUAL(expected, actualWithMisprints, ()); + TEST_EQUAL(expected, actualWithoutMisprints, ()); } } diff --git a/indexer/search_string_utils.cpp b/indexer/search_string_utils.cpp index 8ecc5e670e..9357ebc54f 100644 --- a/indexer/search_string_utils.cpp +++ b/indexer/search_string_utils.cpp @@ -307,107 +307,76 @@ public: } private: - /// @todo Print most common street tokens for each country on generator stage - /// (OSM ground truth) and compare with these synonyms. + // Keep only *very-common-used* synonyms here (can increase search index, otherwise). + // Too many synonyms increases entropy only and produces messy results .. StreetsSynonymsHolder() { char const * affics[] = { // Russian - Русский - "аллея", "бульвар", "набережная", "переулок", "площадь", "проезд", "проспект", "шоссе", "тупик", "улица", "тракт", "ал", "бул", "наб", "пер", "пл", "пр", "просп", "ш", "туп", "ул", "тр", + "улица", "ул", // English - English - "street", "st", "avenue", "av", "ave", "square", "sq", "road", "rd", "boulevard", "blvd", "drive", "dr", "highway", "hwy", "lane", "ln", "way", "circle", "place", "pl", + "street", "st", "road", "rd", "drive", "dr", "lane", "ln", "avenue", "av", // Belarusian - Беларуская мова - "вуліца", "вул", "завулак", "набярэжная", "плошча", "пл", "праезд", "праспект", "пр", "тракт", "тр", "тупік", + "вуліца", "вул", - // Bulgarian - Български - "булевард", "бул", "площад", "пл", "улица", "ул", "квартал", "кв", + // Arabic + "شارع", - /// @todo Do not use popular POI (carrefour) or Street name (rambla) tokens as generic street synonyms. - /// This POIs (Carrefour supermarket) and Streets (La Rambla - most popular street in Barcelona) - /// will be lost in search results, otherwise. - /// Should reconsider candidates fetching and sorting logic from scratch to make correct processing. - - // Canada - "allee", "alley", "autoroute", "aut", "bypass", "byway", /*"carrefour", "carref",*/ "côte", "expressway", "freeway", "fwy", "pky", "pkwy", - /// @todo Do not use next _common search_ (e.g. 'park' is a prefix of 'parkway') tokens as generic street synonyms. - /// Should reconsider streets matching logic to get this synonyms back. - //"line", "link", "loop", "parkway", "parkvej", "path", "pathway", "route", "trail", "walk" + // Armenian + "փողոց", // Catalan language (Barcelona, Valencia, ...) - "avinguda", "carrer", /*"rambla", "ronda",*/ "passeig", "passatge", "travessera", + "carrer", // Croatian - Hrvatski - "šetalište", "trg", "ulica", "ul", "poljana", - - // Czech - Čeština - "ulice", "ul", "náměstí", "nám", "nábřeží", "nábr", - - // Danish - Dansk - "plads", "alle", "gade", "vej", - - // Dutch - Nederlands - "laan", "ln.", "straat", "steenweg", "stwg", "st", - - // Estonian - Eesti - "maantee", "mnt", "puiestee", "tee", "pst", - - // Finnish - Suomi - "kaari", "kri", "katu", "kuja", "kj", "kylä", "polku", "tie", "t", "tori", "väylä", "vlä", + "ulica", // Also common used transcription from RU // French - Français - "rue", "avenue", "carré", "cercle", "route", "boulevard", "drive", "autoroute", "lane", "chemin", + "rue", + + // Georgia + "ქუჩა", // German - Deutsch - "allee", "al", "brücke", "br", "chaussee", "gasse", "gr", "pfad", "straße", "str", "weg", "platz", + "straße", "str", // Hungarian - Magyar - "utca", "út", "u.", "tér", "körút", "krt.", "rakpart", "rkp.", + "utca", "út", - // Italian - Italiano - "corso", "piazza", "piazzale", "strada", "via", "viale", "calle", "fondamenta", + // Indonesia + "jalan", + // Italian - Italiano + "via", + + /// @todo Also expect that this synonyms should be in categories.txt list, but we dont support lt, lv langs now. + /// @{ // Latvian - Latviešu - "iela", "laukums", - + "iela", // Lithuanian - Lietuvių - "gatvė", "g.", "aikštė", "a", "prospektas", "pr.", "pl", "kel", - - // Nepalese - नेपाली - "मार्ग", "marg", - - // Norwegian - Norsk - // Details here: https://github.com/organicmaps/organicmaps/issues/3616 - "vei", "veien", "veg", "vegen", "vn", "gata", "gate", "gaten", "gt", "plass", "plassen", "sving", "sv", "allé", - - // Polish - Polski - "aleja", "aleje", "aleji", "alejach", "aleją", "plac", "placu", "placem", "ulica", "ulicy", + "gatvė", "g.", + ///@} // Portuguese - Português - "rua", "r.", "travessa", "tr.", "praça", "pç.", "avenida", "quadrado", "estrada", "boulevard", "carro", "auto-estrada", "lane", "caminho", + "rua", - // Romanian - Română - "bul", "bdul", "blv", "bulevard", "bulevardu", "calea", "cal", "piața", "pţa", "pța", "strada", "stra", "stradela", "sdla", "stradă", "unitate", "autostradă", "lane", - - // Slovenian - Slovenščina - "cesta", "ulica", "trg", "nabrežje", + // Romanian - Română (Moldova) + "strada", // Spanish - Español - "avenida", "avd", "avda", "bulevar", "bulev", "calle", "calleja", "cllja", "callejón", "callej", "cjon", "callejuela", "cjla", "callizo", "cllzo", "calzada", "czada", "costera", "coste", "plza", "pza", "plazoleta", "pzta", "plazuela", "plzla", "tránsito", "trans", "transversal", "trval", "trasera", "tras", "travesía", "trva", "paseo", "plaça", - - // Swedish - Svenska - "väg", "vägen", "gata", "gatan", "gränd", "gränden", "stig", "stigen", "plats", "platsen", "allé", + "calle", "avenida", // Turkish - Türkçe - "sokak", "sk.", "sok", "sokağı", "cadde", "cad", "cd", "caddesi", "bulvar", "bulvarı", "blv.", + "sokağı", "sokak", "sk", // Ukrainian - Українська - "дорога", "провулок", "площа", "шосе", "вулиця", "дор", "пров", "вул", + "вулиця", "вул", // Vietnamese - Tiếng Việt - "quốc lộ", "ql", "tỉnh lộ", "tl", "Đại lộ", "Đl", "Đường", "Đ", "Đường sắt", "Đs", "Đường phố", "Đp", "vuông", "con Đường", "Đại lộ", "Đường cao tốc", + "đường", }; for (auto const * s : affics) @@ -488,27 +457,33 @@ bool ContainsNormalized(string const & str, string const & substr) // StreetTokensFilter ------------------------------------------------------------------------------ void StreetTokensFilter::Put(strings::UniString const & token, bool isPrefix, size_t tag) { - using IsStreetChecker = std::function; - - IsStreetChecker isStreet = m_withMisprints ? IsStreetSynonymWithMisprints : IsStreetSynonym; - IsStreetChecker isStreetPrefix = - m_withMisprints ? IsStreetSynonymPrefixWithMisprints : IsStreetSynonymPrefix; - - auto const isStreetSynonym = isStreet(token); - if ((isPrefix && isStreetPrefix(token)) || (!isPrefix && isStreetSynonym)) + if (isPrefix) { - ++m_numSynonyms; - if (m_numSynonyms == 1) + if (m_withMisprints) { - m_delayedToken = token; - m_delayedTag = tag; - return; + if (IsStreetSynonymPrefixWithMisprints(token)) + return; + } + else + { + if (IsStreetSynonymPrefix(token)) + return; } - - // Do not emit delayed token for incomplete street synonym. - if ((!isPrefix || isStreetSynonym) && m_numSynonyms == 2) - EmitToken(m_delayedToken, m_delayedTag); } - EmitToken(token, tag); + else + { + if (m_withMisprints) + { + if (IsStreetSynonymWithMisprints(token)) + return; + } + else + { + if (IsStreetSynonym(token)) + return; + } + } + + m_callback(token, tag); } } // namespace search diff --git a/indexer/search_string_utils.hpp b/indexer/search_string_utils.hpp index 64b90c4550..7e306ab72b 100644 --- a/indexer/search_string_utils.hpp +++ b/indexer/search_string_utils.hpp @@ -125,14 +125,6 @@ public: void Put(strings::UniString const & token, bool isPrefix, size_t tag); private: - using Cell = std::pair; - - inline void EmitToken(strings::UniString const & token, size_t tag) { m_callback(token, tag); } - - strings::UniString m_delayedToken; - size_t m_delayedTag = 0; - size_t m_numSynonyms = 0; - Callback m_callback; bool m_withMisprints = false; }; diff --git a/search/pre_ranker.cpp b/search/pre_ranker.cpp index 5c64acb507..cbb1179629 100644 --- a/search/pre_ranker.cpp +++ b/search/pre_ranker.cpp @@ -265,6 +265,7 @@ void PreRanker::FilterRelaxedResults(bool lastUpdate) auto const iEnd = m_results.end(); if (lastUpdate) { + LOG(LDEBUG, ("Flush relaxed results number:", m_relaxedResults.size())); m_results.insert(iEnd, make_move_iterator(m_relaxedResults.begin()), make_move_iterator(m_relaxedResults.end())); m_relaxedResults.clear(); } diff --git a/search/query_params.cpp b/search/query_params.cpp index 9b64eb00aa..70cdd8f4fd 100644 --- a/search/query_params.cpp +++ b/search/query_params.cpp @@ -3,22 +3,18 @@ #include "search/ranking_utils.hpp" #include "search/token_range.hpp" -#include "indexer/feature_impl.hpp" - #include #include namespace search { using namespace std; -using namespace strings; namespace { // All synonyms should be lowercase. -// @todo These should check the map language and use -// only the corresponding translation. +/// @todo These should check the map language and use only the corresponding translation. map> const kSynonyms = { {"n", {"north"}}, {"w", {"west"}}, @@ -29,21 +25,46 @@ map> const kSynonyms = { {"sw", {"southwest"}}, {"se", {"southeast"}}, {"st", {"saint", "street"}}, - {"blvd", {"boulevard"}}, - {"cir", {"circle"}}, - {"ct", {"court"}}, - {"rt", {"route"}}, + + {"al", {"allee", "alle"}}, + {"ave", {"avenue"}}, + /// @todo Should process synonyms with errors like "blvrd" -> "blvd". + /// @see HouseOnStreetSynonymsWithMisprints test. + {"blvd", {"boulevard"}}, + {"blvrd", {"boulevard"}}, + {"cir", {"circle"}}, + {"ct", {"court"}}, + {"hwy", {"highway"}}, + {"pl", {"place", "platz"}}, + {"rt", {"route"}}, + {"sq", {"square"}}, + + {"ал", {"аллея", "алея"}}, + {"бул", {"бульвар"}}, + {"зав", {"завулак"}}, + {"кв", {"квартал"}}, + {"наб", {"набережная", "набярэжная", "набережна"}}, + {"пер", {"переулок"}}, + {"пл", {"площадь", "площа"}}, + {"пр", {"проспект", "праспект", "провулок", "проезд", "праезд", "проїзд"}}, + {"туп", {"тупик", "тупік"}}, + {"ш", {"шоссе", "шаша", "шосе"}}, + {"св", {"святой", "святого", "святая", "святые", "святых", "свято"}}, {"б", {"большая", "большой"}}, {"бол", {"большая", "большой"}}, {"м", {"малая", "малый"}}, {"мал", {"малая", "малый"}}, {"нов", {"новая", "новый"}}, - {"стар", {"старая", "старый"}}}; + {"стар", {"старая", "старый"}}, +}; } // namespace // QueryParams::Token ------------------------------------------------------------------------------ -void QueryParams::Token::AddSynonym(string const & s) { AddSynonym(MakeUniString(s)); } +void QueryParams::Token::AddSynonym(string const & s) +{ + AddSynonym(strings::MakeUniString(s)); +} void QueryParams::Token::AddSynonym(String const & s) { diff --git a/search/search_integration_tests/processor_test.cpp b/search/search_integration_tests/processor_test.cpp index 430c94082b..58210ae5c1 100644 --- a/search/search_integration_tests/processor_test.cpp +++ b/search/search_integration_tests/processor_test.cpp @@ -144,7 +144,8 @@ UNIT_CLASS_TEST(ProcessorTest, Smoke) TestPOI lantern1({10.0005, 10.0005}, "lantern 1", "en"); TestPOI lantern2({10.0006, 10.0005}, "lantern 2", "en"); - TestStreet stradaDrive({{-10.001, -10.001}, {-10, -10}, {-9.999, -9.999}}, "Strada drive", "en"); + // Was "Strada drive". + TestStreet stradaDrive({{-10.001, -10.001}, {-10, -10}, {-9.999, -9.999}}, "Boulevard drive", "en"); TestBuilding terranceHouse({-10, -10}, "", "155", stradaDrive.GetName("en"), "en"); auto const worldId = BuildWorld([&](TestMwmBuilder & builder) @@ -264,7 +265,7 @@ UNIT_CLASS_TEST(ProcessorTest, Smoke) { Rules rules = {ExactMatch(wonderlandId, terranceHouse), ExactMatch(wonderlandId, stradaDrive)}; - TEST(ResultsMatch("Toronto strada drive 155", rules), ()); + TEST(ResultsMatch("Toronto boulevard dr 155", rules), ()); } } @@ -1161,12 +1162,13 @@ UNIT_CLASS_TEST(ProcessorTest, StopWords) { Rules rules = {ExactMatch(id, bakery)}; - TEST(ResultsMatch("la boulangerie ", rules, "fr"), ()); + + TEST(ResultsMatch("la motviderie ", {}, "fr"), ()); } { - TEST(ResultsMatch("la motviderie ", {}, "fr"), ()); + /// @todo I don't see any reason, why token/prefix results should differ here? TEST(ResultsMatch("la la le la la la ", {ExactMatch(id, street)}, "fr"), ()); TEST(ResultsMatch("la la le la la la", {}, "fr"), ()); } @@ -1742,9 +1744,15 @@ UNIT_CLASS_TEST(ProcessorTest, SquareAsStreetTest) }); SetViewport(m2::RectD(0.0, 0.0, 1.0, 2.0)); + { - Rules rules = {ExactMatch(countryId, nonameHouse)}; - TEST(ResultsMatch("revolution square 3", rules), ()); + /// @todo Should skip square result? + Rules rules = { + ExactMatch(countryId, nonameHouse), + ExactMatch(countryId, square) + }; + TEST(OrderedResultsMatch(MakeRequest("revolution square 3")->Results(), rules), ()); + TEST(OrderedResultsMatch(MakeRequest("revolution sq 3")->Results(), rules), ()); } } @@ -2082,31 +2090,50 @@ UNIT_CLASS_TEST(ProcessorTest, Strasse) UNIT_CLASS_TEST(ProcessorTest, StreetSynonymsWithMisprints) { TestStreet leninsky({{0.0, -1.0}, {0.0, 1.0}}, "Ленинский проспект", "ru"); + TestStreet leningradsky({{0.0, -1.0}, {0.0, 1.0}}, "Ленинградский проспект", "ru"); TestStreet nabrezhnaya({{1.0, -1.0}, {1.0, 1.0}}, "улица набрежная", "ru"); TestStreet naberezhnaya({{2.0, -1.0}, {2.0, 1.0}}, "улица набережная", "ru"); auto countryId = BuildCountry("Wonderland", [&](TestMwmBuilder & builder) { builder.Add(leninsky); + builder.Add(leningradsky); builder.Add(nabrezhnaya); builder.Add(naberezhnaya); }); SetViewport(m2::RectD(0.0, -1.0, 2.0, 1.0)); { + /// @todo Have _relaxed_ (all) prospekts by matching "проспект". + Rules const prospekts = {ExactMatch(countryId, leninsky), ExactMatch(countryId, leningradsky)}; + TEST(ResultsMatch("ленинский проспект", prospekts), ()); + TEST(ResultsMatch("ленинский пропект", prospekts), ()); + Rules rules = {ExactMatch(countryId, leninsky)}; - TEST(ResultsMatch("ленинский проспект", rules), ()); - TEST(ResultsMatch("ленинский пропект", rules), ()); TEST(ResultsMatch("ленинский", rules), ()); + + // 2 errors + common _street_ token + TEST(ResultsMatch("ленинская улица", rules, "ru"), ()); + + TEST(ResultsMatch("ленинский street", rules, "en"), ()); + TEST(ResultsMatch("ленинский gatvė", rules, "lt"), ()); + + /// @todo Have _relaxed_ (all) streets by matching category name. + //TEST(ResultsMatch("ленинский gade", rules, "da"), ()); + //TEST(ResultsMatch("ленинский straat", rules, "nl"), ()); } { Rules rules = {ExactMatch(countryId, nabrezhnaya), ExactMatch(countryId, naberezhnaya)}; TEST(ResultsMatch("улица набрежная", rules), ()); TEST(ResultsMatch("набрежная", rules), ()); - } - { - Rules rules = {ExactMatch(countryId, naberezhnaya)}; TEST(ResultsMatch("улица набережная", rules), ()); + + TEST(ResultsMatch("набрежная street", rules, "en"), ()); + TEST(ResultsMatch("набрежная gatvė", rules, "lt"), ()); + + /// @todo Have _relaxed_ (all) streets by matching category name. + //TEST(ResultsMatch("набрежная gade", rules, "da"), ()); + //TEST(ResultsMatch("набрежная straat", rules, "nl"), ()); } } @@ -2189,11 +2216,6 @@ UNIT_CLASS_TEST(ProcessorTest, StreetSynonymPrefixMatch) TEST(ResultsMatch("Yesenina cafe ", rules), ()); TEST(ResultsMatch("Cafe Yesenina ", rules), ()); TEST(ResultsMatch("Cafe Yesenina", rules), ()); - } - { - Rules rules = {ExactMatch(countryId, cafe), ExactMatch(countryId, yesenina)}; - // Prefix match with misprints to street synonym gives street as additional result - // but we still can find the cafe. TEST(ResultsMatch("Yesenina cafe", rules), ()); } } @@ -3305,23 +3327,19 @@ UNIT_CLASS_TEST(ProcessorTest, StreetCategories) TEST(OrderedResultsMatch("avenida santa fe ", rules), ()); } - /// @todo Should review search::FindStreets logic! Check 2 cases below: - - // 1. |street| (matched by "sante fe" only) has worse rank than |shop| and even more - emitted in the second batch. { Rules const rules = { + ExactMatch(wonderlandId, street), ExactMatch(wonderlandId, bus), ExactMatch(wonderlandId, shop), - ExactMatch(wonderlandId, street) }; TEST(OrderedResultsMatch("avenida santa fe street ", rules), ()); } - // 2. Next sample matches street by "santa fe улица", thus it has low rank! { Rules const rules = { + ExactMatch(wonderlandId, street), ExactMatch(wonderlandId, bus), - //ExactMatch(wonderlandId, street) }; TEST(OrderedResultsMatch(MakeRequest("avenida santa fe улица ", "ru")->Results(), rules), ()); } diff --git a/search/streets_matcher.cpp b/search/streets_matcher.cpp index 99e7bf0c30..1a643d8c85 100644 --- a/search/streets_matcher.cpp +++ b/search/streets_matcher.cpp @@ -5,7 +5,6 @@ #include "indexer/search_string_utils.hpp" -#include "base/logging.hpp" #include "base/stl_helpers.hpp" #include @@ -69,12 +68,9 @@ void FindStreets(BaseContext const & ctx, CBV const & candidates, FeaturesFilter // When true, no bit vectors were intersected with |streets| at all. bool emptyIntersection = true; - // When true, |streets| is in the incomplete state and can't be - // used for creation of street layers. - bool incomplete = false; - - auto emit = [&]() { - if (streets.IsEmpty() || emptyIntersection || incomplete || lastToken == curToken) + auto emit = [&]() + { + if (streets.IsEmpty() || emptyIntersection || lastToken == curToken) return; CBV fs(streets); @@ -101,41 +97,27 @@ void FindStreets(BaseContext const & ctx, CBV const & candidates, FeaturesFilter ASSERT_LESS_OR_EQUAL(fs.PopCount(), fa.PopCount(), ()); prediction.m_prob = static_cast(fs.PopCount()) / static_cast(fa.PopCount()); - prediction.m_features = move(fs); + prediction.m_features = std::move(fs); prediction.m_hash = prediction.m_features.Hash(); prediction.m_withMisprints = withMisprints; }; - StreetTokensFilter streetsFilter( - [&](strings::UniString const & /* token */, size_t tag) { - auto buffer = streets.Intersect(ctx.m_features[tag].m_features); - if (tag < curToken) - { - // This is the case for delayed - // street synonym. Therefore, - // |streets| is temporarily in the - // incomplete state. - streets = buffer; - all = all.Intersect(ctx.m_features[tag].m_features); - emptyIntersection = false; + StreetTokensFilter streetsFilter([&](strings::UniString const &, size_t tag) + { + auto buffer = streets.Intersect(ctx.m_features[tag].m_features); + ASSERT_EQUAL(tag, curToken, ()); - incomplete = true; - return; - } - ASSERT_EQUAL(tag, curToken, ()); + // |streets| will become empty after + // the intersection. Therefore we need + // to create streets layer right now. + if (buffer.IsEmpty()) + emit(); - // |streets| will become empty after - // the intersection. Therefore we need - // to create streets layer right now. - if (buffer.IsEmpty()) - emit(); + streets = buffer; + all = all.Intersect(ctx.m_features[tag].m_features); + emptyIntersection = false; - streets = buffer; - all = all.Intersect(ctx.m_features[tag].m_features); - emptyIntersection = false; - incomplete = false; - }, - withMisprints); + }, withMisprints); for (; curToken < ctx.m_numTokens && !ctx.IsTokenUsed(curToken) && !streets.IsEmpty(); ++curToken) {