[search] Significantly reduced street synonyms.

Signed-off-by: Viktor Govako <viktor.govako@gmail.com>
This commit is contained in:
Viktor Govako 2023-02-03 12:04:59 -03:00
parent be65563fdf
commit d500788389
7 changed files with 187 additions and 198 deletions

View file

@ -4,9 +4,7 @@
#include "base/string_utils.hpp"
#include <cstdint>
#include <string>
#include <utility>
#include <vector>
namespace search_string_utils_test
@ -111,7 +109,7 @@ UNIT_TEST(NormalizeAndSimplifyString_Contains)
TEST(!ContainsNormalized(kTestStr, "z"), ());
}
UNIT_TEST(StreetSynonym)
UNIT_TEST(Street_Synonym)
{
TEST(TestStreetSynonym("street"), ());
TEST(TestStreetSynonym("улица"), ());
@ -121,10 +119,10 @@ UNIT_TEST(StreetSynonym)
TEST(!TestStreetSynonym("strase"), ());
TEST(TestStreetSynonymWithMisprints("strase"), ());
TEST(TestStreetSynonym("boulevard"), ());
TEST(TestStreetSynonymWithMisprints("boulevard"), ());
TEST(!TestStreetSynonym("boulevrd"), ());
TEST(TestStreetSynonymWithMisprints("boulevrd"), ());
// TEST(TestStreetSynonym("boulevard"), ());
// TEST(TestStreetSynonymWithMisprints("boulevard"), ());
// TEST(!TestStreetSynonym("boulevrd"), ());
// TEST(TestStreetSynonymWithMisprints("boulevrd"), ());
TEST(TestStreetSynonym("avenue"), ());
TEST(TestStreetSynonymWithMisprints("avenue"), ());
@ -134,32 +132,36 @@ UNIT_TEST(StreetSynonym)
TEST(!TestStreetSynonymWithMisprints("abcdefg"), ());
}
UNIT_TEST(StreetPrefixMatch)
UNIT_TEST(Street_PrefixMatch)
{
TEST(TestStreetPrefixMatch("п"), ());
TEST(TestStreetPrefixMatch("пр"), ());
TEST(TestStreetPrefixMatch("про"), ());
TEST(TestStreetPrefixMatch("прое"), ());
TEST(TestStreetPrefixMatch("проез"), ());
TEST(TestStreetPrefixMatch("проезд"), ());
TEST(!TestStreetPrefixMatch("проездд"), ());
TEST(TestStreetPrefixMatch("у"), ());
TEST(TestStreetPrefixMatch("ул"), ());
TEST(TestStreetPrefixMatch("ули"), ());
TEST(TestStreetPrefixMatchWithMisprints("пр"), ());
TEST(!TestStreetPrefixMatch("пре"), ());
TEST(!TestStreetPrefixMatchWithMisprints("пре"), ());
TEST(!TestStreetPrefixMatch("преу"), ());
TEST(TestStreetPrefixMatchWithMisprints("преу"), ());
TEST(!TestStreetPrefixMatch("преул"), ());
TEST(TestStreetPrefixMatchWithMisprints("преул"), ());
TEST(!TestStreetPrefixMatch("преуло"), ());
TEST(TestStreetPrefixMatchWithMisprints("преуло"), ());
TEST(!TestStreetPrefixMatch("преулок"), ());
TEST(TestStreetPrefixMatchWithMisprints("преулок"), ());
TEST(!TestStreetPrefixMatch("преулак"), ());
TEST(!TestStreetPrefixMatchWithMisprints("преулак"), ());
// TEST(TestStreetPrefixMatch("п"), ());
// TEST(TestStreetPrefixMatch("пр"), ());
// TEST(TestStreetPrefixMatch("про"), ());
// TEST(TestStreetPrefixMatch("прое"), ());
// TEST(TestStreetPrefixMatch("проез"), ());
// TEST(TestStreetPrefixMatch("проезд"), ());
// TEST(!TestStreetPrefixMatch("проездд"), ());
// TEST(TestStreetPrefixMatchWithMisprints("пр"), ());
// TEST(!TestStreetPrefixMatch("пре"), ());
// TEST(!TestStreetPrefixMatchWithMisprints("пре"), ());
// TEST(!TestStreetPrefixMatch("преу"), ());
// TEST(TestStreetPrefixMatchWithMisprints("преу"), ());
// TEST(!TestStreetPrefixMatch("преул"), ());
// TEST(TestStreetPrefixMatchWithMisprints("преул"), ());
// TEST(!TestStreetPrefixMatch("преуло"), ());
// TEST(TestStreetPrefixMatchWithMisprints("преуло"), ());
// TEST(!TestStreetPrefixMatch("преулок"), ());
// TEST(TestStreetPrefixMatchWithMisprints("преулок"), ());
// TEST(!TestStreetPrefixMatch("преулак"), ());
// TEST(!TestStreetPrefixMatchWithMisprints("преулак"), ());
}
UNIT_TEST(StreetTokensFilter)
UNIT_TEST(Street_TokensFilter)
{
using List = vector<pair<string, size_t>>;
@ -196,7 +198,7 @@ UNIT_TEST(StreetTokensFilter)
}
{
List expected = {{"улица", 100}, {"набережная", 50}};
List expected = {{"набережная", 50}};
List actual;
Utf8StreetTokensFilter filter(actual);
@ -207,7 +209,7 @@ UNIT_TEST(StreetTokensFilter)
}
{
List expected = {{"улица", 0}, {"набережная", 1}, {"проспект", 2}};
List expected = {{"набережная", 1}, {"проспект", 2}};
List actual;
Utf8StreetTokensFilter filter(actual);
@ -219,8 +221,7 @@ UNIT_TEST(StreetTokensFilter)
}
{
List expectedWithMisprints = {{"ленинский", 0}};
List expectedWithoutMisprints = {{"ленинский", 0}, {"пропект", 1}};
List expected = {{"ленинский", 0}, {"пропект", 1}};
List actualWithMisprints;
List actualWithoutMisprints;
@ -232,13 +233,12 @@ UNIT_TEST(StreetTokensFilter)
filterWithMisprints.Put("пропект", false /* isPrefix */, 1 /* tag */);
filterWithoutMisprints.Put("пропект", false /* isPrefix */, 1 /* tag */);
TEST_EQUAL(expectedWithMisprints, actualWithMisprints, ());
TEST_EQUAL(expectedWithoutMisprints, actualWithoutMisprints, ());
TEST_EQUAL(expected, actualWithMisprints, ());
TEST_EQUAL(expected, actualWithoutMisprints, ());
}
{
List expectedWithMisprints = {{"улица", 0}, {"набрежная", 1}};
List expectedWithoutMisprints = {{"набрежная", 1}};
List expected = {{"набрежная", 1}};
List actualWithMisprints;
List actualWithoutMisprints;
@ -250,8 +250,8 @@ UNIT_TEST(StreetTokensFilter)
filterWithMisprints.Put("набрежная", false /* isPrefix */, 1 /* tag */);
filterWithoutMisprints.Put("набрежная", false /* isPrefix */, 1 /* tag */);
TEST_EQUAL(expectedWithMisprints, actualWithMisprints, ());
TEST_EQUAL(expectedWithoutMisprints, actualWithoutMisprints, ());
TEST_EQUAL(expected, actualWithMisprints, ());
TEST_EQUAL(expected, actualWithoutMisprints, ());
}
}

View file

@ -307,107 +307,76 @@ public:
}
private:
/// @todo Print most common street tokens for each country on generator stage
/// (OSM ground truth) and compare with these synonyms.
// Keep only *very-common-used* synonyms here (can increase search index, otherwise).
// Too many synonyms increases entropy only and produces messy results ..
StreetsSynonymsHolder()
{
char const * affics[] =
{
// Russian - Русский
"аллея", "бульвар", "набережная", "переулок", "площадь", "проезд", "проспект", "шоссе", "тупик", "улица", "тракт", "ал", "бул", "наб", "пер", "пл", "пр", "просп", "ш", "туп", "ул", "тр",
"улица", "ул",
// English - English
"street", "st", "avenue", "av", "ave", "square", "sq", "road", "rd", "boulevard", "blvd", "drive", "dr", "highway", "hwy", "lane", "ln", "way", "circle", "place", "pl",
"street", "st", "road", "rd", "drive", "dr", "lane", "ln", "avenue", "av",
// Belarusian - Беларуская мова
"вуліца", "вул", "завулак", "набярэжная", "плошча", "пл", "праезд", "праспект", "пр", "тракт", "тр", "тупік",
"вуліца", "вул",
// Bulgarian - Български
"булевард", "бул", "площад", "пл", "улица", "ул", "квартал", "кв",
// Arabic
"شارع",
/// @todo Do not use popular POI (carrefour) or Street name (rambla) tokens as generic street synonyms.
/// This POIs (Carrefour supermarket) and Streets (La Rambla - most popular street in Barcelona)
/// will be lost in search results, otherwise.
/// Should reconsider candidates fetching and sorting logic from scratch to make correct processing.
// Canada
"allee", "alley", "autoroute", "aut", "bypass", "byway", /*"carrefour", "carref",*/ "côte", "expressway", "freeway", "fwy", "pky", "pkwy",
/// @todo Do not use next _common search_ (e.g. 'park' is a prefix of 'parkway') tokens as generic street synonyms.
/// Should reconsider streets matching logic to get this synonyms back.
//"line", "link", "loop", "parkway", "parkvej", "path", "pathway", "route", "trail", "walk"
// Armenian
"փողոց",
// Catalan language (Barcelona, Valencia, ...)
"avinguda", "carrer", /*"rambla", "ronda",*/ "passeig", "passatge", "travessera",
"carrer",
// Croatian - Hrvatski
"šetalište", "trg", "ulica", "ul", "poljana",
// Czech - Čeština
"ulice", "ul", "náměstí", "nám", "nábřeží", "nábr",
// Danish - Dansk
"plads", "alle", "gade", "vej",
// Dutch - Nederlands
"laan", "ln.", "straat", "steenweg", "stwg", "st",
// Estonian - Eesti
"maantee", "mnt", "puiestee", "tee", "pst",
// Finnish - Suomi
"kaari", "kri", "katu", "kuja", "kj", "kylä", "polku", "tie", "t", "tori", "väylä", "vlä",
"ulica", // Also common used transcription from RU
// French - Français
"rue", "avenue", "carré", "cercle", "route", "boulevard", "drive", "autoroute", "lane", "chemin",
"rue",
// Georgia
"ქუჩა",
// German - Deutsch
"allee", "al", "brücke", "br", "chaussee", "gasse", "gr", "pfad", "straße", "str", "weg", "platz",
"straße", "str",
// Hungarian - Magyar
"utca", "út", "u.", "tér", "körút", "krt.", "rakpart", "rkp.",
"utca", "út",
// Italian - Italiano
"corso", "piazza", "piazzale", "strada", "via", "viale", "calle", "fondamenta",
// Indonesia
"jalan",
// Italian - Italiano
"via",
/// @todo Also expect that this synonyms should be in categories.txt list, but we dont support lt, lv langs now.
/// @{
// Latvian - Latviešu
"iela", "laukums",
"iela",
// Lithuanian - Lietuvių
"gatvė", "g.", "aikštė", "a", "prospektas", "pr.", "pl", "kel",
// Nepalese - नेपाली
"मार्ग", "marg",
// Norwegian - Norsk
// Details here: https://github.com/organicmaps/organicmaps/issues/3616
"vei", "veien", "veg", "vegen", "vn", "gata", "gate", "gaten", "gt", "plass", "plassen", "sving", "sv", "allé",
// Polish - Polski
"aleja", "aleje", "aleji", "alejach", "aleją", "plac", "placu", "placem", "ulica", "ulicy",
"gatvė", "g.",
///@}
// Portuguese - Português
"rua", "r.", "travessa", "tr.", "praça", "pç.", "avenida", "quadrado", "estrada", "boulevard", "carro", "auto-estrada", "lane", "caminho",
"rua",
// Romanian - Română
"bul", "bdul", "blv", "bulevard", "bulevardu", "calea", "cal", "piața", "pţa", "pța", "strada", "stra", "stradela", "sdla", "stradă", "unitate", "autostradă", "lane",
// Slovenian - Slovenščina
"cesta", "ulica", "trg", "nabrežje",
// Romanian - Română (Moldova)
"strada",
// Spanish - Español
"avenida", "avd", "avda", "bulevar", "bulev", "calle", "calleja", "cllja", "callejón", "callej", "cjon", "callejuela", "cjla", "callizo", "cllzo", "calzada", "czada", "costera", "coste", "plza", "pza", "plazoleta", "pzta", "plazuela", "plzla", "tránsito", "trans", "transversal", "trval", "trasera", "tras", "travesía", "trva", "paseo", "plaça",
// Swedish - Svenska
"väg", "vägen", "gata", "gatan", "gränd", "gränden", "stig", "stigen", "plats", "platsen", "allé",
"calle", "avenida",
// Turkish - Türkçe
"sokak", "sk.", "sok", "sokağı", "cadde", "cad", "cd", "caddesi", "bulvar", "bulvarı", "blv.",
"sokağı", "sokak", "sk",
// Ukrainian - Українська
"дорога", "провулок", "площа", "шосе", "вулиця", "дор", "пров", "вул",
"вулиця", "вул",
// Vietnamese - Tiếng Việt
"quốc lộ", "ql", "tỉnh lộ", "tl", "Đại lộ", "Đl", "Đường", "Đ", "Đường sắt", "Đs", "Đường phố", "Đp", "vuông", "con Đường", "Đại lộ", "Đường cao tốc",
"đường",
};
for (auto const * s : affics)
@ -488,27 +457,33 @@ bool ContainsNormalized(string const & str, string const & substr)
// StreetTokensFilter ------------------------------------------------------------------------------
void StreetTokensFilter::Put(strings::UniString const & token, bool isPrefix, size_t tag)
{
using IsStreetChecker = std::function<bool(strings::UniString const &)>;
IsStreetChecker isStreet = m_withMisprints ? IsStreetSynonymWithMisprints : IsStreetSynonym;
IsStreetChecker isStreetPrefix =
m_withMisprints ? IsStreetSynonymPrefixWithMisprints : IsStreetSynonymPrefix;
auto const isStreetSynonym = isStreet(token);
if ((isPrefix && isStreetPrefix(token)) || (!isPrefix && isStreetSynonym))
if (isPrefix)
{
++m_numSynonyms;
if (m_numSynonyms == 1)
if (m_withMisprints)
{
m_delayedToken = token;
m_delayedTag = tag;
return;
if (IsStreetSynonymPrefixWithMisprints(token))
return;
}
else
{
if (IsStreetSynonymPrefix(token))
return;
}
// Do not emit delayed token for incomplete street synonym.
if ((!isPrefix || isStreetSynonym) && m_numSynonyms == 2)
EmitToken(m_delayedToken, m_delayedTag);
}
EmitToken(token, tag);
else
{
if (m_withMisprints)
{
if (IsStreetSynonymWithMisprints(token))
return;
}
else
{
if (IsStreetSynonym(token))
return;
}
}
m_callback(token, tag);
}
} // namespace search

View file

@ -125,14 +125,6 @@ public:
void Put(strings::UniString const & token, bool isPrefix, size_t tag);
private:
using Cell = std::pair<strings::UniString, size_t>;
inline void EmitToken(strings::UniString const & token, size_t tag) { m_callback(token, tag); }
strings::UniString m_delayedToken;
size_t m_delayedTag = 0;
size_t m_numSynonyms = 0;
Callback m_callback;
bool m_withMisprints = false;
};

View file

@ -265,6 +265,7 @@ void PreRanker::FilterRelaxedResults(bool lastUpdate)
auto const iEnd = m_results.end();
if (lastUpdate)
{
LOG(LDEBUG, ("Flush relaxed results number:", m_relaxedResults.size()));
m_results.insert(iEnd, make_move_iterator(m_relaxedResults.begin()), make_move_iterator(m_relaxedResults.end()));
m_relaxedResults.clear();
}

View file

@ -3,22 +3,18 @@
#include "search/ranking_utils.hpp"
#include "search/token_range.hpp"
#include "indexer/feature_impl.hpp"
#include <map>
#include <sstream>
namespace search
{
using namespace std;
using namespace strings;
namespace
{
// All synonyms should be lowercase.
// @todo These should check the map language and use
// only the corresponding translation.
/// @todo These should check the map language and use only the corresponding translation.
map<string, vector<string>> const kSynonyms = {
{"n", {"north"}},
{"w", {"west"}},
@ -29,21 +25,46 @@ map<string, vector<string>> const kSynonyms = {
{"sw", {"southwest"}},
{"se", {"southeast"}},
{"st", {"saint", "street"}},
{"blvd", {"boulevard"}},
{"cir", {"circle"}},
{"ct", {"court"}},
{"rt", {"route"}},
{"al", {"allee", "alle"}},
{"ave", {"avenue"}},
/// @todo Should process synonyms with errors like "blvrd" -> "blvd".
/// @see HouseOnStreetSynonymsWithMisprints test.
{"blvd", {"boulevard"}},
{"blvrd", {"boulevard"}},
{"cir", {"circle"}},
{"ct", {"court"}},
{"hwy", {"highway"}},
{"pl", {"place", "platz"}},
{"rt", {"route"}},
{"sq", {"square"}},
{"ал", {"аллея", "алея"}},
{"бул", {"бульвар"}},
{"зав", {"завулак"}},
{"кв", {"квартал"}},
{"наб", {"набережная", "набярэжная", "набережна"}},
{"пер", {"переулок"}},
{"пл", {"площадь", "площа"}},
{"пр", {"проспект", "праспект", "провулок", "проезд", "праезд", "проїзд"}},
{"туп", {"тупик", "тупік"}},
{"ш", {"шоссе", "шаша", "шосе"}},
{"св", {"святой", "святого", "святая", "святые", "святых", "свято"}},
{"б", {"большая", "большой"}},
{"бол", {"большая", "большой"}},
{"м", {"малая", "малый"}},
{"мал", {"малая", "малый"}},
{"нов", {"новая", "новый"}},
{"стар", {"старая", "старый"}}};
{"стар", {"старая", "старый"}},
};
} // namespace
// QueryParams::Token ------------------------------------------------------------------------------
void QueryParams::Token::AddSynonym(string const & s) { AddSynonym(MakeUniString(s)); }
void QueryParams::Token::AddSynonym(string const & s)
{
AddSynonym(strings::MakeUniString(s));
}
void QueryParams::Token::AddSynonym(String const & s)
{

View file

@ -144,7 +144,8 @@ UNIT_CLASS_TEST(ProcessorTest, Smoke)
TestPOI lantern1({10.0005, 10.0005}, "lantern 1", "en");
TestPOI lantern2({10.0006, 10.0005}, "lantern 2", "en");
TestStreet stradaDrive({{-10.001, -10.001}, {-10, -10}, {-9.999, -9.999}}, "Strada drive", "en");
// Was "Strada drive".
TestStreet stradaDrive({{-10.001, -10.001}, {-10, -10}, {-9.999, -9.999}}, "Boulevard drive", "en");
TestBuilding terranceHouse({-10, -10}, "", "155", stradaDrive.GetName("en"), "en");
auto const worldId = BuildWorld([&](TestMwmBuilder & builder)
@ -264,7 +265,7 @@ UNIT_CLASS_TEST(ProcessorTest, Smoke)
{
Rules rules = {ExactMatch(wonderlandId, terranceHouse), ExactMatch(wonderlandId, stradaDrive)};
TEST(ResultsMatch("Toronto strada drive 155", rules), ());
TEST(ResultsMatch("Toronto boulevard dr 155", rules), ());
}
}
@ -1161,12 +1162,13 @@ UNIT_CLASS_TEST(ProcessorTest, StopWords)
{
Rules rules = {ExactMatch(id, bakery)};
TEST(ResultsMatch("la boulangerie ", rules, "fr"), ());
TEST(ResultsMatch("la motviderie ", {}, "fr"), ());
}
{
TEST(ResultsMatch("la motviderie ", {}, "fr"), ());
/// @todo I don't see any reason, why token/prefix results should differ here?
TEST(ResultsMatch("la la le la la la ", {ExactMatch(id, street)}, "fr"), ());
TEST(ResultsMatch("la la le la la la", {}, "fr"), ());
}
@ -1742,9 +1744,15 @@ UNIT_CLASS_TEST(ProcessorTest, SquareAsStreetTest)
});
SetViewport(m2::RectD(0.0, 0.0, 1.0, 2.0));
{
Rules rules = {ExactMatch(countryId, nonameHouse)};
TEST(ResultsMatch("revolution square 3", rules), ());
/// @todo Should skip square result?
Rules rules = {
ExactMatch(countryId, nonameHouse),
ExactMatch(countryId, square)
};
TEST(OrderedResultsMatch(MakeRequest("revolution square 3")->Results(), rules), ());
TEST(OrderedResultsMatch(MakeRequest("revolution sq 3")->Results(), rules), ());
}
}
@ -2082,31 +2090,50 @@ UNIT_CLASS_TEST(ProcessorTest, Strasse)
UNIT_CLASS_TEST(ProcessorTest, StreetSynonymsWithMisprints)
{
TestStreet leninsky({{0.0, -1.0}, {0.0, 1.0}}, "Ленинский проспект", "ru");
TestStreet leningradsky({{0.0, -1.0}, {0.0, 1.0}}, "Ленинградский проспект", "ru");
TestStreet nabrezhnaya({{1.0, -1.0}, {1.0, 1.0}}, "улица набрежная", "ru");
TestStreet naberezhnaya({{2.0, -1.0}, {2.0, 1.0}}, "улица набережная", "ru");
auto countryId = BuildCountry("Wonderland", [&](TestMwmBuilder & builder)
{
builder.Add(leninsky);
builder.Add(leningradsky);
builder.Add(nabrezhnaya);
builder.Add(naberezhnaya);
});
SetViewport(m2::RectD(0.0, -1.0, 2.0, 1.0));
{
/// @todo Have _relaxed_ (all) prospekts by matching "проспект".
Rules const prospekts = {ExactMatch(countryId, leninsky), ExactMatch(countryId, leningradsky)};
TEST(ResultsMatch("ленинский проспект", prospekts), ());
TEST(ResultsMatch("ленинский пропект", prospekts), ());
Rules rules = {ExactMatch(countryId, leninsky)};
TEST(ResultsMatch("ленинский проспект", rules), ());
TEST(ResultsMatch("ленинский пропект", rules), ());
TEST(ResultsMatch("ленинский", rules), ());
// 2 errors + common _street_ token
TEST(ResultsMatch("ленинская улица", rules, "ru"), ());
TEST(ResultsMatch("ленинский street", rules, "en"), ());
TEST(ResultsMatch("ленинский gatvė", rules, "lt"), ());
/// @todo Have _relaxed_ (all) streets by matching category name.
//TEST(ResultsMatch("ленинский gade", rules, "da"), ());
//TEST(ResultsMatch("ленинский straat", rules, "nl"), ());
}
{
Rules rules = {ExactMatch(countryId, nabrezhnaya), ExactMatch(countryId, naberezhnaya)};
TEST(ResultsMatch("улица набрежная", rules), ());
TEST(ResultsMatch("набрежная", rules), ());
}
{
Rules rules = {ExactMatch(countryId, naberezhnaya)};
TEST(ResultsMatch("улица набережная", rules), ());
TEST(ResultsMatch("набрежная street", rules, "en"), ());
TEST(ResultsMatch("набрежная gatvė", rules, "lt"), ());
/// @todo Have _relaxed_ (all) streets by matching category name.
//TEST(ResultsMatch("набрежная gade", rules, "da"), ());
//TEST(ResultsMatch("набрежная straat", rules, "nl"), ());
}
}
@ -2189,11 +2216,6 @@ UNIT_CLASS_TEST(ProcessorTest, StreetSynonymPrefixMatch)
TEST(ResultsMatch("Yesenina cafe ", rules), ());
TEST(ResultsMatch("Cafe Yesenina ", rules), ());
TEST(ResultsMatch("Cafe Yesenina", rules), ());
}
{
Rules rules = {ExactMatch(countryId, cafe), ExactMatch(countryId, yesenina)};
// Prefix match with misprints to street synonym gives street as additional result
// but we still can find the cafe.
TEST(ResultsMatch("Yesenina cafe", rules), ());
}
}
@ -3305,23 +3327,19 @@ UNIT_CLASS_TEST(ProcessorTest, StreetCategories)
TEST(OrderedResultsMatch("avenida santa fe ", rules), ());
}
/// @todo Should review search::FindStreets logic! Check 2 cases below:
// 1. |street| (matched by "sante fe" only) has worse rank than |shop| and even more - emitted in the second batch.
{
Rules const rules = {
ExactMatch(wonderlandId, street),
ExactMatch(wonderlandId, bus),
ExactMatch(wonderlandId, shop),
ExactMatch(wonderlandId, street)
};
TEST(OrderedResultsMatch("avenida santa fe street ", rules), ());
}
// 2. Next sample matches street by "santa fe улица", thus it has low rank!
{
Rules const rules = {
ExactMatch(wonderlandId, street),
ExactMatch(wonderlandId, bus),
//ExactMatch(wonderlandId, street)
};
TEST(OrderedResultsMatch(MakeRequest("avenida santa fe улица ", "ru")->Results(), rules), ());
}

View file

@ -5,7 +5,6 @@
#include "indexer/search_string_utils.hpp"
#include "base/logging.hpp"
#include "base/stl_helpers.hpp"
#include <algorithm>
@ -69,12 +68,9 @@ void FindStreets(BaseContext const & ctx, CBV const & candidates, FeaturesFilter
// When true, no bit vectors were intersected with |streets| at all.
bool emptyIntersection = true;
// When true, |streets| is in the incomplete state and can't be
// used for creation of street layers.
bool incomplete = false;
auto emit = [&]() {
if (streets.IsEmpty() || emptyIntersection || incomplete || lastToken == curToken)
auto emit = [&]()
{
if (streets.IsEmpty() || emptyIntersection || lastToken == curToken)
return;
CBV fs(streets);
@ -101,41 +97,27 @@ void FindStreets(BaseContext const & ctx, CBV const & candidates, FeaturesFilter
ASSERT_LESS_OR_EQUAL(fs.PopCount(), fa.PopCount(), ());
prediction.m_prob = static_cast<double>(fs.PopCount()) / static_cast<double>(fa.PopCount());
prediction.m_features = move(fs);
prediction.m_features = std::move(fs);
prediction.m_hash = prediction.m_features.Hash();
prediction.m_withMisprints = withMisprints;
};
StreetTokensFilter streetsFilter(
[&](strings::UniString const & /* token */, size_t tag) {
auto buffer = streets.Intersect(ctx.m_features[tag].m_features);
if (tag < curToken)
{
// This is the case for delayed
// street synonym. Therefore,
// |streets| is temporarily in the
// incomplete state.
streets = buffer;
all = all.Intersect(ctx.m_features[tag].m_features);
emptyIntersection = false;
StreetTokensFilter streetsFilter([&](strings::UniString const &, size_t tag)
{
auto buffer = streets.Intersect(ctx.m_features[tag].m_features);
ASSERT_EQUAL(tag, curToken, ());
incomplete = true;
return;
}
ASSERT_EQUAL(tag, curToken, ());
// |streets| will become empty after
// the intersection. Therefore we need
// to create streets layer right now.
if (buffer.IsEmpty())
emit();
// |streets| will become empty after
// the intersection. Therefore we need
// to create streets layer right now.
if (buffer.IsEmpty())
emit();
streets = buffer;
all = all.Intersect(ctx.m_features[tag].m_features);
emptyIntersection = false;
streets = buffer;
all = all.Intersect(ctx.m_features[tag].m_features);
emptyIntersection = false;
incomplete = false;
},
withMisprints);
}, withMisprints);
for (; curToken < ctx.m_numTokens && !ctx.IsTokenUsed(curToken) && !streets.IsEmpty(); ++curToken)
{