forked from organicmaps/organicmaps-tmp
[search] Significantly reduced street synonyms.
Signed-off-by: Viktor Govako <viktor.govako@gmail.com>
This commit is contained in:
parent
be65563fdf
commit
d500788389
7 changed files with 187 additions and 198 deletions
|
@ -4,9 +4,7 @@
|
|||
|
||||
#include "base/string_utils.hpp"
|
||||
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
namespace search_string_utils_test
|
||||
|
@ -111,7 +109,7 @@ UNIT_TEST(NormalizeAndSimplifyString_Contains)
|
|||
TEST(!ContainsNormalized(kTestStr, "z"), ());
|
||||
}
|
||||
|
||||
UNIT_TEST(StreetSynonym)
|
||||
UNIT_TEST(Street_Synonym)
|
||||
{
|
||||
TEST(TestStreetSynonym("street"), ());
|
||||
TEST(TestStreetSynonym("улица"), ());
|
||||
|
@ -121,10 +119,10 @@ UNIT_TEST(StreetSynonym)
|
|||
TEST(!TestStreetSynonym("strase"), ());
|
||||
TEST(TestStreetSynonymWithMisprints("strase"), ());
|
||||
|
||||
TEST(TestStreetSynonym("boulevard"), ());
|
||||
TEST(TestStreetSynonymWithMisprints("boulevard"), ());
|
||||
TEST(!TestStreetSynonym("boulevrd"), ());
|
||||
TEST(TestStreetSynonymWithMisprints("boulevrd"), ());
|
||||
// TEST(TestStreetSynonym("boulevard"), ());
|
||||
// TEST(TestStreetSynonymWithMisprints("boulevard"), ());
|
||||
// TEST(!TestStreetSynonym("boulevrd"), ());
|
||||
// TEST(TestStreetSynonymWithMisprints("boulevrd"), ());
|
||||
|
||||
TEST(TestStreetSynonym("avenue"), ());
|
||||
TEST(TestStreetSynonymWithMisprints("avenue"), ());
|
||||
|
@ -134,32 +132,36 @@ UNIT_TEST(StreetSynonym)
|
|||
TEST(!TestStreetSynonymWithMisprints("abcdefg"), ());
|
||||
}
|
||||
|
||||
UNIT_TEST(StreetPrefixMatch)
|
||||
UNIT_TEST(Street_PrefixMatch)
|
||||
{
|
||||
TEST(TestStreetPrefixMatch("п"), ());
|
||||
TEST(TestStreetPrefixMatch("пр"), ());
|
||||
TEST(TestStreetPrefixMatch("про"), ());
|
||||
TEST(TestStreetPrefixMatch("прое"), ());
|
||||
TEST(TestStreetPrefixMatch("проез"), ());
|
||||
TEST(TestStreetPrefixMatch("проезд"), ());
|
||||
TEST(!TestStreetPrefixMatch("проездд"), ());
|
||||
TEST(TestStreetPrefixMatch("у"), ());
|
||||
TEST(TestStreetPrefixMatch("ул"), ());
|
||||
TEST(TestStreetPrefixMatch("ули"), ());
|
||||
|
||||
TEST(TestStreetPrefixMatchWithMisprints("пр"), ());
|
||||
TEST(!TestStreetPrefixMatch("пре"), ());
|
||||
TEST(!TestStreetPrefixMatchWithMisprints("пре"), ());
|
||||
TEST(!TestStreetPrefixMatch("преу"), ());
|
||||
TEST(TestStreetPrefixMatchWithMisprints("преу"), ());
|
||||
TEST(!TestStreetPrefixMatch("преул"), ());
|
||||
TEST(TestStreetPrefixMatchWithMisprints("преул"), ());
|
||||
TEST(!TestStreetPrefixMatch("преуло"), ());
|
||||
TEST(TestStreetPrefixMatchWithMisprints("преуло"), ());
|
||||
TEST(!TestStreetPrefixMatch("преулок"), ());
|
||||
TEST(TestStreetPrefixMatchWithMisprints("преулок"), ());
|
||||
TEST(!TestStreetPrefixMatch("преулак"), ());
|
||||
TEST(!TestStreetPrefixMatchWithMisprints("преулак"), ());
|
||||
// TEST(TestStreetPrefixMatch("п"), ());
|
||||
// TEST(TestStreetPrefixMatch("пр"), ());
|
||||
// TEST(TestStreetPrefixMatch("про"), ());
|
||||
// TEST(TestStreetPrefixMatch("прое"), ());
|
||||
// TEST(TestStreetPrefixMatch("проез"), ());
|
||||
// TEST(TestStreetPrefixMatch("проезд"), ());
|
||||
// TEST(!TestStreetPrefixMatch("проездд"), ());
|
||||
|
||||
// TEST(TestStreetPrefixMatchWithMisprints("пр"), ());
|
||||
// TEST(!TestStreetPrefixMatch("пре"), ());
|
||||
// TEST(!TestStreetPrefixMatchWithMisprints("пре"), ());
|
||||
// TEST(!TestStreetPrefixMatch("преу"), ());
|
||||
// TEST(TestStreetPrefixMatchWithMisprints("преу"), ());
|
||||
// TEST(!TestStreetPrefixMatch("преул"), ());
|
||||
// TEST(TestStreetPrefixMatchWithMisprints("преул"), ());
|
||||
// TEST(!TestStreetPrefixMatch("преуло"), ());
|
||||
// TEST(TestStreetPrefixMatchWithMisprints("преуло"), ());
|
||||
// TEST(!TestStreetPrefixMatch("преулок"), ());
|
||||
// TEST(TestStreetPrefixMatchWithMisprints("преулок"), ());
|
||||
// TEST(!TestStreetPrefixMatch("преулак"), ());
|
||||
// TEST(!TestStreetPrefixMatchWithMisprints("преулак"), ());
|
||||
}
|
||||
|
||||
UNIT_TEST(StreetTokensFilter)
|
||||
UNIT_TEST(Street_TokensFilter)
|
||||
{
|
||||
using List = vector<pair<string, size_t>>;
|
||||
|
||||
|
@ -196,7 +198,7 @@ UNIT_TEST(StreetTokensFilter)
|
|||
}
|
||||
|
||||
{
|
||||
List expected = {{"улица", 100}, {"набережная", 50}};
|
||||
List expected = {{"набережная", 50}};
|
||||
List actual;
|
||||
|
||||
Utf8StreetTokensFilter filter(actual);
|
||||
|
@ -207,7 +209,7 @@ UNIT_TEST(StreetTokensFilter)
|
|||
}
|
||||
|
||||
{
|
||||
List expected = {{"улица", 0}, {"набережная", 1}, {"проспект", 2}};
|
||||
List expected = {{"набережная", 1}, {"проспект", 2}};
|
||||
List actual;
|
||||
|
||||
Utf8StreetTokensFilter filter(actual);
|
||||
|
@ -219,8 +221,7 @@ UNIT_TEST(StreetTokensFilter)
|
|||
}
|
||||
|
||||
{
|
||||
List expectedWithMisprints = {{"ленинский", 0}};
|
||||
List expectedWithoutMisprints = {{"ленинский", 0}, {"пропект", 1}};
|
||||
List expected = {{"ленинский", 0}, {"пропект", 1}};
|
||||
List actualWithMisprints;
|
||||
List actualWithoutMisprints;
|
||||
|
||||
|
@ -232,13 +233,12 @@ UNIT_TEST(StreetTokensFilter)
|
|||
filterWithMisprints.Put("пропект", false /* isPrefix */, 1 /* tag */);
|
||||
filterWithoutMisprints.Put("пропект", false /* isPrefix */, 1 /* tag */);
|
||||
|
||||
TEST_EQUAL(expectedWithMisprints, actualWithMisprints, ());
|
||||
TEST_EQUAL(expectedWithoutMisprints, actualWithoutMisprints, ());
|
||||
TEST_EQUAL(expected, actualWithMisprints, ());
|
||||
TEST_EQUAL(expected, actualWithoutMisprints, ());
|
||||
}
|
||||
|
||||
{
|
||||
List expectedWithMisprints = {{"улица", 0}, {"набрежная", 1}};
|
||||
List expectedWithoutMisprints = {{"набрежная", 1}};
|
||||
List expected = {{"набрежная", 1}};
|
||||
List actualWithMisprints;
|
||||
List actualWithoutMisprints;
|
||||
|
||||
|
@ -250,8 +250,8 @@ UNIT_TEST(StreetTokensFilter)
|
|||
filterWithMisprints.Put("набрежная", false /* isPrefix */, 1 /* tag */);
|
||||
filterWithoutMisprints.Put("набрежная", false /* isPrefix */, 1 /* tag */);
|
||||
|
||||
TEST_EQUAL(expectedWithMisprints, actualWithMisprints, ());
|
||||
TEST_EQUAL(expectedWithoutMisprints, actualWithoutMisprints, ());
|
||||
TEST_EQUAL(expected, actualWithMisprints, ());
|
||||
TEST_EQUAL(expected, actualWithoutMisprints, ());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -307,107 +307,76 @@ public:
|
|||
}
|
||||
|
||||
private:
|
||||
/// @todo Print most common street tokens for each country on generator stage
|
||||
/// (OSM ground truth) and compare with these synonyms.
|
||||
// Keep only *very-common-used* synonyms here (can increase search index, otherwise).
|
||||
// Too many synonyms increases entropy only and produces messy results ..
|
||||
StreetsSynonymsHolder()
|
||||
{
|
||||
char const * affics[] =
|
||||
{
|
||||
// Russian - Русский
|
||||
"аллея", "бульвар", "набережная", "переулок", "площадь", "проезд", "проспект", "шоссе", "тупик", "улица", "тракт", "ал", "бул", "наб", "пер", "пл", "пр", "просп", "ш", "туп", "ул", "тр",
|
||||
"улица", "ул",
|
||||
|
||||
// English - English
|
||||
"street", "st", "avenue", "av", "ave", "square", "sq", "road", "rd", "boulevard", "blvd", "drive", "dr", "highway", "hwy", "lane", "ln", "way", "circle", "place", "pl",
|
||||
"street", "st", "road", "rd", "drive", "dr", "lane", "ln", "avenue", "av",
|
||||
|
||||
// Belarusian - Беларуская мова
|
||||
"вуліца", "вул", "завулак", "набярэжная", "плошча", "пл", "праезд", "праспект", "пр", "тракт", "тр", "тупік",
|
||||
"вуліца", "вул",
|
||||
|
||||
// Bulgarian - Български
|
||||
"булевард", "бул", "площад", "пл", "улица", "ул", "квартал", "кв",
|
||||
// Arabic
|
||||
"شارع",
|
||||
|
||||
/// @todo Do not use popular POI (carrefour) or Street name (rambla) tokens as generic street synonyms.
|
||||
/// This POIs (Carrefour supermarket) and Streets (La Rambla - most popular street in Barcelona)
|
||||
/// will be lost in search results, otherwise.
|
||||
/// Should reconsider candidates fetching and sorting logic from scratch to make correct processing.
|
||||
|
||||
// Canada
|
||||
"allee", "alley", "autoroute", "aut", "bypass", "byway", /*"carrefour", "carref",*/ "côte", "expressway", "freeway", "fwy", "pky", "pkwy",
|
||||
/// @todo Do not use next _common search_ (e.g. 'park' is a prefix of 'parkway') tokens as generic street synonyms.
|
||||
/// Should reconsider streets matching logic to get this synonyms back.
|
||||
//"line", "link", "loop", "parkway", "parkvej", "path", "pathway", "route", "trail", "walk"
|
||||
// Armenian
|
||||
"փողոց",
|
||||
|
||||
// Catalan language (Barcelona, Valencia, ...)
|
||||
"avinguda", "carrer", /*"rambla", "ronda",*/ "passeig", "passatge", "travessera",
|
||||
"carrer",
|
||||
|
||||
// Croatian - Hrvatski
|
||||
"šetalište", "trg", "ulica", "ul", "poljana",
|
||||
|
||||
// Czech - Čeština
|
||||
"ulice", "ul", "náměstí", "nám", "nábřeží", "nábr",
|
||||
|
||||
// Danish - Dansk
|
||||
"plads", "alle", "gade", "vej",
|
||||
|
||||
// Dutch - Nederlands
|
||||
"laan", "ln.", "straat", "steenweg", "stwg", "st",
|
||||
|
||||
// Estonian - Eesti
|
||||
"maantee", "mnt", "puiestee", "tee", "pst",
|
||||
|
||||
// Finnish - Suomi
|
||||
"kaari", "kri", "katu", "kuja", "kj", "kylä", "polku", "tie", "t", "tori", "väylä", "vlä",
|
||||
"ulica", // Also common used transcription from RU
|
||||
|
||||
// French - Français
|
||||
"rue", "avenue", "carré", "cercle", "route", "boulevard", "drive", "autoroute", "lane", "chemin",
|
||||
"rue",
|
||||
|
||||
// Georgia
|
||||
"ქუჩა",
|
||||
|
||||
// German - Deutsch
|
||||
"allee", "al", "brücke", "br", "chaussee", "gasse", "gr", "pfad", "straße", "str", "weg", "platz",
|
||||
"straße", "str",
|
||||
|
||||
// Hungarian - Magyar
|
||||
"utca", "út", "u.", "tér", "körút", "krt.", "rakpart", "rkp.",
|
||||
"utca", "út",
|
||||
|
||||
// Italian - Italiano
|
||||
"corso", "piazza", "piazzale", "strada", "via", "viale", "calle", "fondamenta",
|
||||
// Indonesia
|
||||
"jalan",
|
||||
|
||||
// Italian - Italiano
|
||||
"via",
|
||||
|
||||
/// @todo Also expect that this synonyms should be in categories.txt list, but we dont support lt, lv langs now.
|
||||
/// @{
|
||||
// Latvian - Latviešu
|
||||
"iela", "laukums",
|
||||
|
||||
"iela",
|
||||
// Lithuanian - Lietuvių
|
||||
"gatvė", "g.", "aikštė", "a", "prospektas", "pr.", "pl", "kel",
|
||||
|
||||
// Nepalese - नेपाली
|
||||
"मार्ग", "marg",
|
||||
|
||||
// Norwegian - Norsk
|
||||
// Details here: https://github.com/organicmaps/organicmaps/issues/3616
|
||||
"vei", "veien", "veg", "vegen", "vn", "gata", "gate", "gaten", "gt", "plass", "plassen", "sving", "sv", "allé",
|
||||
|
||||
// Polish - Polski
|
||||
"aleja", "aleje", "aleji", "alejach", "aleją", "plac", "placu", "placem", "ulica", "ulicy",
|
||||
"gatvė", "g.",
|
||||
///@}
|
||||
|
||||
// Portuguese - Português
|
||||
"rua", "r.", "travessa", "tr.", "praça", "pç.", "avenida", "quadrado", "estrada", "boulevard", "carro", "auto-estrada", "lane", "caminho",
|
||||
"rua",
|
||||
|
||||
// Romanian - Română
|
||||
"bul", "bdul", "blv", "bulevard", "bulevardu", "calea", "cal", "piața", "pţa", "pța", "strada", "stra", "stradela", "sdla", "stradă", "unitate", "autostradă", "lane",
|
||||
|
||||
// Slovenian - Slovenščina
|
||||
"cesta", "ulica", "trg", "nabrežje",
|
||||
// Romanian - Română (Moldova)
|
||||
"strada",
|
||||
|
||||
// Spanish - Español
|
||||
"avenida", "avd", "avda", "bulevar", "bulev", "calle", "calleja", "cllja", "callejón", "callej", "cjon", "callejuela", "cjla", "callizo", "cllzo", "calzada", "czada", "costera", "coste", "plza", "pza", "plazoleta", "pzta", "plazuela", "plzla", "tránsito", "trans", "transversal", "trval", "trasera", "tras", "travesía", "trva", "paseo", "plaça",
|
||||
|
||||
// Swedish - Svenska
|
||||
"väg", "vägen", "gata", "gatan", "gränd", "gränden", "stig", "stigen", "plats", "platsen", "allé",
|
||||
"calle", "avenida",
|
||||
|
||||
// Turkish - Türkçe
|
||||
"sokak", "sk.", "sok", "sokağı", "cadde", "cad", "cd", "caddesi", "bulvar", "bulvarı", "blv.",
|
||||
"sokağı", "sokak", "sk",
|
||||
|
||||
// Ukrainian - Українська
|
||||
"дорога", "провулок", "площа", "шосе", "вулиця", "дор", "пров", "вул",
|
||||
"вулиця", "вул",
|
||||
|
||||
// Vietnamese - Tiếng Việt
|
||||
"quốc lộ", "ql", "tỉnh lộ", "tl", "Đại lộ", "Đl", "Đường", "Đ", "Đường sắt", "Đs", "Đường phố", "Đp", "vuông", "con Đường", "Đại lộ", "Đường cao tốc",
|
||||
"đường",
|
||||
};
|
||||
|
||||
for (auto const * s : affics)
|
||||
|
@ -488,27 +457,33 @@ bool ContainsNormalized(string const & str, string const & substr)
|
|||
// StreetTokensFilter ------------------------------------------------------------------------------
|
||||
void StreetTokensFilter::Put(strings::UniString const & token, bool isPrefix, size_t tag)
|
||||
{
|
||||
using IsStreetChecker = std::function<bool(strings::UniString const &)>;
|
||||
|
||||
IsStreetChecker isStreet = m_withMisprints ? IsStreetSynonymWithMisprints : IsStreetSynonym;
|
||||
IsStreetChecker isStreetPrefix =
|
||||
m_withMisprints ? IsStreetSynonymPrefixWithMisprints : IsStreetSynonymPrefix;
|
||||
|
||||
auto const isStreetSynonym = isStreet(token);
|
||||
if ((isPrefix && isStreetPrefix(token)) || (!isPrefix && isStreetSynonym))
|
||||
if (isPrefix)
|
||||
{
|
||||
++m_numSynonyms;
|
||||
if (m_numSynonyms == 1)
|
||||
if (m_withMisprints)
|
||||
{
|
||||
m_delayedToken = token;
|
||||
m_delayedTag = tag;
|
||||
return;
|
||||
if (IsStreetSynonymPrefixWithMisprints(token))
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (IsStreetSynonymPrefix(token))
|
||||
return;
|
||||
}
|
||||
|
||||
// Do not emit delayed token for incomplete street synonym.
|
||||
if ((!isPrefix || isStreetSynonym) && m_numSynonyms == 2)
|
||||
EmitToken(m_delayedToken, m_delayedTag);
|
||||
}
|
||||
EmitToken(token, tag);
|
||||
else
|
||||
{
|
||||
if (m_withMisprints)
|
||||
{
|
||||
if (IsStreetSynonymWithMisprints(token))
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (IsStreetSynonym(token))
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
m_callback(token, tag);
|
||||
}
|
||||
} // namespace search
|
||||
|
|
|
@ -125,14 +125,6 @@ public:
|
|||
void Put(strings::UniString const & token, bool isPrefix, size_t tag);
|
||||
|
||||
private:
|
||||
using Cell = std::pair<strings::UniString, size_t>;
|
||||
|
||||
inline void EmitToken(strings::UniString const & token, size_t tag) { m_callback(token, tag); }
|
||||
|
||||
strings::UniString m_delayedToken;
|
||||
size_t m_delayedTag = 0;
|
||||
size_t m_numSynonyms = 0;
|
||||
|
||||
Callback m_callback;
|
||||
bool m_withMisprints = false;
|
||||
};
|
||||
|
|
|
@ -265,6 +265,7 @@ void PreRanker::FilterRelaxedResults(bool lastUpdate)
|
|||
auto const iEnd = m_results.end();
|
||||
if (lastUpdate)
|
||||
{
|
||||
LOG(LDEBUG, ("Flush relaxed results number:", m_relaxedResults.size()));
|
||||
m_results.insert(iEnd, make_move_iterator(m_relaxedResults.begin()), make_move_iterator(m_relaxedResults.end()));
|
||||
m_relaxedResults.clear();
|
||||
}
|
||||
|
|
|
@ -3,22 +3,18 @@
|
|||
#include "search/ranking_utils.hpp"
|
||||
#include "search/token_range.hpp"
|
||||
|
||||
#include "indexer/feature_impl.hpp"
|
||||
|
||||
#include <map>
|
||||
#include <sstream>
|
||||
|
||||
namespace search
|
||||
{
|
||||
using namespace std;
|
||||
using namespace strings;
|
||||
|
||||
namespace
|
||||
{
|
||||
// All synonyms should be lowercase.
|
||||
|
||||
// @todo These should check the map language and use
|
||||
// only the corresponding translation.
|
||||
/// @todo These should check the map language and use only the corresponding translation.
|
||||
map<string, vector<string>> const kSynonyms = {
|
||||
{"n", {"north"}},
|
||||
{"w", {"west"}},
|
||||
|
@ -29,21 +25,46 @@ map<string, vector<string>> const kSynonyms = {
|
|||
{"sw", {"southwest"}},
|
||||
{"se", {"southeast"}},
|
||||
{"st", {"saint", "street"}},
|
||||
{"blvd", {"boulevard"}},
|
||||
{"cir", {"circle"}},
|
||||
{"ct", {"court"}},
|
||||
{"rt", {"route"}},
|
||||
|
||||
{"al", {"allee", "alle"}},
|
||||
{"ave", {"avenue"}},
|
||||
/// @todo Should process synonyms with errors like "blvrd" -> "blvd".
|
||||
/// @see HouseOnStreetSynonymsWithMisprints test.
|
||||
{"blvd", {"boulevard"}},
|
||||
{"blvrd", {"boulevard"}},
|
||||
{"cir", {"circle"}},
|
||||
{"ct", {"court"}},
|
||||
{"hwy", {"highway"}},
|
||||
{"pl", {"place", "platz"}},
|
||||
{"rt", {"route"}},
|
||||
{"sq", {"square"}},
|
||||
|
||||
{"ал", {"аллея", "алея"}},
|
||||
{"бул", {"бульвар"}},
|
||||
{"зав", {"завулак"}},
|
||||
{"кв", {"квартал"}},
|
||||
{"наб", {"набережная", "набярэжная", "набережна"}},
|
||||
{"пер", {"переулок"}},
|
||||
{"пл", {"площадь", "площа"}},
|
||||
{"пр", {"проспект", "праспект", "провулок", "проезд", "праезд", "проїзд"}},
|
||||
{"туп", {"тупик", "тупік"}},
|
||||
{"ш", {"шоссе", "шаша", "шосе"}},
|
||||
|
||||
{"св", {"святой", "святого", "святая", "святые", "святых", "свято"}},
|
||||
{"б", {"большая", "большой"}},
|
||||
{"бол", {"большая", "большой"}},
|
||||
{"м", {"малая", "малый"}},
|
||||
{"мал", {"малая", "малый"}},
|
||||
{"нов", {"новая", "новый"}},
|
||||
{"стар", {"старая", "старый"}}};
|
||||
{"стар", {"старая", "старый"}},
|
||||
};
|
||||
} // namespace
|
||||
|
||||
// QueryParams::Token ------------------------------------------------------------------------------
|
||||
void QueryParams::Token::AddSynonym(string const & s) { AddSynonym(MakeUniString(s)); }
|
||||
void QueryParams::Token::AddSynonym(string const & s)
|
||||
{
|
||||
AddSynonym(strings::MakeUniString(s));
|
||||
}
|
||||
|
||||
void QueryParams::Token::AddSynonym(String const & s)
|
||||
{
|
||||
|
|
|
@ -144,7 +144,8 @@ UNIT_CLASS_TEST(ProcessorTest, Smoke)
|
|||
TestPOI lantern1({10.0005, 10.0005}, "lantern 1", "en");
|
||||
TestPOI lantern2({10.0006, 10.0005}, "lantern 2", "en");
|
||||
|
||||
TestStreet stradaDrive({{-10.001, -10.001}, {-10, -10}, {-9.999, -9.999}}, "Strada drive", "en");
|
||||
// Was "Strada drive".
|
||||
TestStreet stradaDrive({{-10.001, -10.001}, {-10, -10}, {-9.999, -9.999}}, "Boulevard drive", "en");
|
||||
TestBuilding terranceHouse({-10, -10}, "", "155", stradaDrive.GetName("en"), "en");
|
||||
|
||||
auto const worldId = BuildWorld([&](TestMwmBuilder & builder)
|
||||
|
@ -264,7 +265,7 @@ UNIT_CLASS_TEST(ProcessorTest, Smoke)
|
|||
|
||||
{
|
||||
Rules rules = {ExactMatch(wonderlandId, terranceHouse), ExactMatch(wonderlandId, stradaDrive)};
|
||||
TEST(ResultsMatch("Toronto strada drive 155", rules), ());
|
||||
TEST(ResultsMatch("Toronto boulevard dr 155", rules), ());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1161,12 +1162,13 @@ UNIT_CLASS_TEST(ProcessorTest, StopWords)
|
|||
|
||||
{
|
||||
Rules rules = {ExactMatch(id, bakery)};
|
||||
|
||||
TEST(ResultsMatch("la boulangerie ", rules, "fr"), ());
|
||||
|
||||
TEST(ResultsMatch("la motviderie ", {}, "fr"), ());
|
||||
}
|
||||
|
||||
{
|
||||
TEST(ResultsMatch("la motviderie ", {}, "fr"), ());
|
||||
/// @todo I don't see any reason, why token/prefix results should differ here?
|
||||
TEST(ResultsMatch("la la le la la la ", {ExactMatch(id, street)}, "fr"), ());
|
||||
TEST(ResultsMatch("la la le la la la", {}, "fr"), ());
|
||||
}
|
||||
|
@ -1742,9 +1744,15 @@ UNIT_CLASS_TEST(ProcessorTest, SquareAsStreetTest)
|
|||
});
|
||||
|
||||
SetViewport(m2::RectD(0.0, 0.0, 1.0, 2.0));
|
||||
|
||||
{
|
||||
Rules rules = {ExactMatch(countryId, nonameHouse)};
|
||||
TEST(ResultsMatch("revolution square 3", rules), ());
|
||||
/// @todo Should skip square result?
|
||||
Rules rules = {
|
||||
ExactMatch(countryId, nonameHouse),
|
||||
ExactMatch(countryId, square)
|
||||
};
|
||||
TEST(OrderedResultsMatch(MakeRequest("revolution square 3")->Results(), rules), ());
|
||||
TEST(OrderedResultsMatch(MakeRequest("revolution sq 3")->Results(), rules), ());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2082,31 +2090,50 @@ UNIT_CLASS_TEST(ProcessorTest, Strasse)
|
|||
UNIT_CLASS_TEST(ProcessorTest, StreetSynonymsWithMisprints)
|
||||
{
|
||||
TestStreet leninsky({{0.0, -1.0}, {0.0, 1.0}}, "Ленинский проспект", "ru");
|
||||
TestStreet leningradsky({{0.0, -1.0}, {0.0, 1.0}}, "Ленинградский проспект", "ru");
|
||||
TestStreet nabrezhnaya({{1.0, -1.0}, {1.0, 1.0}}, "улица набрежная", "ru");
|
||||
TestStreet naberezhnaya({{2.0, -1.0}, {2.0, 1.0}}, "улица набережная", "ru");
|
||||
|
||||
auto countryId = BuildCountry("Wonderland", [&](TestMwmBuilder & builder)
|
||||
{
|
||||
builder.Add(leninsky);
|
||||
builder.Add(leningradsky);
|
||||
builder.Add(nabrezhnaya);
|
||||
builder.Add(naberezhnaya);
|
||||
});
|
||||
|
||||
SetViewport(m2::RectD(0.0, -1.0, 2.0, 1.0));
|
||||
{
|
||||
/// @todo Have _relaxed_ (all) prospekts by matching "проспект".
|
||||
Rules const prospekts = {ExactMatch(countryId, leninsky), ExactMatch(countryId, leningradsky)};
|
||||
TEST(ResultsMatch("ленинский проспект", prospekts), ());
|
||||
TEST(ResultsMatch("ленинский пропект", prospekts), ());
|
||||
|
||||
Rules rules = {ExactMatch(countryId, leninsky)};
|
||||
TEST(ResultsMatch("ленинский проспект", rules), ());
|
||||
TEST(ResultsMatch("ленинский пропект", rules), ());
|
||||
TEST(ResultsMatch("ленинский", rules), ());
|
||||
|
||||
// 2 errors + common _street_ token
|
||||
TEST(ResultsMatch("ленинская улица", rules, "ru"), ());
|
||||
|
||||
TEST(ResultsMatch("ленинский street", rules, "en"), ());
|
||||
TEST(ResultsMatch("ленинский gatvė", rules, "lt"), ());
|
||||
|
||||
/// @todo Have _relaxed_ (all) streets by matching category name.
|
||||
//TEST(ResultsMatch("ленинский gade", rules, "da"), ());
|
||||
//TEST(ResultsMatch("ленинский straat", rules, "nl"), ());
|
||||
}
|
||||
{
|
||||
Rules rules = {ExactMatch(countryId, nabrezhnaya), ExactMatch(countryId, naberezhnaya)};
|
||||
TEST(ResultsMatch("улица набрежная", rules), ());
|
||||
TEST(ResultsMatch("набрежная", rules), ());
|
||||
}
|
||||
{
|
||||
Rules rules = {ExactMatch(countryId, naberezhnaya)};
|
||||
TEST(ResultsMatch("улица набережная", rules), ());
|
||||
|
||||
TEST(ResultsMatch("набрежная street", rules, "en"), ());
|
||||
TEST(ResultsMatch("набрежная gatvė", rules, "lt"), ());
|
||||
|
||||
/// @todo Have _relaxed_ (all) streets by matching category name.
|
||||
//TEST(ResultsMatch("набрежная gade", rules, "da"), ());
|
||||
//TEST(ResultsMatch("набрежная straat", rules, "nl"), ());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2189,11 +2216,6 @@ UNIT_CLASS_TEST(ProcessorTest, StreetSynonymPrefixMatch)
|
|||
TEST(ResultsMatch("Yesenina cafe ", rules), ());
|
||||
TEST(ResultsMatch("Cafe Yesenina ", rules), ());
|
||||
TEST(ResultsMatch("Cafe Yesenina", rules), ());
|
||||
}
|
||||
{
|
||||
Rules rules = {ExactMatch(countryId, cafe), ExactMatch(countryId, yesenina)};
|
||||
// Prefix match with misprints to street synonym gives street as additional result
|
||||
// but we still can find the cafe.
|
||||
TEST(ResultsMatch("Yesenina cafe", rules), ());
|
||||
}
|
||||
}
|
||||
|
@ -3305,23 +3327,19 @@ UNIT_CLASS_TEST(ProcessorTest, StreetCategories)
|
|||
TEST(OrderedResultsMatch("avenida santa fe ", rules), ());
|
||||
}
|
||||
|
||||
/// @todo Should review search::FindStreets logic! Check 2 cases below:
|
||||
|
||||
// 1. |street| (matched by "sante fe" only) has worse rank than |shop| and even more - emitted in the second batch.
|
||||
{
|
||||
Rules const rules = {
|
||||
ExactMatch(wonderlandId, street),
|
||||
ExactMatch(wonderlandId, bus),
|
||||
ExactMatch(wonderlandId, shop),
|
||||
ExactMatch(wonderlandId, street)
|
||||
};
|
||||
TEST(OrderedResultsMatch("avenida santa fe street ", rules), ());
|
||||
}
|
||||
|
||||
// 2. Next sample matches street by "santa fe улица", thus it has low rank!
|
||||
{
|
||||
Rules const rules = {
|
||||
ExactMatch(wonderlandId, street),
|
||||
ExactMatch(wonderlandId, bus),
|
||||
//ExactMatch(wonderlandId, street)
|
||||
};
|
||||
TEST(OrderedResultsMatch(MakeRequest("avenida santa fe улица ", "ru")->Results(), rules), ());
|
||||
}
|
||||
|
|
|
@ -5,7 +5,6 @@
|
|||
|
||||
#include "indexer/search_string_utils.hpp"
|
||||
|
||||
#include "base/logging.hpp"
|
||||
#include "base/stl_helpers.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
|
@ -69,12 +68,9 @@ void FindStreets(BaseContext const & ctx, CBV const & candidates, FeaturesFilter
|
|||
// When true, no bit vectors were intersected with |streets| at all.
|
||||
bool emptyIntersection = true;
|
||||
|
||||
// When true, |streets| is in the incomplete state and can't be
|
||||
// used for creation of street layers.
|
||||
bool incomplete = false;
|
||||
|
||||
auto emit = [&]() {
|
||||
if (streets.IsEmpty() || emptyIntersection || incomplete || lastToken == curToken)
|
||||
auto emit = [&]()
|
||||
{
|
||||
if (streets.IsEmpty() || emptyIntersection || lastToken == curToken)
|
||||
return;
|
||||
|
||||
CBV fs(streets);
|
||||
|
@ -101,41 +97,27 @@ void FindStreets(BaseContext const & ctx, CBV const & candidates, FeaturesFilter
|
|||
ASSERT_LESS_OR_EQUAL(fs.PopCount(), fa.PopCount(), ());
|
||||
prediction.m_prob = static_cast<double>(fs.PopCount()) / static_cast<double>(fa.PopCount());
|
||||
|
||||
prediction.m_features = move(fs);
|
||||
prediction.m_features = std::move(fs);
|
||||
prediction.m_hash = prediction.m_features.Hash();
|
||||
prediction.m_withMisprints = withMisprints;
|
||||
};
|
||||
|
||||
StreetTokensFilter streetsFilter(
|
||||
[&](strings::UniString const & /* token */, size_t tag) {
|
||||
auto buffer = streets.Intersect(ctx.m_features[tag].m_features);
|
||||
if (tag < curToken)
|
||||
{
|
||||
// This is the case for delayed
|
||||
// street synonym. Therefore,
|
||||
// |streets| is temporarily in the
|
||||
// incomplete state.
|
||||
streets = buffer;
|
||||
all = all.Intersect(ctx.m_features[tag].m_features);
|
||||
emptyIntersection = false;
|
||||
StreetTokensFilter streetsFilter([&](strings::UniString const &, size_t tag)
|
||||
{
|
||||
auto buffer = streets.Intersect(ctx.m_features[tag].m_features);
|
||||
ASSERT_EQUAL(tag, curToken, ());
|
||||
|
||||
incomplete = true;
|
||||
return;
|
||||
}
|
||||
ASSERT_EQUAL(tag, curToken, ());
|
||||
// |streets| will become empty after
|
||||
// the intersection. Therefore we need
|
||||
// to create streets layer right now.
|
||||
if (buffer.IsEmpty())
|
||||
emit();
|
||||
|
||||
// |streets| will become empty after
|
||||
// the intersection. Therefore we need
|
||||
// to create streets layer right now.
|
||||
if (buffer.IsEmpty())
|
||||
emit();
|
||||
streets = buffer;
|
||||
all = all.Intersect(ctx.m_features[tag].m_features);
|
||||
emptyIntersection = false;
|
||||
|
||||
streets = buffer;
|
||||
all = all.Intersect(ctx.m_features[tag].m_features);
|
||||
emptyIntersection = false;
|
||||
incomplete = false;
|
||||
},
|
||||
withMisprints);
|
||||
}, withMisprints);
|
||||
|
||||
for (; curToken < ctx.m_numTokens && !ctx.IsTokenUsed(curToken) && !streets.IsEmpty(); ++curToken)
|
||||
{
|
||||
|
|
Loading…
Add table
Reference in a new issue