forked from organicmaps/organicmaps
[search] Fixed discrepancy in tokens between geocoder and ranker.
This commit is contained in:
parent
d719020c9c
commit
e09b14f438
6 changed files with 120 additions and 62 deletions
|
@ -218,21 +218,6 @@ MwmSet::MwmHandle FindWorld(Index const & index, vector<shared_ptr<MwmInfo>> con
|
|||
return handle;
|
||||
}
|
||||
|
||||
UniString AsciiToUniString(char const * s) { return UniString(s, s + strlen(s)); }
|
||||
|
||||
bool IsStopWord(UniString const & s)
|
||||
{
|
||||
/// @todo Get all common used stop words and factor out this array into
|
||||
/// search_string_utils.cpp module for example.
|
||||
static char const * arr[] = {"a", "de", "da", "la"};
|
||||
|
||||
static set<UniString> const kStopWords(
|
||||
make_transform_iterator(arr, &AsciiToUniString),
|
||||
make_transform_iterator(arr + ARRAY_SIZE(arr), &AsciiToUniString));
|
||||
|
||||
return kStopWords.count(s) > 0;
|
||||
}
|
||||
|
||||
double Area(m2::RectD const & rect) { return rect.IsValid() ? rect.SizeX() * rect.SizeY() : 0; }
|
||||
|
||||
// Computes an average similaty between |rect| and |pivot|. By
|
||||
|
@ -375,43 +360,6 @@ void Geocoder::SetParams(Params const & params)
|
|||
{
|
||||
m_params = params;
|
||||
|
||||
// Filter stop words.
|
||||
if (m_params.GetNumTokens() > 1)
|
||||
{
|
||||
for (size_t i = 0; i < m_params.GetNumTokens();)
|
||||
{
|
||||
if (m_params.IsPrefixToken(i))
|
||||
{
|
||||
++i;
|
||||
continue;
|
||||
}
|
||||
|
||||
auto & token = m_params.GetToken(i);
|
||||
if (IsStopWord(token.m_original))
|
||||
{
|
||||
m_params.RemoveToken(i);
|
||||
}
|
||||
else
|
||||
{
|
||||
my::EraseIf(token.m_synonyms, &IsStopWord);
|
||||
++i;
|
||||
}
|
||||
}
|
||||
|
||||
// If all tokens are stop words - give up.
|
||||
if (m_params.GetNumTokens() == 0)
|
||||
m_params = params;
|
||||
}
|
||||
|
||||
// Remove all category synonyms for streets, as they're extracted
|
||||
// individually.
|
||||
for (size_t i = 0; i < m_params.GetNumTokens(); ++i)
|
||||
{
|
||||
auto & token = m_params.GetToken(i);
|
||||
if (IsStreetSynonym(token.m_original))
|
||||
m_params.GetTypeIndices(i).clear();
|
||||
}
|
||||
|
||||
m_tokenRequests.clear();
|
||||
m_prefixTokenRequest.Clear();
|
||||
for (size_t i = 0; i < m_params.GetNumTokens(); ++i)
|
||||
|
|
|
@ -129,6 +129,42 @@ void SendStatistics(SearchParams const & params, m2::RectD const & viewport, Res
|
|||
alohalytics::LogEvent("searchEmitResultsAndCoords", stats);
|
||||
GetPlatform().GetMarketingService().SendMarketingEvent(marketing::kSearchEmitResultsAndCoords, {});
|
||||
}
|
||||
|
||||
// Removes all full-token stop words from |params|, unless |params|
|
||||
// consists of all such tokens.
|
||||
void RemoveStopWordsIfNeeded(QueryParams & params)
|
||||
{
|
||||
size_t numStopWords = 0;
|
||||
for (size_t i = 0; i < params.GetNumTokens(); ++i)
|
||||
{
|
||||
auto & token = params.GetToken(i);
|
||||
if (!params.IsPrefixToken(i) && IsStopWord(token.m_original))
|
||||
++numStopWords;
|
||||
}
|
||||
|
||||
if (numStopWords == params.GetNumTokens())
|
||||
return;
|
||||
|
||||
for (size_t i = 0; i < params.GetNumTokens();)
|
||||
{
|
||||
if (params.IsPrefixToken(i))
|
||||
{
|
||||
++i;
|
||||
continue;
|
||||
}
|
||||
|
||||
auto & token = params.GetToken(i);
|
||||
if (IsStopWord(token.m_original))
|
||||
{
|
||||
params.RemoveToken(i);
|
||||
}
|
||||
else
|
||||
{
|
||||
my::EraseIf(token.m_synonyms, &IsStopWord);
|
||||
++i;
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
// static
|
||||
|
@ -642,6 +678,17 @@ void Processor::InitParams(QueryParams & params)
|
|||
auto & langs = params.GetLangs();
|
||||
for (int i = 0; i < LANG_COUNT; ++i)
|
||||
langs.insert(GetLanguage(i));
|
||||
|
||||
RemoveStopWordsIfNeeded(params);
|
||||
|
||||
// Remove all type indices for streets, as they're considired
|
||||
// individually.
|
||||
for (size_t i = 0; i < params.GetNumTokens(); ++i)
|
||||
{
|
||||
auto & token = params.GetToken(i);
|
||||
if (IsStreetSynonym(token.m_original))
|
||||
params.GetTypeIndices(i).clear();
|
||||
}
|
||||
}
|
||||
|
||||
void Processor::InitGeocoder(Geocoder::Params & params)
|
||||
|
|
|
@ -188,7 +188,7 @@ class PreResult2Maker
|
|||
if (!ft.GetName(lang, name))
|
||||
continue;
|
||||
vector<strings::UniString> tokens;
|
||||
SplitUniString(NormalizeAndSimplifyString(name), MakeBackInsertFunctor(tokens), Delimiters());
|
||||
PrepareStringForMatching(name, tokens);
|
||||
|
||||
UpdateNameScore(tokens, slice, info.m_nameScore);
|
||||
UpdateNameScore(tokens, sliceNoCategories, info.m_nameScore);
|
||||
|
|
|
@ -1,11 +1,18 @@
|
|||
#include "search/ranking_utils.hpp"
|
||||
|
||||
#include "std/algorithm.hpp"
|
||||
#include "std/transform_iterator.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
using namespace strings;
|
||||
|
||||
namespace search
|
||||
{
|
||||
namespace
|
||||
{
|
||||
UniString AsciiToUniString(char const * s) { return UniString(s, s + strlen(s)); }
|
||||
} // namespace
|
||||
|
||||
namespace impl
|
||||
{
|
||||
bool FullMatch(QueryParams::Token const & token, UniString const & text)
|
||||
|
@ -30,6 +37,29 @@ bool PrefixMatch(QueryParams::Token const & token, UniString const & text)
|
|||
}
|
||||
} // namespace impl
|
||||
|
||||
bool IsStopWord(UniString const & s)
|
||||
{
|
||||
/// @todo Get all common used stop words and factor out this array into
|
||||
/// search_string_utils.cpp module for example.
|
||||
static char const * arr[] = {"a", "de", "da", "la"};
|
||||
|
||||
static std::set<UniString> const kStopWords(
|
||||
make_transform_iterator(arr, &AsciiToUniString),
|
||||
make_transform_iterator(arr + ARRAY_SIZE(arr), &AsciiToUniString));
|
||||
|
||||
return kStopWords.count(s) > 0;
|
||||
}
|
||||
|
||||
void PrepareStringForMatching(std::string const & name, std::vector<strings::UniString> & tokens)
|
||||
{
|
||||
auto filter = [&tokens](strings::UniString const & token)
|
||||
{
|
||||
if (!IsStopWord(token))
|
||||
tokens.push_back(token);
|
||||
};
|
||||
SplitUniString(NormalizeAndSimplifyString(name), filter, Delimiters());
|
||||
}
|
||||
|
||||
string DebugPrint(NameScore score)
|
||||
{
|
||||
switch (score)
|
||||
|
@ -43,5 +73,4 @@ string DebugPrint(NameScore score)
|
|||
}
|
||||
return "Unknown";
|
||||
}
|
||||
|
||||
} // namespace search
|
||||
|
|
|
@ -9,10 +9,10 @@
|
|||
#include "base/stl_add.hpp"
|
||||
#include "base/string_utils.hpp"
|
||||
|
||||
#include "std/cstdint.hpp"
|
||||
#include "std/limits.hpp"
|
||||
#include "std/string.hpp"
|
||||
#include "std/vector.hpp"
|
||||
#include <cstdint>
|
||||
#include <limits>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace search
|
||||
{
|
||||
|
@ -38,19 +38,25 @@ enum NameScore
|
|||
NAME_SCORE_COUNT
|
||||
};
|
||||
|
||||
// Returns true when |s| is a stop-word and may be removed from a query.
|
||||
bool IsStopWord(strings::UniString const & s);
|
||||
|
||||
// Normalizes, simplifies and splits string, removes stop-words.
|
||||
void PrepareStringForMatching(std::string const & name, std::vector<strings::UniString> & tokens);
|
||||
|
||||
template <typename TSlice>
|
||||
NameScore GetNameScore(string const & name, TSlice const & slice)
|
||||
NameScore GetNameScore(std::string const & name, TSlice const & slice)
|
||||
{
|
||||
if (slice.Empty())
|
||||
return NAME_SCORE_ZERO;
|
||||
|
||||
vector<strings::UniString> tokens;
|
||||
std::vector<strings::UniString> tokens;
|
||||
SplitUniString(NormalizeAndSimplifyString(name), MakeBackInsertFunctor(tokens), Delimiters());
|
||||
return GetNameScore(tokens, slice);
|
||||
}
|
||||
|
||||
template <typename TSlice>
|
||||
NameScore GetNameScore(vector<strings::UniString> const & tokens, TSlice const & slice)
|
||||
NameScore GetNameScore(std::vector<strings::UniString> const & tokens, TSlice const & slice)
|
||||
{
|
||||
if (slice.Empty())
|
||||
return NAME_SCORE_ZERO;
|
||||
|
|
|
@ -805,5 +805,33 @@ UNIT_CLASS_TEST(ProcessorTest, SpacesInCategories)
|
|||
TEST(ResultsMatch("Москва ночной клуб", "ru", rules), ());
|
||||
}
|
||||
}
|
||||
|
||||
UNIT_CLASS_TEST(ProcessorTest, StopWords)
|
||||
{
|
||||
TestCountry country(m2::PointD(0, 0), "France", "en");
|
||||
TestCity city(m2::PointD(0, 0), "Paris", "en", 100 /* rank */);
|
||||
TestStreet street(
|
||||
vector<m2::PointD>{m2::PointD(-0.001, -0.001), m2::PointD(0, 0), m2::PointD(0.001, 0.001)},
|
||||
"Rue de la Paix", "en");
|
||||
|
||||
BuildWorld([&](TestMwmBuilder & builder) {
|
||||
builder.Add(country);
|
||||
builder.Add(city);
|
||||
});
|
||||
|
||||
auto id = BuildCountry(country.GetName(), [&](TestMwmBuilder & builder) { builder.Add(street); });
|
||||
|
||||
{
|
||||
auto request = MakeRequest("la France à Paris Rue de la Paix");
|
||||
|
||||
TRules rules = {ExactMatch(id, street)};
|
||||
|
||||
auto const & results = request->Results();
|
||||
TEST(MatchResults(rules, results), ());
|
||||
|
||||
auto const & info = results[0].GetRankingInfo();
|
||||
TEST_EQUAL(info.m_nameScore, NAME_SCORE_FULL_MATCH, ());
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
} // namespace search
|
||||
|
|
Loading…
Add table
Reference in a new issue