forked from organicmaps/organicmaps
[search] Fixed name score calculation.
This commit is contained in:
parent
8958fefd1a
commit
15de1fd09b
8 changed files with 169 additions and 83 deletions
|
@ -181,6 +181,32 @@ m2::RectD GetRectAroundPosition(m2::PointD const & position)
|
|||
double constexpr kMaxPositionRadiusM = 50.0 * 1000;
|
||||
return MercatorBounds::RectByCenterXYAndSizeInMeters(position, kMaxPositionRadiusM);
|
||||
}
|
||||
|
||||
template <typename TSlice>
|
||||
void UpdateNameScore(string const & name, TSlice const & slice, search::v2::NameScore & bestScore)
|
||||
{
|
||||
auto const score = v2::GetNameScore(name, slice);
|
||||
if (score > bestScore)
|
||||
bestScore = score;
|
||||
}
|
||||
|
||||
template <typename TSlice>
|
||||
void UpdateNameScore(vector<strings::UniString> const & tokens, TSlice const & slice,
|
||||
search::v2::NameScore & bestScore, double & bestCoverage)
|
||||
{
|
||||
auto const score = v2::GetNameScore(tokens, slice);
|
||||
auto const coverage =
|
||||
tokens.empty() ? 0 : static_cast<double>(slice.Size()) / static_cast<double>(tokens.size());
|
||||
if (score > bestScore)
|
||||
{
|
||||
bestScore = score;
|
||||
bestCoverage = coverage;
|
||||
}
|
||||
else if (score == bestScore && coverage > bestCoverage)
|
||||
{
|
||||
bestCoverage = coverage;
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
// static
|
||||
|
@ -591,6 +617,9 @@ class PreResult2Maker
|
|||
info.m_searchType = preInfo.m_searchType;
|
||||
|
||||
info.m_nameScore = v2::NAME_SCORE_ZERO;
|
||||
|
||||
v2::TokensSliceNoCategories slice(m_params, preInfo.m_startToken, preInfo.m_endToken);
|
||||
|
||||
for (auto const & lang : m_params.m_langs)
|
||||
{
|
||||
string name;
|
||||
|
@ -599,28 +628,11 @@ class PreResult2Maker
|
|||
vector<strings::UniString> tokens;
|
||||
SplitUniString(NormalizeAndSimplifyString(name), MakeBackInsertFunctor(tokens), Delimiters());
|
||||
|
||||
auto score = GetNameScore(tokens, m_params, preInfo.m_startToken, preInfo.m_endToken);
|
||||
auto coverage =
|
||||
tokens.empty() ? 0 : static_cast<double>(preInfo.m_endToken - preInfo.m_startToken) /
|
||||
static_cast<double>(tokens.size());
|
||||
if (score > info.m_nameScore)
|
||||
{
|
||||
info.m_nameScore = score;
|
||||
info.m_nameCoverage = coverage;
|
||||
}
|
||||
else if (score == info.m_nameScore && coverage > info.m_nameCoverage)
|
||||
{
|
||||
info.m_nameCoverage = coverage;
|
||||
}
|
||||
UpdateNameScore(tokens, slice, info.m_nameScore, info.m_nameCoverage);
|
||||
}
|
||||
|
||||
if (info.m_searchType == v2::SearchModel::SEARCH_TYPE_BUILDING)
|
||||
{
|
||||
string const houseNumber = ft.GetHouseNumber();
|
||||
auto score = GetNameScore(houseNumber, m_params, preInfo.m_startToken, preInfo.m_endToken);
|
||||
if (score > info.m_nameScore)
|
||||
info.m_nameScore = score;
|
||||
}
|
||||
UpdateNameScore(ft.GetHouseNumber(), slice, info.m_nameScore);
|
||||
}
|
||||
|
||||
uint8_t NormalizeRank(uint8_t rank, v2::SearchModel::SearchType type, m2::PointD const & center,
|
||||
|
@ -1164,6 +1176,8 @@ void Query::InitParams(bool localitySearch, SearchQueryParams & params)
|
|||
for (size_t i = 0; i < tokensCount; ++i)
|
||||
params.m_tokens[i].push_back(m_tokens[i]);
|
||||
|
||||
params.m_isCategorySynonym.assign(tokensCount + (m_prefix.empty() ? 0 : 1), false);
|
||||
|
||||
// Add names of categories (and synonyms).
|
||||
if (!localitySearch)
|
||||
{
|
||||
|
@ -1175,6 +1189,7 @@ void Query::InitParams(bool localitySearch, SearchQueryParams & params)
|
|||
|
||||
uint32_t const index = cl.GetIndexForType(t);
|
||||
v.push_back(FeatureTypeToString(index));
|
||||
params.m_isCategorySynonym[i] = true;
|
||||
|
||||
// v2-version MWM has raw classificator types in search index prefix, so
|
||||
// do the hack: add synonyms for old convention if needed.
|
||||
|
|
|
@ -63,6 +63,7 @@ void SearchQueryParams::Clear()
|
|||
{
|
||||
m_tokens.clear();
|
||||
m_prefixTokens.clear();
|
||||
m_isCategorySynonym.clear();
|
||||
m_langs.clear();
|
||||
m_scale = scales::GetUpperScale();
|
||||
}
|
||||
|
|
|
@ -16,6 +16,8 @@ struct SearchQueryParams
|
|||
|
||||
vector<TSynonymsVector> m_tokens;
|
||||
TSynonymsVector m_prefixTokens;
|
||||
vector<bool> m_isCategorySynonym;
|
||||
|
||||
TLangsSet m_langs;
|
||||
int m_scale;
|
||||
|
||||
|
|
|
@ -32,7 +32,7 @@ NameScore GetScore(string const & name, string const & query, size_t startToken,
|
|||
params.m_prefixTokens.swap(params.m_tokens.back());
|
||||
params.m_tokens.pop_back();
|
||||
}
|
||||
return GetNameScore(name, params, startToken, endToken);
|
||||
return GetNameScore(name, TokensSlice(params, startToken, endToken));
|
||||
}
|
||||
|
||||
UNIT_TEST(NameTest_Smoke)
|
||||
|
|
|
@ -99,8 +99,8 @@ void LocalityScorer::SortByName(vector<ExLocality> & ls) const
|
|||
auto score = NAME_SCORE_ZERO;
|
||||
for (auto const & name : names)
|
||||
{
|
||||
score = max(score,
|
||||
GetNameScore(name, m_params, l.m_locality.m_startToken, l.m_locality.m_endToken));
|
||||
score = max(score, GetNameScore(name, v2::TokensSlice(m_params, l.m_locality.m_startToken,
|
||||
l.m_locality.m_endToken)));
|
||||
}
|
||||
l.m_nameScore = score;
|
||||
}
|
||||
|
|
|
@ -14,7 +14,7 @@ namespace
|
|||
double const kDistanceToPivot = 24.443;
|
||||
double const kRank = 11.010;
|
||||
double const kNameScore = 1.0;
|
||||
double const kNameCoverage = 0.0;
|
||||
double const kNameCoverage = 1.0;
|
||||
double const kSearchType = 22.378;
|
||||
|
||||
double TransformDistance(double distance)
|
||||
|
|
|
@ -1,12 +1,5 @@
|
|||
#include "search/v2/ranking_utils.hpp"
|
||||
|
||||
#include "search/search_query_params.hpp"
|
||||
|
||||
#include "indexer/search_delimiters.hpp"
|
||||
#include "indexer/search_string_utils.hpp"
|
||||
|
||||
#include "base/stl_add.hpp"
|
||||
|
||||
#include "std/algorithm.hpp"
|
||||
|
||||
using namespace strings;
|
||||
|
@ -15,7 +8,7 @@ namespace search
|
|||
{
|
||||
namespace v2
|
||||
{
|
||||
namespace
|
||||
namespace impl
|
||||
{
|
||||
bool Match(vector<UniString> const & tokens, UniString const & token)
|
||||
{
|
||||
|
@ -31,54 +24,7 @@ bool PrefixMatch(vector<UniString> const & prefixes, UniString const & token)
|
|||
}
|
||||
return false;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
NameScore GetNameScore(string const & name, SearchQueryParams const & params, size_t startToken,
|
||||
size_t endToken)
|
||||
{
|
||||
if (startToken >= endToken)
|
||||
return NAME_SCORE_ZERO;
|
||||
|
||||
vector<UniString> tokens;
|
||||
SplitUniString(NormalizeAndSimplifyString(name), MakeBackInsertFunctor(tokens), Delimiters());
|
||||
return GetNameScore(tokens, params, startToken, endToken);
|
||||
}
|
||||
|
||||
NameScore GetNameScore(vector<UniString> const & tokens, SearchQueryParams const & params,
|
||||
size_t startToken, size_t endToken)
|
||||
{
|
||||
if (startToken >= endToken)
|
||||
return NAME_SCORE_ZERO;
|
||||
|
||||
size_t const n = tokens.size();
|
||||
size_t const m = endToken - startToken;
|
||||
|
||||
bool const lastTokenIsPrefix = (endToken == params.m_tokens.size() + 1);
|
||||
|
||||
NameScore score = NAME_SCORE_ZERO;
|
||||
for (int offset = 0; offset + m <= n; ++offset)
|
||||
{
|
||||
bool match = true;
|
||||
for (int i = 0; i + 1 < m && match; ++i)
|
||||
match = match && Match(params.GetTokens(startToken + i), tokens[offset + i]);
|
||||
if (!match)
|
||||
continue;
|
||||
|
||||
if (Match(params.GetTokens(endToken - 1), tokens[offset + m - 1]))
|
||||
{
|
||||
if (m == n)
|
||||
return NAME_SCORE_FULL_MATCH;
|
||||
score = max(score, NAME_SCORE_SUBSTRING);
|
||||
}
|
||||
if (lastTokenIsPrefix && PrefixMatch(params.GetTokens(endToken - 1), tokens[offset + m - 1]))
|
||||
{
|
||||
if (m == n)
|
||||
return NAME_SCORE_FULL_MATCH_PREFIX;
|
||||
score = max(score, NAME_SCORE_SUBSTRING_PREFIX);
|
||||
}
|
||||
}
|
||||
return score;
|
||||
}
|
||||
} // namespace impl
|
||||
|
||||
string DebugPrint(NameScore score)
|
||||
{
|
||||
|
|
|
@ -1,8 +1,14 @@
|
|||
#pragma once
|
||||
|
||||
#include "search/search_query_params.hpp"
|
||||
#include "search/v2/geocoder.hpp"
|
||||
#include "search/v2/search_model.hpp"
|
||||
|
||||
#include "indexer/search_delimiters.hpp"
|
||||
#include "indexer/search_string_utils.hpp"
|
||||
|
||||
#include "base/assert.hpp"
|
||||
#include "base/stl_add.hpp"
|
||||
#include "base/string_utils.hpp"
|
||||
|
||||
#include "std/cstdint.hpp"
|
||||
|
@ -16,6 +22,13 @@ struct SearchQueryParams;
|
|||
|
||||
namespace v2
|
||||
{
|
||||
namespace impl
|
||||
{
|
||||
bool Match(vector<strings::UniString> const & tokens, strings::UniString const & token);
|
||||
|
||||
bool PrefixMatch(vector<strings::UniString> const & prefixes, strings::UniString const & token);
|
||||
} // namespace impl
|
||||
|
||||
// The order and numeric values are important here. Please, check all
|
||||
// use-cases before changing this enum.
|
||||
enum NameScore
|
||||
|
@ -29,11 +42,120 @@ enum NameScore
|
|||
NAME_SCORE_COUNT
|
||||
};
|
||||
|
||||
NameScore GetNameScore(string const & name, SearchQueryParams const & params, size_t startToken,
|
||||
size_t endToken);
|
||||
class TokensSlice
|
||||
{
|
||||
public:
|
||||
TokensSlice(SearchQueryParams const & params, size_t startToken, size_t endToken)
|
||||
: m_params(params), m_offset(startToken), m_size(endToken - startToken)
|
||||
{
|
||||
ASSERT_LESS_OR_EQUAL(startToken, endToken, ());
|
||||
}
|
||||
|
||||
NameScore GetNameScore(vector<strings::UniString> const & tokens, SearchQueryParams const & params,
|
||||
size_t startToken, size_t endToken);
|
||||
inline SearchQueryParams::TSynonymsVector const & Get(size_t i) const
|
||||
{
|
||||
ASSERT_LESS(i, Size(), ());
|
||||
return m_params.GetTokens(m_offset + i);
|
||||
}
|
||||
|
||||
inline size_t Size() const { return m_size; }
|
||||
|
||||
inline bool Empty() const { return Size() == 0; }
|
||||
|
||||
inline bool IsPrefix(size_t i) const
|
||||
{
|
||||
ASSERT_LESS(i, Size(), ());
|
||||
return m_offset + i == m_params.m_tokens.size();
|
||||
}
|
||||
|
||||
private:
|
||||
SearchQueryParams const & m_params;
|
||||
size_t const m_offset;
|
||||
size_t const m_size;
|
||||
};
|
||||
|
||||
class TokensSliceNoCategories
|
||||
{
|
||||
public:
|
||||
TokensSliceNoCategories(SearchQueryParams const & params, size_t startToken, size_t endToken)
|
||||
: m_params(params)
|
||||
{
|
||||
ASSERT_LESS_OR_EQUAL(startToken, endToken, ());
|
||||
|
||||
m_indexes.reserve(endToken - startToken);
|
||||
for (size_t i = startToken; i < endToken; ++i)
|
||||
{
|
||||
if (!m_params.m_isCategorySynonym[i])
|
||||
m_indexes.push_back(i);
|
||||
}
|
||||
}
|
||||
|
||||
inline SearchQueryParams::TSynonymsVector const & Get(size_t i) const
|
||||
{
|
||||
ASSERT_LESS(i, Size(), ());
|
||||
return m_params.GetTokens(m_indexes[i]);
|
||||
}
|
||||
|
||||
inline size_t Size() const { return m_indexes.size(); }
|
||||
|
||||
inline bool Empty() const { return Size() == 0; }
|
||||
|
||||
inline bool IsPrefix(size_t i) const
|
||||
{
|
||||
ASSERT_LESS(i, Size(), ());
|
||||
return m_indexes[i] == m_params.m_tokens.size();
|
||||
}
|
||||
|
||||
private:
|
||||
SearchQueryParams const & m_params;
|
||||
vector<size_t> m_indexes;
|
||||
};
|
||||
|
||||
template <typename TSlice>
|
||||
NameScore GetNameScore(string const & name, TSlice const & slice)
|
||||
{
|
||||
if (slice.Empty())
|
||||
return NAME_SCORE_ZERO;
|
||||
|
||||
vector<strings::UniString> tokens;
|
||||
SplitUniString(NormalizeAndSimplifyString(name), MakeBackInsertFunctor(tokens), Delimiters());
|
||||
return GetNameScore(tokens, slice);
|
||||
}
|
||||
|
||||
template <typename TSlice>
|
||||
NameScore GetNameScore(vector<strings::UniString> const & tokens, TSlice const & slice)
|
||||
{
|
||||
if (slice.Empty())
|
||||
return NAME_SCORE_ZERO;
|
||||
|
||||
size_t const n = tokens.size();
|
||||
size_t const m = slice.Size();
|
||||
|
||||
bool const lastTokenIsPrefix = slice.IsPrefix(m - 1);
|
||||
|
||||
NameScore score = NAME_SCORE_ZERO;
|
||||
for (int offset = 0; offset + m <= n; ++offset)
|
||||
{
|
||||
bool match = true;
|
||||
for (int i = 0; i < m - 1 && match; ++i)
|
||||
match = match && impl::Match(slice.Get(i), tokens[offset + i]);
|
||||
if (!match)
|
||||
continue;
|
||||
|
||||
if (impl::Match(slice.Get(m - 1), tokens[offset + m - 1]))
|
||||
{
|
||||
if (m == n)
|
||||
return NAME_SCORE_FULL_MATCH;
|
||||
score = max(score, NAME_SCORE_SUBSTRING);
|
||||
}
|
||||
if (lastTokenIsPrefix && impl::PrefixMatch(slice.Get(m - 1), tokens[offset + m - 1]))
|
||||
{
|
||||
if (m == n)
|
||||
return NAME_SCORE_FULL_MATCH_PREFIX;
|
||||
score = max(score, NAME_SCORE_SUBSTRING_PREFIX);
|
||||
}
|
||||
}
|
||||
return score;
|
||||
}
|
||||
|
||||
string DebugPrint(NameScore score);
|
||||
} // namespace v2
|
||||
|
|
Loading…
Add table
Reference in a new issue