[search] Used linear model for rank calculation.

This commit is contained in:
Yuri Gorshenin 2016-03-11 17:44:01 +03:00 committed by Sergey Yershov
parent bc44c63d03
commit 575d9eb26d
7 changed files with 105 additions and 16 deletions

View file

@ -6,7 +6,20 @@
#include "std/utility.hpp"
#include "std/vector.hpp"
UNIT_TEST(CompareBy_Smoke)
namespace
{
class Int
{
public:
explicit Int(int v) : m_v(v) {}
inline int Get() const { return m_v; }
private:
int m_v;
};
UNIT_TEST(CompareBy_Field)
{
vector<pair<int, int>> v = {{2, 2}, {0, 4}, {3, 1}, {4, 0}, {1, 3}};
sort(v.begin(), v.end(), my::CompareBy(&pair<int, int>::first));
@ -21,3 +34,15 @@ UNIT_TEST(CompareBy_Smoke)
for (size_t i = 0; i < pv.size(); ++i)
TEST_EQUAL(i, pv[i]->second, ());
}
UNIT_TEST(CompareBy_Method)
{
vector<Int> v;
for (int i = 9; i >= 0; --i)
v.emplace_back(i);
sort(v.begin(), v.end(), my::CompareBy(&Int::Get));
for (size_t i = 0; i < v.size(); ++i)
TEST_EQUAL(v[i].Get(), static_cast<int>(i), ());
}
} // namespace

View file

@ -7,8 +7,11 @@ namespace my
{
namespace impl
{
template <typename T, typename C>
struct Comparer
template <bool isField, typename T, typename C>
struct Comparer;
template<typename T, typename C>
struct Comparer<true, T, C>
{
Comparer(T(C::*p)) : p_(p) {}
@ -21,6 +24,21 @@ struct Comparer
T(C::*p_);
};
template<typename T, typename C>
struct Comparer<false, T, C>
{
Comparer(T (C::*p)() const) : p_(p) {}
inline bool operator()(C const & lhs, C const & rhs) const { return (lhs.*p_)() < (rhs.*p_)(); }
inline bool operator()(C const * const lhs, C const * const rhs) const
{
return (lhs->*p_)() < (rhs->*p_)();
}
T(C::*p_)() const;
};
} // namespace impl
// Sorts and removes duplicate entries from |v|.
@ -36,8 +54,14 @@ void SortUnique(std::vector<T> & v)
// create comparer that is able to compare pairs of ints by second
// component, it's enough to call CompareBy(&pair<int, int>::second).
template <typename T, typename C>
impl::Comparer<T, C> CompareBy(T(C::*p))
impl::Comparer<true, T, C> CompareBy(T(C::*p))
{
return impl::Comparer<T, C>(p);
return impl::Comparer<true, T, C>(p);
}
template <typename T, typename C>
impl::Comparer<false, T, C> CompareBy(T (C::*p)() const)
{
return impl::Comparer<false, T, C>(p);
}
} // namespace my

View file

@ -43,6 +43,7 @@
#include "base/macros.hpp"
#include "base/scope_guard.hpp"
#include "base/stl_add.hpp"
#include "base/stl_helpers.hpp"
#include "base/string_utils.hpp"
#include "std/algorithm.hpp"
@ -132,10 +133,17 @@ class IndexedValue : public search::IndexedValueBase<Query::kQueuesCount>
/// Need to rewrite std::unique algorithm.
shared_ptr<impl::PreResult2> m_val;
double m_rank;
public:
explicit IndexedValue(unique_ptr<impl::PreResult2> v) : m_val(move(v)) {}
explicit IndexedValue(unique_ptr<impl::PreResult2> v)
: m_val(move(v)), m_rank(m_val ? m_val->GetRankingInfo().GetLinearModelRank() : 0)
{
}
impl::PreResult2 const & operator*() const { return *m_val; }
inline double GetRank() const { return m_rank; }
};
string DebugPrint(IndexedValue const & value)
@ -842,7 +850,7 @@ void Query::FlushResults(v2::Geocoder::Params const & params, Results & res, boo
RemoveDuplicatingLinear(indV);
SortByIndexedValue(indV, CompFactory2());
sort(indV.rbegin(), indV.rend(), my::CompareBy(&IndexedValue::GetRank));
// Do not process suggestions in additional search.
if (!allMWMs || res.GetCount() == 0)

View file

@ -1,5 +1,8 @@
#include "search/v2/ranking_info.hpp"
#include "std/cmath.hpp"
#include "std/limits.hpp"
namespace search
{
namespace v2
@ -38,5 +41,27 @@ void RankingInfo::ToCSV(ostream & os) const
<< "," << DebugPrint(m_nameScore) << "," << m_nameCoverage << "," << DebugPrint(m_searchType)
<< "," << m_positionInViewport;
}
double RankingInfo::GetLinearModelRank() const
{
// See search/search_quality/scoring_model.py for details. In
// short, these coeffs correspond to coeffs in a linear model.
// NOTE: this code must be consistent with scoring_model.py. Keep
// this in mind when you're going to change scoring_model.py or this
// code. We're working on automatic rank calculation code generator
// integrated in the build system.
static double const kCoeffs[] = {0.98369469, 0.40219458, 0.97463078, 0.21027244, 0.07368054};
double const minDistance =
exp(-min(m_distanceToViewport, m_distanceToPosition) / PreRankingInfo::kMaxDistMeters);
double const rank = static_cast<double>(m_rank) / numeric_limits<uint8_t>::max();
double const nameScore = static_cast<double>(m_nameScore) / NAME_SCORE_COUNT;
double const nameCoverage = m_nameCoverage;
double const searchType = static_cast<double>(m_searchType) / SearchModel::SEARCH_TYPE_COUNT;
return kCoeffs[0] * minDistance + kCoeffs[1] * rank + kCoeffs[2] * nameScore +
kCoeffs[3] * nameCoverage + kCoeffs[4] * searchType;
}
} // namespace v2
} // namespace search

View file

@ -37,6 +37,10 @@ struct RankingInfo
static void PrintCSVHeader(ostream & os);
void ToCSV(ostream & os) const;
// Returns rank calculated by a linear model. Large values
// correspond to important features.
double GetLinearModelRank() const;
};
string DebugPrint(RankingInfo const & info);

View file

@ -24,7 +24,9 @@ enum NameScore
NAME_SCORE_SUBSTRING_PREFIX = 1,
NAME_SCORE_SUBSTRING = 2,
NAME_SCORE_FULL_MATCH_PREFIX = 3,
NAME_SCORE_FULL_MATCH = 4
NAME_SCORE_FULL_MATCH = 4,
NAME_SCORE_COUNT
};
NameScore GetNameScore(string const & name, SearchQueryParams const & params, size_t startToken,

View file

@ -26,20 +26,21 @@ public:
{
// Low-level features such as amenities, offices, shops, buildings
// without house number, etc.
SEARCH_TYPE_POI,
SEARCH_TYPE_POI = 0,
// All features with set house number.
SEARCH_TYPE_BUILDING,
SEARCH_TYPE_BUILDING = 1,
SEARCH_TYPE_STREET,
SEARCH_TYPE_STREET = 2,
// All low-level features except POI, BUILDING and STREET.
SEARCH_TYPE_UNCLASSIFIED,
SEARCH_TYPE_UNCLASSIFIED = 3,
SEARCH_TYPE_VILLAGE = 4,
SEARCH_TYPE_CITY = 5,
SEARCH_TYPE_STATE = 6, // US or Canadian states
SEARCH_TYPE_COUNTRY = 7,
SEARCH_TYPE_VILLAGE,
SEARCH_TYPE_CITY,
SEARCH_TYPE_STATE, // US or Canadian states
SEARCH_TYPE_COUNTRY,
SEARCH_TYPE_COUNT
};