From 575d9eb26d3870b94be38cba5abb48c33b33a45f Mon Sep 17 00:00:00 2001 From: Yuri Gorshenin Date: Fri, 11 Mar 2016 17:44:01 +0300 Subject: [PATCH] [search] Used linear model for rank calculation. --- base/base_tests/stl_helpers_test.cpp | 27 ++++++++++++++++++++++- base/stl_helpers.hpp | 32 ++++++++++++++++++++++++---- search/search_query.cpp | 12 +++++++++-- search/v2/ranking_info.cpp | 25 ++++++++++++++++++++++ search/v2/ranking_info.hpp | 4 ++++ search/v2/ranking_utils.hpp | 4 +++- search/v2/search_model.hpp | 17 ++++++++------- 7 files changed, 105 insertions(+), 16 deletions(-) diff --git a/base/base_tests/stl_helpers_test.cpp b/base/base_tests/stl_helpers_test.cpp index 00b59ab302..7d23bdd3d2 100644 --- a/base/base_tests/stl_helpers_test.cpp +++ b/base/base_tests/stl_helpers_test.cpp @@ -6,7 +6,20 @@ #include "std/utility.hpp" #include "std/vector.hpp" -UNIT_TEST(CompareBy_Smoke) +namespace +{ +class Int +{ +public: + explicit Int(int v) : m_v(v) {} + + inline int Get() const { return m_v; } + +private: + int m_v; +}; + +UNIT_TEST(CompareBy_Field) { vector> v = {{2, 2}, {0, 4}, {3, 1}, {4, 0}, {1, 3}}; sort(v.begin(), v.end(), my::CompareBy(&pair::first)); @@ -21,3 +34,15 @@ UNIT_TEST(CompareBy_Smoke) for (size_t i = 0; i < pv.size(); ++i) TEST_EQUAL(i, pv[i]->second, ()); } + +UNIT_TEST(CompareBy_Method) +{ + vector v; + for (int i = 9; i >= 0; --i) + v.emplace_back(i); + + sort(v.begin(), v.end(), my::CompareBy(&Int::Get)); + for (size_t i = 0; i < v.size(); ++i) + TEST_EQUAL(v[i].Get(), static_cast(i), ()); +} +} // namespace diff --git a/base/stl_helpers.hpp b/base/stl_helpers.hpp index 7dcc4f5beb..916989a040 100644 --- a/base/stl_helpers.hpp +++ b/base/stl_helpers.hpp @@ -7,8 +7,11 @@ namespace my { namespace impl { -template -struct Comparer +template +struct Comparer; + +template +struct Comparer { Comparer(T(C::*p)) : p_(p) {} @@ -21,6 +24,21 @@ struct Comparer T(C::*p_); }; + +template +struct Comparer +{ + Comparer(T (C::*p)() const) : p_(p) {} + + inline bool operator()(C const & lhs, C const & rhs) const { return (lhs.*p_)() < (rhs.*p_)(); } + + inline bool operator()(C const * const lhs, C const * const rhs) const + { + return (lhs->*p_)() < (rhs->*p_)(); + } + + T(C::*p_)() const; +}; } // namespace impl // Sorts and removes duplicate entries from |v|. @@ -36,8 +54,14 @@ void SortUnique(std::vector & v) // create comparer that is able to compare pairs of ints by second // component, it's enough to call CompareBy(&pair::second). template -impl::Comparer CompareBy(T(C::*p)) +impl::Comparer CompareBy(T(C::*p)) { - return impl::Comparer(p); + return impl::Comparer(p); +} + +template +impl::Comparer CompareBy(T (C::*p)() const) +{ + return impl::Comparer(p); } } // namespace my diff --git a/search/search_query.cpp b/search/search_query.cpp index 046fda0c9a..dc7574ea36 100644 --- a/search/search_query.cpp +++ b/search/search_query.cpp @@ -43,6 +43,7 @@ #include "base/macros.hpp" #include "base/scope_guard.hpp" #include "base/stl_add.hpp" +#include "base/stl_helpers.hpp" #include "base/string_utils.hpp" #include "std/algorithm.hpp" @@ -132,10 +133,17 @@ class IndexedValue : public search::IndexedValueBase /// Need to rewrite std::unique algorithm. shared_ptr m_val; + double m_rank; + public: - explicit IndexedValue(unique_ptr v) : m_val(move(v)) {} + explicit IndexedValue(unique_ptr v) + : m_val(move(v)), m_rank(m_val ? m_val->GetRankingInfo().GetLinearModelRank() : 0) + { + } impl::PreResult2 const & operator*() const { return *m_val; } + + inline double GetRank() const { return m_rank; } }; string DebugPrint(IndexedValue const & value) @@ -842,7 +850,7 @@ void Query::FlushResults(v2::Geocoder::Params const & params, Results & res, boo RemoveDuplicatingLinear(indV); - SortByIndexedValue(indV, CompFactory2()); + sort(indV.rbegin(), indV.rend(), my::CompareBy(&IndexedValue::GetRank)); // Do not process suggestions in additional search. if (!allMWMs || res.GetCount() == 0) diff --git a/search/v2/ranking_info.cpp b/search/v2/ranking_info.cpp index 35a3fa1d42..8d0f270747 100644 --- a/search/v2/ranking_info.cpp +++ b/search/v2/ranking_info.cpp @@ -1,5 +1,8 @@ #include "search/v2/ranking_info.hpp" +#include "std/cmath.hpp" +#include "std/limits.hpp" + namespace search { namespace v2 @@ -38,5 +41,27 @@ void RankingInfo::ToCSV(ostream & os) const << "," << DebugPrint(m_nameScore) << "," << m_nameCoverage << "," << DebugPrint(m_searchType) << "," << m_positionInViewport; } + +double RankingInfo::GetLinearModelRank() const +{ + // See search/search_quality/scoring_model.py for details. In + // short, these coeffs correspond to coeffs in a linear model. + + // NOTE: this code must be consistent with scoring_model.py. Keep + // this in mind when you're going to change scoring_model.py or this + // code. We're working on automatic rank calculation code generator + // integrated in the build system. + static double const kCoeffs[] = {0.98369469, 0.40219458, 0.97463078, 0.21027244, 0.07368054}; + + double const minDistance = + exp(-min(m_distanceToViewport, m_distanceToPosition) / PreRankingInfo::kMaxDistMeters); + double const rank = static_cast(m_rank) / numeric_limits::max(); + double const nameScore = static_cast(m_nameScore) / NAME_SCORE_COUNT; + double const nameCoverage = m_nameCoverage; + double const searchType = static_cast(m_searchType) / SearchModel::SEARCH_TYPE_COUNT; + + return kCoeffs[0] * minDistance + kCoeffs[1] * rank + kCoeffs[2] * nameScore + + kCoeffs[3] * nameCoverage + kCoeffs[4] * searchType; +} } // namespace v2 } // namespace search diff --git a/search/v2/ranking_info.hpp b/search/v2/ranking_info.hpp index 7cd6eeeba3..8c52f71838 100644 --- a/search/v2/ranking_info.hpp +++ b/search/v2/ranking_info.hpp @@ -37,6 +37,10 @@ struct RankingInfo static void PrintCSVHeader(ostream & os); void ToCSV(ostream & os) const; + + // Returns rank calculated by a linear model. Large values + // correspond to important features. + double GetLinearModelRank() const; }; string DebugPrint(RankingInfo const & info); diff --git a/search/v2/ranking_utils.hpp b/search/v2/ranking_utils.hpp index 40514f92fa..4dd5099c6c 100644 --- a/search/v2/ranking_utils.hpp +++ b/search/v2/ranking_utils.hpp @@ -24,7 +24,9 @@ enum NameScore NAME_SCORE_SUBSTRING_PREFIX = 1, NAME_SCORE_SUBSTRING = 2, NAME_SCORE_FULL_MATCH_PREFIX = 3, - NAME_SCORE_FULL_MATCH = 4 + NAME_SCORE_FULL_MATCH = 4, + + NAME_SCORE_COUNT }; NameScore GetNameScore(string const & name, SearchQueryParams const & params, size_t startToken, diff --git a/search/v2/search_model.hpp b/search/v2/search_model.hpp index a6a80efae0..394d6d1ba7 100644 --- a/search/v2/search_model.hpp +++ b/search/v2/search_model.hpp @@ -26,20 +26,21 @@ public: { // Low-level features such as amenities, offices, shops, buildings // without house number, etc. - SEARCH_TYPE_POI, + SEARCH_TYPE_POI = 0, // All features with set house number. - SEARCH_TYPE_BUILDING, + SEARCH_TYPE_BUILDING = 1, - SEARCH_TYPE_STREET, + SEARCH_TYPE_STREET = 2, // All low-level features except POI, BUILDING and STREET. - SEARCH_TYPE_UNCLASSIFIED, + SEARCH_TYPE_UNCLASSIFIED = 3, + + SEARCH_TYPE_VILLAGE = 4, + SEARCH_TYPE_CITY = 5, + SEARCH_TYPE_STATE = 6, // US or Canadian states + SEARCH_TYPE_COUNTRY = 7, - SEARCH_TYPE_VILLAGE, - SEARCH_TYPE_CITY, - SEARCH_TYPE_STATE, // US or Canadian states - SEARCH_TYPE_COUNTRY, SEARCH_TYPE_COUNT };