From ad0bca72b5f0adad982fffee1b5bf7e75d87f439 Mon Sep 17 00:00:00 2001
From: tatiana-yan <tatiana.kondakova@gmail.com>
Date: Tue, 23 Apr 2019 16:23:46 +0300
Subject: [PATCH] [search] Heuristics for rating.

---
 search/intermediate_result.hpp         |  1 +
 search/pre_ranker.cpp                  |  7 +++++++
 search/pre_ranking_info.cpp            |  9 +++++----
 search/pre_ranking_info.hpp            | 11 +++++++++++
 search/ranker.cpp                      |  1 +
 search/ranking_info.cpp                | 25 +++++++++++++++++++++++++
 search/ranking_info.hpp                |  4 ++++
 search/search_quality/scoring_model.py |  2 +-
 8 files changed, 55 insertions(+), 5 deletions(-)
diff --git a/search/intermediate_result.hpp b/search/intermediate_result.hpp
index b8ff62c324..6a865354f1 100644
--- a/search/intermediate_result.hpp
+++ b/search/intermediate_result.hpp
@@ -51,6 +51,7 @@ public:
   double GetDistance() const { return m_info.m_distanceToPivot; }
   uint8_t GetRank() const { return m_info.m_rank; }
   uint8_t GetPopularity() const { return m_info.m_popularity; }
+  std::pair<uint8_t, float> GetRating() const { return m_info.m_rating; }
   PreRankingInfo & GetInfo() { return m_info; }
   PreRankingInfo const & GetInfo() const { return m_info; }
   std::vector<ResultTracer::Branch> const & GetProvenance() const { return m_provenance; }
diff --git a/search/pre_ranker.cpp b/search/pre_ranker.cpp
index 711526d17c..5d6c16b309 100644
--- a/search/pre_ranker.cpp
+++ b/search/pre_ranker.cpp
@@ -5,6 +5,8 @@
 #include "search/pre_ranking_info.hpp"
 #include "search/tracer.hpp"
 
+#include "ugc/types.hpp"
+
 #include "indexer/data_source.hpp"
 #include "indexer/mwm_set.hpp"
 #include "indexer/rank_table.hpp"
@@ -77,6 +79,7 @@ void PreRanker::FillMissingFieldsInPreResults()
   MwmSet::MwmHandle mwmHandle;
   unique_ptr<RankTable> ranks = make_unique<DummyRankTable>();
   unique_ptr<RankTable> popularityRanks = make_unique<DummyRankTable>();
+  unique_ptr<RankTable> ratings = make_unique<DummyRankTable>();
   unique_ptr<LazyCentersTable> centers;
   bool pivotFeaturesInitialized = false;
 
@@ -94,16 +97,20 @@ void PreRanker::FillMissingFieldsInPreResults()
         ranks = RankTable::Load(mwmHandle.GetValue<MwmValue>()->m_cont, SEARCH_RANKS_FILE_TAG);
         popularityRanks = RankTable::Load(mwmHandle.GetValue<MwmValue>()->m_cont,
                                           POPULARITY_RANKS_FILE_TAG);
+        ratings = RankTable::Load(mwmHandle.GetValue<MwmValue>()->m_cont, RATINGS_FILE_TAG);
         centers = make_unique<LazyCentersTable>(*mwmHandle.GetValue<MwmValue>());
       }
       if (!ranks)
         ranks = make_unique<DummyRankTable>();
       if (!popularityRanks)
         popularityRanks = make_unique<DummyRankTable>();
+      if (!ratings)
+        ratings = make_unique<DummyRankTable>();
     }
 
     info.m_rank = ranks->Get(id.m_index);
     info.m_popularity = popularityRanks->Get(id.m_index);
+    info.m_rating = ugc::UGC::UnpackRating(ratings->Get(id.m_index));
 
     m2::PointD center;
     if (centers && centers->Get(id.m_index, center))
diff --git a/search/pre_ranking_info.cpp b/search/pre_ranking_info.cpp
index 2b05f68d5c..c5a4afa668 100644
--- a/search/pre_ranking_info.cpp
+++ b/search/pre_ranking_info.cpp
@@ -8,17 +8,18 @@ std::string DebugPrint(PreRankingInfo const & info)
 {
   std::ostringstream os;
   os << "PreRankingInfo [";
-  os << "m_distanceToPivot:" << info.m_distanceToPivot << ",";
+  os << "m_distanceToPivot:" << info.m_distanceToPivot << ", ";
   for (size_t i = 0; i < static_cast<size_t>(Model::TYPE_COUNT); ++i)
   {
     if (info.m_tokenRange[i].Empty())
       continue;
 
     auto const type = static_cast<Model::Type>(i);
-    os << "m_tokenRange[" << DebugPrint(type) << "]:" << DebugPrint(info.m_tokenRange[i]) << ",";
+    os << "m_tokenRange[" << DebugPrint(type) << "]:" << DebugPrint(info.m_tokenRange[i]) << ", ";
   }
-  os << "m_rank:" << static_cast<int>(info.m_rank) << ",";
-  os << "m_popularity:" << static_cast<int>(info.m_popularity) << ",";
+  os << "m_rank:" << static_cast<int>(info.m_rank) << ", ";
+  os << "m_popularity:" << static_cast<int>(info.m_popularity) << ", ";
+  os << "m_rating: [" << static_cast<int>(info.m_rating.first) << ", "<< info.m_rating.second << "], ";
   os << "m_type:" << info.m_type;
   os << "]";
   return os.str();
diff --git a/search/pre_ranking_info.hpp b/search/pre_ranking_info.hpp
index b19a76dd1d..8150bf0e36 100644
--- a/search/pre_ranking_info.hpp
+++ b/search/pre_ranking_info.hpp
@@ -12,6 +12,7 @@
 
 #include <cstdint>
 #include <string>
+#include <utility>
 
 namespace search
 {
@@ -59,6 +60,16 @@ struct PreRankingInfo
   // Popularity rank of the feature.
   uint8_t m_popularity = 0;
 
+  // Confidence and UGC rating.
+  // Confidence: 0 - no information
+  //             1 - based on few reviews
+  //             2 - based on average reviews number
+  //             3 - based on large number of reviews.
+  // Rating [4.0 ... 10.0]:
+  //             4.0 and lower represented as 4.0
+  //             higher ratings saved as is from UGC.
+  std::pair<uint8_t, float> m_rating = {0, 0.0f};
+
   // Search type for the feature.
   Model::Type m_type = Model::TYPE_COUNT;
 };
diff --git a/search/ranker.cpp b/search/ranker.cpp
index 8a1c3fada5..2e5bb0fd73 100644
--- a/search/ranker.cpp
+++ b/search/ranker.cpp
@@ -298,6 +298,7 @@ class RankerResultMaker
     info.m_distanceToPivot = MercatorBounds::DistanceOnEarth(center, pivot);
     info.m_rank = preInfo.m_rank;
     info.m_popularity = preInfo.m_popularity;
+    info.m_rating = preInfo.m_rating;
     info.m_type = preInfo.m_type;
     info.m_allTokensUsed = preInfo.m_allTokensUsed;
     info.m_categorialRequest = m_params.IsCategorialRequest();
diff --git a/search/ranking_info.cpp b/search/ranking_info.cpp
index fdb4f675aa..a80cd70cb2 100644
--- a/search/ranking_info.cpp
+++ b/search/ranking_info.cpp
@@ -1,5 +1,7 @@
 #include "search/ranking_info.hpp"
 
+#include "ugc/types.hpp"
+
 #include <iomanip>
 #include <limits>
 #include <sstream>
@@ -16,10 +18,13 @@ double constexpr kDistanceToPivot = -1.0000000;
 double constexpr kRank = 1.0000000;
 // todo: (@t.yan) Adjust.
 double constexpr kPopularity = 0.0500000;
+// todo: (@t.yan) Adjust.
+double constexpr kRating = 0.0500000;
 double constexpr kFalseCats = -0.3691859;
 double constexpr kErrorsMade = -0.0579812;
 double constexpr kAllTokensUsed = 0.0000000;
 double constexpr kHasName = 0.5;
+
 double constexpr kNameScore[NameScore::NAME_SCORE_COUNT] = {
   -0.7245815 /* Zero */,
   0.1853727 /* Substring */,
@@ -48,6 +53,20 @@ double TransformDistance(double distance)
 {
   return min(distance, RankingInfo::kMaxDistMeters) / RankingInfo::kMaxDistMeters;
 }
+
+double TransformRating(pair<uint8_t, float> const & rating)
+{
+  double r = 0.0;
+  // From statistics.
+  double constexpr kAverageRating = 7.6;
+  if (rating.first != 0)
+  {
+    r = (static_cast<double>(rating.second) - kAverageRating) /
+        (ugc::UGC::kMaxRating - ugc::UGC::kRatingDetalizationThreshold);
+    r *= static_cast<double>(rating.first) / 3.0 /* maximal confidence */;
+  }
+  return r;
+}
 }  // namespace
 
 // static
@@ -59,6 +78,7 @@ void RankingInfo::PrintCSVHeader(ostream & os)
   os << "DistanceToPivot"
      << ",Rank"
      << ",Popularity"
+     << ",Rating"
      << ",NameScore"
      << ",ErrorsMade"
      << ",SearchType"
@@ -75,6 +95,8 @@ string DebugPrint(RankingInfo const & info)
   os << "m_distanceToPivot:" << info.m_distanceToPivot;
   os << ", m_rank:" << static_cast<int>(info.m_rank);
   os << ", m_popularity:" << static_cast<int>(info.m_popularity);
+  os << ", m_rating:[" << static_cast<int>(info.m_rating.first) << ", " << info.m_rating.second
+     << "]";
   os << ", m_nameScore:" << DebugPrint(info.m_nameScore);
   os << ", m_errorsMade:" << DebugPrint(info.m_errorsMade);
   os << ", m_type:" << DebugPrint(info.m_type);
@@ -93,6 +115,7 @@ void RankingInfo::ToCSV(ostream & os) const
   os << m_distanceToPivot << ",";
   os << static_cast<int>(m_rank) << ",";
   os << static_cast<int>(m_popularity) << ",";
+  os << TransformRating(m_rating) << ",";
   os << DebugPrint(m_nameScore) << ",";
   os << GetErrorsMade() << ",";
   os << DebugPrint(m_type) << ",";
@@ -112,6 +135,7 @@ double RankingInfo::GetLinearModelRank() const
   double const distanceToPivot = TransformDistance(m_distanceToPivot);
   double const rank = static_cast<double>(m_rank) / numeric_limits<uint8_t>::max();
   double const popularity = static_cast<double>(m_popularity) / numeric_limits<uint8_t>::max();
+  double const rating = TransformRating(m_rating);
 
   auto nameScore = m_nameScore;
   if (m_pureCats || m_falseCats)
@@ -129,6 +153,7 @@ double RankingInfo::GetLinearModelRank() const
   result += kDistanceToPivot * distanceToPivot;
   result += kRank * rank;
   result += kPopularity * popularity;
+  result += kRating * rating;
   result += m_falseCats * kFalseCats;
   if (!m_categorialRequest)
   {
diff --git a/search/ranking_info.hpp b/search/ranking_info.hpp
index 2b47b23632..2668ea0f45 100644
--- a/search/ranking_info.hpp
+++ b/search/ranking_info.hpp
@@ -8,6 +8,7 @@
 #include <cstdint>
 #include <ostream>
 #include <string>
+#include <utility>
 
 class FeatureType;
 
@@ -36,6 +37,9 @@ struct RankingInfo
   // Popularity rank of the feature.
   uint8_t m_popularity = 0;
 
+  // Confidence and UGC rating.
+  std::pair<uint8_t, float> m_rating = {0, 0.0f};
+
   // Score for the feature's name.
   NameScore m_nameScore = NAME_SCORE_ZERO;
 
diff --git a/search/search_quality/scoring_model.py b/search/search_quality/scoring_model.py
index ab010dfd64..20b77314d6 100755
--- a/search/search_quality/scoring_model.py
+++ b/search/search_quality/scoring_model.py
@@ -20,7 +20,7 @@ MAX_POPULARITY = 255
 RELEVANCES = {'Harmful': -3, 'Irrelevant': 0, 'Relevant': 1, 'Vital': 3}
 NAME_SCORES = ['Zero', 'Substring', 'Prefix', 'Full Match']
 SEARCH_TYPES = ['POI', 'Building', 'Street', 'Unclassified', 'Village', 'City', 'State', 'Country']
-FEATURES = ['DistanceToPivot', 'Rank', 'Popularity', 'FalseCats', 'ErrorsMade', 'AllTokensUsed',
+FEATURES = ['DistanceToPivot', 'Rank', 'Popularity', 'Rating', 'FalseCats', 'ErrorsMade', 'AllTokensUsed',
             'CategorialRequest', 'HasName'] + NAME_SCORES + SEARCH_TYPES
 
 BOOTSTRAP_ITERATIONS = 10000