[search] Heuristics for rating.

This commit is contained in:
tatiana-yan 2019-04-23 16:23:46 +03:00 committed by mpimenov
parent e77592b68e
commit ad0bca72b5
8 changed files with 55 additions and 5 deletions

View file

@ -51,6 +51,7 @@ public:
double GetDistance() const { return m_info.m_distanceToPivot; }
uint8_t GetRank() const { return m_info.m_rank; }
uint8_t GetPopularity() const { return m_info.m_popularity; }
std::pair<uint8_t, float> GetRating() const { return m_info.m_rating; }
PreRankingInfo & GetInfo() { return m_info; }
PreRankingInfo const & GetInfo() const { return m_info; }
std::vector<ResultTracer::Branch> const & GetProvenance() const { return m_provenance; }

View file

@ -5,6 +5,8 @@
#include "search/pre_ranking_info.hpp"
#include "search/tracer.hpp"
#include "ugc/types.hpp"
#include "indexer/data_source.hpp"
#include "indexer/mwm_set.hpp"
#include "indexer/rank_table.hpp"
@ -77,6 +79,7 @@ void PreRanker::FillMissingFieldsInPreResults()
MwmSet::MwmHandle mwmHandle;
unique_ptr<RankTable> ranks = make_unique<DummyRankTable>();
unique_ptr<RankTable> popularityRanks = make_unique<DummyRankTable>();
unique_ptr<RankTable> ratings = make_unique<DummyRankTable>();
unique_ptr<LazyCentersTable> centers;
bool pivotFeaturesInitialized = false;
@ -94,16 +97,20 @@ void PreRanker::FillMissingFieldsInPreResults()
ranks = RankTable::Load(mwmHandle.GetValue<MwmValue>()->m_cont, SEARCH_RANKS_FILE_TAG);
popularityRanks = RankTable::Load(mwmHandle.GetValue<MwmValue>()->m_cont,
POPULARITY_RANKS_FILE_TAG);
ratings = RankTable::Load(mwmHandle.GetValue<MwmValue>()->m_cont, RATINGS_FILE_TAG);
centers = make_unique<LazyCentersTable>(*mwmHandle.GetValue<MwmValue>());
}
if (!ranks)
ranks = make_unique<DummyRankTable>();
if (!popularityRanks)
popularityRanks = make_unique<DummyRankTable>();
if (!ratings)
ratings = make_unique<DummyRankTable>();
}
info.m_rank = ranks->Get(id.m_index);
info.m_popularity = popularityRanks->Get(id.m_index);
info.m_rating = ugc::UGC::UnpackRating(ratings->Get(id.m_index));
m2::PointD center;
if (centers && centers->Get(id.m_index, center))

View file

@ -8,17 +8,18 @@ std::string DebugPrint(PreRankingInfo const & info)
{
std::ostringstream os;
os << "PreRankingInfo [";
os << "m_distanceToPivot:" << info.m_distanceToPivot << ",";
os << "m_distanceToPivot:" << info.m_distanceToPivot << ", ";
for (size_t i = 0; i < static_cast<size_t>(Model::TYPE_COUNT); ++i)
{
if (info.m_tokenRange[i].Empty())
continue;
auto const type = static_cast<Model::Type>(i);
os << "m_tokenRange[" << DebugPrint(type) << "]:" << DebugPrint(info.m_tokenRange[i]) << ",";
os << "m_tokenRange[" << DebugPrint(type) << "]:" << DebugPrint(info.m_tokenRange[i]) << ", ";
}
os << "m_rank:" << static_cast<int>(info.m_rank) << ",";
os << "m_popularity:" << static_cast<int>(info.m_popularity) << ",";
os << "m_rank:" << static_cast<int>(info.m_rank) << ", ";
os << "m_popularity:" << static_cast<int>(info.m_popularity) << ", ";
os << "m_rating: [" << static_cast<int>(info.m_rating.first) << ", "<< info.m_rating.second << "], ";
os << "m_type:" << info.m_type;
os << "]";
return os.str();

View file

@ -12,6 +12,7 @@
#include <cstdint>
#include <string>
#include <utility>
namespace search
{
@ -59,6 +60,16 @@ struct PreRankingInfo
// Popularity rank of the feature.
uint8_t m_popularity = 0;
// Confidence and UGC rating.
// Confidence: 0 - no information
// 1 - based on few reviews
// 2 - based on average reviews number
// 3 - based on large number of reviews.
// Rating [4.0 ... 10.0]:
// 4.0 and lower represented as 4.0
// higher ratings saved as is from UGC.
std::pair<uint8_t, float> m_rating = {0, 0.0f};
// Search type for the feature.
Model::Type m_type = Model::TYPE_COUNT;
};

View file

@ -298,6 +298,7 @@ class RankerResultMaker
info.m_distanceToPivot = MercatorBounds::DistanceOnEarth(center, pivot);
info.m_rank = preInfo.m_rank;
info.m_popularity = preInfo.m_popularity;
info.m_rating = preInfo.m_rating;
info.m_type = preInfo.m_type;
info.m_allTokensUsed = preInfo.m_allTokensUsed;
info.m_categorialRequest = m_params.IsCategorialRequest();

View file

@ -1,5 +1,7 @@
#include "search/ranking_info.hpp"
#include "ugc/types.hpp"
#include <iomanip>
#include <limits>
#include <sstream>
@ -16,10 +18,13 @@ double constexpr kDistanceToPivot = -1.0000000;
double constexpr kRank = 1.0000000;
// todo: (@t.yan) Adjust.
double constexpr kPopularity = 0.0500000;
// todo: (@t.yan) Adjust.
double constexpr kRating = 0.0500000;
double constexpr kFalseCats = -0.3691859;
double constexpr kErrorsMade = -0.0579812;
double constexpr kAllTokensUsed = 0.0000000;
double constexpr kHasName = 0.5;
double constexpr kNameScore[NameScore::NAME_SCORE_COUNT] = {
-0.7245815 /* Zero */,
0.1853727 /* Substring */,
@ -48,6 +53,20 @@ double TransformDistance(double distance)
{
return min(distance, RankingInfo::kMaxDistMeters) / RankingInfo::kMaxDistMeters;
}
double TransformRating(pair<uint8_t, float> const & rating)
{
double r = 0.0;
// From statistics.
double constexpr kAverageRating = 7.6;
if (rating.first != 0)
{
r = (static_cast<double>(rating.second) - kAverageRating) /
(ugc::UGC::kMaxRating - ugc::UGC::kRatingDetalizationThreshold);
r *= static_cast<double>(rating.first) / 3.0 /* maximal confidence */;
}
return r;
}
} // namespace
// static
@ -59,6 +78,7 @@ void RankingInfo::PrintCSVHeader(ostream & os)
os << "DistanceToPivot"
<< ",Rank"
<< ",Popularity"
<< ",Rating"
<< ",NameScore"
<< ",ErrorsMade"
<< ",SearchType"
@ -75,6 +95,8 @@ string DebugPrint(RankingInfo const & info)
os << "m_distanceToPivot:" << info.m_distanceToPivot;
os << ", m_rank:" << static_cast<int>(info.m_rank);
os << ", m_popularity:" << static_cast<int>(info.m_popularity);
os << ", m_rating:[" << static_cast<int>(info.m_rating.first) << ", " << info.m_rating.second
<< "]";
os << ", m_nameScore:" << DebugPrint(info.m_nameScore);
os << ", m_errorsMade:" << DebugPrint(info.m_errorsMade);
os << ", m_type:" << DebugPrint(info.m_type);
@ -93,6 +115,7 @@ void RankingInfo::ToCSV(ostream & os) const
os << m_distanceToPivot << ",";
os << static_cast<int>(m_rank) << ",";
os << static_cast<int>(m_popularity) << ",";
os << TransformRating(m_rating) << ",";
os << DebugPrint(m_nameScore) << ",";
os << GetErrorsMade() << ",";
os << DebugPrint(m_type) << ",";
@ -112,6 +135,7 @@ double RankingInfo::GetLinearModelRank() const
double const distanceToPivot = TransformDistance(m_distanceToPivot);
double const rank = static_cast<double>(m_rank) / numeric_limits<uint8_t>::max();
double const popularity = static_cast<double>(m_popularity) / numeric_limits<uint8_t>::max();
double const rating = TransformRating(m_rating);
auto nameScore = m_nameScore;
if (m_pureCats || m_falseCats)
@ -129,6 +153,7 @@ double RankingInfo::GetLinearModelRank() const
result += kDistanceToPivot * distanceToPivot;
result += kRank * rank;
result += kPopularity * popularity;
result += kRating * rating;
result += m_falseCats * kFalseCats;
if (!m_categorialRequest)
{

View file

@ -8,6 +8,7 @@
#include <cstdint>
#include <ostream>
#include <string>
#include <utility>
class FeatureType;
@ -36,6 +37,9 @@ struct RankingInfo
// Popularity rank of the feature.
uint8_t m_popularity = 0;
// Confidence and UGC rating.
std::pair<uint8_t, float> m_rating = {0, 0.0f};
// Score for the feature's name.
NameScore m_nameScore = NAME_SCORE_ZERO;

View file

@ -20,7 +20,7 @@ MAX_POPULARITY = 255
RELEVANCES = {'Harmful': -3, 'Irrelevant': 0, 'Relevant': 1, 'Vital': 3}
NAME_SCORES = ['Zero', 'Substring', 'Prefix', 'Full Match']
SEARCH_TYPES = ['POI', 'Building', 'Street', 'Unclassified', 'Village', 'City', 'State', 'Country']
FEATURES = ['DistanceToPivot', 'Rank', 'Popularity', 'FalseCats', 'ErrorsMade', 'AllTokensUsed',
FEATURES = ['DistanceToPivot', 'Rank', 'Popularity', 'Rating', 'FalseCats', 'ErrorsMade', 'AllTokensUsed',
'CategorialRequest', 'HasName'] + NAME_SCORES + SEARCH_TYPES
BOOTSTRAP_ITERATIONS = 10000