forked from organicmaps/organicmaps
[search] Heuristics for rating.
This commit is contained in:
parent
e77592b68e
commit
ad0bca72b5
8 changed files with 55 additions and 5 deletions
|
@ -51,6 +51,7 @@ public:
|
|||
double GetDistance() const { return m_info.m_distanceToPivot; }
|
||||
uint8_t GetRank() const { return m_info.m_rank; }
|
||||
uint8_t GetPopularity() const { return m_info.m_popularity; }
|
||||
std::pair<uint8_t, float> GetRating() const { return m_info.m_rating; }
|
||||
PreRankingInfo & GetInfo() { return m_info; }
|
||||
PreRankingInfo const & GetInfo() const { return m_info; }
|
||||
std::vector<ResultTracer::Branch> const & GetProvenance() const { return m_provenance; }
|
||||
|
|
|
@ -5,6 +5,8 @@
|
|||
#include "search/pre_ranking_info.hpp"
|
||||
#include "search/tracer.hpp"
|
||||
|
||||
#include "ugc/types.hpp"
|
||||
|
||||
#include "indexer/data_source.hpp"
|
||||
#include "indexer/mwm_set.hpp"
|
||||
#include "indexer/rank_table.hpp"
|
||||
|
@ -77,6 +79,7 @@ void PreRanker::FillMissingFieldsInPreResults()
|
|||
MwmSet::MwmHandle mwmHandle;
|
||||
unique_ptr<RankTable> ranks = make_unique<DummyRankTable>();
|
||||
unique_ptr<RankTable> popularityRanks = make_unique<DummyRankTable>();
|
||||
unique_ptr<RankTable> ratings = make_unique<DummyRankTable>();
|
||||
unique_ptr<LazyCentersTable> centers;
|
||||
bool pivotFeaturesInitialized = false;
|
||||
|
||||
|
@ -94,16 +97,20 @@ void PreRanker::FillMissingFieldsInPreResults()
|
|||
ranks = RankTable::Load(mwmHandle.GetValue<MwmValue>()->m_cont, SEARCH_RANKS_FILE_TAG);
|
||||
popularityRanks = RankTable::Load(mwmHandle.GetValue<MwmValue>()->m_cont,
|
||||
POPULARITY_RANKS_FILE_TAG);
|
||||
ratings = RankTable::Load(mwmHandle.GetValue<MwmValue>()->m_cont, RATINGS_FILE_TAG);
|
||||
centers = make_unique<LazyCentersTable>(*mwmHandle.GetValue<MwmValue>());
|
||||
}
|
||||
if (!ranks)
|
||||
ranks = make_unique<DummyRankTable>();
|
||||
if (!popularityRanks)
|
||||
popularityRanks = make_unique<DummyRankTable>();
|
||||
if (!ratings)
|
||||
ratings = make_unique<DummyRankTable>();
|
||||
}
|
||||
|
||||
info.m_rank = ranks->Get(id.m_index);
|
||||
info.m_popularity = popularityRanks->Get(id.m_index);
|
||||
info.m_rating = ugc::UGC::UnpackRating(ratings->Get(id.m_index));
|
||||
|
||||
m2::PointD center;
|
||||
if (centers && centers->Get(id.m_index, center))
|
||||
|
|
|
@ -8,17 +8,18 @@ std::string DebugPrint(PreRankingInfo const & info)
|
|||
{
|
||||
std::ostringstream os;
|
||||
os << "PreRankingInfo [";
|
||||
os << "m_distanceToPivot:" << info.m_distanceToPivot << ",";
|
||||
os << "m_distanceToPivot:" << info.m_distanceToPivot << ", ";
|
||||
for (size_t i = 0; i < static_cast<size_t>(Model::TYPE_COUNT); ++i)
|
||||
{
|
||||
if (info.m_tokenRange[i].Empty())
|
||||
continue;
|
||||
|
||||
auto const type = static_cast<Model::Type>(i);
|
||||
os << "m_tokenRange[" << DebugPrint(type) << "]:" << DebugPrint(info.m_tokenRange[i]) << ",";
|
||||
os << "m_tokenRange[" << DebugPrint(type) << "]:" << DebugPrint(info.m_tokenRange[i]) << ", ";
|
||||
}
|
||||
os << "m_rank:" << static_cast<int>(info.m_rank) << ",";
|
||||
os << "m_popularity:" << static_cast<int>(info.m_popularity) << ",";
|
||||
os << "m_rank:" << static_cast<int>(info.m_rank) << ", ";
|
||||
os << "m_popularity:" << static_cast<int>(info.m_popularity) << ", ";
|
||||
os << "m_rating: [" << static_cast<int>(info.m_rating.first) << ", "<< info.m_rating.second << "], ";
|
||||
os << "m_type:" << info.m_type;
|
||||
os << "]";
|
||||
return os.str();
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
namespace search
|
||||
{
|
||||
|
@ -59,6 +60,16 @@ struct PreRankingInfo
|
|||
// Popularity rank of the feature.
|
||||
uint8_t m_popularity = 0;
|
||||
|
||||
// Confidence and UGC rating.
|
||||
// Confidence: 0 - no information
|
||||
// 1 - based on few reviews
|
||||
// 2 - based on average reviews number
|
||||
// 3 - based on large number of reviews.
|
||||
// Rating [4.0 ... 10.0]:
|
||||
// 4.0 and lower represented as 4.0
|
||||
// higher ratings saved as is from UGC.
|
||||
std::pair<uint8_t, float> m_rating = {0, 0.0f};
|
||||
|
||||
// Search type for the feature.
|
||||
Model::Type m_type = Model::TYPE_COUNT;
|
||||
};
|
||||
|
|
|
@ -298,6 +298,7 @@ class RankerResultMaker
|
|||
info.m_distanceToPivot = MercatorBounds::DistanceOnEarth(center, pivot);
|
||||
info.m_rank = preInfo.m_rank;
|
||||
info.m_popularity = preInfo.m_popularity;
|
||||
info.m_rating = preInfo.m_rating;
|
||||
info.m_type = preInfo.m_type;
|
||||
info.m_allTokensUsed = preInfo.m_allTokensUsed;
|
||||
info.m_categorialRequest = m_params.IsCategorialRequest();
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
#include "search/ranking_info.hpp"
|
||||
|
||||
#include "ugc/types.hpp"
|
||||
|
||||
#include <iomanip>
|
||||
#include <limits>
|
||||
#include <sstream>
|
||||
|
@ -16,10 +18,13 @@ double constexpr kDistanceToPivot = -1.0000000;
|
|||
double constexpr kRank = 1.0000000;
|
||||
// todo: (@t.yan) Adjust.
|
||||
double constexpr kPopularity = 0.0500000;
|
||||
// todo: (@t.yan) Adjust.
|
||||
double constexpr kRating = 0.0500000;
|
||||
double constexpr kFalseCats = -0.3691859;
|
||||
double constexpr kErrorsMade = -0.0579812;
|
||||
double constexpr kAllTokensUsed = 0.0000000;
|
||||
double constexpr kHasName = 0.5;
|
||||
|
||||
double constexpr kNameScore[NameScore::NAME_SCORE_COUNT] = {
|
||||
-0.7245815 /* Zero */,
|
||||
0.1853727 /* Substring */,
|
||||
|
@ -48,6 +53,20 @@ double TransformDistance(double distance)
|
|||
{
|
||||
return min(distance, RankingInfo::kMaxDistMeters) / RankingInfo::kMaxDistMeters;
|
||||
}
|
||||
|
||||
double TransformRating(pair<uint8_t, float> const & rating)
|
||||
{
|
||||
double r = 0.0;
|
||||
// From statistics.
|
||||
double constexpr kAverageRating = 7.6;
|
||||
if (rating.first != 0)
|
||||
{
|
||||
r = (static_cast<double>(rating.second) - kAverageRating) /
|
||||
(ugc::UGC::kMaxRating - ugc::UGC::kRatingDetalizationThreshold);
|
||||
r *= static_cast<double>(rating.first) / 3.0 /* maximal confidence */;
|
||||
}
|
||||
return r;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
// static
|
||||
|
@ -59,6 +78,7 @@ void RankingInfo::PrintCSVHeader(ostream & os)
|
|||
os << "DistanceToPivot"
|
||||
<< ",Rank"
|
||||
<< ",Popularity"
|
||||
<< ",Rating"
|
||||
<< ",NameScore"
|
||||
<< ",ErrorsMade"
|
||||
<< ",SearchType"
|
||||
|
@ -75,6 +95,8 @@ string DebugPrint(RankingInfo const & info)
|
|||
os << "m_distanceToPivot:" << info.m_distanceToPivot;
|
||||
os << ", m_rank:" << static_cast<int>(info.m_rank);
|
||||
os << ", m_popularity:" << static_cast<int>(info.m_popularity);
|
||||
os << ", m_rating:[" << static_cast<int>(info.m_rating.first) << ", " << info.m_rating.second
|
||||
<< "]";
|
||||
os << ", m_nameScore:" << DebugPrint(info.m_nameScore);
|
||||
os << ", m_errorsMade:" << DebugPrint(info.m_errorsMade);
|
||||
os << ", m_type:" << DebugPrint(info.m_type);
|
||||
|
@ -93,6 +115,7 @@ void RankingInfo::ToCSV(ostream & os) const
|
|||
os << m_distanceToPivot << ",";
|
||||
os << static_cast<int>(m_rank) << ",";
|
||||
os << static_cast<int>(m_popularity) << ",";
|
||||
os << TransformRating(m_rating) << ",";
|
||||
os << DebugPrint(m_nameScore) << ",";
|
||||
os << GetErrorsMade() << ",";
|
||||
os << DebugPrint(m_type) << ",";
|
||||
|
@ -112,6 +135,7 @@ double RankingInfo::GetLinearModelRank() const
|
|||
double const distanceToPivot = TransformDistance(m_distanceToPivot);
|
||||
double const rank = static_cast<double>(m_rank) / numeric_limits<uint8_t>::max();
|
||||
double const popularity = static_cast<double>(m_popularity) / numeric_limits<uint8_t>::max();
|
||||
double const rating = TransformRating(m_rating);
|
||||
|
||||
auto nameScore = m_nameScore;
|
||||
if (m_pureCats || m_falseCats)
|
||||
|
@ -129,6 +153,7 @@ double RankingInfo::GetLinearModelRank() const
|
|||
result += kDistanceToPivot * distanceToPivot;
|
||||
result += kRank * rank;
|
||||
result += kPopularity * popularity;
|
||||
result += kRating * rating;
|
||||
result += m_falseCats * kFalseCats;
|
||||
if (!m_categorialRequest)
|
||||
{
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
#include <cstdint>
|
||||
#include <ostream>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
class FeatureType;
|
||||
|
||||
|
@ -36,6 +37,9 @@ struct RankingInfo
|
|||
// Popularity rank of the feature.
|
||||
uint8_t m_popularity = 0;
|
||||
|
||||
// Confidence and UGC rating.
|
||||
std::pair<uint8_t, float> m_rating = {0, 0.0f};
|
||||
|
||||
// Score for the feature's name.
|
||||
NameScore m_nameScore = NAME_SCORE_ZERO;
|
||||
|
||||
|
|
|
@ -20,7 +20,7 @@ MAX_POPULARITY = 255
|
|||
RELEVANCES = {'Harmful': -3, 'Irrelevant': 0, 'Relevant': 1, 'Vital': 3}
|
||||
NAME_SCORES = ['Zero', 'Substring', 'Prefix', 'Full Match']
|
||||
SEARCH_TYPES = ['POI', 'Building', 'Street', 'Unclassified', 'Village', 'City', 'State', 'Country']
|
||||
FEATURES = ['DistanceToPivot', 'Rank', 'Popularity', 'FalseCats', 'ErrorsMade', 'AllTokensUsed',
|
||||
FEATURES = ['DistanceToPivot', 'Rank', 'Popularity', 'Rating', 'FalseCats', 'ErrorsMade', 'AllTokensUsed',
|
||||
'CategorialRequest', 'HasName'] + NAME_SCORES + SEARCH_TYPES
|
||||
|
||||
BOOTSTRAP_ITERATIONS = 10000
|
||||
|
|
Loading…
Add table
Reference in a new issue