[search] Prefer objects with higher popularity

This commit is contained in:
tatiana-yan 2018-07-11 17:51:38 +03:00 committed by mpimenov
parent fd8fde4210
commit b5e49972d6
5 changed files with 14 additions and 5 deletions

View file

@ -69,10 +69,12 @@ PreRankerResult::PreRankerResult(FeatureID const & id, PreRankingInfo const & in
}
// static
bool PreRankerResult::LessRank(PreRankerResult const & r1, PreRankerResult const & r2)
bool PreRankerResult::LessRankAndPopularity(PreRankerResult const & r1, PreRankerResult const & r2)
{
if (r1.m_info.m_rank != r2.m_info.m_rank)
return r1.m_info.m_rank > r2.m_info.m_rank;
if (r1.m_info.m_popularity != r2.m_info.m_popularity)
return r1.m_info.m_popularity > r2.m_info.m_popularity;
return r1.m_info.m_distanceToPivot < r2.m_info.m_distanceToPivot;
}

View file

@ -31,7 +31,7 @@ class PreRankerResult
public:
PreRankerResult(FeatureID const & id, PreRankingInfo const & info);
static bool LessRank(PreRankerResult const & r1, PreRankerResult const & r2);
static bool LessRankAndPopularity(PreRankerResult const & r1, PreRankerResult const & r2);
static bool LessDistance(PreRankerResult const & r1, PreRankerResult const & r2);
FeatureID const & GetId() const { return m_id; }

View file

@ -30,8 +30,9 @@ void SweepNearbyResults(double eps, set<FeatureID> const & prevEmit, vector<PreR
{
auto const & p = results[i].GetInfo().m_center;
uint8_t const rank = results[i].GetInfo().m_rank;
uint8_t const popularity = results[i].GetInfo().m_popularity;
uint8_t const prevCount = prevEmit.count(results[i].GetId()) ? 1 : 0;
uint8_t const priority = max(rank, prevCount);
uint8_t const priority = max({rank, prevCount, popularity});
sweeper.Add(p.x, p.y, i, priority);
}
@ -195,7 +196,7 @@ void PreRanker::Filter(bool viewportSearch)
{
size_t n = min(m_results.size(), BatchSize());
nth_element(m_results.begin(), m_results.begin() + n, m_results.end(),
&PreRankerResult::LessRank);
&PreRankerResult::LessRankAndPopularity);
filtered.insert(m_results.begin(), m_results.begin() + n);
}

View file

@ -14,6 +14,8 @@ namespace
// these coeffs correspond to coeffs in a linear model.
double const kDistanceToPivot = -1.0000000;
double const kRank = 1.0000000;
// todo: (@t.yan) Adjust.
double const kPopularity = 1.0000000;
double const kFalseCats = -0.0839847;
double const kErrorsMade = 0.0066984;
double const kAllTokensUsed = 0.0000000;
@ -96,6 +98,7 @@ double RankingInfo::GetLinearModelRank() const
// integrated in the build system.
double const distanceToPivot = TransformDistance(m_distanceToPivot);
double const rank = static_cast<double>(m_rank) / numeric_limits<uint8_t>::max();
double const popularity = static_cast<double>(m_popularity) / numeric_limits<uint8_t>::max();
auto nameScore = m_nameScore;
if (m_pureCats || m_falseCats)
@ -112,6 +115,7 @@ double RankingInfo::GetLinearModelRank() const
double result = 0.0;
result += kDistanceToPivot * distanceToPivot;
result += kRank * rank;
result += kPopularity * popularity;
result += kNameScore[nameScore];
result += kErrorsMade * GetErrorsMade();
result += kType[m_type];

View file

@ -16,10 +16,11 @@ import sys
MAX_DISTANCE_METERS = 2e6
MAX_RANK = 255
MAX_POPULARITY = 255
RELEVANCES = {'Irrelevant': 0, 'Relevant': 1, 'Vital': 3}
NAME_SCORES = ['Zero', 'Substring', 'Prefix', 'Full Match']
SEARCH_TYPES = ['POI', 'Building', 'Street', 'Unclassified', 'Village', 'City', 'State', 'Country']
FEATURES = ['DistanceToPivot', 'Rank', 'FalseCats', 'ErrorsMade', 'AllTokensUsed'] + NAME_SCORES + SEARCH_TYPES
FEATURES = ['DistanceToPivot', 'Rank', 'Popularity', 'FalseCats', 'ErrorsMade', 'AllTokensUsed'] + NAME_SCORES + SEARCH_TYPES
BOOTSTRAP_ITERATIONS = 10000
@ -36,6 +37,7 @@ def normalize_data(data):
data['DistanceToPivot'] = data['DistanceToPivot'].apply(transform_distance)
data['Rank'] = data['Rank'].apply(lambda v: v / MAX_RANK)
data['Popularity'] = data['Popularity'].apply(lambda v: v / MAX_POPULARITY)
data['Relevance'] = data['Relevance'].apply(lambda v: RELEVANCES[v])
cats = data['PureCats'].combine(data['FalseCats'], max)