[search] Fixed ranking.

This commit is contained in:
Yuri Gorshenin 2016-03-16 18:48:10 +03:00 committed by Sergey Yershov
parent bbcdfa919a
commit 75f23a048c
14 changed files with 311 additions and 115 deletions

View file

@ -42,6 +42,7 @@ public:
static bool LessPointsForViewport(PreResult1 const & r1, PreResult1 const & r2);
inline FeatureID GetID() const { return m_id; }
inline double GetPriority() const { return m_priority; }
inline uint8_t GetRank() const { return m_info.m_rank; }
inline int8_t GetViewportID() const { return m_viewportID; }
inline v2::PreRankingInfo const & GetInfo() const { return m_info; }

View file

@ -52,6 +52,7 @@ HEADERS += \
v2/intersection_result.hpp \
v2/locality_scorer.hpp \
v2/mwm_context.hpp \
v2/nested_rects_cache.hpp \
v2/pre_ranking_info.hpp \
v2/rank_table_cache.hpp \
v2/ranking_info.hpp \
@ -94,7 +95,7 @@ SOURCES += \
v2/intersection_result.cpp \
v2/locality_scorer.cpp \
v2/mwm_context.cpp \
v2/pre_ranking_info.cpp \
v2/nested_rects_cache.cpp \
v2/rank_table_cache.cpp \
v2/ranking_info.cpp \
v2/ranking_utils.cpp \

View file

@ -9,24 +9,33 @@ import numpy as np
import pandas as pd
import sys
FEATURES = ['MinDistance', 'Rank', 'SearchType', 'NameScore', 'NameCoverage']
FEATURES = ['DistanceToViewport', 'DistanceToPosition', 'MinDistance', 'Rank', 'SearchType', 'NameScore', 'PositionInViewport']
DISTANCE_WINDOW = 1e9
MAX_RANK = 256
MAX_RANK = 255
RELEVANCES = {'Irrelevant': 0, 'Relevant': 1, 'Vital': 3}
NAME_SCORES = ['Zero', 'Substring Prefix', 'Substring', 'Full Match Prefix', 'Full Match']
SEARCH_TYPES = ['POI', 'BUILDING', 'STREET', 'UNCLASSIFIED', 'VILLAGE', 'CITY', 'STATE', 'COUNTRY']
SEARCH_TYPES = {'POI': 0,
'BUILDING': 0,
'STREET': 1,
'UNCLASSIFIED': 2,
'VILLAGE': 3,
'CITY': 4,
'STATE': 5,
'COUNTRY': 6}
def normalize_data(data):
transform_distance = lambda d: exp(-d / DISTANCE_WINDOW)
transform_distance = lambda d: exp(- d * 1000 / DISTANCE_WINDOW)
max_name_score = len(NAME_SCORES) - 1
max_search_type = SEARCH_TYPES['COUNTRY']
data['DistanceToViewport'] = data['DistanceToViewport'].apply(transform_distance)
data['DistanceToPosition'] = data['DistanceToPosition'].apply(transform_distance)
data['Rank'] = data['Rank'].apply(lambda rank: rank / MAX_RANK)
data['NameScore'] = data['NameScore'].apply(lambda s: NAME_SCORES.index(s) / len(NAME_SCORES))
data['SearchType'] = data['SearchType'].apply(
lambda t: SEARCH_TYPES.index(t) / len(SEARCH_TYPES))
data['NameScore'] = data['NameScore'].apply(lambda s: NAME_SCORES.index(s) / max_name_score)
data['SearchType'] = data['SearchType'].apply(lambda t: SEARCH_TYPES[t] / max_search_type)
data['Relevance'] = data['Relevance'].apply(lambda r: RELEVANCES[r])
data['MinDistance'] = pd.Series(np.minimum(data['DistanceToViewport'], data['DistanceToPosition']))

View file

@ -63,17 +63,6 @@ namespace search
namespace
{
using TCompareFunction1 = function<bool(impl::PreResult1 const &, impl::PreResult1 const &)>;
using TCompareFunction2 = function<bool(impl::PreResult2 const &, impl::PreResult2 const &)>;
TCompareFunction1 const g_arrCompare1[] = {
&impl::PreResult1::LessPriority, &impl::PreResult1::LessRank,
};
TCompareFunction2 const g_arrCompare2[] = {
&impl::PreResult2::LessDistance, &impl::PreResult2::LessRank,
};
/// This indexes should match the initialization routine below.
int const g_arrLang1[] = {0, 1, 2, 2, 3};
int const g_arrLang2[] = {0, 0, 0, 1, 0};
@ -193,18 +182,9 @@ Query::Query(Index & index, CategoriesHolder const & categories, vector<Suggest>
, m_mode(Mode::Everywhere)
, m_worldSearch(true)
, m_suggestsEnabled(true)
, m_viewportSearch(false)
, m_keepHouseNumberInQuery(false)
{
// Results queue's initialization.
static_assert(kQueuesCount == ARRAY_SIZE(g_arrCompare1), "");
static_assert(kQueuesCount == ARRAY_SIZE(g_arrCompare2), "");
for (size_t i = 0; i < kQueuesCount; ++i)
{
m_results[i] = TQueue(kPreResultsCount, TQueueCompare(g_arrCompare1[i]));
m_results[i].reserve(kPreResultsCount);
}
// Initialize keywords scorer.
// Note! This order should match the indexes arrays above.
vector<vector<int8_t> > langPriorities(4);
@ -358,29 +338,13 @@ void Query::Init(bool viewportSearch)
m_streetID.clear();
#endif
ClearQueues();
if (viewportSearch)
{
// Special case to change comparator in viewport search
// (more uniform results distribution on the map).
m_queuesCount = 1;
m_results[0] =
TQueue(kPreResultsCount, TQueueCompare(&impl::PreResult1::LessPointsForViewport));
}
else
{
m_queuesCount = kQueuesCount;
m_results[DISTANCE_TO_PIVOT] =
TQueue(kPreResultsCount, TQueueCompare(g_arrCompare1[DISTANCE_TO_PIVOT]));
}
m_viewportSearch = viewportSearch;
ClearResults();
}
void Query::ClearQueues()
void Query::ClearResults()
{
for (size_t i = 0; i < kQueuesCount; ++i)
m_results[i].clear();
m_results.clear();
}
int Query::GetCategoryLocales(int8_t (&arr) [3]) const
@ -637,7 +601,7 @@ class PreResult2Maker
auto const & position = m_params.m_position;
info.m_distanceToViewport = viewport.IsEmptyInterior()
? v2::PreRankingInfo::kMaxDistMeters
? v2::RankingInfo::kMaxDistMeters
: feature::GetMinDistanceMeters(ft, viewport.Center());
info.m_distanceToPosition = feature::GetMinDistanceMeters(ft, position);
@ -768,11 +732,47 @@ void Query::MakePreResult2(v2::Geocoder::Params const & params, vector<T> & cont
using TPreResultSet = set<impl::PreResult1, LessFeatureID>;
TPreResultSet theSet;
for (size_t i = 0; i < m_queuesCount; ++i)
vector<impl::PreResult1> results;
results.reserve(m_results.size());
for (auto const & p : m_results)
results.emplace_back(p.second);
sort(results.begin(), results.end(), &impl::PreResult1::LessPriority);
if (kPreResultsCount != 0 && results.size() > kPreResultsCount)
{
theSet.insert(m_results[i].begin(), m_results[i].end());
m_results[i].clear();
// Priority is some kind of distance from the viewport or
// position, therefore if we have a bunch of results with the same
// priority, we have no idea here which results are relevant. To
// prevent bias from previous search routines (like sorting by
// feature id) this code randomly selects tail of the
// sorted-by-priority list of pre-results.
double const lastPriority = results[kPreResultsCount - 1].GetPriority();
auto b = results.begin() + kPreResultsCount - 1;
for (; b != results.begin() && b->GetPriority() == lastPriority; --b)
;
if (b->GetPriority() != lastPriority)
++b;
auto e = results.begin() + kPreResultsCount;
for (; e != results.end() && e->GetPriority() == lastPriority; ++e)
;
// TODO (@y, @m, @vng): this method is deprecated, need to rewrite
// it.
random_shuffle(b, e);
}
theSet.insert(results.begin(), results.begin() + min(results.size(), kPreResultsCount));
if (!m_viewportSearch)
{
size_t n = min(results.size(), kPreResultsCount);
nth_element(results.begin(), results.begin() + n, results.end(), &impl::PreResult1::LessRank);
theSet.insert(results.begin(), results.begin() + n);
}
ClearResults();
// Makes PreResult2 vector.
impl::PreResult2Maker maker(*this, params);
@ -957,17 +957,9 @@ void Query::ProcessSuggestions(vector<T> & vec, Results & res) const
void Query::AddPreResult1(MwmSet::MwmId const & mwmId, uint32_t featureId, double priority,
v2::PreRankingInfo const & info, ViewportID viewportId /* = DEFAULT_V */)
{
impl::PreResult1 res(FeatureID(mwmId, featureId), priority, viewportId, info);
for (size_t i = 0; i < m_queuesCount; ++i)
{
// here can be the duplicates because of different language match (for suggest token)
if (m_results[i].end() ==
find_if(m_results[i].begin(), m_results[i].end(), EqualFeatureID(res)))
{
m_results[i].push(res);
}
}
FeatureID id(mwmId, featureId);
impl::PreResult1 res(id, priority, viewportId, info);
m_results.emplace(make_pair(id, res));
}
namespace impl

View file

@ -142,7 +142,7 @@ protected:
friend class impl::DoFindLocality;
friend class impl::HouseCompFactory;
void ClearQueues();
void ClearResults();
int GetCategoryLocales(int8_t (&arr) [3]) const;
template <class ToDo> void ForEachCategoryTypes(ToDo toDo) const;
@ -281,8 +281,8 @@ protected:
DISTANCE_TO_PIVOT, // LessDistance
FEATURE_RANK // LessRank
};
TQueue m_results[kQueuesCount];
size_t m_queuesCount;
map<FeatureID, impl::PreResult1> m_results;
bool m_viewportSearch;
bool m_keepHouseNumberInQuery;
//@}
};

View file

@ -336,12 +336,24 @@ m2::RectD GetRectAroundPoistion(m2::PointD const & position)
return MercatorBounds::RectByCenterXYAndSizeInMeters(position, kMaxPositionRadiusM);
}
double GetSquaredDistance(vector<m2::RectD> const & pivots, m2::RectD const & rect)
double Area(m2::RectD const & rect)
{
double distance = numeric_limits<double>::max();
auto const center = rect.Center();
return rect.IsValid() ? rect.SizeX() * rect.SizeY() : 0;
}
// Computes an average similaty between |rect| and |pivots|. By
// similarity between two rects we mean a fraction of the area of
// rects intersection to the area of the smallest rect.
double GetSimilarity(vector<m2::RectD> const & pivots, m2::RectD const & rect)
{
double distance = 0;
for (auto const & pivot : pivots)
distance = min(distance, center.SquareLength(pivot.Center()));
{
double const area = min(Area(pivot), Area(rect));
m2::RectD p = pivot;
p.Intersect(rect);
distance += area == 0.0 ? 0.0 : Area(p) / area;
}
return distance;
}
@ -354,10 +366,9 @@ TIt OrderCountries(Geocoder::Params const & params, TIt begin, TIt end)
{
vector<m2::RectD> const pivots = {NormalizeViewport(params.m_viewport),
GetRectAroundPoistion(params.m_position)};
auto compareByDistance = [&](shared_ptr<MwmInfo> const & lhs, shared_ptr<MwmInfo> const & rhs)
auto compareBySimilarity = [&](shared_ptr<MwmInfo> const & lhs, shared_ptr<MwmInfo> const & rhs)
{
return GetSquaredDistance(pivots, lhs->m_limitRect) <
GetSquaredDistance(pivots, rhs->m_limitRect);
return GetSimilarity(pivots, lhs->m_limitRect) > GetSimilarity(pivots, rhs->m_limitRect);
};
auto intersects = [&](shared_ptr<MwmInfo> const & info) -> bool
{
@ -368,7 +379,7 @@ TIt OrderCountries(Geocoder::Params const & params, TIt begin, TIt end)
}
return false;
};
sort(begin, end, compareByDistance);
sort(begin, end, compareBySimilarity);
return stable_partition(begin, end, intersects);
}
@ -403,6 +414,8 @@ Geocoder::Geocoder(Index & index, storage::CountryInfoGetter const & infoGetter)
, m_infoGetter(infoGetter)
, m_numTokens(0)
, m_model(SearchModel::Instance())
, m_viewportFeatures(index)
, m_positionFeatures(index)
, m_streets(nullptr)
, m_villages(nullptr)
, m_filter(nullptr)
@ -615,7 +628,7 @@ void Geocoder::GoImpl(vector<shared_ptr<MwmInfo>> & infos, bool inViewport)
}
// Fill results ranks, as they were missed.
FillResultRanks();
FillMissingFieldsInResults();
}
void Geocoder::ClearCaches()
@ -625,6 +638,8 @@ void Geocoder::ClearCaches()
m_matchersCache.clear();
m_streetsCache.clear();
m_villages.reset();
m_viewportFeatures.ClearCaches();
m_positionFeatures.ClearCaches();
}
void Geocoder::PrepareRetrievalParams(size_t curToken, size_t endToken)
@ -1266,15 +1281,8 @@ void Geocoder::EmitResult(MwmSet::MwmId const & mwmId, uint32_t ftId, SearchMode
info.m_searchType = type;
info.m_startToken = startToken;
info.m_endToken = endToken;
if (auto const & mwmInfo = mwmId.GetInfo())
{
auto const center = mwmInfo->m_limitRect.Center();
info.m_mwmDistanceToViewport =
MercatorBounds::DistanceOnEarth(center, m_params.m_viewport.Center());
info.m_mwmDistanceToPosition = MercatorBounds::DistanceOnEarth(center, m_params.m_position);
}
// info.m_ranks will be filled at the end, for all results at once.
// Other fields will be filled at the end, for all results at once.
m_results->emplace_back(move(id), move(info));
}
@ -1295,8 +1303,11 @@ void Geocoder::EmitResult(City const & city, size_t startToken, size_t endToken)
EmitResult(city.m_countryId, city.m_featureId, city.m_type, startToken, endToken);
}
void Geocoder::FillResultRanks()
void Geocoder::FillMissingFieldsInResults()
{
m_viewportFeatures.SetPosition(m_params.m_viewport.Center(), m_params.m_scale);
m_positionFeatures.SetPosition(m_params.m_position, m_params.m_scale);
sort(m_results->begin(), m_results->end(), my::CompareBy(&TResult::first));
auto ib = m_results->begin();
@ -1320,6 +1331,8 @@ void Geocoder::FillResultRanks()
auto & info = ii->second;
info.m_rank = rankTable->Get(id.m_index);
info.m_distanceToViewport = m_viewportFeatures.GetDistanceToFeatureMeters(id);
info.m_distanceToPosition = m_positionFeatures.GetDistanceToFeatureMeters(id);
}
}
ib = ie;

View file

@ -6,6 +6,7 @@
#include "search/v2/features_layer.hpp"
#include "search/v2/features_layer_path_finder.hpp"
#include "search/v2/mwm_context.hpp"
#include "search/v2/nested_rects_cache.hpp"
#include "search/v2/pre_ranking_info.hpp"
#include "search/v2/ranking_utils.hpp"
#include "search/v2/search_model.hpp"
@ -229,8 +230,8 @@ private:
void EmitResult(Region const & region, size_t startToken, size_t endToken);
void EmitResult(City const & city, size_t startToken, size_t endToken);
// Computes rank for all results in |m_results|.
void FillResultRanks();
// Computes missing fields for all results in |m_results|.
void FillMissingFieldsInResults();
// Tries to match unclassified objects from lower layers, like
// parks, forests, lakes, rivers, etc. This method finds all
@ -303,6 +304,9 @@ private:
};
map<MwmSet::MwmId, vector<FeaturesInRect>> m_geometryFeatures;
NestedRectsCache m_viewportFeatures;
NestedRectsCache m_positionFeatures;
// Cache of posting lists for each token in the query. TODO (@y,
// @m, @vng): consider to update this cache lazily, as user inputs
// tokens one-by-one.

View file

@ -0,0 +1,104 @@
#include "search/v2/nested_rects_cache.hpp"
#include "search/v2/ranking_info.hpp"
#include "indexer/index.hpp"
#include "geometry/mercator.hpp"
#include "geometry/rect2d.hpp"
#include "base/assert.hpp"
#include "base/stl_add.hpp"
#include "std/algorithm.hpp"
namespace search
{
namespace v2
{
namespace
{
double const kPositionToleranceMeters = 30.0;
} // namespace
NestedRectsCache::NestedRectsCache(Index & index)
: m_index(index), m_scale(0), m_position(0, 0), m_valid(false)
{
}
void NestedRectsCache::SetPosition(m2::PointD const & position, int scale)
{
double distance = MercatorBounds::DistanceOnEarth(position, m_position);
if (distance < kPositionToleranceMeters && scale == m_scale && m_valid)
return;
m_position = position;
m_scale = scale;
UpdateCaches();
}
double NestedRectsCache::GetDistanceToFeatureMeters(FeatureID const & id) const
{
if (!m_valid)
return RankingInfo::kMaxDistMeters;
size_t bucket = 0;
for (; bucket != RECT_SCALE_COUNT; ++bucket)
{
if (binary_search(m_features[bucket].begin(), m_features[bucket].end(), id))
break;
}
auto const scale = static_cast<RectScale>(bucket);
if (scale != RECT_SCALE_COUNT)
return GetRadiusMeters(scale);
if (auto const & info = id.m_mwmId.GetInfo())
{
auto const & rect = info->m_limitRect;
return max(MercatorBounds::DistanceOnEarth(rect.Center(), m_position), GetRadiusMeters(scale));
}
return RankingInfo::kMaxDistMeters;
}
void NestedRectsCache::ClearCaches()
{
for (int scale = 0; scale != RECT_SCALE_COUNT; ++scale)
{
m_features[scale].clear();
m_features[scale].shrink_to_fit();
}
m_valid = false;
}
// static
double NestedRectsCache::GetRadiusMeters(RectScale scale)
{
switch (scale)
{
case RECT_SCALE_TINY: return 100.0;
case RECT_SCALE_SMALL: return 300.0;
case RECT_SCALE_MEDIUM: return 1000.0;
case RECT_SCALE_LARGE: return 2500.0;
case RECT_SCALE_COUNT: return 5000.0;
}
}
void NestedRectsCache::UpdateCaches()
{
for (int scale = 0; scale != RECT_SCALE_COUNT; ++scale)
{
auto & features = m_features[scale];
features.clear();
m2::RectD const rect = MercatorBounds::RectByCenterXYAndSizeInMeters(
m_position, GetRadiusMeters(static_cast<RectScale>(scale)));
auto addId = MakeBackInsertFunctor(features);
m_index.ForEachFeatureIDInRect(addId, rect, m_scale);
sort(features.begin(), features.end());
}
m_valid = true;
}
} // namespace v2
} // namespace search

View file

@ -0,0 +1,48 @@
#pragma once
#include "indexer/feature_decl.hpp"
#include "geometry/point2d.hpp"
class Index;
namespace search
{
namespace v2
{
class NestedRectsCache
{
public:
explicit NestedRectsCache(Index & index);
void SetPosition(m2::PointD const & position, int scale);
double GetDistanceToFeatureMeters(FeatureID const & id) const;
void ClearCaches();
private:
enum RectScale
{
RECT_SCALE_TINY,
RECT_SCALE_SMALL,
RECT_SCALE_MEDIUM,
RECT_SCALE_LARGE,
RECT_SCALE_COUNT
};
static double GetRadiusMeters(RectScale scale);
void UpdateCaches();
Index & m_index;
int m_scale;
m2::PointD m_position;
bool m_valid;
// Sorted lists of features.
vector<FeatureID> m_features[RECT_SCALE_COUNT];
};
} // namespace v2
} // namespace search

View file

@ -1,10 +0,0 @@
#include "search/v2/pre_ranking_info.hpp"
namespace search
{
namespace v2
{
// static
double const PreRankingInfo::kMaxDistMeters = 1e9;
} // namespace v2
} // namespace search

View file

@ -10,13 +10,13 @@ namespace v2
{
struct PreRankingInfo
{
static double const kMaxDistMeters;
// An abstract distance from the feature to the viewport.
// Measurement units do not matter here.
double m_distanceToViewport = 0;
// Distance from the mwm center to the current viewport's center.
double m_mwmDistanceToViewport = kMaxDistMeters;
// Distance from the feature to the current user's position.
double m_mwmDistanceToPosition = kMaxDistMeters;
// An abstract distance from the feature to the user's position.
// Measurement units do not matter here.
double m_distanceToPosition = 0;
// Tokens [m_startToken, m_endToken) match to the feature name or
// house number.

View file

@ -7,6 +7,25 @@ namespace search
{
namespace v2
{
namespace
{
double const kDistanceToViewport = 1.850;
double const kDistanceToPosition = 85.898;
double const kMinDistance = 6.908;
double const kRank = 78.441;
double const kNameScore = 1.0;
double const kNameCoverage = 0.0;
double const kPositionInViewport = 0.0;
double TransformDistance(double distance)
{
return exp(-distance * 1000 / RankingInfo::kMaxDistMeters);
}
} // namespace
// static
double const RankingInfo::kMaxDistMeters = 1e9;
// static
void RankingInfo::PrintCSVHeader(ostream & os)
{
@ -51,17 +70,30 @@ double RankingInfo::GetLinearModelRank() const
// this in mind when you're going to change scoring_model.py or this
// code. We're working on automatic rank calculation code generator
// integrated in the build system.
static double const kCoeffs[] = {0.98369469, 0.40219458, 0.97463078, 0.21027244, 0.07368054};
double const minDistance =
exp(-min(m_distanceToViewport, m_distanceToPosition) / PreRankingInfo::kMaxDistMeters);
double const distanceToViewport = TransformDistance(m_distanceToViewport);
double const distanceToPosition = TransformDistance(m_distanceToPosition);
double const minDistance = min(distanceToViewport, distanceToPosition);
double const rank = static_cast<double>(m_rank) / numeric_limits<uint8_t>::max();
double const nameScore = static_cast<double>(m_nameScore) / NAME_SCORE_COUNT;
double const nameScore = static_cast<double>(m_nameScore) / NAME_SCORE_FULL_MATCH;
double const nameCoverage = m_nameCoverage;
double const searchType = static_cast<double>(m_searchType) / SearchModel::SEARCH_TYPE_COUNT;
double const positionInViewport = static_cast<double>(m_positionInViewport);
return kCoeffs[0] * minDistance + kCoeffs[1] * rank + kCoeffs[2] * nameScore +
kCoeffs[3] * nameCoverage + kCoeffs[4] * searchType;
double searchType;
switch (m_searchType)
{
case SearchModel::SEARCH_TYPE_POI:
case SearchModel::SEARCH_TYPE_BUILDING:
searchType = 0;
break;
default:
searchType = m_searchType - 1;
break;
}
searchType = searchType / (SearchModel::SEARCH_TYPE_COUNTRY - 1);
return kDistanceToViewport * distanceToViewport + kDistanceToPosition * distanceToPosition +
kMinDistance * minDistance + kRank * rank + kNameScore * nameScore +
kNameCoverage * nameCoverage + kPositionInViewport * positionInViewport;
}
} // namespace v2
} // namespace search

View file

@ -13,11 +13,13 @@ namespace v2
{
struct RankingInfo
{
static double const kMaxDistMeters;
// Distance from the feature to the current viewport's center.
double m_distanceToViewport = PreRankingInfo::kMaxDistMeters;
double m_distanceToViewport = kMaxDistMeters;
// Distance from the feature to the current user's position.
double m_distanceToPosition = PreRankingInfo::kMaxDistMeters;
double m_distanceToPosition = kMaxDistMeters;
// Rank of the feature.
uint8_t m_rank = 0;

View file

@ -79,9 +79,9 @@ void SearchQueryV2::AddPreResults1(Geocoder::TResultList & results, bool viewpor
auto const & id = result.first;
auto const & info = result.second;
if (viewportSearch)
AddPreResult1(id.m_mwmId, id.m_index, info.m_mwmDistanceToViewport /* priority */, info);
AddPreResult1(id.m_mwmId, id.m_index, info.m_distanceToViewport /* priority */, info);
else
AddPreResult1(id.m_mwmId, id.m_index, info.m_mwmDistanceToPosition /* priority */, info);
AddPreResult1(id.m_mwmId, id.m_index, info.m_distanceToPosition /* priority */, info);
}
}
} // namespace v2