[search] Tracing the provenance of results.

This commit is contained in:
Maxim Pimenov 2019-02-04 19:25:12 +03:00 committed by Tatiana Yan
parent 98453f92cf
commit a39b1257c8
11 changed files with 161 additions and 19 deletions

View file

@ -312,6 +312,22 @@ size_t OrderCountries(boost::optional<m2::PointD> const & position, m2::RectD co
auto const sep = stable_partition(infos.begin(), infos.end(), intersects);
return distance(infos.begin(), sep);
}
void TraceEntrance(shared_ptr<Tracer> tracer, Tracer::Branch branch)
{
if (tracer != nullptr)
tracer->CallMethod(branch);
}
void TraceExit(shared_ptr<Tracer> tracer, Tracer::Branch branch)
{
if (tracer != nullptr)
tracer->LeaveMethod(branch);
}
#define TRACE(branch) \
TraceEntrance(m_params.m_tracer, Tracer::Branch::branch); \
SCOPE_GUARD(tracerGuard, [&] { TraceExit(m_params.m_tracer, Tracer::Branch::branch); });
} // namespace
// Geocoder::Geocoder ------------------------------------------------------------------------------
@ -394,6 +410,8 @@ void Geocoder::GoEverywhere()
});
#endif
TRACE(GoEverywhere);
if (m_params.GetNumTokens() == 0)
return;
@ -405,6 +423,8 @@ void Geocoder::GoEverywhere()
void Geocoder::GoInViewport()
{
TRACE(GoInViewport);
if (m_params.GetNumTokens() == 0)
return;
@ -769,6 +789,8 @@ void Geocoder::ForEachCountry(vector<shared_ptr<MwmInfo>> const & infos, Fn && f
void Geocoder::MatchCategories(BaseContext & ctx, bool aroundPivot)
{
TRACE(MatchCategories);
auto features = ctx.m_features[0];
if (aroundPivot)
@ -796,6 +818,8 @@ void Geocoder::MatchCategories(BaseContext & ctx, bool aroundPivot)
void Geocoder::MatchRegions(BaseContext & ctx, Region::Type type)
{
TRACE(MatchRegions);
switch (type)
{
case Region::TYPE_STATE:
@ -870,6 +894,8 @@ void Geocoder::MatchRegions(BaseContext & ctx, Region::Type type)
void Geocoder::MatchCities(BaseContext & ctx)
{
TRACE(MatchCities);
ASSERT(!ctx.m_city, ());
// Localities are ordered my (m_startToken, m_endToken) pairs.
@ -917,6 +943,8 @@ void Geocoder::MatchCities(BaseContext & ctx)
void Geocoder::MatchAroundPivot(BaseContext & ctx)
{
TRACE(MatchAroundPivot);
auto const features = RetrieveGeometryFeatures(*m_context, m_params.m_pivot, RECT_ID_PIVOT);
ViewportFilter filter(features, m_preRanker.Limit() /* threshold */);
LimitedSearch(ctx, filter);
@ -944,6 +972,8 @@ void Geocoder::LimitedSearch(BaseContext & ctx, FeaturesFilter const & filter)
template <typename Fn>
void Geocoder::WithPostcodes(BaseContext & ctx, Fn && fn)
{
TRACE(WithPostcodes);
size_t const maxPostcodeTokens = GetMaxNumTokensInPostcode();
for (size_t startToken = 0; startToken != ctx.m_numTokens; ++startToken)
@ -982,6 +1012,8 @@ void Geocoder::WithPostcodes(BaseContext & ctx, Fn && fn)
void Geocoder::GreedilyMatchStreets(BaseContext & ctx)
{
TRACE(GreedilyMatchStreets);
vector<StreetsMatcher::Prediction> predictions;
StreetsMatcher::Go(ctx, *m_filter, m_params, predictions);
@ -1019,6 +1051,8 @@ void Geocoder::CreateStreetsLayerAndMatchLowerLayers(BaseContext & ctx,
void Geocoder::MatchPOIsAndBuildings(BaseContext & ctx, size_t curToken)
{
TRACE(MatchPOIsAndBuildings);
BailIfCancelled();
auto & layers = ctx.m_layers;
@ -1340,7 +1374,10 @@ void Geocoder::EmitResult(BaseContext & ctx, MwmSet::MwmId const & mwmId, uint32
info.m_allTokensUsed = allTokensUsed;
m_preRanker.Emplace(id, info);
if (m_params.m_tracer == nullptr)
m_preRanker.Emplace(id, info, Tracer::Provenance{});
else
m_preRanker.Emplace(id, info, m_params.m_tracer->GetProvenance());
// ++ctx.m_numEmitted;
}
@ -1362,6 +1399,8 @@ void Geocoder::EmitResult(BaseContext & ctx, City const & city, TokenRange const
void Geocoder::MatchUnclassified(BaseContext & ctx, size_t curToken)
{
TRACE(MatchUnclassified);
ASSERT(ctx.m_layers.empty(), ());
// We need to match all unused tokens to UNCLASSIFIED features,
@ -1443,3 +1482,5 @@ bool Geocoder::GetTypeInGeocoding(BaseContext const & ctx, uint32_t featureId, M
return false;
}
} // namespace search
#undef TRACE

View file

@ -72,6 +72,13 @@ PreRankerResult::PreRankerResult(FeatureID const & id, PreRankingInfo const & in
ASSERT(m_id.IsValid(), ());
}
PreRankerResult::PreRankerResult(FeatureID const & id, PreRankingInfo const & info,
vector<Tracer::Branch> const & provenance)
: m_id(id), m_info(info), m_provenance(provenance)
{
ASSERT(m_id.IsValid(), ());
}
// static
bool PreRankerResult::LessRankAndPopularity(PreRankerResult const & r1, PreRankerResult const & r2)
{
@ -259,8 +266,12 @@ string DebugPrint(RankerResult const & r)
stringstream ss;
ss << "RankerResult ["
<< "Name: " << r.GetName()
<< "; Type: " << r.GetBestType()
<< "; " << DebugPrint(r.GetRankingInfo())
<< "; Type: " << r.GetBestType();
if (!r.GetProvenance().empty())
ss << "; Provenance: " << ::DebugPrint(r.GetProvenance());
ss << "; " << DebugPrint(r.GetRankingInfo())
<< "; Linear model rank: " << r.GetLinearModelRank()
<< "]";
return ss.str();

View file

@ -11,6 +11,7 @@
#include <cstdint>
#include <string>
#include <utility>
#include <vector>
class FeatureType;
@ -33,6 +34,9 @@ class PreRankerResult
public:
PreRankerResult(FeatureID const & id, PreRankingInfo const & info);
PreRankerResult(FeatureID const & id, PreRankingInfo const & info,
std::vector<Tracer::Branch> const & provenance);
static bool LessRankAndPopularity(PreRankerResult const & r1, PreRankerResult const & r2);
static bool LessDistance(PreRankerResult const & r1, PreRankerResult const & r2);
@ -51,12 +55,16 @@ public:
uint8_t GetPopularity() const { return m_info.m_popularity; }
PreRankingInfo & GetInfo() { return m_info; }
PreRankingInfo const & GetInfo() const { return m_info; }
std::vector<Tracer::Branch> const & GetProvenance() const { return m_provenance; }
private:
friend class RankerResult;
FeatureID m_id;
PreRankingInfo m_info;
// The call path in the Geocoder that leads to this result.
std::vector<Tracer::Branch> m_provenance;
};
// Second result class. Objects are created during reading of features.
@ -107,6 +115,8 @@ public:
uint32_t GetBestType(std::vector<uint32_t> const & preferredTypes = {}) const;
std::vector<Tracer::Branch> const & GetProvenance() const { return m_provenance; }
private:
friend class RankerResultMaker;
@ -134,6 +144,9 @@ private:
RankingInfo m_info;
feature::EGeomType m_geomType;
Result::Metadata m_metadata;
// The call path in the Geocoder that leads to this result.
std::vector<Tracer::Branch> m_provenance;
};
void ProcessMetadata(FeatureType & ft, Result::Metadata & meta);

View file

@ -3,6 +3,7 @@
#include "search/intersection_result.hpp"
#include "search/model.hpp"
#include "search/token_range.hpp"
#include "search/tracer.hpp"
#include "indexer/feature_decl.hpp"

View file

@ -549,6 +549,8 @@ void Processor::InitGeocoder(Geocoder::Params & geocoderParams, SearchParams con
geocoderParams.m_cuisineTypes = m_cuisineTypes;
geocoderParams.m_preferredTypes = m_preferredTypes;
geocoderParams.m_tracer = searchParams.m_tracer;
if (geocoderParams.m_tracer == nullptr)
geocoderParams.m_tracer = make_shared<Tracer>();
m_geocoder.SetParams(geocoderParams);
}

View file

@ -400,6 +400,7 @@ public:
InitRankingInfo(ft, center, preRankerResult, info);
info.m_rank = NormalizeRank(info.m_rank, info.m_type, center, country);
r.SetRankingInfo(move(info));
r.m_provenance = move(preRankerResult.GetProvenance());
return r;
}
@ -494,6 +495,7 @@ Result Ranker::MakeResult(RankerResult const & rankerResult, bool needAddress,
HighlightResult(m_params.m_tokens, m_params.m_prefix, res);
res.SetRankingInfo(rankerResult.GetRankingInfo());
res.SetProvenance(rankerResult.GetProvenance());
return res;
}

View file

@ -17,6 +17,16 @@ struct RankingInfo
{
static double const kMaxDistMeters;
static void PrintCSVHeader(std::ostream & os);
void ToCSV(std::ostream & os) const;
// Returns rank calculated by a linear model. Large values
// correspond to important features.
double GetLinearModelRank() const;
size_t GetErrorsMade() const;
// Distance from the feature to the pivot point.
double m_distanceToPivot = kMaxDistMeters;
@ -53,16 +63,6 @@ struct RankingInfo
// True iff the feature has a name.
bool m_hasName = false;
static void PrintCSVHeader(std::ostream & os);
void ToCSV(std::ostream & os) const;
// Returns rank calculated by a linear model. Large values
// correspond to important features.
double GetLinearModelRank() const;
size_t GetErrorsMade() const;
};
std::string DebugPrint(RankingInfo const & info);

View file

@ -166,7 +166,9 @@ string DebugPrint(Result const & result)
os << "Result [";
os << "name: " << result.GetString() << ", ";
os << "type: " << readableType << ", ";
os << "info: " << DebugPrint(result.GetRankingInfo());
os << "info: " << DebugPrint(result.GetRankingInfo()) << ", ";
if (!result.GetProvenance().empty())
os << "provenance: " << ::DebugPrint(result.GetProvenance());
os << "]";
return os.str();
}

View file

@ -3,6 +3,7 @@
#include "search/bookmarks/results.hpp"
#include "search/hotels_classifier.hpp"
#include "search/ranking_info.hpp"
#include "search/tracer.hpp"
#include "indexer/feature_decl.hpp"
@ -130,12 +131,20 @@ public:
RankingInfo const & GetRankingInfo() const { return m_info; }
std::vector<Tracer::Branch> const & GetProvenance() const { return m_provenance; }
template <typename Info>
void SetRankingInfo(Info && info)
{
m_info = std::forward<Info>(info);
}
template <typename Prov>
void SetProvenance(Prov && prov)
{
m_provenance = std::forward<Prov>(prov);
}
// Returns a representation of this result that is sent to the
// statistics servers and later used to measure the quality of our
// search engine.
@ -158,7 +167,10 @@ private:
// a search query. -1 if undefined.
int32_t m_positionInResults = -1;
std::vector<Tracer::Branch> m_provenance;
public:
// Careful when moving: the order of destructors is important.
Metadata m_metadata;
};

View file

@ -1,10 +1,12 @@
#include "search/tracer.hpp"
#include "base/assert.hpp"
#include "base/stl_helpers.hpp"
#include <cstddef>
#include <iomanip>
#include <sstream>
#include <string>
using namespace std;
@ -32,6 +34,27 @@ Tracer::Parse::Parse(vector<pair<TokenType, TokenRange>> const & ranges, bool ca
m_ranges[kv.first] = kv.second;
}
// Tracer ------------------------------------------------------------------------------------------
vector<Tracer::Parse> Tracer::GetUniqueParses() const
{
auto parses = m_parses;
base::SortUnique(parses);
return parses;
}
void Tracer::CallMethod(Branch branch)
{
m_provenance.emplace_back(branch);
}
void Tracer::LeaveMethod(Branch branch)
{
CHECK(!m_provenance.empty(), ());
CHECK_EQUAL(m_provenance.back(), branch, ());
m_provenance.pop_back();
}
// Functions ---------------------------------------------------------------------------------------
string DebugPrint(Tracer::Parse const & parse)
{
using TokenType = Tracer::Parse::TokenType;
@ -59,11 +82,21 @@ string DebugPrint(Tracer::Parse const & parse)
return os.str();
}
// Tracer ------------------------------------------------------------------------------------------
vector<Tracer::Parse> Tracer::GetUniqueParses() const
string DebugPrint(Tracer::Branch branch)
{
auto parses = m_parses;
base::SortUnique(parses);
return parses;
switch (branch)
{
case Tracer::Branch::GoEverywhere: return "GoEverywhere";
case Tracer::Branch::GoInViewport: return "GoInViewport";
case Tracer::Branch::MatchCategories: return "MatchCategories";
case Tracer::Branch::MatchRegions: return "MatchRegions";
case Tracer::Branch::MatchCities: return "MatchCities";
case Tracer::Branch::MatchAroundPivot: return "MatchAroundPivot";
case Tracer::Branch::MatchPOIsAndBuildings: return "MatchPOIsAndBuildings";
case Tracer::Branch::GreedilyMatchStreets: return "GreedilyMatchStreets";
case Tracer::Branch::WithPostcodes: return "WithPostcodes";
case Tracer::Branch::MatchUnclassified: return "MatchUnclassified";
}
UNREACHABLE();
}
} // namespace search

View file

@ -13,6 +13,23 @@ namespace search
class Tracer
{
public:
// Mimics the Geocoder methods.
enum class Branch
{
GoEverywhere,
GoInViewport,
MatchCategories,
MatchRegions,
MatchCities,
MatchAroundPivot,
MatchPOIsAndBuildings,
GreedilyMatchStreets,
WithPostcodes,
MatchUnclassified,
};
using Provenance = std::vector<Branch>;
struct Parse
{
using TokenType = BaseContext::TokenType;
@ -45,9 +62,17 @@ public:
std::vector<Parse> GetUniqueParses() const;
void CallMethod(Branch branch);
void LeaveMethod(Branch branch);
Provenance const & GetProvenance() const { return m_provenance; }
private:
std::vector<Parse> m_parses;
// Traces the Geocoder call tree that leads to emitting the current result.
Provenance m_provenance;
};
std::string DebugPrint(Tracer::Parse const & parse);
std::string DebugPrint(Tracer::Branch branch);
} // namespace search