From a39b1257c883eced6cb270587b2d2b4047655ac8 Mon Sep 17 00:00:00 2001 From: Maxim Pimenov Date: Mon, 4 Feb 2019 19:25:12 +0300 Subject: [PATCH] [search] Tracing the provenance of results. --- search/geocoder.cpp | 43 +++++++++++++++++++++++++++++++++- search/intermediate_result.cpp | 15 ++++++++++-- search/intermediate_result.hpp | 13 ++++++++++ search/pre_ranking_info.hpp | 1 + search/processor.cpp | 2 ++ search/ranker.cpp | 2 ++ search/ranking_info.hpp | 20 ++++++++-------- search/result.cpp | 4 +++- search/result.hpp | 12 ++++++++++ search/tracer.cpp | 43 ++++++++++++++++++++++++++++++---- search/tracer.hpp | 25 ++++++++++++++++++++ 11 files changed, 161 insertions(+), 19 deletions(-) diff --git a/search/geocoder.cpp b/search/geocoder.cpp index eae9eb673c..e22b3367f0 100644 --- a/search/geocoder.cpp +++ b/search/geocoder.cpp @@ -312,6 +312,22 @@ size_t OrderCountries(boost::optional const & position, m2::RectD co auto const sep = stable_partition(infos.begin(), infos.end(), intersects); return distance(infos.begin(), sep); } + +void TraceEntrance(shared_ptr tracer, Tracer::Branch branch) +{ + if (tracer != nullptr) + tracer->CallMethod(branch); +} + +void TraceExit(shared_ptr tracer, Tracer::Branch branch) +{ + if (tracer != nullptr) + tracer->LeaveMethod(branch); +} + +#define TRACE(branch) \ + TraceEntrance(m_params.m_tracer, Tracer::Branch::branch); \ + SCOPE_GUARD(tracerGuard, [&] { TraceExit(m_params.m_tracer, Tracer::Branch::branch); }); } // namespace // Geocoder::Geocoder ------------------------------------------------------------------------------ @@ -394,6 +410,8 @@ void Geocoder::GoEverywhere() }); #endif + TRACE(GoEverywhere); + if (m_params.GetNumTokens() == 0) return; @@ -405,6 +423,8 @@ void Geocoder::GoEverywhere() void Geocoder::GoInViewport() { + TRACE(GoInViewport); + if (m_params.GetNumTokens() == 0) return; @@ -769,6 +789,8 @@ void Geocoder::ForEachCountry(vector> const & infos, Fn && f void Geocoder::MatchCategories(BaseContext & ctx, bool aroundPivot) { + TRACE(MatchCategories); + auto features = ctx.m_features[0]; if (aroundPivot) @@ -796,6 +818,8 @@ void Geocoder::MatchCategories(BaseContext & ctx, bool aroundPivot) void Geocoder::MatchRegions(BaseContext & ctx, Region::Type type) { + TRACE(MatchRegions); + switch (type) { case Region::TYPE_STATE: @@ -870,6 +894,8 @@ void Geocoder::MatchRegions(BaseContext & ctx, Region::Type type) void Geocoder::MatchCities(BaseContext & ctx) { + TRACE(MatchCities); + ASSERT(!ctx.m_city, ()); // Localities are ordered my (m_startToken, m_endToken) pairs. @@ -917,6 +943,8 @@ void Geocoder::MatchCities(BaseContext & ctx) void Geocoder::MatchAroundPivot(BaseContext & ctx) { + TRACE(MatchAroundPivot); + auto const features = RetrieveGeometryFeatures(*m_context, m_params.m_pivot, RECT_ID_PIVOT); ViewportFilter filter(features, m_preRanker.Limit() /* threshold */); LimitedSearch(ctx, filter); @@ -944,6 +972,8 @@ void Geocoder::LimitedSearch(BaseContext & ctx, FeaturesFilter const & filter) template void Geocoder::WithPostcodes(BaseContext & ctx, Fn && fn) { + TRACE(WithPostcodes); + size_t const maxPostcodeTokens = GetMaxNumTokensInPostcode(); for (size_t startToken = 0; startToken != ctx.m_numTokens; ++startToken) @@ -982,6 +1012,8 @@ void Geocoder::WithPostcodes(BaseContext & ctx, Fn && fn) void Geocoder::GreedilyMatchStreets(BaseContext & ctx) { + TRACE(GreedilyMatchStreets); + vector predictions; StreetsMatcher::Go(ctx, *m_filter, m_params, predictions); @@ -1019,6 +1051,8 @@ void Geocoder::CreateStreetsLayerAndMatchLowerLayers(BaseContext & ctx, void Geocoder::MatchPOIsAndBuildings(BaseContext & ctx, size_t curToken) { + TRACE(MatchPOIsAndBuildings); + BailIfCancelled(); auto & layers = ctx.m_layers; @@ -1340,7 +1374,10 @@ void Geocoder::EmitResult(BaseContext & ctx, MwmSet::MwmId const & mwmId, uint32 info.m_allTokensUsed = allTokensUsed; - m_preRanker.Emplace(id, info); + if (m_params.m_tracer == nullptr) + m_preRanker.Emplace(id, info, Tracer::Provenance{}); + else + m_preRanker.Emplace(id, info, m_params.m_tracer->GetProvenance()); // ++ctx.m_numEmitted; } @@ -1362,6 +1399,8 @@ void Geocoder::EmitResult(BaseContext & ctx, City const & city, TokenRange const void Geocoder::MatchUnclassified(BaseContext & ctx, size_t curToken) { + TRACE(MatchUnclassified); + ASSERT(ctx.m_layers.empty(), ()); // We need to match all unused tokens to UNCLASSIFIED features, @@ -1443,3 +1482,5 @@ bool Geocoder::GetTypeInGeocoding(BaseContext const & ctx, uint32_t featureId, M return false; } } // namespace search + +#undef TRACE diff --git a/search/intermediate_result.cpp b/search/intermediate_result.cpp index dee62bcb5d..ce201796ad 100644 --- a/search/intermediate_result.cpp +++ b/search/intermediate_result.cpp @@ -72,6 +72,13 @@ PreRankerResult::PreRankerResult(FeatureID const & id, PreRankingInfo const & in ASSERT(m_id.IsValid(), ()); } +PreRankerResult::PreRankerResult(FeatureID const & id, PreRankingInfo const & info, + vector const & provenance) + : m_id(id), m_info(info), m_provenance(provenance) +{ + ASSERT(m_id.IsValid(), ()); +} + // static bool PreRankerResult::LessRankAndPopularity(PreRankerResult const & r1, PreRankerResult const & r2) { @@ -259,8 +266,12 @@ string DebugPrint(RankerResult const & r) stringstream ss; ss << "RankerResult [" << "Name: " << r.GetName() - << "; Type: " << r.GetBestType() - << "; " << DebugPrint(r.GetRankingInfo()) + << "; Type: " << r.GetBestType(); + + if (!r.GetProvenance().empty()) + ss << "; Provenance: " << ::DebugPrint(r.GetProvenance()); + + ss << "; " << DebugPrint(r.GetRankingInfo()) << "; Linear model rank: " << r.GetLinearModelRank() << "]"; return ss.str(); diff --git a/search/intermediate_result.hpp b/search/intermediate_result.hpp index 3068cb5497..856d9c74da 100644 --- a/search/intermediate_result.hpp +++ b/search/intermediate_result.hpp @@ -11,6 +11,7 @@ #include #include +#include #include class FeatureType; @@ -33,6 +34,9 @@ class PreRankerResult public: PreRankerResult(FeatureID const & id, PreRankingInfo const & info); + PreRankerResult(FeatureID const & id, PreRankingInfo const & info, + std::vector const & provenance); + static bool LessRankAndPopularity(PreRankerResult const & r1, PreRankerResult const & r2); static bool LessDistance(PreRankerResult const & r1, PreRankerResult const & r2); @@ -51,12 +55,16 @@ public: uint8_t GetPopularity() const { return m_info.m_popularity; } PreRankingInfo & GetInfo() { return m_info; } PreRankingInfo const & GetInfo() const { return m_info; } + std::vector const & GetProvenance() const { return m_provenance; } private: friend class RankerResult; FeatureID m_id; PreRankingInfo m_info; + + // The call path in the Geocoder that leads to this result. + std::vector m_provenance; }; // Second result class. Objects are created during reading of features. @@ -107,6 +115,8 @@ public: uint32_t GetBestType(std::vector const & preferredTypes = {}) const; + std::vector const & GetProvenance() const { return m_provenance; } + private: friend class RankerResultMaker; @@ -134,6 +144,9 @@ private: RankingInfo m_info; feature::EGeomType m_geomType; Result::Metadata m_metadata; + + // The call path in the Geocoder that leads to this result. + std::vector m_provenance; }; void ProcessMetadata(FeatureType & ft, Result::Metadata & meta); diff --git a/search/pre_ranking_info.hpp b/search/pre_ranking_info.hpp index b19a76dd1d..cd5d6add9e 100644 --- a/search/pre_ranking_info.hpp +++ b/search/pre_ranking_info.hpp @@ -3,6 +3,7 @@ #include "search/intersection_result.hpp" #include "search/model.hpp" #include "search/token_range.hpp" +#include "search/tracer.hpp" #include "indexer/feature_decl.hpp" diff --git a/search/processor.cpp b/search/processor.cpp index 61dc41fd6c..001bb059fd 100644 --- a/search/processor.cpp +++ b/search/processor.cpp @@ -549,6 +549,8 @@ void Processor::InitGeocoder(Geocoder::Params & geocoderParams, SearchParams con geocoderParams.m_cuisineTypes = m_cuisineTypes; geocoderParams.m_preferredTypes = m_preferredTypes; geocoderParams.m_tracer = searchParams.m_tracer; + if (geocoderParams.m_tracer == nullptr) + geocoderParams.m_tracer = make_shared(); m_geocoder.SetParams(geocoderParams); } diff --git a/search/ranker.cpp b/search/ranker.cpp index abb7a00ebc..0fd446290d 100644 --- a/search/ranker.cpp +++ b/search/ranker.cpp @@ -400,6 +400,7 @@ public: InitRankingInfo(ft, center, preRankerResult, info); info.m_rank = NormalizeRank(info.m_rank, info.m_type, center, country); r.SetRankingInfo(move(info)); + r.m_provenance = move(preRankerResult.GetProvenance()); return r; } @@ -494,6 +495,7 @@ Result Ranker::MakeResult(RankerResult const & rankerResult, bool needAddress, HighlightResult(m_params.m_tokens, m_params.m_prefix, res); res.SetRankingInfo(rankerResult.GetRankingInfo()); + res.SetProvenance(rankerResult.GetProvenance()); return res; } diff --git a/search/ranking_info.hpp b/search/ranking_info.hpp index f89755ded0..2b47b23632 100644 --- a/search/ranking_info.hpp +++ b/search/ranking_info.hpp @@ -17,6 +17,16 @@ struct RankingInfo { static double const kMaxDistMeters; + static void PrintCSVHeader(std::ostream & os); + + void ToCSV(std::ostream & os) const; + + // Returns rank calculated by a linear model. Large values + // correspond to important features. + double GetLinearModelRank() const; + + size_t GetErrorsMade() const; + // Distance from the feature to the pivot point. double m_distanceToPivot = kMaxDistMeters; @@ -53,16 +63,6 @@ struct RankingInfo // True iff the feature has a name. bool m_hasName = false; - - static void PrintCSVHeader(std::ostream & os); - - void ToCSV(std::ostream & os) const; - - // Returns rank calculated by a linear model. Large values - // correspond to important features. - double GetLinearModelRank() const; - - size_t GetErrorsMade() const; }; std::string DebugPrint(RankingInfo const & info); diff --git a/search/result.cpp b/search/result.cpp index e614968781..fe9f2e7506 100644 --- a/search/result.cpp +++ b/search/result.cpp @@ -166,7 +166,9 @@ string DebugPrint(Result const & result) os << "Result ["; os << "name: " << result.GetString() << ", "; os << "type: " << readableType << ", "; - os << "info: " << DebugPrint(result.GetRankingInfo()); + os << "info: " << DebugPrint(result.GetRankingInfo()) << ", "; + if (!result.GetProvenance().empty()) + os << "provenance: " << ::DebugPrint(result.GetProvenance()); os << "]"; return os.str(); } diff --git a/search/result.hpp b/search/result.hpp index 5c69f92497..450417621f 100644 --- a/search/result.hpp +++ b/search/result.hpp @@ -3,6 +3,7 @@ #include "search/bookmarks/results.hpp" #include "search/hotels_classifier.hpp" #include "search/ranking_info.hpp" +#include "search/tracer.hpp" #include "indexer/feature_decl.hpp" @@ -130,12 +131,20 @@ public: RankingInfo const & GetRankingInfo() const { return m_info; } + std::vector const & GetProvenance() const { return m_provenance; } + template void SetRankingInfo(Info && info) { m_info = std::forward(info); } + template + void SetProvenance(Prov && prov) + { + m_provenance = std::forward(prov); + } + // Returns a representation of this result that is sent to the // statistics servers and later used to measure the quality of our // search engine. @@ -158,7 +167,10 @@ private: // a search query. -1 if undefined. int32_t m_positionInResults = -1; + std::vector m_provenance; + public: + // Careful when moving: the order of destructors is important. Metadata m_metadata; }; diff --git a/search/tracer.cpp b/search/tracer.cpp index 7b178ce949..27121a6bea 100644 --- a/search/tracer.cpp +++ b/search/tracer.cpp @@ -1,10 +1,12 @@ #include "search/tracer.hpp" +#include "base/assert.hpp" #include "base/stl_helpers.hpp" #include #include #include +#include using namespace std; @@ -32,6 +34,27 @@ Tracer::Parse::Parse(vector> const & ranges, bool ca m_ranges[kv.first] = kv.second; } +// Tracer ------------------------------------------------------------------------------------------ +vector Tracer::GetUniqueParses() const +{ + auto parses = m_parses; + base::SortUnique(parses); + return parses; +} + +void Tracer::CallMethod(Branch branch) +{ + m_provenance.emplace_back(branch); +} + +void Tracer::LeaveMethod(Branch branch) +{ + CHECK(!m_provenance.empty(), ()); + CHECK_EQUAL(m_provenance.back(), branch, ()); + m_provenance.pop_back(); +} + +// Functions --------------------------------------------------------------------------------------- string DebugPrint(Tracer::Parse const & parse) { using TokenType = Tracer::Parse::TokenType; @@ -59,11 +82,21 @@ string DebugPrint(Tracer::Parse const & parse) return os.str(); } -// Tracer ------------------------------------------------------------------------------------------ -vector Tracer::GetUniqueParses() const +string DebugPrint(Tracer::Branch branch) { - auto parses = m_parses; - base::SortUnique(parses); - return parses; + switch (branch) + { + case Tracer::Branch::GoEverywhere: return "GoEverywhere"; + case Tracer::Branch::GoInViewport: return "GoInViewport"; + case Tracer::Branch::MatchCategories: return "MatchCategories"; + case Tracer::Branch::MatchRegions: return "MatchRegions"; + case Tracer::Branch::MatchCities: return "MatchCities"; + case Tracer::Branch::MatchAroundPivot: return "MatchAroundPivot"; + case Tracer::Branch::MatchPOIsAndBuildings: return "MatchPOIsAndBuildings"; + case Tracer::Branch::GreedilyMatchStreets: return "GreedilyMatchStreets"; + case Tracer::Branch::WithPostcodes: return "WithPostcodes"; + case Tracer::Branch::MatchUnclassified: return "MatchUnclassified"; + } + UNREACHABLE(); } } // namespace search diff --git a/search/tracer.hpp b/search/tracer.hpp index f92e7b1037..7e62f1127b 100644 --- a/search/tracer.hpp +++ b/search/tracer.hpp @@ -13,6 +13,23 @@ namespace search class Tracer { public: + // Mimics the Geocoder methods. + enum class Branch + { + GoEverywhere, + GoInViewport, + MatchCategories, + MatchRegions, + MatchCities, + MatchAroundPivot, + MatchPOIsAndBuildings, + GreedilyMatchStreets, + WithPostcodes, + MatchUnclassified, + }; + + using Provenance = std::vector; + struct Parse { using TokenType = BaseContext::TokenType; @@ -45,9 +62,17 @@ public: std::vector GetUniqueParses() const; + void CallMethod(Branch branch); + void LeaveMethod(Branch branch); + Provenance const & GetProvenance() const { return m_provenance; } + private: std::vector m_parses; + + // Traces the Geocoder call tree that leads to emitting the current result. + Provenance m_provenance; }; std::string DebugPrint(Tracer::Parse const & parse); +std::string DebugPrint(Tracer::Branch branch); } // namespace search