From 5fb6f781efb8c528efecb639dec5dafa84e54182 Mon Sep 17 00:00:00 2001 From: Maxim Pimenov Date: Thu, 19 Dec 2019 20:41:59 +0300 Subject: [PATCH] [search] Show types of matched tokens in logs. --- search/geocoder.cpp | 12 ++++++------ search/geocoder.hpp | 4 ++-- search/intermediate_result.cpp | 4 ++-- search/pre_ranking_info.cpp | 4 ++-- search/pre_ranking_info.hpp | 9 +++++---- search/ranker.cpp | 7 ++++--- search/ranking_info.cpp | 29 ++++++++++++++++++++++++++++- search/ranking_info.hpp | 7 +++++++ 8 files changed, 56 insertions(+), 20 deletions(-) diff --git a/search/geocoder.cpp b/search/geocoder.cpp index 4e09844aca..8e187e7a2e 100644 --- a/search/geocoder.cpp +++ b/search/geocoder.cpp @@ -1198,7 +1198,7 @@ void Geocoder::MatchPOIsAndBuildings(BaseContext & ctx, size_t curToken) if (!(layers.size() == 1 && layers[0].m_type == Model::TYPE_STREET)) return FindPaths(ctx); - // If there're only one street layer but user also entered a + // If there's only one street layer but user also entered a // postcode, we need to emit all features matching to postcode on // the given street, including the street itself. @@ -1357,7 +1357,7 @@ bool Geocoder::IsLayerSequenceSane(vector const & layers) const size_t buildingIndex = layers.size(); size_t streetIndex = layers.size(); - // Following loop returns false iff there're two different layers + // Following loop returns false iff there are two different layers // of the same search type. for (size_t i = 0; i < layers.size(); ++i) { @@ -1498,23 +1498,23 @@ void Geocoder::EmitResult(BaseContext & ctx, MwmSet::MwmId const & mwmId, uint32 // Distance and rank will be filled at the end, for all results at once. // // TODO (@y, @m): need to skip zero rank features that are too - // distant from the pivot when there're enough results close to the + // distant from the pivot when there are enough results close to the // pivot. PreRankingInfo info(type, tokenRange); for (auto const & layer : ctx.m_layers) - info.m_tokenRange[layer.m_type] = layer.m_tokenRange; + info.m_tokenRanges[layer.m_type] = layer.m_tokenRange; for (auto const * region : ctx.m_regions) { auto const regionType = Region::ToModelType(region->m_type); ASSERT_NOT_EQUAL(regionType, Model::TYPE_COUNT, ()); - info.m_tokenRange[regionType] = region->m_tokenRange; + info.m_tokenRanges[regionType] = region->m_tokenRange; } if (ctx.m_city) { auto const & city = *ctx.m_city; - info.m_tokenRange[Model::TYPE_CITY] = city.m_tokenRange; + info.m_tokenRanges[Model::TYPE_CITY] = city.m_tokenRange; info.m_cityId = FeatureID(city.m_countryId, city.m_featureId); } diff --git a/search/geocoder.hpp b/search/geocoder.hpp index a566c4bed1..3a194b2618 100644 --- a/search/geocoder.hpp +++ b/search/geocoder.hpp @@ -110,12 +110,12 @@ public: void GoInViewport(); // Ends geocoding and informs the following stages - // of the pipeline (PreRanker). + // of the pipeline (PreRanker and further). // This method must be called from the previous stage // of the pipeline (the Processor). // If |cancelled| is true, the reason for calling Finish must // be the cancellation of processing the search request, otherwise - // the reason must be the normal exit from GoEverywhere of GoInViewport. + // the reason must be the normal exit from GoEverywhere or GoInViewport. // // *NOTE* The caller assumes that a call to this method will never // result in search::CancelException even if the shutdown takes diff --git a/search/intermediate_result.cpp b/search/intermediate_result.cpp index 982019cb4a..c5acf756ca 100644 --- a/search/intermediate_result.cpp +++ b/search/intermediate_result.cpp @@ -73,7 +73,7 @@ PreRankerResult::PreRankerResult(FeatureID const & id, PreRankingInfo const & in ASSERT(m_id.IsValid(), ()); m_matchedTokensNumber = 0; - for (auto const & r : m_info.m_tokenRange) + for (auto const & r : m_info.m_tokenRanges) m_matchedTokensNumber += r.Size(); } @@ -297,7 +297,7 @@ string DebugPrint(RankerResult const & r) stringstream ss; ss << "RankerResult [" << "Name: " << r.GetName() - << "; Type: " << r.GetBestType(); + << "; Type: " << classif().GetReadableObjectName(r.GetBestType()); if (!r.GetProvenance().empty()) ss << "; Provenance: " << ::DebugPrint(r.GetProvenance()); diff --git a/search/pre_ranking_info.cpp b/search/pre_ranking_info.cpp index c42af8add3..a6b813f90c 100644 --- a/search/pre_ranking_info.cpp +++ b/search/pre_ranking_info.cpp @@ -11,11 +11,11 @@ std::string DebugPrint(PreRankingInfo const & info) os << "m_distanceToPivot: " << info.m_distanceToPivot << ", "; for (size_t i = 0; i < static_cast(Model::TYPE_COUNT); ++i) { - if (info.m_tokenRange[i].Empty()) + if (info.m_tokenRanges[i].Empty()) continue; auto const type = static_cast(i); - os << "m_tokenRange[" << DebugPrint(type) << "]:" << DebugPrint(info.m_tokenRange[i]) << ", "; + os << "m_tokenRanges[" << DebugPrint(type) << "]:" << DebugPrint(info.m_tokenRanges[i]) << ", "; } os << "m_allTokensUsed: " << info.m_allTokensUsed << ", "; os << "m_exactMatch: " << info.m_exactMatch << ", "; diff --git a/search/pre_ranking_info.hpp b/search/pre_ranking_info.hpp index e472ce449c..7af979e632 100644 --- a/search/pre_ranking_info.hpp +++ b/search/pre_ranking_info.hpp @@ -10,6 +10,7 @@ #include "base/assert.hpp" +#include #include #include #include @@ -22,13 +23,13 @@ struct PreRankingInfo { ASSERT_LESS(type, Model::TYPE_COUNT, ()); m_type = type; - m_tokenRange[m_type] = range; + m_tokenRanges[m_type] = range; } TokenRange const & InnermostTokenRange() const { ASSERT_LESS(m_type, Model::TYPE_COUNT, ()); - return m_tokenRange[m_type]; + return m_tokenRanges[m_type]; } // An abstract distance from the feature to the pivot. Measurement @@ -38,8 +39,8 @@ struct PreRankingInfo m2::PointD m_center = m2::PointD::Zero(); bool m_centerLoaded = false; - // Tokens match to the feature name or house number. - TokenRange m_tokenRange[Model::TYPE_COUNT]; + // Matched parts of the query. + std::array m_tokenRanges; // Different geo-parts extracted from query. Currently only poi, // building and street ids are in |m_geoParts|. diff --git a/search/ranker.cpp b/search/ranker.cpp index 517258aa26..c7873a7b52 100644 --- a/search/ranker.cpp +++ b/search/ranker.cpp @@ -357,6 +357,7 @@ class RankerResultMaker info.m_allTokensUsed = preInfo.m_allTokensUsed; info.m_exactMatch = preInfo.m_exactMatch; info.m_categorialRequest = m_params.IsCategorialRequest(); + info.m_tokenRanges = preInfo.m_tokenRanges; // We do not compare result name and request for categorial requests but we prefer named // features. @@ -386,7 +387,7 @@ class RankerResultMaker if (street) { auto const type = Model::TYPE_STREET; - auto const & range = preInfo.m_tokenRange[type]; + auto const & range = preInfo.m_tokenRanges[type]; auto const streetScores = GetNameScores(*street, m_params, range, type); nameScore = min(nameScore, streetScores.first.m_nameScore); @@ -403,7 +404,7 @@ class RankerResultMaker if (suburb) { auto const type = Model::TYPE_SUBURB; - auto const & range = preInfo.m_tokenRange[type]; + auto const & range = preInfo.m_tokenRanges[type]; auto const matchingResult = MatchTokenRange(*suburb, m_params, range, type); errorsMade += matchingResult.first; matchedLength += matchingResult.second; @@ -416,7 +417,7 @@ class RankerResultMaker if (city) { auto const type = Model::TYPE_CITY; - auto const & range = preInfo.m_tokenRange[type]; + auto const & range = preInfo.m_tokenRanges[type]; auto const matchingResult = MatchTokenRange(*city, m_params, range, type); errorsMade += matchingResult.first; matchedLength += matchingResult.second; diff --git a/search/ranking_info.cpp b/search/ranking_info.cpp index 3b972d61be..d740241e06 100644 --- a/search/ranking_info.cpp +++ b/search/ranking_info.cpp @@ -4,6 +4,8 @@ #include "indexer/search_string_utils.hpp" +#include "base/assert.hpp" + #include #include #include @@ -74,6 +76,30 @@ double TransformRating(pair const & rating) } return r; } + +void PrintParse(ostringstream & oss, array const & ranges, + size_t numTokens) +{ + vector types(numTokens, Model::Type::TYPE_COUNT); + for (size_t i = 0; i < ranges.size(); ++i) + { + for (size_t pos : ranges[i]) + { + CHECK_LESS(pos, numTokens, ()); + CHECK_EQUAL(types[pos], Model::Type::TYPE_COUNT, ()); + types[pos] = static_cast(i); + } + } + + oss << "Parse ["; + for (size_t i = 0; i < numTokens; ++i) + { + if (i > 0) + oss << " "; + oss << DebugPrint(types[i]); + } + oss << "]"; +} } // namespace // static @@ -101,7 +127,8 @@ string DebugPrint(RankingInfo const & info) { ostringstream os; os << boolalpha; - os << "RankingInfo ["; + PrintParse(os, info.m_tokenRanges, info.m_numTokens); + os << ", RankingInfo ["; os << "m_distanceToPivot:" << info.m_distanceToPivot; os << ", m_rank:" << static_cast(info.m_rank); os << ", m_popularity:" << static_cast(info.m_popularity); diff --git a/search/ranking_info.hpp b/search/ranking_info.hpp index e871a4f1c5..59c0132bfe 100644 --- a/search/ranking_info.hpp +++ b/search/ranking_info.hpp @@ -4,6 +4,7 @@ #include "search/pre_ranking_info.hpp" #include "search/ranking_utils.hpp" +#include #include #include #include @@ -45,9 +46,15 @@ struct RankingInfo // Number of misprints. ErrorsMade m_errorsMade; + // Query tokens number. size_t m_numTokens; + // Matched parts of the query. + // todo(@m) Using TokenType instead of ModelType here would + // allow to distinguish postcodes too. + std::array m_tokenRanges; + // Fraction of characters from original query matched to feature. double m_matchedFraction = 0.0;