From f95d5485eda8e25d0ac21fdff570677cf806c01e Mon Sep 17 00:00:00 2001 From: Yuri Gorshenin Date: Tue, 29 Mar 2016 17:24:51 +0300 Subject: [PATCH] [search] Added stats to features collector tool. --- .../features_collector_tool.cpp | 104 ++++++++++++++---- 1 file changed, 83 insertions(+), 21 deletions(-) diff --git a/search/search_quality/features_collector_tool/features_collector_tool.cpp b/search/search_quality/features_collector_tool/features_collector_tool.cpp index 25defc8b05..cb44c76742 100644 --- a/search/search_quality/features_collector_tool/features_collector_tool.cpp +++ b/search/search_quality/features_collector_tool/features_collector_tool.cpp @@ -37,18 +37,16 @@ using namespace storage; DEFINE_string(data_path, "", "Path to data directory (resources dir)"); DEFINE_string(mwm_path, "", "Path to mwm files (writable dir)"); +DEFINE_string(stats_path, "", "Path to store stats about queries results (default: stderr)"); DEFINE_string(json_in, "", "Path to the json file with samples (default: stdin)"); -void DidDownload(TCountryId const & /* countryId */, - shared_ptr const & /* localFile */) -{ -} +size_t constexpr kInvalidId = numeric_limits::max(); -bool WillDelete(TCountryId const & /* countryId */, - shared_ptr const & /* localFile */) +struct Stats { - return false; -} + // Indexes of not-found VITAL or RELEVANT results. + vector m_notFound; +}; struct Context { @@ -95,35 +93,68 @@ bool Matches(Context & context, Sample::Result const & golden, search::Result co my::AlmostEqualAbs(golden.m_pos, center, kEps); } -void SetRelevanceValues(Context & context, vector const & golden, - vector const & actual, - vector & relevances) +void MatchResults(Context & context, vector const & golden, + vector const & actual, vector & goldenMatching, + vector & actualMatching) { auto const n = golden.size(); auto const m = actual.size(); - relevances.assign(m, Sample::Result::RELEVANCE_IRRELEVANT); - vector matched(m); + goldenMatching.assign(n, kInvalidId); + actualMatching.assign(m, kInvalidId); // TODO (@y, @m): use Kuhn algorithm here for maximum matching. for (size_t i = 0; i < n; ++i) { + if (goldenMatching[i] != kInvalidId) + continue; auto const & g = golden[i]; + for (size_t j = 0; j < m; ++j) { - if (matched[j]) + if (actualMatching[j] != kInvalidId) continue; + auto const & a = actual[j]; if (Matches(context, g, a)) { - matched[j] = true; - relevances[j] = g.m_relevance; + goldenMatching[i] = j; + actualMatching[j] = i; break; } } } } +void DidDownload(TCountryId const & /* countryId */, + shared_ptr const & /* localFile */) +{ +} + +bool WillDelete(TCountryId const & /* countryId */, + shared_ptr const & /* localFile */) +{ + return false; +} + +void DisplayStats(ostream & os, vector const & samples, vector const & stats) +{ + auto const n = samples.size(); + ASSERT_EQUAL(stats.size(), n, ()); + for (size_t i = 0; i < n; ++i) + { + os << "Query #" << i << " \"" << strings::ToUtf8(samples[i].m_query) << "\""; + if (stats[i].m_notFound.empty()) + { + os << ": OK" << endl; + continue; + } + os << ": WARNING" << endl; + for (auto const & j : stats[i].m_notFound) + os << "Not found: " << DebugPrint(samples[i].m_results[j]) << endl; + } +} + int main(int argc, char * argv[]) { ChangeMaxNumberOfOpenFiles(kMaxOpenFiles); @@ -186,11 +217,13 @@ int main(int argc, char * argv[]) engine.RegisterMap(mwm); } + vector stats(samples.size()); + Context context(engine); + cout << "SampleId,"; v2::RankingInfo::PrintCSVHeader(cout); cout << ",Relevance" << endl; - Context context(engine); for (size_t i = 0; i < samples.size(); ++i) { auto const & sample = samples[i]; @@ -210,10 +243,10 @@ int main(int argc, char * argv[]) auto const & results = request.Results(); - vector relevances; - SetRelevanceValues(context, sample.m_results, results, relevances); + vector goldenMatching; + vector actualMatching; + MatchResults(context, sample.m_results, results, goldenMatching, actualMatching); - ASSERT_EQUAL(results.size(), relevances.size(), ()); for (size_t j = 0; j < results.size(); ++j) { if (results[j].GetResultType() != Result::RESULT_FEATURE) @@ -221,8 +254,37 @@ int main(int argc, char * argv[]) auto const & info = results[j].GetRankingInfo(); cout << i << ","; info.ToCSV(cout); - cout << "," << DebugPrint(relevances[j]) << endl; + + auto relevance = Sample::Result::RELEVANCE_IRRELEVANT; + if (actualMatching[j] != kInvalidId) + relevance = sample.m_results[actualMatching[j]].m_relevance; + cout << "," << DebugPrint(relevance) << endl; } + + auto & s = stats[i]; + for (size_t j = 0; j < goldenMatching.size(); ++j) + { + if (goldenMatching[j] == kInvalidId && + sample.m_results[j].m_relevance != Sample::Result::RELEVANCE_IRRELEVANT) + { + s.m_notFound.push_back(j); + } + } + } + + if (FLAGS_stats_path.empty()) + { + DisplayStats(cerr, samples, stats); + } + else + { + ofstream ofs(FLAGS_stats_path); + if (!ofs.is_open()) + { + cerr << "Can't open output file for stats." << endl; + return -1; + } + DisplayStats(ofs, samples, stats); } return 0; }