From ddbedfde555ecf4ea13c529fe1e90e3801c8018a Mon Sep 17 00:00:00 2001
From: Yuri Gorshenin <y@maps.me>
Date: Thu, 19 May 2016 14:26:44 +0300
Subject: [PATCH] Review fixes.

---
 base/stl_helpers.hpp                          |  6 --
 .../search_query_v2_test.cpp                  | 45 ++++++----
 search/search_quality/download-maps.sh        | 82 +++++++++++++++----
 .../features_collector_tool.cpp               |  5 +-
 search/search_quality/scoring_model.py        |  4 +-
 search/search_query.cpp                       | 33 +++-----
 search/v2/geocoder.cpp                        |  4 +-
 search/v2/ranking_info.cpp                    | 23 +++---
 search/v2/ranking_info.hpp                    | 16 ++--
 9 files changed, 134 insertions(+), 84 deletions(-)
diff --git a/base/stl_helpers.hpp b/base/stl_helpers.hpp
index 704c487c2e..3375867f75 100644
--- a/base/stl_helpers.hpp
+++ b/base/stl_helpers.hpp
@@ -73,10 +73,4 @@ impl::Comparer<false, T, C> CompareBy(T (C::*p)() const)
 {
   return impl::Comparer<false, T, C>(p);
 }
-
-template <typename T>
-struct Id
-{
-  T const & operator()(T const & t) const { return t; }
-};
 }  // namespace my
diff --git a/search/search_integration_tests/search_query_v2_test.cpp b/search/search_integration_tests/search_query_v2_test.cpp
index d156d75b2f..3d2b498057 100644
--- a/search/search_integration_tests/search_query_v2_test.cpp
+++ b/search/search_integration_tests/search_query_v2_test.cpp
@@ -33,7 +33,7 @@ namespace
 class SearchQueryV2Test : public SearchTest
 {
 public:
-  unique_ptr<TestSearchRequest> DoRequest(string const & query)
+  unique_ptr<TestSearchRequest> MakeRequest(string const & query)
   {
     SearchParams params;
     params.m_query = query;
@@ -332,7 +332,7 @@ UNIT_CLASS_TEST(SearchQueryV2Test, TestRankingInfo)
 
   SetViewport(m2::RectD(m2::PointD(-0.5, -0.5), m2::PointD(0.5, 0.5)));
   {
-    auto request = DoRequest("golden gate bridge ");
+    auto request = MakeRequest("golden gate bridge ");
 
     TRules rules = {ExactMatch(wonderlandId, goldenGateBridge),
                     ExactMatch(wonderlandId, goldenGateStreet)};
@@ -342,15 +342,14 @@ UNIT_CLASS_TEST(SearchQueryV2Test, TestRankingInfo)
     {
       auto const & info = result.GetRankingInfo();
       TEST_EQUAL(NAME_SCORE_FULL_MATCH, info.m_nameScore, (result));
-      TEST(!info.m_matchByTrueCats, (result));
-      TEST(!info.m_matchByFalseCats, (result));
-      TEST(my::AlmostEqualAbs(1.0, info.m_nameCoverage, 1e-6), (info.m_nameCoverage));
+      TEST(!info.m_pureCats, (result));
+      TEST(!info.m_falseCats, (result));
     }
   }
 
   // This test is quite important and must always pass.
   {
-    auto request = DoRequest("cafe лермонтов");
+    auto request = MakeRequest("cafe лермонтов");
     auto const & results = request->Results();
 
     TRules rules{ExactMatch(wonderlandId, cafe1), ExactMatch(wonderlandId, cafe2),
@@ -475,6 +474,9 @@ UNIT_CLASS_TEST(SearchQueryV2Test, TestCategories)
   TestPOI named(m2::PointD(0.0001, 0.0001), "ATM", "en");
   named.SetTypes({{"amenity", "atm"}});
 
+  TestPOI busStop(m2::PointD(0.00005, 0.0005), "ATM Bus Stop", "en");
+  busStop.SetTypes({{"highway", "bus_stop"}});
+
   BuildWorld([&](TestMwmBuilder & builder)
              {
                builder.Add(sanFrancisco);
@@ -483,24 +485,42 @@ UNIT_CLASS_TEST(SearchQueryV2Test, TestCategories)
                                    {
                                      builder.Add(named);
                                      builder.Add(noname);
+                                     builder.Add(busStop);
                                    });
 
   SetViewport(m2::RectD(m2::PointD(-0.5, -0.5), m2::PointD(0.5, 0.5)));
-  TRules const rules = {ExactMatch(wonderlandId, noname), ExactMatch(wonderlandId, named)};
 
   {
-    auto request = DoRequest("atm");
+    TRules const rules = {ExactMatch(wonderlandId, noname), ExactMatch(wonderlandId, named),
+                          ExactMatch(wonderlandId, busStop)};
+
+    auto request = MakeRequest("atm");
     TEST(MatchResults(rules, request->Results()), ());
     for (auto const & result : request->Results())
     {
+      Index::FeaturesLoaderGuard loader(m_engine, wonderlandId);
+      FeatureType ft;
+      loader.GetFeatureByIndex(result.GetFeatureID().m_index, ft);
+
       auto const & info = result.GetRankingInfo();
-      TEST(info.m_matchByTrueCats, (result));
-      TEST(!info.m_matchByFalseCats, (result));
+
+      if (busStop.Matches(ft))
+      {
+        TEST(!info.m_pureCats, (result));
+        TEST(info.m_falseCats, (result));
+      }
+      else
+      {
+        TEST(info.m_pureCats, (result));
+        TEST(!info.m_falseCats, (result));
+      }
     }
   }
 
   {
-    auto request = DoRequest("#atm");
+    TRules const rules = {ExactMatch(wonderlandId, noname), ExactMatch(wonderlandId, named)};
+
+    auto request = MakeRequest("#atm");
 
     TEST(MatchResults(rules, request->Results()), ());
     for (auto const & result : request->Results())
@@ -510,9 +530,6 @@ UNIT_CLASS_TEST(SearchQueryV2Test, TestCategories)
       // Token with a hashtag should not participate in name-score
       // calculations.
       TEST_EQUAL(NAME_SCORE_ZERO, info.m_nameScore, (result));
-
-      // TODO (@y): fix this. Name coverage calculations are flawed.
-      // TEST(my::AlmostEqualAbs(0.0, info.m_nameCoverage, 1e-6), (info.m_nameCoverage));
     }
   }
 
diff --git a/search/search_quality/download-maps.sh b/search/search_quality/download-maps.sh
index 85ea1ab758..c41ac333eb 100755
--- a/search/search_quality/download-maps.sh
+++ b/search/search_quality/download-maps.sh
@@ -3,25 +3,77 @@
 # Downloads all maps necessary for learning to rank to the current
 # directory.
 
-case $# in
-    1) VERSION="$1"
-       ;;
-    *) echo "Usage: $0 version" 2>&1
-       exit -1
-       ;;
-esac
+ALL=
+VERSION=
+BASE="http://direct.mapswithme.com/direct"
+
+display_usage() {
+    echo "Usage: $0 -v [version] -a -h"
+    echo "    -v  version of maps to download"
+    echo "    -a  download all maps of the specified version"
+    echo "    -h  display this message"
+}
+
+while getopts ":av:h" opt
+do
+    case "$opt" in
+        a) ALL=1
+           ;;
+        v) VERSION="$OPTARG"
+           ;;
+        h) display_usage
+           exit -1
+           ;;
+        \?) echo "Invalid option: -$OPTARG" 1>&2
+            ;;
+        :) echo "Option -$OPTARG requires an argument" 1>&2
+           ;;
+    esac
+done
+
+if [ -z "$VERSION" ]
+then
+    echo "Version of maps is not specified." 1>&2
+    exit -1
+fi
+
+if ! curl "$BASE/" 2>/dev/null |
+        sed -n 's/^.*href="\(.*\)\/".*$/\1/p' |
+        grep -v "^../$" | grep -q "$VERSION"
+then
+    echo "Invalid version: $VERSION" 1>&2
+    exit -1
+fi
 
-BASE="http://direct.mapswithme.com/direct/$VERSION/"
 NAMES=("Australia_Brisbane.mwm"
        "Belarus_Minsk*.mwm"
        "Germany_*.mwm"
        "Russia_*.mwm"
        "UK_England_*.mwm"
-       "US_California_*.mwm" "US_Maryland_*.mwm")
+       "US_California_*.mwm"
+       "US_Maryland_*.mwm")
 
-set -e
-set -x
-for name in ${NAMES[@]}
-do
-    wget -r -np -nd -A "$name" "$BASE"
-done
+DIR="$BASE/$VERSION"
+
+if [ "$ALL" ]
+then
+    echo "Downloading all maps..."
+
+    files=$(curl "$DIR/" 2>/dev/null | sed -n 's/^.*href="\(.*\.mwm\)".*$/\1/p')
+
+    set -e
+    set -x
+    for file in $files
+    do
+        wget -np -nd "$DIR/$file"
+    done
+else
+    echo "Downloading maps..."
+
+    set -e
+    set -x
+    for name in ${NAMES[@]}
+    do
+        wget -r -np -nd -A "$name" "$DIR/"
+    done
+fi
diff --git a/search/search_quality/features_collector_tool/features_collector_tool.cpp b/search/search_quality/features_collector_tool/features_collector_tool.cpp
index 9529b9d3fb..bf33021ab6 100644
--- a/search/search_quality/features_collector_tool/features_collector_tool.cpp
+++ b/search/search_quality/features_collector_tool/features_collector_tool.cpp
@@ -143,14 +143,15 @@ void DisplayStats(ostream & os, vector<Sample> const & samples, vector<Stats> co
   ASSERT_EQUAL(stats.size(), n, ());
 
   size_t numWarnings = 0;
-  for (auto const & stat : stats) {
+  for (auto const & stat : stats)
+  {
     if (!stat.m_notFound.empty())
       ++numWarnings;
   }
 
   if (numWarnings == 0)
   {
-    os << "All " << stats.size() << " queries OK." << endl;
+    os << "All " << stats.size() << " queries are OK." << endl;
     return;
   }
 
diff --git a/search/search_quality/scoring_model.py b/search/search_quality/scoring_model.py
index 244688d890..2409e3d715 100755
--- a/search/search_quality/scoring_model.py
+++ b/search/search_quality/scoring_model.py
@@ -38,7 +38,7 @@ def normalize_data(data):
     data['Rank'] = data['Rank'].apply(lambda v: v / MAX_RANK)
     data['Relevance'] = data['Relevance'].apply(lambda v: RELEVANCES[v])
 
-    cats = data['MatchByTrueCats'].combine(data['MatchByFalseCats'], max)
+    cats = data['PureCats'].combine(data['FalseCats'], max)
 
     # Full prefix match is unified with a full match as these features
     # are collinear. But we need both of them as they're also used in
@@ -49,8 +49,6 @@ def normalize_data(data):
     # the features too.
     data['NameScore'] = data['NameScore'].combine(cats, transform_name_score)
 
-    data['NameCoverage'] = data['NameCoverage'].combine(cats, lambda v, c: v if c == 0 else 0.0)
-
     # Adds dummy variables to data for NAME_SCORES.
     for ns in NAME_SCORES:
         data[ns] = data['NameScore'].apply(lambda v: int(ns == v))
diff --git a/search/search_query.cpp b/search/search_query.cpp
index cfbf486c89..0f78a1c64f 100644
--- a/search/search_query.cpp
+++ b/search/search_query.cpp
@@ -190,20 +190,11 @@ void UpdateNameScore(string const & name, TSlice const & slice, v2::NameScore &
 
 template <typename TSlice>
 void UpdateNameScore(vector<strings::UniString> const & tokens, TSlice const & slice,
-                     v2::NameScore & bestScore, double & bestCoverage)
+                     v2::NameScore & bestScore)
 {
   auto const score = v2::GetNameScore(tokens, slice);
-  auto const coverage =
-      tokens.empty() ? 0 : static_cast<double>(slice.Size()) / static_cast<double>(tokens.size());
   if (score > bestScore)
-  {
     bestScore = score;
-    bestCoverage = coverage;
-  }
-  else if (score == bestScore && coverage > bestCoverage)
-  {
-    bestCoverage = coverage;
-  }
 }
 
 inline bool IsHashtagged(strings::UniString const & s) { return !s.empty() && s[0] == '#'; }
@@ -663,8 +654,8 @@ class PreResult2Maker
       vector<strings::UniString> tokens;
       SplitUniString(NormalizeAndSimplifyString(name), MakeBackInsertFunctor(tokens), Delimiters());
 
-      UpdateNameScore(tokens, slice, info.m_nameScore, info.m_nameCoverage);
-      UpdateNameScore(tokens, sliceNoCategories, info.m_nameScore, info.m_nameCoverage);
+      UpdateNameScore(tokens, slice, info.m_nameScore);
+      UpdateNameScore(tokens, sliceNoCategories, info.m_nameScore);
     }
 
     if (info.m_searchType == v2::SearchModel::SEARCH_TYPE_BUILDING)
@@ -679,16 +670,14 @@ class PreResult2Maker
         ++matched[i].first;
     });
 
-    info.m_matchByTrueCats =
-        all_of(matched.begin(), matched.end(), [](pair<size_t, size_t> const & m)
-               {
-                 return m.first != 0;
-               });
-    info.m_matchByFalseCats =
-        all_of(matched.begin(), matched.end(), [](pair<size_t, size_t> const & m)
-               {
-                 return m.first == 0 && m.second != 0;
-               });
+    info.m_pureCats = all_of(matched.begin(), matched.end(), [](pair<size_t, size_t> const & m)
+                             {
+                               return m.first != 0;
+                             });
+    info.m_falseCats = all_of(matched.begin(), matched.end(), [](pair<size_t, size_t> const & m)
+                              {
+                                return m.first == 0 && m.second != 0;
+                              });
   }
 
   uint8_t NormalizeRank(uint8_t rank, v2::SearchModel::SearchType type, m2::PointD const & center,
diff --git a/search/v2/geocoder.cpp b/search/v2/geocoder.cpp
index be83577b74..3f3a91b7dd 100644
--- a/search/v2/geocoder.cpp
+++ b/search/v2/geocoder.cpp
@@ -1557,12 +1557,12 @@ SearchModel::SearchType Geocoder::GetSearchTypeInGeocoding(uint32_t featureId)
 
 bool Geocoder::AllTokensUsed() const
 {
-  return all_of(m_usedTokens.begin(), m_usedTokens.end(), my::Id<bool>());
+  return all_of(m_usedTokens.begin(), m_usedTokens.end(), IdFunctor());
 }
 
 bool Geocoder::HasUsedTokensInRange(size_t from, size_t to) const
 {
-  return any_of(m_usedTokens.begin() + from, m_usedTokens.begin() + to, my::Id<bool>());
+  return any_of(m_usedTokens.begin() + from, m_usedTokens.begin() + to, IdFunctor());
 }
 
 size_t Geocoder::NumUnusedTokensGroups() const
diff --git a/search/v2/ranking_info.cpp b/search/v2/ranking_info.cpp
index 1a4cbb42bd..1e4dbbaf32 100644
--- a/search/v2/ranking_info.cpp
+++ b/search/v2/ranking_info.cpp
@@ -47,10 +47,9 @@ void RankingInfo::PrintCSVHeader(ostream & os)
   os << "DistanceToPivot"
      << ",Rank"
      << ",NameScore"
-     << ",NameCoverage"
      << ",SearchType"
-     << ",MatchByTrueCats"
-     << ",MatchByFalseCats";
+     << ",PureCats"
+     << ",FalseCats";
 }
 
 string DebugPrint(RankingInfo const & info)
@@ -60,10 +59,9 @@ string DebugPrint(RankingInfo const & info)
   os << "m_distanceToPivot:" << info.m_distanceToPivot << ",";
   os << "m_rank:" << static_cast<int>(info.m_rank) << ",";
   os << "m_nameScore:" << DebugPrint(info.m_nameScore) << ",";
-  os << "m_nameCoverage:" << info.m_nameCoverage << ",";
   os << "m_searchType:" << DebugPrint(info.m_searchType) << ",";
-  os << "m_matchByTrueCats:" << info.m_matchByTrueCats << ",";
-  os << "m_matchByFalseCats:" << info.m_matchByFalseCats;
+  os << "m_pureCats:" << info.m_pureCats << ",";
+  os << "m_falseCats:" << info.m_falseCats;
   os << "]";
   return os.str();
 }
@@ -72,8 +70,7 @@ void RankingInfo::ToCSV(ostream & os) const
 {
   os << fixed;
   os << m_distanceToPivot << "," << static_cast<int>(m_rank) << "," << DebugPrint(m_nameScore)
-     << "," << m_nameCoverage << "," << DebugPrint(m_searchType) << "," << m_matchByTrueCats << ","
-     << m_matchByFalseCats;
+     << "," << DebugPrint(m_searchType) << "," << m_pureCats << "," << m_falseCats;
 }
 
 double RankingInfo::GetLinearModelRank() const
@@ -86,11 +83,15 @@ double RankingInfo::GetLinearModelRank() const
   double const rank = static_cast<double>(m_rank) / numeric_limits<uint8_t>::max();
 
   auto nameScore = m_nameScore;
-  auto nameCoverage = m_nameCoverage;
-  if (m_matchByTrueCats || m_matchByFalseCats)
+  if (m_pureCats || m_falseCats)
   {
+    // If the feature was matched only by categorial tokens, it's
+    // better for ranking to set name score to zero.  For example,
+    // when we're looking for a "cafe", cafes "Cafe Pushkin" and
+    // "Lermontov" both match to the request, but must be ranked in
+    // accordance to their distances to the user position or viewport,
+    // in spite of "Cafe Pushkin" has a non-zero name rank.
     nameScore = NAME_SCORE_ZERO;
-    nameCoverage = 0.0;
   }
 
   return kDistanceToPivot * distanceToPivot + kRank * rank + kNameScore[nameScore] +
diff --git a/search/v2/ranking_info.hpp b/search/v2/ranking_info.hpp
index 23d5b000c9..e86811a3ee 100644
--- a/search/v2/ranking_info.hpp
+++ b/search/v2/ranking_info.hpp
@@ -24,19 +24,17 @@ struct RankingInfo
   // Score for the feature's name.
   NameScore m_nameScore = NAME_SCORE_ZERO;
 
-  // Fraction of tokens from the query matched to a feature name.
-  double m_nameCoverage = 0;
-
   // Search type for the feature.
   SearchModel::SearchType m_searchType = SearchModel::SEARCH_TYPE_COUNT;
 
-  // True if the feature was matched only by tokens corresponding to
-  // it's categories.
-  bool m_matchByTrueCats = false;
+  // True if all of the tokens that the feature was matched by
+  // correspond to this feature's categories.
+  bool m_pureCats = false;
 
-  // True if the feature was matched only by tokens don't
-  // corresponding to it's categories.
-  bool m_matchByFalseCats = false;
+  // True if none of the tokens that the feature was matched by
+  // corresponds to this feature's categories although all of the
+  // tokens are categorial ones.
+  bool m_falseCats = false;
 
   static void PrintCSVHeader(ostream & os);