forked from organicmaps/organicmaps
[search] Use indices vector to sort and select results in PreRanker.
Signed-off-by: Viktor Govako <viktor.govako@gmail.com>
This commit is contained in:
parent
ecf603e06a
commit
00366cc5d8
5 changed files with 81 additions and 92 deletions
|
@ -1,12 +1,12 @@
|
|||
#include "indexer/feature_decl.hpp"
|
||||
|
||||
#include <boost/container_hash/hash.hpp>
|
||||
|
||||
#include <sstream>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace feature
|
||||
{
|
||||
string DebugPrint(GeomType type)
|
||||
std::string DebugPrint(GeomType type)
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
|
@ -19,7 +19,7 @@ string DebugPrint(GeomType type)
|
|||
}
|
||||
} // namespace feature
|
||||
|
||||
string DebugPrint(FeatureID const & id)
|
||||
std::string DebugPrint(FeatureID const & id)
|
||||
{
|
||||
return "{ " + DebugPrint(id.m_mwmId) + ", " + std::to_string(id.m_index) + " }";
|
||||
}
|
||||
|
@ -30,7 +30,7 @@ char const * const FeatureID::kInvalidFileName = "INVALID";
|
|||
int64_t const FeatureID::kInvalidMwmVersion = -1;
|
||||
|
||||
|
||||
string FeatureID::GetMwmName() const
|
||||
std::string FeatureID::GetMwmName() const
|
||||
{
|
||||
return IsValid() ? m_mwmId.GetInfo()->GetCountryName() : kInvalidFileName;
|
||||
}
|
||||
|
@ -39,3 +39,11 @@ int64_t FeatureID::GetMwmVersion() const
|
|||
{
|
||||
return IsValid() ? m_mwmId.GetInfo()->GetVersion() : kInvalidMwmVersion;
|
||||
}
|
||||
|
||||
size_t std::hash<FeatureID>::operator()(FeatureID const & fID) const
|
||||
{
|
||||
size_t seed = 0;
|
||||
boost::hash_combine(seed, fID.m_mwmId.GetInfo());
|
||||
boost::hash_combine(seed, fID.m_index);
|
||||
return seed;
|
||||
}
|
||||
|
|
|
@ -50,3 +50,12 @@ struct FeatureID
|
|||
};
|
||||
|
||||
std::string DebugPrint(FeatureID const & id);
|
||||
|
||||
namespace std
|
||||
{
|
||||
template <>
|
||||
struct hash<FeatureID>
|
||||
{
|
||||
size_t operator()(FeatureID const & fID) const;
|
||||
};
|
||||
} // namespace std
|
||||
|
|
|
@ -24,7 +24,7 @@ using namespace std;
|
|||
|
||||
namespace
|
||||
{
|
||||
void SweepNearbyResults(m2::PointD const & eps, set<FeatureID> const & prevEmit,
|
||||
void SweepNearbyResults(m2::PointD const & eps, unordered_set<FeatureID> const & prevEmit,
|
||||
vector<PreRankerResult> & results)
|
||||
{
|
||||
m2::NearbyPointsSweeper sweeper(eps.x, eps.y);
|
||||
|
@ -143,6 +143,22 @@ void PreRanker::FillMissingFieldsInPreResults()
|
|||
});
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
template <class CompT, class ContT> class CompareIndices
|
||||
{
|
||||
CompT m_cmp;
|
||||
ContT const & m_cont;
|
||||
|
||||
public:
|
||||
CompareIndices(CompT const & cmp, ContT const & cont) : m_cmp(cmp), m_cont(cont) {}
|
||||
bool operator()(size_t l, size_t r) const
|
||||
{
|
||||
return m_cmp(m_cont[l], m_cont[r]);
|
||||
}
|
||||
};
|
||||
} // namespace
|
||||
|
||||
void PreRanker::Filter(bool viewportSearch)
|
||||
{
|
||||
auto const lessForUnique = [](PreRankerResult const & lhs, PreRankerResult const & rhs)
|
||||
|
@ -167,64 +183,22 @@ void PreRanker::Filter(bool viewportSearch)
|
|||
if (m_results.size() <= BatchSize())
|
||||
return;
|
||||
|
||||
sort(m_results.begin(), m_results.end(), &PreRankerResult::LessDistance);
|
||||
vector<size_t> indices(m_results.size());
|
||||
generate(indices.begin(), indices.end(), [n = 0] () mutable { return n++; });
|
||||
unordered_set<size_t> filtered;
|
||||
|
||||
/// @todo To have any benefit from the next sort-shuffle code block, we should have at least 2 *strictly* equal
|
||||
/// (distance in meters) results in the middle of m_results vector. The probability of that is -> 0.
|
||||
/// This code had sence, when we had some approximated viewport distance before centers table.
|
||||
/*{
|
||||
// Priority is some kind of distance from the viewport or
|
||||
// position, therefore if we have a bunch of results with the same
|
||||
// priority, we have no idea here which results are relevant. To
|
||||
// prevent bias from previous search routines (like sorting by
|
||||
// feature id) this code randomly selects tail of the
|
||||
// sorted-by-priority list of pre-results.
|
||||
|
||||
double const last = m_results[BatchSize()].GetDistance();
|
||||
|
||||
auto b = m_results.begin() + BatchSize();
|
||||
for (; b != m_results.begin() && b->GetDistance() == last; --b)
|
||||
;
|
||||
if (b->GetDistance() != last)
|
||||
++b;
|
||||
|
||||
auto e = m_results.begin() + BatchSize();
|
||||
for (; e != m_results.end() && e->GetDistance() == last; ++e)
|
||||
;
|
||||
|
||||
// The main reason of shuffling here is to select a random subset
|
||||
// from the low-priority results. We're using a linear
|
||||
// congruential method with default seed because it is fast,
|
||||
// simple and doesn't need an external entropy source.
|
||||
//
|
||||
// TODO (@y, @m, @vng): consider to take some kind of hash from
|
||||
// features and then select a subset with smallest values of this
|
||||
// hash. In this case this subset of results will be persistent
|
||||
// to small changes in the original set.
|
||||
shuffle(b, e, m_rng);
|
||||
}*/
|
||||
|
||||
struct LessFeatureID
|
||||
{
|
||||
inline bool operator()(PreRankerResult const & lhs, PreRankerResult const & rhs) const
|
||||
{
|
||||
return lhs.GetId() < rhs.GetId();
|
||||
}
|
||||
};
|
||||
set<PreRankerResult, LessFeatureID> filtered;
|
||||
|
||||
auto const numResults = min(m_results.size(), BatchSize());
|
||||
auto const iBeg = m_results.begin();
|
||||
auto const iMiddle = iBeg + numResults;
|
||||
auto const iEnd = m_results.end();
|
||||
auto const iBeg = indices.begin();
|
||||
auto const iMiddle = iBeg + BatchSize();
|
||||
auto const iEnd = indices.end();
|
||||
|
||||
nth_element(iBeg, iMiddle, iEnd, CompareIndices(&PreRankerResult::LessDistance, m_results));
|
||||
filtered.insert(iBeg, iMiddle);
|
||||
|
||||
if (!m_params.m_categorialRequest)
|
||||
{
|
||||
nth_element(iBeg, iMiddle, iEnd, &PreRankerResult::LessRankAndPopularity);
|
||||
nth_element(iBeg, iMiddle, iEnd, CompareIndices(&PreRankerResult::LessRankAndPopularity, m_results));
|
||||
filtered.insert(iBeg, iMiddle);
|
||||
nth_element(iBeg, iMiddle, iEnd, &PreRankerResult::LessByExactMatch);
|
||||
nth_element(iBeg, iMiddle, iEnd, CompareIndices(&PreRankerResult::LessByExactMatch, m_results));
|
||||
filtered.insert(iBeg, iMiddle);
|
||||
}
|
||||
else
|
||||
|
@ -238,11 +212,15 @@ void PreRanker::Filter(bool viewportSearch)
|
|||
2 * kPedestrianRadiusMeters;
|
||||
comparator.m_viewport = m_params.m_viewport;
|
||||
|
||||
nth_element(iBeg, iMiddle, iEnd, comparator);
|
||||
nth_element(iBeg, iMiddle, iEnd, CompareIndices(comparator, m_results));
|
||||
filtered.insert(iBeg, iMiddle);
|
||||
}
|
||||
|
||||
m_results.assign(make_move_iterator(filtered.begin()), make_move_iterator(filtered.end()));
|
||||
PreResultsContainerT newResults;
|
||||
newResults.reserve(filtered.size());
|
||||
for (size_t idx : filtered)
|
||||
newResults.push_back(m_results[idx]);
|
||||
m_results.swap(newResults);
|
||||
}
|
||||
|
||||
void PreRanker::UpdateResults(bool lastUpdate)
|
||||
|
|
|
@ -12,10 +12,9 @@
|
|||
#include <algorithm>
|
||||
#include <limits>
|
||||
#include <optional>
|
||||
#include <random>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <unordered_set>
|
||||
#include <vector>
|
||||
|
||||
class DataSource;
|
||||
|
@ -144,8 +143,10 @@ private:
|
|||
|
||||
DataSource const & m_dataSource;
|
||||
Ranker & m_ranker;
|
||||
std::vector<PreRankerResult> m_results;
|
||||
std::vector<PreRankerResult> m_relaxedResults;
|
||||
|
||||
using PreResultsContainerT = std::vector<PreRankerResult>;
|
||||
PreResultsContainerT m_results, m_relaxedResults;
|
||||
|
||||
Params m_params;
|
||||
|
||||
// Amount of results sent up the pipeline.
|
||||
|
@ -161,12 +162,10 @@ private:
|
|||
/// search session. They're used for filtering of current search, because we need to give more priority
|
||||
/// to results that were on map previously, to avoid result's annoying blinking/flickering on map.
|
||||
/// @{
|
||||
std::set<FeatureID> m_currEmit;
|
||||
std::set<FeatureID> m_prevEmit;
|
||||
std::unordered_set<FeatureID> m_currEmit;
|
||||
std::unordered_set<FeatureID> m_prevEmit;
|
||||
/// @}
|
||||
|
||||
std::minstd_rand m_rng;
|
||||
|
||||
DISALLOW_COPY_AND_MOVE(PreRanker);
|
||||
};
|
||||
} // namespace search
|
||||
|
|
|
@ -27,31 +27,19 @@
|
|||
|
||||
#include "base/assert.hpp"
|
||||
#include "base/cancellable.hpp"
|
||||
#include "base/limited_priority_queue.hpp"
|
||||
#include "base/math.hpp"
|
||||
#include "base/stl_helpers.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <iterator>
|
||||
#include <vector>
|
||||
|
||||
namespace pre_ranker_test
|
||||
{
|
||||
using namespace generator::tests_support;
|
||||
using namespace search::tests_support;
|
||||
using namespace search;
|
||||
using namespace std;
|
||||
|
||||
class DataSource;
|
||||
|
||||
namespace storage
|
||||
{
|
||||
class CountryInfoGetter;
|
||||
}
|
||||
|
||||
namespace search
|
||||
{
|
||||
namespace
|
||||
{
|
||||
class TestRanker : public Ranker
|
||||
{
|
||||
public:
|
||||
|
@ -105,12 +93,15 @@ UNIT_CLASS_TEST(PreRankerTest, Smoke)
|
|||
// emits results nearest to the pivot.
|
||||
|
||||
m2::PointD const kPivot(0, 0);
|
||||
m2::RectD const kViewport(m2::PointD(-5, -5), m2::PointD(5, 5));
|
||||
m2::RectD const kViewport(-5, -5, 5, 5);
|
||||
|
||||
size_t const kBatchSize = 50;
|
||||
/// @todo Well, I'm not sure that 50 results will have unique distances to pivot.
|
||||
/// 7x7 grid is 49, so potentially it can be 51 (north and south) or (east and west).
|
||||
/// But we should consider circle (ellipse) around pivot and I can't say,
|
||||
/// how it goes in meters radius on integer mercator grid.
|
||||
size_t constexpr kBatchSize = 50;
|
||||
|
||||
vector<TestPOI> pois;
|
||||
|
||||
for (int x = -5; x <= 5; ++x)
|
||||
{
|
||||
for (int y = -5; y <= 5; ++y)
|
||||
|
@ -122,7 +113,8 @@ UNIT_CLASS_TEST(PreRankerTest, Smoke)
|
|||
|
||||
TEST_LESS(kBatchSize, pois.size(), ());
|
||||
|
||||
auto mwmId = BuildCountry("Cafeland", [&](TestMwmBuilder & builder) {
|
||||
auto mwmId = BuildCountry("Cafeland", [&](TestMwmBuilder & builder)
|
||||
{
|
||||
for (auto const & poi : pois)
|
||||
builder.Add(poi);
|
||||
});
|
||||
|
@ -150,7 +142,8 @@ UNIT_CLASS_TEST(PreRankerTest, Smoke)
|
|||
vector<bool> emit(pois.size());
|
||||
|
||||
FeaturesVectorTest fv(mwmId.GetInfo()->GetLocalFile().GetPath(MapFileType::Map));
|
||||
fv.GetVector().ForEach([&](FeatureType & ft, uint32_t index) {
|
||||
fv.GetVector().ForEach([&](FeatureType & ft, uint32_t index)
|
||||
{
|
||||
FeatureID id(mwmId, index);
|
||||
ResultTracer::Provenance provenance;
|
||||
preRanker.Emplace(id, PreRankingInfo(Model::TYPE_SUBPOI, TokenRange(0, 1)), provenance);
|
||||
|
@ -164,19 +157,21 @@ UNIT_CLASS_TEST(PreRankerTest, Smoke)
|
|||
|
||||
TEST(all_of(emit.begin(), emit.end(), base::IdFunctor()), (emit));
|
||||
TEST(ranker.Finished(), ());
|
||||
TEST_EQUAL(results.size(), kBatchSize, ());
|
||||
|
||||
size_t const count = results.size();
|
||||
// See todo comment above for details.
|
||||
TEST(count == kBatchSize || count == kBatchSize + 1, (count));
|
||||
|
||||
vector<bool> checked(pois.size());
|
||||
for (size_t i = 0; i < results.size(); ++i)
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
{
|
||||
size_t const index = results[i].GetId().m_index;
|
||||
TEST_LESS(index, pois.size(), ());
|
||||
|
||||
TEST(!checked[index], (index));
|
||||
TEST(base::AlmostEqualAbs(distances[index], results[i].GetDistance(), 1.0),
|
||||
TEST(base::AlmostEqualAbs(distances[index], results[i].GetDistance(), 1.0 /* 1 meter epsilon */),
|
||||
(distances[index], results[i].GetDistance()));
|
||||
checked[index] = true;
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
} // namespace search
|
||||
} // namespace pre_ranker_test
|
||||
|
|
Loading…
Add table
Reference in a new issue