[search] LocalityScorer: Prefer nearest locality.

This commit is contained in:
tatiana-yan 2020-04-16 12:24:17 +03:00 committed by mpimenov
parent 38d2660f47
commit d779cde557
4 changed files with 138 additions and 25 deletions

View file

@ -162,7 +162,7 @@ private:
class LocalityScorerDelegate : public LocalityScorer::Delegate
{
public:
LocalityScorerDelegate(MwmContext const & context, Geocoder::Params const & params,
LocalityScorerDelegate(MwmContext & context, Geocoder::Params const & params,
base::Cancellable const & cancellable)
: m_context(context)
, m_params(params)
@ -188,8 +188,24 @@ public:
uint8_t GetRank(uint32_t featureId) const override { return m_ranks.Get(featureId); }
optional<m2::PointD> GetCenter(uint32_t featureId) override
{
m2::PointD center;
// m_context->GetCenter is faster but may not work for editor created features.
if (!m_context.GetCenter(featureId, center))
{
auto ft = m_context.GetFeature(featureId);
if (!ft)
return {};
center = feature::GetCenter(*ft);
}
return center;
}
private:
MwmContext const & m_context;
MwmContext & m_context;
Geocoder::Params const & m_params;
base::Cancellable const & m_cancellable;
@ -657,7 +673,7 @@ void Geocoder::FillLocalityCandidates(BaseContext const & ctx, CBV const & filte
}
LocalityScorerDelegate delegate(*m_context, m_params, m_cancellable);
LocalityScorer scorer(m_params, delegate);
LocalityScorer scorer(m_params, m_params.m_pivot.Center(), delegate);
scorer.GetTopLocalities(m_context->GetId(), ctx, filter, maxNumLocalities, preLocalities);
}

View file

@ -80,14 +80,15 @@ LocalityScorer::ExLocality::ExLocality(Locality const & locality, double queryNo
}
// LocalityScorer ----------------------------------------------------------------------------------
LocalityScorer::LocalityScorer(QueryParams const & params, Delegate const & delegate)
: m_params(params), m_delegate(delegate)
LocalityScorer::LocalityScorer(QueryParams const & params, m2::PointD const & pivot,
Delegate & delegate)
: m_params(params), m_pivot(pivot), m_delegate(delegate)
{
}
void LocalityScorer::GetTopLocalities(MwmSet::MwmId const & countryId, BaseContext const & ctx,
CBV const & filter, size_t limit,
vector<Locality> & localities) const
vector<Locality> & localities)
{
double const kUnknownIdf = 1.0;
@ -165,8 +166,7 @@ void LocalityScorer::GetTopLocalities(MwmSet::MwmId const & countryId, BaseConte
LeaveTopLocalities(idfs, limit, localities);
}
void LocalityScorer::LeaveTopLocalities(IdfMap & idfs, size_t limit,
vector<Locality> & localities) const
void LocalityScorer::LeaveTopLocalities(IdfMap & idfs, size_t limit, vector<Locality> & localities)
{
vector<ExLocality> els;
els.reserve(localities.size());
@ -199,11 +199,15 @@ void LocalityScorer::LeaveTopLocalities(IdfMap & idfs, size_t limit,
// for all localities tokens. Therefore, for tokens not in the
// query, some default IDF value will be used.
GetDocVecs(els[i].GetId(), dvs);
auto const distance = GetDistanceToPivot(els[i].GetId());
for (; i < j; ++i)
{
els[i].m_similarity = GetSimilarity(els[i].m_locality.m_queryVec, idfs, dvs);
els[i].m_distanceToPivot = distance;
}
}
LeaveTopBySimilarityAndRank(limit, els);
LeaveTopBySimilarityAndOther(limit, els);
ASSERT_LESS_OR_EQUAL(els.size(), limit, ());
localities.clear();
@ -234,7 +238,7 @@ void LocalityScorer::LeaveTopByExactMatchNormAndRank(size_t limitUniqueIds,
base::EraseIf(els, [&](ExLocality const & el) { return seen.find(el.GetId()) == seen.cend(); });
}
void LocalityScorer::LeaveTopBySimilarityAndRank(size_t limit, vector<ExLocality> & els) const
void LocalityScorer::LeaveTopBySimilarityAndOther(size_t limit, vector<ExLocality> & els) const
{
sort(els.begin(), els.end(), [](ExLocality const & lhs, ExLocality const & rhs) {
if (lhs.m_similarity != rhs.m_similarity)
@ -246,19 +250,49 @@ void LocalityScorer::LeaveTopBySimilarityAndRank(size_t limit, vector<ExLocality
return lhs.m_locality.m_featureId < rhs.m_locality.m_featureId;
});
auto lessDistance = [](ExLocality const & lhs, ExLocality const & rhs) {
return lhs.m_distanceToPivot < rhs.m_distanceToPivot;
};
auto const compareSimilarityAndSize = [](ExLocality const & lhs, ExLocality const & rhs) {
if (lhs.m_similarity != rhs.m_similarity)
return lhs.m_similarity > rhs.m_similarity;
if (lhs.m_locality.m_tokenRange.Size() != rhs.m_locality.m_tokenRange.Size())
return lhs.m_locality.m_tokenRange.Size() > rhs.m_locality.m_tokenRange.Size();
return false;
};
vector<ExLocality> tmp;
tmp.reserve(els.size());
auto begin = els.begin();
auto const end = els.end();
while (begin != end)
{
// We can split els to equal ranges by similarity and size because we sorted els by similarity
// and size first.
auto const range = equal_range(begin, end, *begin, compareSimilarityAndSize);
auto const closest = min_element(range.first, range.second, lessDistance);
tmp.emplace_back(std::move(*closest));
for (auto it = range.first; it != range.second; ++it)
{
if (it != closest)
tmp.emplace_back(move(*it));
}
begin = range.second;
}
unordered_set<uint32_t> seen;
size_t n = 0;
for (size_t i = 0; i < els.size() && n < limit; ++i)
els.clear();
els.reserve(limit);
for (size_t i = 0; i < tmp.size() && els.size() < limit; ++i)
{
auto const id = els[i].GetId();
auto const id = tmp[i].GetId();
if (seen.insert(id).second)
{
els[n] = els[i];
++n;
els.emplace_back(move(tmp[i]));
}
}
els.erase(els.begin() + n, els.end());
}
void LocalityScorer::GetDocVecs(uint32_t localityId, vector<DocVec> & dvs) const
@ -279,6 +313,17 @@ void LocalityScorer::GetDocVecs(uint32_t localityId, vector<DocVec> & dvs) const
}
}
double LocalityScorer::GetDistanceToPivot(uint32_t localityId)
{
auto distance = numeric_limits<double>::max();
auto const center = m_delegate.GetCenter(localityId);
if (center)
{
distance = mercator::DistanceOnEarth(m_pivot, *center);
}
return distance;
}
double LocalityScorer::GetSimilarity(QueryVec & qv, IdfMap & docIdfs, vector<DocVec> & dvc) const
{
double const kScale = 1e6;

View file

@ -8,6 +8,8 @@
#include <cstddef>
#include <cstdint>
#include <limits>
#include <optional>
#include <string>
#include <vector>
@ -30,14 +32,17 @@ public:
virtual void GetNames(uint32_t featureId, std::vector<std::string> & names) const = 0;
virtual uint8_t GetRank(uint32_t featureId) const = 0;
virtual std::optional<m2::PointD> GetCenter(uint32_t featureId) = 0;
};
LocalityScorer(QueryParams const & params, Delegate const & delegate);
LocalityScorer(QueryParams const & params, m2::PointD const & pivot, Delegate & delegate);
void SetPivotForTests(m2::PointD const & pivot) { m_pivot = pivot; }
// Leaves at most |limit| elements of |localities|, ordered by their
// features.
void GetTopLocalities(MwmSet::MwmId const & countryId, BaseContext const & ctx,
CBV const & filter, size_t limit, std::vector<Locality> & localities) const;
CBV const & filter, size_t limit, std::vector<Locality> & localities);
private:
struct ExLocality
@ -50,27 +55,31 @@ private:
double m_queryNorm = 0.0;
double m_similarity = 0.0;
uint8_t m_rank = 0;
double m_distanceToPivot = std::numeric_limits<double>::max();
};
friend std::string DebugPrint(ExLocality const & locality);
// Leaves at most |limit| elements of |localities|, ordered by some
// combination of ranks and number of matched tokens.
void LeaveTopLocalities(IdfMap & idfs, size_t limit, std::vector<Locality> & localities) const;
void LeaveTopLocalities(IdfMap & idfs, size_t limit, std::vector<Locality> & localities);
// Selects at most |limitUniqueIds| best features by exact match, query norm and
// rank, and then leaves only localities corresponding to those
// features in |els|.
void LeaveTopByExactMatchNormAndRank(size_t limitUniqueIds, std::vector<ExLocality> & els) const;
// Leaves at most |limit| unique best localities by similarity to
// the query and rank.
void LeaveTopBySimilarityAndRank(size_t limit, std::vector<ExLocality> & els) const;
// Leaves at most |limit| unique best localities by similarity and matched tokens range size. For
// elements with the same similarity and matched range size selects the closest one (by distance to
// pivot), rest of elements are sorted by rank.
void LeaveTopBySimilarityAndOther(size_t limit, std::vector<ExLocality> & els) const;
void GetDocVecs(uint32_t localityId, std::vector<DocVec> & dvs) const;
double GetDistanceToPivot(uint32_t localityId);
double GetSimilarity(QueryVec & qv, IdfMap & docIdfs, std::vector<DocVec> & dvs) const;
QueryParams const & m_params;
Delegate const & m_delegate;
m2::PointD m_pivot;
Delegate & m_delegate;
};
} // namespace search

View file

@ -34,12 +34,22 @@ class LocalityScorerTest : public LocalityScorer::Delegate
public:
using Ids = vector<uint32_t>;
LocalityScorerTest() : m_scorer(m_params, static_cast<LocalityScorer::Delegate &>(*this)) {}
LocalityScorerTest()
: m_scorer(m_params, m2::PointD(), static_cast<LocalityScorer::Delegate &>(*this))
{
}
void InitParams(string const & query, bool lastTokenIsPrefix)
{
InitParams(query, m2::PointD(), lastTokenIsPrefix);
}
void InitParams(string const & query, m2::PointD const & pivot, bool lastTokenIsPrefix)
{
m_params.Clear();
m_scorer.SetPivotForTests(pivot);
vector<UniString> tokens;
Delimiters delims;
SplitUniString(NormalizeAndSimplifyString(query), base::MakeBackInsertFunctor(tokens), delims);
@ -57,7 +67,8 @@ public:
}
}
void AddLocality(string const & name, uint32_t featureId, uint8_t rank = 0)
void AddLocality(string const & name, uint32_t featureId, uint8_t rank = 0,
m2::PointD const & center = {})
{
set<UniString> tokens;
Delimiters delims;
@ -68,6 +79,7 @@ public:
m_names[featureId].push_back(name);
m_ranks[featureId] = rank;
m_centers[featureId] = center;
}
Ids GetTopLocalities(size_t limit)
@ -128,10 +140,17 @@ public:
return it == m_ranks.end() ? 0 : it->second;
}
optional<m2::PointD> GetCenter(uint32_t featureId) override
{
auto it = m_centers.find(featureId);
return it == m_centers.end() ? optional<m2::PointD>() : it->second;
}
protected:
QueryParams m_params;
unordered_map<uint32_t, vector<string>> m_names;
unordered_map<uint32_t, uint8_t> m_ranks;
unordered_map<uint32_t, m2::PointD> m_centers;
LocalityScorer m_scorer;
base::MemTrie<UniString, base::VectorValues<uint32_t>> m_searchIndex;
@ -263,3 +282,27 @@ UNIT_CLASS_TEST(LocalityScorerTest, Similarity)
InitParams("San Carlos de Apoquindo", false /* lastTokenIsPrefix */);
TEST_EQUAL(GetTopLocalities(1 /* limit */), Ids({ID_SAN_CARLOS_APOQUINDO}), ());
}
UNIT_CLASS_TEST(LocalityScorerTest, DistanceToPivot)
{
enum
{
ID_ABERDEEN_CLOSE,
ID_ABERDEEN_RANK1,
ID_ABERDEEN_RANK2,
ID_ABERDEEN_RANK3
};
AddLocality("Aberdeen", ID_ABERDEEN_CLOSE, 10 /* rank */, m2::PointD(11.0, 11.0));
AddLocality("Aberdeen", ID_ABERDEEN_RANK1, 100 /* rank */, m2::PointD(0.0, 0.0));
AddLocality("Aberdeen", ID_ABERDEEN_RANK2, 50 /* rank */, m2::PointD(2.0, 2.0));
AddLocality("Aberdeen", ID_ABERDEEN_RANK2, 5 /* rank */, m2::PointD(4.0, 4.0));
InitParams("Aberdeen", m2::PointD(10.0, 10.0) /* pivot */, false /* lastTokenIsPrefix */);
// Expected order is: the closest one (ID_ABERDEEN_CLOSE) first, than sorted by rank.
TEST_EQUAL(GetTopLocalities(1 /* limit */), Ids({ID_ABERDEEN_CLOSE}), ());
TEST_EQUAL(GetTopLocalities(2 /* limit */), Ids({ID_ABERDEEN_CLOSE, ID_ABERDEEN_RANK1}), ());
TEST_EQUAL(GetTopLocalities(3 /* limit */),
Ids({ID_ABERDEEN_CLOSE, ID_ABERDEEN_RANK1, ID_ABERDEEN_RANK2}), ());
}