forked from organicmaps/organicmaps
[search] LocalityScorer: Prefer nearest locality.
This commit is contained in:
parent
38d2660f47
commit
d779cde557
4 changed files with 138 additions and 25 deletions
|
@ -162,7 +162,7 @@ private:
|
|||
class LocalityScorerDelegate : public LocalityScorer::Delegate
|
||||
{
|
||||
public:
|
||||
LocalityScorerDelegate(MwmContext const & context, Geocoder::Params const & params,
|
||||
LocalityScorerDelegate(MwmContext & context, Geocoder::Params const & params,
|
||||
base::Cancellable const & cancellable)
|
||||
: m_context(context)
|
||||
, m_params(params)
|
||||
|
@ -188,8 +188,24 @@ public:
|
|||
|
||||
uint8_t GetRank(uint32_t featureId) const override { return m_ranks.Get(featureId); }
|
||||
|
||||
optional<m2::PointD> GetCenter(uint32_t featureId) override
|
||||
{
|
||||
m2::PointD center;
|
||||
// m_context->GetCenter is faster but may not work for editor created features.
|
||||
if (!m_context.GetCenter(featureId, center))
|
||||
{
|
||||
auto ft = m_context.GetFeature(featureId);
|
||||
if (!ft)
|
||||
return {};
|
||||
|
||||
center = feature::GetCenter(*ft);
|
||||
}
|
||||
|
||||
return center;
|
||||
}
|
||||
|
||||
private:
|
||||
MwmContext const & m_context;
|
||||
MwmContext & m_context;
|
||||
Geocoder::Params const & m_params;
|
||||
base::Cancellable const & m_cancellable;
|
||||
|
||||
|
@ -657,7 +673,7 @@ void Geocoder::FillLocalityCandidates(BaseContext const & ctx, CBV const & filte
|
|||
}
|
||||
|
||||
LocalityScorerDelegate delegate(*m_context, m_params, m_cancellable);
|
||||
LocalityScorer scorer(m_params, delegate);
|
||||
LocalityScorer scorer(m_params, m_params.m_pivot.Center(), delegate);
|
||||
scorer.GetTopLocalities(m_context->GetId(), ctx, filter, maxNumLocalities, preLocalities);
|
||||
}
|
||||
|
||||
|
|
|
@ -80,14 +80,15 @@ LocalityScorer::ExLocality::ExLocality(Locality const & locality, double queryNo
|
|||
}
|
||||
|
||||
// LocalityScorer ----------------------------------------------------------------------------------
|
||||
LocalityScorer::LocalityScorer(QueryParams const & params, Delegate const & delegate)
|
||||
: m_params(params), m_delegate(delegate)
|
||||
LocalityScorer::LocalityScorer(QueryParams const & params, m2::PointD const & pivot,
|
||||
Delegate & delegate)
|
||||
: m_params(params), m_pivot(pivot), m_delegate(delegate)
|
||||
{
|
||||
}
|
||||
|
||||
void LocalityScorer::GetTopLocalities(MwmSet::MwmId const & countryId, BaseContext const & ctx,
|
||||
CBV const & filter, size_t limit,
|
||||
vector<Locality> & localities) const
|
||||
vector<Locality> & localities)
|
||||
{
|
||||
double const kUnknownIdf = 1.0;
|
||||
|
||||
|
@ -165,8 +166,7 @@ void LocalityScorer::GetTopLocalities(MwmSet::MwmId const & countryId, BaseConte
|
|||
LeaveTopLocalities(idfs, limit, localities);
|
||||
}
|
||||
|
||||
void LocalityScorer::LeaveTopLocalities(IdfMap & idfs, size_t limit,
|
||||
vector<Locality> & localities) const
|
||||
void LocalityScorer::LeaveTopLocalities(IdfMap & idfs, size_t limit, vector<Locality> & localities)
|
||||
{
|
||||
vector<ExLocality> els;
|
||||
els.reserve(localities.size());
|
||||
|
@ -199,11 +199,15 @@ void LocalityScorer::LeaveTopLocalities(IdfMap & idfs, size_t limit,
|
|||
// for all localities tokens. Therefore, for tokens not in the
|
||||
// query, some default IDF value will be used.
|
||||
GetDocVecs(els[i].GetId(), dvs);
|
||||
auto const distance = GetDistanceToPivot(els[i].GetId());
|
||||
for (; i < j; ++i)
|
||||
{
|
||||
els[i].m_similarity = GetSimilarity(els[i].m_locality.m_queryVec, idfs, dvs);
|
||||
els[i].m_distanceToPivot = distance;
|
||||
}
|
||||
}
|
||||
|
||||
LeaveTopBySimilarityAndRank(limit, els);
|
||||
LeaveTopBySimilarityAndOther(limit, els);
|
||||
ASSERT_LESS_OR_EQUAL(els.size(), limit, ());
|
||||
|
||||
localities.clear();
|
||||
|
@ -234,7 +238,7 @@ void LocalityScorer::LeaveTopByExactMatchNormAndRank(size_t limitUniqueIds,
|
|||
base::EraseIf(els, [&](ExLocality const & el) { return seen.find(el.GetId()) == seen.cend(); });
|
||||
}
|
||||
|
||||
void LocalityScorer::LeaveTopBySimilarityAndRank(size_t limit, vector<ExLocality> & els) const
|
||||
void LocalityScorer::LeaveTopBySimilarityAndOther(size_t limit, vector<ExLocality> & els) const
|
||||
{
|
||||
sort(els.begin(), els.end(), [](ExLocality const & lhs, ExLocality const & rhs) {
|
||||
if (lhs.m_similarity != rhs.m_similarity)
|
||||
|
@ -246,19 +250,49 @@ void LocalityScorer::LeaveTopBySimilarityAndRank(size_t limit, vector<ExLocality
|
|||
return lhs.m_locality.m_featureId < rhs.m_locality.m_featureId;
|
||||
});
|
||||
|
||||
auto lessDistance = [](ExLocality const & lhs, ExLocality const & rhs) {
|
||||
return lhs.m_distanceToPivot < rhs.m_distanceToPivot;
|
||||
};
|
||||
|
||||
auto const compareSimilarityAndSize = [](ExLocality const & lhs, ExLocality const & rhs) {
|
||||
if (lhs.m_similarity != rhs.m_similarity)
|
||||
return lhs.m_similarity > rhs.m_similarity;
|
||||
if (lhs.m_locality.m_tokenRange.Size() != rhs.m_locality.m_tokenRange.Size())
|
||||
return lhs.m_locality.m_tokenRange.Size() > rhs.m_locality.m_tokenRange.Size();
|
||||
return false;
|
||||
};
|
||||
|
||||
vector<ExLocality> tmp;
|
||||
tmp.reserve(els.size());
|
||||
auto begin = els.begin();
|
||||
auto const end = els.end();
|
||||
while (begin != end)
|
||||
{
|
||||
// We can split els to equal ranges by similarity and size because we sorted els by similarity
|
||||
// and size first.
|
||||
auto const range = equal_range(begin, end, *begin, compareSimilarityAndSize);
|
||||
auto const closest = min_element(range.first, range.second, lessDistance);
|
||||
tmp.emplace_back(std::move(*closest));
|
||||
for (auto it = range.first; it != range.second; ++it)
|
||||
{
|
||||
if (it != closest)
|
||||
tmp.emplace_back(move(*it));
|
||||
}
|
||||
begin = range.second;
|
||||
}
|
||||
|
||||
unordered_set<uint32_t> seen;
|
||||
|
||||
size_t n = 0;
|
||||
for (size_t i = 0; i < els.size() && n < limit; ++i)
|
||||
els.clear();
|
||||
els.reserve(limit);
|
||||
for (size_t i = 0; i < tmp.size() && els.size() < limit; ++i)
|
||||
{
|
||||
auto const id = els[i].GetId();
|
||||
auto const id = tmp[i].GetId();
|
||||
if (seen.insert(id).second)
|
||||
{
|
||||
els[n] = els[i];
|
||||
++n;
|
||||
els.emplace_back(move(tmp[i]));
|
||||
}
|
||||
}
|
||||
els.erase(els.begin() + n, els.end());
|
||||
}
|
||||
|
||||
void LocalityScorer::GetDocVecs(uint32_t localityId, vector<DocVec> & dvs) const
|
||||
|
@ -279,6 +313,17 @@ void LocalityScorer::GetDocVecs(uint32_t localityId, vector<DocVec> & dvs) const
|
|||
}
|
||||
}
|
||||
|
||||
double LocalityScorer::GetDistanceToPivot(uint32_t localityId)
|
||||
{
|
||||
auto distance = numeric_limits<double>::max();
|
||||
auto const center = m_delegate.GetCenter(localityId);
|
||||
if (center)
|
||||
{
|
||||
distance = mercator::DistanceOnEarth(m_pivot, *center);
|
||||
}
|
||||
return distance;
|
||||
}
|
||||
|
||||
double LocalityScorer::GetSimilarity(QueryVec & qv, IdfMap & docIdfs, vector<DocVec> & dvc) const
|
||||
{
|
||||
double const kScale = 1e6;
|
||||
|
|
|
@ -8,6 +8,8 @@
|
|||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <limits>
|
||||
#include <optional>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
|
@ -30,14 +32,17 @@ public:
|
|||
|
||||
virtual void GetNames(uint32_t featureId, std::vector<std::string> & names) const = 0;
|
||||
virtual uint8_t GetRank(uint32_t featureId) const = 0;
|
||||
virtual std::optional<m2::PointD> GetCenter(uint32_t featureId) = 0;
|
||||
};
|
||||
|
||||
LocalityScorer(QueryParams const & params, Delegate const & delegate);
|
||||
LocalityScorer(QueryParams const & params, m2::PointD const & pivot, Delegate & delegate);
|
||||
|
||||
void SetPivotForTests(m2::PointD const & pivot) { m_pivot = pivot; }
|
||||
|
||||
// Leaves at most |limit| elements of |localities|, ordered by their
|
||||
// features.
|
||||
void GetTopLocalities(MwmSet::MwmId const & countryId, BaseContext const & ctx,
|
||||
CBV const & filter, size_t limit, std::vector<Locality> & localities) const;
|
||||
CBV const & filter, size_t limit, std::vector<Locality> & localities);
|
||||
|
||||
private:
|
||||
struct ExLocality
|
||||
|
@ -50,27 +55,31 @@ private:
|
|||
double m_queryNorm = 0.0;
|
||||
double m_similarity = 0.0;
|
||||
uint8_t m_rank = 0;
|
||||
double m_distanceToPivot = std::numeric_limits<double>::max();
|
||||
};
|
||||
|
||||
friend std::string DebugPrint(ExLocality const & locality);
|
||||
|
||||
// Leaves at most |limit| elements of |localities|, ordered by some
|
||||
// combination of ranks and number of matched tokens.
|
||||
void LeaveTopLocalities(IdfMap & idfs, size_t limit, std::vector<Locality> & localities) const;
|
||||
void LeaveTopLocalities(IdfMap & idfs, size_t limit, std::vector<Locality> & localities);
|
||||
|
||||
// Selects at most |limitUniqueIds| best features by exact match, query norm and
|
||||
// rank, and then leaves only localities corresponding to those
|
||||
// features in |els|.
|
||||
void LeaveTopByExactMatchNormAndRank(size_t limitUniqueIds, std::vector<ExLocality> & els) const;
|
||||
|
||||
// Leaves at most |limit| unique best localities by similarity to
|
||||
// the query and rank.
|
||||
void LeaveTopBySimilarityAndRank(size_t limit, std::vector<ExLocality> & els) const;
|
||||
// Leaves at most |limit| unique best localities by similarity and matched tokens range size. For
|
||||
// elements with the same similarity and matched range size selects the closest one (by distance to
|
||||
// pivot), rest of elements are sorted by rank.
|
||||
void LeaveTopBySimilarityAndOther(size_t limit, std::vector<ExLocality> & els) const;
|
||||
|
||||
void GetDocVecs(uint32_t localityId, std::vector<DocVec> & dvs) const;
|
||||
double GetDistanceToPivot(uint32_t localityId);
|
||||
double GetSimilarity(QueryVec & qv, IdfMap & docIdfs, std::vector<DocVec> & dvs) const;
|
||||
|
||||
QueryParams const & m_params;
|
||||
Delegate const & m_delegate;
|
||||
m2::PointD m_pivot;
|
||||
Delegate & m_delegate;
|
||||
};
|
||||
} // namespace search
|
||||
|
|
|
@ -34,12 +34,22 @@ class LocalityScorerTest : public LocalityScorer::Delegate
|
|||
public:
|
||||
using Ids = vector<uint32_t>;
|
||||
|
||||
LocalityScorerTest() : m_scorer(m_params, static_cast<LocalityScorer::Delegate &>(*this)) {}
|
||||
LocalityScorerTest()
|
||||
: m_scorer(m_params, m2::PointD(), static_cast<LocalityScorer::Delegate &>(*this))
|
||||
{
|
||||
}
|
||||
|
||||
void InitParams(string const & query, bool lastTokenIsPrefix)
|
||||
{
|
||||
InitParams(query, m2::PointD(), lastTokenIsPrefix);
|
||||
}
|
||||
|
||||
void InitParams(string const & query, m2::PointD const & pivot, bool lastTokenIsPrefix)
|
||||
{
|
||||
m_params.Clear();
|
||||
|
||||
m_scorer.SetPivotForTests(pivot);
|
||||
|
||||
vector<UniString> tokens;
|
||||
Delimiters delims;
|
||||
SplitUniString(NormalizeAndSimplifyString(query), base::MakeBackInsertFunctor(tokens), delims);
|
||||
|
@ -57,7 +67,8 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
void AddLocality(string const & name, uint32_t featureId, uint8_t rank = 0)
|
||||
void AddLocality(string const & name, uint32_t featureId, uint8_t rank = 0,
|
||||
m2::PointD const & center = {})
|
||||
{
|
||||
set<UniString> tokens;
|
||||
Delimiters delims;
|
||||
|
@ -68,6 +79,7 @@ public:
|
|||
|
||||
m_names[featureId].push_back(name);
|
||||
m_ranks[featureId] = rank;
|
||||
m_centers[featureId] = center;
|
||||
}
|
||||
|
||||
Ids GetTopLocalities(size_t limit)
|
||||
|
@ -128,10 +140,17 @@ public:
|
|||
return it == m_ranks.end() ? 0 : it->second;
|
||||
}
|
||||
|
||||
optional<m2::PointD> GetCenter(uint32_t featureId) override
|
||||
{
|
||||
auto it = m_centers.find(featureId);
|
||||
return it == m_centers.end() ? optional<m2::PointD>() : it->second;
|
||||
}
|
||||
|
||||
protected:
|
||||
QueryParams m_params;
|
||||
unordered_map<uint32_t, vector<string>> m_names;
|
||||
unordered_map<uint32_t, uint8_t> m_ranks;
|
||||
unordered_map<uint32_t, m2::PointD> m_centers;
|
||||
LocalityScorer m_scorer;
|
||||
|
||||
base::MemTrie<UniString, base::VectorValues<uint32_t>> m_searchIndex;
|
||||
|
@ -263,3 +282,27 @@ UNIT_CLASS_TEST(LocalityScorerTest, Similarity)
|
|||
InitParams("San Carlos de Apoquindo", false /* lastTokenIsPrefix */);
|
||||
TEST_EQUAL(GetTopLocalities(1 /* limit */), Ids({ID_SAN_CARLOS_APOQUINDO}), ());
|
||||
}
|
||||
|
||||
UNIT_CLASS_TEST(LocalityScorerTest, DistanceToPivot)
|
||||
{
|
||||
enum
|
||||
{
|
||||
ID_ABERDEEN_CLOSE,
|
||||
ID_ABERDEEN_RANK1,
|
||||
ID_ABERDEEN_RANK2,
|
||||
ID_ABERDEEN_RANK3
|
||||
};
|
||||
|
||||
AddLocality("Aberdeen", ID_ABERDEEN_CLOSE, 10 /* rank */, m2::PointD(11.0, 11.0));
|
||||
AddLocality("Aberdeen", ID_ABERDEEN_RANK1, 100 /* rank */, m2::PointD(0.0, 0.0));
|
||||
AddLocality("Aberdeen", ID_ABERDEEN_RANK2, 50 /* rank */, m2::PointD(2.0, 2.0));
|
||||
AddLocality("Aberdeen", ID_ABERDEEN_RANK2, 5 /* rank */, m2::PointD(4.0, 4.0));
|
||||
|
||||
InitParams("Aberdeen", m2::PointD(10.0, 10.0) /* pivot */, false /* lastTokenIsPrefix */);
|
||||
|
||||
// Expected order is: the closest one (ID_ABERDEEN_CLOSE) first, than sorted by rank.
|
||||
TEST_EQUAL(GetTopLocalities(1 /* limit */), Ids({ID_ABERDEEN_CLOSE}), ());
|
||||
TEST_EQUAL(GetTopLocalities(2 /* limit */), Ids({ID_ABERDEEN_CLOSE, ID_ABERDEEN_RANK1}), ());
|
||||
TEST_EQUAL(GetTopLocalities(3 /* limit */),
|
||||
Ids({ID_ABERDEEN_CLOSE, ID_ABERDEEN_RANK1, ID_ABERDEEN_RANK2}), ());
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue