diff --git a/search/geocoder.cpp b/search/geocoder.cpp index b0a673a9e3..13bbe448ca 100644 --- a/search/geocoder.cpp +++ b/search/geocoder.cpp @@ -162,7 +162,7 @@ private: class LocalityScorerDelegate : public LocalityScorer::Delegate { public: - LocalityScorerDelegate(MwmContext const & context, Geocoder::Params const & params, + LocalityScorerDelegate(MwmContext & context, Geocoder::Params const & params, base::Cancellable const & cancellable) : m_context(context) , m_params(params) @@ -188,8 +188,24 @@ public: uint8_t GetRank(uint32_t featureId) const override { return m_ranks.Get(featureId); } + optional GetCenter(uint32_t featureId) override + { + m2::PointD center; + // m_context->GetCenter is faster but may not work for editor created features. + if (!m_context.GetCenter(featureId, center)) + { + auto ft = m_context.GetFeature(featureId); + if (!ft) + return {}; + + center = feature::GetCenter(*ft); + } + + return center; + } + private: - MwmContext const & m_context; + MwmContext & m_context; Geocoder::Params const & m_params; base::Cancellable const & m_cancellable; @@ -657,7 +673,7 @@ void Geocoder::FillLocalityCandidates(BaseContext const & ctx, CBV const & filte } LocalityScorerDelegate delegate(*m_context, m_params, m_cancellable); - LocalityScorer scorer(m_params, delegate); + LocalityScorer scorer(m_params, m_params.m_pivot.Center(), delegate); scorer.GetTopLocalities(m_context->GetId(), ctx, filter, maxNumLocalities, preLocalities); } diff --git a/search/locality_scorer.cpp b/search/locality_scorer.cpp index 802563d14f..d4a59b77cc 100644 --- a/search/locality_scorer.cpp +++ b/search/locality_scorer.cpp @@ -80,14 +80,15 @@ LocalityScorer::ExLocality::ExLocality(Locality const & locality, double queryNo } // LocalityScorer ---------------------------------------------------------------------------------- -LocalityScorer::LocalityScorer(QueryParams const & params, Delegate const & delegate) - : m_params(params), m_delegate(delegate) +LocalityScorer::LocalityScorer(QueryParams const & params, m2::PointD const & pivot, + Delegate & delegate) + : m_params(params), m_pivot(pivot), m_delegate(delegate) { } void LocalityScorer::GetTopLocalities(MwmSet::MwmId const & countryId, BaseContext const & ctx, CBV const & filter, size_t limit, - vector & localities) const + vector & localities) { double const kUnknownIdf = 1.0; @@ -165,8 +166,7 @@ void LocalityScorer::GetTopLocalities(MwmSet::MwmId const & countryId, BaseConte LeaveTopLocalities(idfs, limit, localities); } -void LocalityScorer::LeaveTopLocalities(IdfMap & idfs, size_t limit, - vector & localities) const +void LocalityScorer::LeaveTopLocalities(IdfMap & idfs, size_t limit, vector & localities) { vector els; els.reserve(localities.size()); @@ -199,11 +199,15 @@ void LocalityScorer::LeaveTopLocalities(IdfMap & idfs, size_t limit, // for all localities tokens. Therefore, for tokens not in the // query, some default IDF value will be used. GetDocVecs(els[i].GetId(), dvs); + auto const distance = GetDistanceToPivot(els[i].GetId()); for (; i < j; ++i) + { els[i].m_similarity = GetSimilarity(els[i].m_locality.m_queryVec, idfs, dvs); + els[i].m_distanceToPivot = distance; + } } - LeaveTopBySimilarityAndRank(limit, els); + LeaveTopBySimilarityAndOther(limit, els); ASSERT_LESS_OR_EQUAL(els.size(), limit, ()); localities.clear(); @@ -234,7 +238,7 @@ void LocalityScorer::LeaveTopByExactMatchNormAndRank(size_t limitUniqueIds, base::EraseIf(els, [&](ExLocality const & el) { return seen.find(el.GetId()) == seen.cend(); }); } -void LocalityScorer::LeaveTopBySimilarityAndRank(size_t limit, vector & els) const +void LocalityScorer::LeaveTopBySimilarityAndOther(size_t limit, vector & els) const { sort(els.begin(), els.end(), [](ExLocality const & lhs, ExLocality const & rhs) { if (lhs.m_similarity != rhs.m_similarity) @@ -246,19 +250,49 @@ void LocalityScorer::LeaveTopBySimilarityAndRank(size_t limit, vector rhs.m_similarity; + if (lhs.m_locality.m_tokenRange.Size() != rhs.m_locality.m_tokenRange.Size()) + return lhs.m_locality.m_tokenRange.Size() > rhs.m_locality.m_tokenRange.Size(); + return false; + }; + + vector tmp; + tmp.reserve(els.size()); + auto begin = els.begin(); + auto const end = els.end(); + while (begin != end) + { + // We can split els to equal ranges by similarity and size because we sorted els by similarity + // and size first. + auto const range = equal_range(begin, end, *begin, compareSimilarityAndSize); + auto const closest = min_element(range.first, range.second, lessDistance); + tmp.emplace_back(std::move(*closest)); + for (auto it = range.first; it != range.second; ++it) + { + if (it != closest) + tmp.emplace_back(move(*it)); + } + begin = range.second; + } + unordered_set seen; - size_t n = 0; - for (size_t i = 0; i < els.size() && n < limit; ++i) + els.clear(); + els.reserve(limit); + for (size_t i = 0; i < tmp.size() && els.size() < limit; ++i) { - auto const id = els[i].GetId(); + auto const id = tmp[i].GetId(); if (seen.insert(id).second) { - els[n] = els[i]; - ++n; + els.emplace_back(move(tmp[i])); } } - els.erase(els.begin() + n, els.end()); } void LocalityScorer::GetDocVecs(uint32_t localityId, vector & dvs) const @@ -279,6 +313,17 @@ void LocalityScorer::GetDocVecs(uint32_t localityId, vector & dvs) const } } +double LocalityScorer::GetDistanceToPivot(uint32_t localityId) +{ + auto distance = numeric_limits::max(); + auto const center = m_delegate.GetCenter(localityId); + if (center) + { + distance = mercator::DistanceOnEarth(m_pivot, *center); + } + return distance; +} + double LocalityScorer::GetSimilarity(QueryVec & qv, IdfMap & docIdfs, vector & dvc) const { double const kScale = 1e6; diff --git a/search/locality_scorer.hpp b/search/locality_scorer.hpp index 786ea3f417..a5da0d0097 100644 --- a/search/locality_scorer.hpp +++ b/search/locality_scorer.hpp @@ -8,6 +8,8 @@ #include #include +#include +#include #include #include @@ -30,14 +32,17 @@ public: virtual void GetNames(uint32_t featureId, std::vector & names) const = 0; virtual uint8_t GetRank(uint32_t featureId) const = 0; + virtual std::optional GetCenter(uint32_t featureId) = 0; }; - LocalityScorer(QueryParams const & params, Delegate const & delegate); + LocalityScorer(QueryParams const & params, m2::PointD const & pivot, Delegate & delegate); + + void SetPivotForTests(m2::PointD const & pivot) { m_pivot = pivot; } // Leaves at most |limit| elements of |localities|, ordered by their // features. void GetTopLocalities(MwmSet::MwmId const & countryId, BaseContext const & ctx, - CBV const & filter, size_t limit, std::vector & localities) const; + CBV const & filter, size_t limit, std::vector & localities); private: struct ExLocality @@ -50,27 +55,31 @@ private: double m_queryNorm = 0.0; double m_similarity = 0.0; uint8_t m_rank = 0; + double m_distanceToPivot = std::numeric_limits::max(); }; friend std::string DebugPrint(ExLocality const & locality); // Leaves at most |limit| elements of |localities|, ordered by some // combination of ranks and number of matched tokens. - void LeaveTopLocalities(IdfMap & idfs, size_t limit, std::vector & localities) const; + void LeaveTopLocalities(IdfMap & idfs, size_t limit, std::vector & localities); // Selects at most |limitUniqueIds| best features by exact match, query norm and // rank, and then leaves only localities corresponding to those // features in |els|. void LeaveTopByExactMatchNormAndRank(size_t limitUniqueIds, std::vector & els) const; - // Leaves at most |limit| unique best localities by similarity to - // the query and rank. - void LeaveTopBySimilarityAndRank(size_t limit, std::vector & els) const; + // Leaves at most |limit| unique best localities by similarity and matched tokens range size. For + // elements with the same similarity and matched range size selects the closest one (by distance to + // pivot), rest of elements are sorted by rank. + void LeaveTopBySimilarityAndOther(size_t limit, std::vector & els) const; void GetDocVecs(uint32_t localityId, std::vector & dvs) const; + double GetDistanceToPivot(uint32_t localityId); double GetSimilarity(QueryVec & qv, IdfMap & docIdfs, std::vector & dvs) const; QueryParams const & m_params; - Delegate const & m_delegate; + m2::PointD m_pivot; + Delegate & m_delegate; }; } // namespace search diff --git a/search/search_tests/locality_scorer_test.cpp b/search/search_tests/locality_scorer_test.cpp index 4bb7962453..feb377748c 100644 --- a/search/search_tests/locality_scorer_test.cpp +++ b/search/search_tests/locality_scorer_test.cpp @@ -34,12 +34,22 @@ class LocalityScorerTest : public LocalityScorer::Delegate public: using Ids = vector; - LocalityScorerTest() : m_scorer(m_params, static_cast(*this)) {} + LocalityScorerTest() + : m_scorer(m_params, m2::PointD(), static_cast(*this)) + { + } void InitParams(string const & query, bool lastTokenIsPrefix) + { + InitParams(query, m2::PointD(), lastTokenIsPrefix); + } + + void InitParams(string const & query, m2::PointD const & pivot, bool lastTokenIsPrefix) { m_params.Clear(); + m_scorer.SetPivotForTests(pivot); + vector tokens; Delimiters delims; SplitUniString(NormalizeAndSimplifyString(query), base::MakeBackInsertFunctor(tokens), delims); @@ -57,7 +67,8 @@ public: } } - void AddLocality(string const & name, uint32_t featureId, uint8_t rank = 0) + void AddLocality(string const & name, uint32_t featureId, uint8_t rank = 0, + m2::PointD const & center = {}) { set tokens; Delimiters delims; @@ -68,6 +79,7 @@ public: m_names[featureId].push_back(name); m_ranks[featureId] = rank; + m_centers[featureId] = center; } Ids GetTopLocalities(size_t limit) @@ -128,10 +140,17 @@ public: return it == m_ranks.end() ? 0 : it->second; } + optional GetCenter(uint32_t featureId) override + { + auto it = m_centers.find(featureId); + return it == m_centers.end() ? optional() : it->second; + } + protected: QueryParams m_params; unordered_map> m_names; unordered_map m_ranks; + unordered_map m_centers; LocalityScorer m_scorer; base::MemTrie> m_searchIndex; @@ -263,3 +282,27 @@ UNIT_CLASS_TEST(LocalityScorerTest, Similarity) InitParams("San Carlos de Apoquindo", false /* lastTokenIsPrefix */); TEST_EQUAL(GetTopLocalities(1 /* limit */), Ids({ID_SAN_CARLOS_APOQUINDO}), ()); } + +UNIT_CLASS_TEST(LocalityScorerTest, DistanceToPivot) +{ + enum + { + ID_ABERDEEN_CLOSE, + ID_ABERDEEN_RANK1, + ID_ABERDEEN_RANK2, + ID_ABERDEEN_RANK3 + }; + + AddLocality("Aberdeen", ID_ABERDEEN_CLOSE, 10 /* rank */, m2::PointD(11.0, 11.0)); + AddLocality("Aberdeen", ID_ABERDEEN_RANK1, 100 /* rank */, m2::PointD(0.0, 0.0)); + AddLocality("Aberdeen", ID_ABERDEEN_RANK2, 50 /* rank */, m2::PointD(2.0, 2.0)); + AddLocality("Aberdeen", ID_ABERDEEN_RANK2, 5 /* rank */, m2::PointD(4.0, 4.0)); + + InitParams("Aberdeen", m2::PointD(10.0, 10.0) /* pivot */, false /* lastTokenIsPrefix */); + + // Expected order is: the closest one (ID_ABERDEEN_CLOSE) first, than sorted by rank. + TEST_EQUAL(GetTopLocalities(1 /* limit */), Ids({ID_ABERDEEN_CLOSE}), ()); + TEST_EQUAL(GetTopLocalities(2 /* limit */), Ids({ID_ABERDEEN_CLOSE, ID_ABERDEEN_RANK1}), ()); + TEST_EQUAL(GetTopLocalities(3 /* limit */), + Ids({ID_ABERDEEN_CLOSE, ID_ABERDEEN_RANK1, ID_ABERDEEN_RANK2}), ()); +}