From 4e148959f52f9303940fc95bf15091143730b388 Mon Sep 17 00:00:00 2001 From: vng Date: Thu, 7 Jan 2016 19:39:19 +0300 Subject: [PATCH] =?UTF-8?q?[search]=20Basic=20cache=20implementation=20if?= =?UTF-8?q?=20CBV=E2=80=99s=20by=20rects.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- geometry/mercator.hpp | 2 +- search/v2/features_filter.cpp | 55 +++++++- search/v2/features_filter.hpp | 55 +++++++- search/v2/geocoder.cpp | 239 +++++++++++++++++++++++----------- search/v2/geocoder.hpp | 25 +++- 5 files changed, 283 insertions(+), 93 deletions(-) diff --git a/geometry/mercator.hpp b/geometry/mercator.hpp index bd570b0119..4fa61ac21f 100644 --- a/geometry/mercator.hpp +++ b/geometry/mercator.hpp @@ -94,7 +94,7 @@ struct MercatorBounds static m2::PointD GetSmPoint(m2::PointD const & pt, double lonMetresR, double latMetresR); - static double GetCellID2PointAbsEpsilon() { return 1.0E-4; } + static double constexpr GetCellID2PointAbsEpsilon() { return 1.0E-4; } inline static m2::PointD FromLatLon(double lat, double lon) { diff --git a/search/v2/features_filter.cpp b/search/v2/features_filter.cpp index 0a31de1669..ea8b672e8a 100644 --- a/search/v2/features_filter.cpp +++ b/search/v2/features_filter.cpp @@ -1,13 +1,15 @@ #include "search/v2/features_filter.hpp" +#include "coding/compressed_bit_vector.hpp" + namespace search { namespace v2 { -FeaturesFilter::FeaturesFilter() : m_threshold(0) {} +FeaturesFilter::FeaturesFilter() : m_filter(nullptr), m_threshold(0) {} -FeaturesFilter::FeaturesFilter(unique_ptr filter, uint32_t threshold) - : m_filter(move(filter)), m_threshold(threshold) +FeaturesFilter::FeaturesFilter(coding::CompressedBitVector const & filter, uint32_t threshold) + : m_filter(&filter), m_threshold(threshold) { } @@ -23,5 +25,52 @@ unique_ptr FeaturesFilter::Filter( return make_unique(); return coding::CompressedBitVector::Intersect(*m_filter, cbv); } + +void CBVPtr::Free() +{ + if (m_isOwner) + delete m_ptr; + + m_ptr = nullptr; + m_isOwner = false; + m_isFull = false; +} + +void CBVPtr::Union(coding::CompressedBitVector const * p) +{ + if (!p || m_isFull) + return; + + if (!m_ptr) + { + m_ptr = p; + m_isFull = false; + } + else + Set(coding::CompressedBitVector::Union(*m_ptr, *p).release(), true); +} + +void CBVPtr::Intersect(coding::CompressedBitVector const * p) +{ + if (!p) + { + Free(); + return; + } + + if (m_ptr) + Set(coding::CompressedBitVector::Intersect(*m_ptr, *p).release(), true); + else if (m_isFull) + { + m_ptr = p; + m_isFull = false; + } +} + +bool CBVPtr::IsEmpty() const +{ + return !m_isFull && coding::CompressedBitVector::IsEmpty(m_ptr); +} + } // namespace v2 } // namespace search diff --git a/search/v2/features_filter.hpp b/search/v2/features_filter.hpp index 77b14e061a..5aaba3b762 100644 --- a/search/v2/features_filter.hpp +++ b/search/v2/features_filter.hpp @@ -1,9 +1,14 @@ #pragma once -#include "coding/compressed_bit_vector.hpp" +#include "base/macros.hpp" #include "std/unique_ptr.hpp" +namespace coding +{ +class CompressedBitVector; +} + namespace search { namespace v2 @@ -16,17 +21,59 @@ class FeaturesFilter public: FeaturesFilter(); - FeaturesFilter(unique_ptr filter, uint32_t threshold); + FeaturesFilter(coding::CompressedBitVector const & filter, uint32_t threshold); + + inline void SetFilter(coding::CompressedBitVector const * filter) { m_filter = filter; } - inline void SetFilter(unique_ptr filter) { m_filter = move(filter); } inline void SetThreshold(uint32_t threshold) { m_threshold = threshold; } bool NeedToFilter(coding::CompressedBitVector const & features) const; unique_ptr Filter(coding::CompressedBitVector const & cbv) const; private: - unique_ptr m_filter; + // Non-owning ptr. + coding::CompressedBitVector const * m_filter; uint32_t m_threshold; }; + + +/// CompressedBitVector pointer class that incapsulates +/// binary operators logic and takes ownership if needed. +class CBVPtr +{ + DISALLOW_COPY_AND_MOVE(CBVPtr); + + coding::CompressedBitVector const * m_ptr = nullptr; + bool m_isOwner = false; + bool m_isFull = false; + + void Free(); + +public: + CBVPtr() = default; + ~CBVPtr() { Free(); } + + inline void SetFull() + { + Free(); + m_isFull = true; + } + + inline void Set(coding::CompressedBitVector const * p, bool isOwner = false) + { + Free(); + + m_ptr = p; + m_isOwner = p && isOwner; + } + + inline coding::CompressedBitVector const * Get() const { return m_ptr; } + + bool IsEmpty() const; + + void Union(coding::CompressedBitVector const * p); + void Intersect(coding::CompressedBitVector const * p); +}; + } // namespace v2 } // namespace search diff --git a/search/v2/geocoder.cpp b/search/v2/geocoder.cpp index 41d33659f5..023cb3606f 100644 --- a/search/v2/geocoder.cpp +++ b/search/v2/geocoder.cpp @@ -10,6 +10,7 @@ #include "indexer/feature_impl.hpp" #include "indexer/index.hpp" #include "indexer/mwm_set.hpp" +#include "indexer/rank_table.hpp" #include "coding/multilang_utf8_string.hpp" @@ -44,6 +45,10 @@ namespace v2 { namespace { + +size_t const kMaxLocalitiesCount = 5; +double constexpr kComparePoints = MercatorBounds::GetCellID2PointAbsEpsilon(); + void JoinQueryTokens(SearchQueryParams const & params, size_t curToken, size_t endToken, string const & sep, string & res) { @@ -181,20 +186,20 @@ void Geocoder::Go(vector & results) { m_matcher.reset(); m_context.reset(); - m_features.clear(); + m_addressFeatures.clear(); m_streets = nullptr; }); m_matcher.reset(new FeaturesLayerMatcher(m_index, *m_context, *this /* cancellable */)); // Creates a cache of posting lists for each token. - m_features.resize(m_numTokens); + m_addressFeatures.resize(m_numTokens); for (size_t i = 0; i < m_numTokens; ++i) { PrepareRetrievalParams(i, i + 1); - m_features[i] = Retrieval::RetrieveAddressFeatures( + m_addressFeatures[i] = Retrieval::RetrieveAddressFeatures( m_context->m_value, *this /* cancellable */, m_retrievalParams); - ASSERT(m_features[i], ()); + ASSERT(m_addressFeatures[i], ()); } m_streets = LoadStreets(*m_context); @@ -213,7 +218,8 @@ void Geocoder::Go(vector & results) void Geocoder::ClearCaches() { - m_features.clear(); + m_geometryFeatures.clear(); + m_addressFeatures.clear(); m_matcher.reset(); m_streetsCache.clear(); } @@ -242,35 +248,114 @@ void Geocoder::FillLocalitiesTable(MwmContext const & context) { m_localities.clear(); - auto addLocality = [&](size_t curToken, size_t endToken, uint32_t featureId) + // 1. Get cbv for every single token and prefix. + vector> tokensCBV; + for (size_t i = 0; i < m_numTokens; ++i) { - FeatureType ft; - context.m_vector.GetByIndex(featureId, ft); - if (m_model.GetSearchType(ft) != SearchModel::SEARCH_TYPE_CITY) - return; + PrepareRetrievalParams(i, i + 1); + tokensCBV.push_back(Retrieval::RetrieveAddressFeatures( + context.m_value, static_cast(*this), m_retrievalParams)); + } - m2::PointD const center = feature::GetCenter(ft, FeatureType::WORST_GEOMETRY); - double const radiusM = ftypes::GetRadiusByPopulation(ft.GetPopulation()); + // 2. Get all locality candidates with all the token ranges. + vector preLocalities; - Locality locality; - locality.m_featureId = featureId; - locality.m_startToken = curToken; - locality.m_endToken = endToken; - locality.m_rect = MercatorBounds::RectByCenterXYAndSizeInMeters(center, radiusM); - m_localities[make_pair(curToken, endToken)].push_back(locality); + for (size_t i = 0; i < m_numTokens; ++i) + { + CBVPtr intersection; + intersection.Set(tokensCBV[i].get(), false); + if (intersection.IsEmpty()) + continue; + + for (size_t j = i + 1; j <= m_numTokens; ++j) + { + coding::CompressedBitVectorEnumerator::ForEach(*intersection.Get(), + [&](uint32_t featureId) + { + Locality l; + l.m_featureId = featureId; + l.m_startToken = i; + l.m_endToken = j; + preLocalities.push_back(l); + }); + + if (j < m_numTokens) + { + intersection.Intersect(tokensCBV[j].get()); + if (intersection.IsEmpty()) + break; + } + } + } + + auto const tokensCountFn = [&](Locality const & l) + { + // Important! Don't take into account matched prefix for locality comparison. + size_t d = l.m_endToken - l.m_startToken; + ASSERT_GREATER(d, 0, ()); + if (l.m_endToken == m_numTokens && !m_params.m_prefixTokens.empty()) + --d; + return d; }; - for (size_t curToken = 0; curToken < m_numTokens; ++curToken) + // 3. Unique preLocalities with featureId but leave the longest range if equal. + sort(preLocalities.begin(), preLocalities.end(), + [&](Locality const & l1, Locality const & l2) + { + if (l1.m_featureId != l2.m_featureId) + return l1.m_featureId < l2.m_featureId; + return tokensCountFn(l1) > tokensCountFn(l2); + }); + + preLocalities.erase(unique(preLocalities.begin(), preLocalities.end(), + [](Locality const & l1, Locality const & l2) + { + return l1.m_featureId == l2.m_featureId; + }), preLocalities.end()); + + // 4. Leave most popular localities. + // Use 2*kMaxLocalitiesCount because there can be countries, states, ... + if (preLocalities.size() > 2*kMaxLocalitiesCount) { - for (size_t endToken = curToken + 1; endToken <= m_numTokens; ++endToken) + auto rankTable = search::RankTable::Load(context.m_value.m_cont); + + sort(preLocalities.begin(), preLocalities.end(), + [&] (Locality const & l1, Locality const & l2) + { + auto const d1 = tokensCountFn(l1); + auto const d2 = tokensCountFn(l2); + if (d1 != d2) + return d1 > d2; + return rankTable->Get(l1.m_featureId) > rankTable->Get(l2.m_featureId); + }); + preLocalities.resize(2*kMaxLocalitiesCount); + } + + // 5. Fill result container. + size_t count = 0; + for (auto & l : preLocalities) + { + FeatureType ft; + context.m_vector.GetByIndex(l.m_featureId, ft); + + if (count < kMaxLocalitiesCount && + ft.GetFeatureType() == feature::GEOM_POINT && + m_model.GetSearchType(ft) == SearchModel::SEARCH_TYPE_CITY) { - PrepareRetrievalParams(curToken, endToken); - auto localities = Retrieval::RetrieveAddressFeatures( - context.m_value, static_cast(*this), m_retrievalParams); - if (coding::CompressedBitVector::IsEmpty(localities)) - break; - coding::CompressedBitVectorEnumerator::ForEach(*localities, - bind(addLocality, curToken, endToken, _1)); + ++count; + l.m_rect = MercatorBounds::RectByCenterXYAndSizeInMeters( + ft.GetCenter(), ftypes::GetRadiusByPopulation(ft.GetPopulation())); + + m_localities[make_pair(l.m_startToken, l.m_endToken)].push_back(l); + } + else + { + /// @todo Process not only cities but states and countries. + /// We can limit the MWM's scope for search. + + // Push to results only full tokens match. + if (l.m_endToken - l.m_startToken == m_numTokens) + m_results->emplace_back(m_worldId, l.m_featureId); } } } @@ -317,27 +402,21 @@ void Geocoder::DoGeocodingWithLocalities() // Unites features from all localities and uses the resulting bit // vector as a filter for features retrieved during geocoding. - unique_ptr allFeatures; + CBVPtr allFeatures; for (auto const & locality : p.second) { - m2::RectD rect = countryBounds; - if (!rect.Intersect(locality.m_rect)) - continue; - auto features = Retrieval::RetrieveGeometryFeatures( - m_context->m_value, static_cast(*this), rect, m_params.m_scale); - if (!features) + m2::RectD rect = locality.m_rect; + if (!rect.Intersect(countryBounds)) continue; - if (!allFeatures) - allFeatures = move(features); - else - allFeatures = coding::CompressedBitVector::Union(*allFeatures, *features); + allFeatures.Union(RetrieveGeometryFeatures(*m_context, rect, locality.m_featureId)); } - if (coding::CompressedBitVector::IsEmpty(allFeatures)) + if (allFeatures.IsEmpty()) continue; - m_filter.SetFilter(move(allFeatures)); + m_filter.SetFilter(allFeatures.Get()); + MY_SCOPE_GUARD(resetFilter, [&]() { m_filter.SetFilter(nullptr); }); // Filter will be applied for all non-empty bit vectors. m_filter.SetThreshold(0); @@ -357,53 +436,32 @@ void Geocoder::DoGeocodingWithoutLocalities() // 50km radius around position. double constexpr kMaxPositionRadiusM = 50.0 * 1000; - double constexpr kEps = 1.0e-5; - - m2::RectD const & viewport = m_params.m_viewport; + m2::RectD viewport = m_params.m_viewport; m2::PointD const & position = m_params.m_position; + CBVPtr allFeatures; + // Extracts features in viewport. - unique_ptr viewportFeatures; { // Limits viewport by kMaxViewportRadiusM. m2::RectD const viewportLimit = MercatorBounds::RectByCenterXYAndSizeInMeters(viewport.Center(), kMaxViewportRadiusM); - m2::RectD rect = viewport; - rect.Intersect(viewportLimit); - if (!rect.IsEmptyInterior()) - { - viewportFeatures = Retrieval::RetrieveGeometryFeatures( - m_context->m_value, static_cast(*this), rect, m_params.m_scale); - } + VERIFY(viewport.Intersect(viewportLimit), ()); + + allFeatures.Union(RetrieveGeometryFeatures(*m_context, viewport, VIEWPORT_ID)); } // Extracts features around user position. - unique_ptr positionFeatures; - if (!position.EqualDxDy(viewport.Center(), kEps)) + if (!position.EqualDxDy(viewport.Center(), kComparePoints)) { m2::RectD const rect = MercatorBounds::RectByCenterXYAndSizeInMeters(position, kMaxPositionRadiusM); - positionFeatures = Retrieval::RetrieveGeometryFeatures( - m_context->m_value, static_cast(*this), rect, m_params.m_scale); + + allFeatures.Union(RetrieveGeometryFeatures(*m_context, rect, POSITION_ID)); } - if (coding::CompressedBitVector::IsEmpty(viewportFeatures) && - coding::CompressedBitVector::IsEmpty(positionFeatures)) - { - m_filter.SetFilter(nullptr); - } - else if (coding::CompressedBitVector::IsEmpty(viewportFeatures)) - { - m_filter.SetFilter(move(positionFeatures)); - } - else if (coding::CompressedBitVector::IsEmpty(positionFeatures)) - { - m_filter.SetFilter(move(viewportFeatures)); - } - else - { - m_filter.SetFilter(coding::CompressedBitVector::Union(*viewportFeatures, *positionFeatures)); - } + m_filter.SetFilter(allFeatures.Get()); + MY_SCOPE_GUARD(resetFilter, [&]() { m_filter.SetFilter(nullptr); }); // Filter will be applied only for large bit vectors. m_filter.SetThreshold(m_params.m_maxNumResults); @@ -434,13 +492,13 @@ void Geocoder::GreedilyMatchStreets() continue; if (startToken == curToken || coding::CompressedBitVector::IsEmpty(allFeatures)) { - buffer = coding::CompressedBitVector::Intersect(*m_streets, *m_features[curToken]); + buffer = coding::CompressedBitVector::Intersect(*m_streets, *m_addressFeatures[curToken]); if (m_filter.NeedToFilter(*buffer)) buffer = m_filter.Filter(*buffer); } else { - buffer = coding::CompressedBitVector::Intersect(*allFeatures, *m_features[curToken]); + buffer = coding::CompressedBitVector::Intersect(*allFeatures, *m_addressFeatures[curToken]); } if (coding::CompressedBitVector::IsEmpty(buffer)) break; @@ -510,12 +568,12 @@ void Geocoder::MatchPOIsAndBuildings(size_t curToken) unique_ptr intersection; coding::CompressedBitVector * features = nullptr; - // Try to consume first n tokens starting at |curToken|. + // Try to consume [curToken, m_numTokens) tokens range. for (size_t n = 1; curToken + n <= m_numTokens && !m_usedTokens[curToken + n - 1]; ++n) { // At this point |intersection| is the intersection of - // m_features[curToken], m_features[curToken + 1], ..., - // m_features[curToken + n - 2], iff n > 2. + // m_addressFeatures[curToken], m_addressFeatures[curToken + 1], ..., + // m_addressFeatures[curToken + n - 2], iff n > 2. BailIfCancelled(); @@ -530,7 +588,7 @@ void Geocoder::MatchPOIsAndBuildings(size_t curToken) if (n == 1) { - features = m_features[curToken].get(); + features = m_addressFeatures[curToken].get(); if (m_filter.NeedToFilter(*features)) { intersection = m_filter.Filter(*features); @@ -540,7 +598,7 @@ void Geocoder::MatchPOIsAndBuildings(size_t curToken) else { intersection = - coding::CompressedBitVector::Intersect(*features, *m_features[curToken + n - 1]); + coding::CompressedBitVector::Intersect(*features, *m_addressFeatures[curToken + n - 1]); features = intersection.get(); } ASSERT(features, ()); @@ -688,5 +746,28 @@ coding::CompressedBitVector const * Geocoder::LoadStreets(MwmContext & context) m_streetsCache[mwmId] = move(streetsList[0]); return result; } + +coding::CompressedBitVector const * Geocoder::RetrieveGeometryFeatures( + MwmContext const & context, m2::RectD const & rect, int id) +{ + /// @todo + /// - Implement more smart strategy according to id. + /// - Move all rect limits here + + auto & featuresV = m_geometryFeatures[context.m_id]; + for (auto const & v : featuresV) + { + if (v.m_rect.IsRectInside(rect)) + return v.m_cbv.get(); + } + + auto features = Retrieval::RetrieveGeometryFeatures( + context.m_value, static_cast(*this), rect, m_params.m_scale); + + auto const * result = features.get(); + featuresV.push_back({ m2::Inflate(rect, kComparePoints, kComparePoints), move(features), id }); + return result; +} + } // namespace v2 } // namespace search diff --git a/search/v2/geocoder.hpp b/search/v2/geocoder.hpp index b17c7e57bb..14c38352db 100644 --- a/search/v2/geocoder.hpp +++ b/search/v2/geocoder.hpp @@ -83,11 +83,9 @@ public: private: struct Locality { - Locality() : m_featureId(0), m_startToken(0), m_endToken(0) {} - - uint32_t m_featureId; - size_t m_startToken; - size_t m_endToken; + uint32_t m_featureId = 0; + size_t m_startToken = 0; + size_t m_endToken = 0; m2::RectD m_rect; }; @@ -141,6 +139,12 @@ private: coding::CompressedBitVector const * LoadStreets(MwmContext & context); + enum { VIEWPORT_ID = -1, POSITION_ID = -2 }; + /// A caching wrapper around Retrieval::RetrieveGeometryFeatures. + /// param[in] Optional query id. Use VIEWPORT_ID, POSITION_ID or feature index for locality. + coding::CompressedBitVector const * RetrieveGeometryFeatures( + MwmContext const & context, m2::RectD const & rect, int id); + Index & m_index; // Geocoder params. @@ -164,10 +168,19 @@ private: // Map from [curToken, endToken) to matching localities list. map, vector> m_localities; + // Cache of geometry features. + struct FeaturesInRect + { + m2::RectD m_rect; + unique_ptr m_cbv; + int m_id; + }; + map> m_geometryFeatures; + // Cache of posting lists for each token in the query. TODO (@y, // @m, @vng): consider to update this cache lazily, as user inputs // tokens one-by-one. - vector> m_features; + vector> m_addressFeatures; // Cache of street ids in mwms. map> m_streetsCache;