From 2f51f39aa326a0e431bfcb0817787843558ecac8 Mon Sep 17 00:00:00 2001 From: Yuri Gorshenin Date: Tue, 15 Dec 2015 19:55:29 +0300 Subject: [PATCH] [search] Implemented FeaturesFilter and cache for houses. Optimized house numbers matching. --- coding/compressed_bit_vector.cpp | 3 +- search/retrieval.cpp | 43 +++--- search/retrieval.hpp | 8 +- search/search.pro | 4 + search/search_query.cpp | 6 +- search/search_query.hpp | 5 +- search/v2/features_filter.cpp | 71 +++++++++ search/v2/features_filter.hpp | 69 +++++++++ search/v2/features_layer.cpp | 5 +- search/v2/features_layer.hpp | 8 +- search/v2/features_layer_matcher.cpp | 42 ++++++ search/v2/features_layer_matcher.hpp | 106 ++++++++------ search/v2/features_layer_path_finder.cpp | 32 +++-- search/v2/features_layer_path_finder.hpp | 9 +- search/v2/geocoder.cpp | 175 ++++++++++++++++------- search/v2/geocoder.hpp | 51 +++++-- search/v2/rank_table_cache.cpp | 31 ++++ search/v2/rank_table_cache.hpp | 35 +++++ search/v2/search_model.cpp | 8 -- search/v2/search_model.hpp | 2 - search/v2/search_query_v2.cpp | 12 +- search/v2/search_query_v2.hpp | 1 + 22 files changed, 556 insertions(+), 170 deletions(-) create mode 100644 search/v2/features_filter.cpp create mode 100644 search/v2/features_filter.hpp create mode 100644 search/v2/rank_table_cache.cpp create mode 100644 search/v2/rank_table_cache.hpp diff --git a/coding/compressed_bit_vector.cpp b/coding/compressed_bit_vector.cpp index 496fd63e64..9d9513f11f 100644 --- a/coding/compressed_bit_vector.cpp +++ b/coding/compressed_bit_vector.cpp @@ -259,8 +259,7 @@ uint64_t SparseCBV::PopCount() const { return m_positions.size(); } bool SparseCBV::GetBit(uint64_t pos) const { - auto const it = lower_bound(m_positions.begin(), m_positions.end(), pos); - return it != m_positions.end() && *it == pos; + return binary_search(m_positions.begin(), m_positions.end(), pos); } CompressedBitVector::StorageStrategy SparseCBV::GetStorageStrategy() const diff --git a/search/retrieval.cpp b/search/retrieval.cpp index ad94837e8a..aed9116f2a 100644 --- a/search/retrieval.cpp +++ b/search/retrieval.cpp @@ -57,11 +57,10 @@ void CoverRect(m2::RectD const & rect, int scale, covering::IntervalsT & result) // features matching to |params|. template unique_ptr RetrieveAddressFeaturesImpl( - MwmValue * value, my::Cancellable const & cancellable, SearchQueryParams const & params) + MwmValue & value, my::Cancellable const & cancellable, SearchQueryParams const & params) { - ASSERT(value, ()); - serial::CodingParams codingParams(trie::GetCodingParams(value->GetHeader().GetDefCodingParams())); - ModelReaderPtr searchReader = value->m_cont.GetReader(SEARCH_INDEX_FILE_TAG); + serial::CodingParams codingParams(trie::GetCodingParams(value.GetHeader().GetDefCodingParams())); + ModelReaderPtr searchReader = value.m_cont.GetReader(SEARCH_INDEX_FILE_TAG); auto emptyFilter = [](uint32_t /* featureId */) { @@ -87,13 +86,10 @@ unique_ptr RetrieveAddressFeaturesImpl( // Retrieves from the geometry index corresponding to handle all // features from |coverage|. -unique_ptr RetrieveGeometryFeatures( - MwmSet::MwmHandle const & handle, my::Cancellable const & cancellable, - covering::IntervalsT const & coverage, int scale) +unique_ptr RetrieveGeometryFeaturesImpl( + MwmValue & value, my::Cancellable const & cancellable, covering::IntervalsT const & coverage, + int scale) { - auto * value = handle.GetValue(); - ASSERT(value, ()); - // TODO (@y, @m): remove this code as soon as geometry index will // have native support for bit vectors. vector features; @@ -104,7 +100,7 @@ unique_ptr RetrieveGeometryFeatures( features.push_back(featureId); }; - ScaleIndex index(value->m_cont.GetReader(INDEX_FILE_TAG), value->m_factory); + ScaleIndex index(value.m_cont.GetReader(INDEX_FILE_TAG), value.m_factory); for (auto const & interval : coverage) index.ForEachInIntervalAndScale(collector, interval.first, interval.second, scale); return SortFeaturesAndBuildCBV(move(features)); @@ -235,8 +231,8 @@ public: { covering::IntervalsT coverage; CoverRect(currViewport, m_coverageScale, coverage); - geometryFeatures = - RetrieveGeometryFeatures(m_handle, cancellable, coverage, m_coverageScale); + geometryFeatures = RetrieveGeometryFeaturesImpl(*m_handle.GetValue(), cancellable, + coverage, m_coverageScale); for (auto const & interval : coverage) m_visited.Add(interval); } @@ -269,8 +265,8 @@ public: for (auto const & interval : coverage) m_visited.SubtractFrom(interval, reducedCoverage); - geometryFeatures = - RetrieveGeometryFeatures(m_handle, cancellable, reducedCoverage, m_coverageScale); + geometryFeatures = RetrieveGeometryFeaturesImpl(*m_handle.GetValue(), cancellable, + reducedCoverage, m_coverageScale); for (auto const & interval : reducedCoverage) m_visited.Add(interval); @@ -369,11 +365,9 @@ Retrieval::Retrieval() : m_index(nullptr), m_featuresReported(0) {} // static unique_ptr Retrieval::RetrieveAddressFeatures( - MwmValue * value, my::Cancellable const & cancellable, SearchQueryParams const & params) + MwmValue & value, my::Cancellable const & cancellable, SearchQueryParams const & params) { - ASSERT(value, ()); - - MwmTraits mwmTraits(value->GetMwmVersion().format); + MwmTraits mwmTraits(value.GetMwmVersion().format); if (mwmTraits.GetSearchIndexFormat() == MwmTraits::SearchIndexFormat::FeaturesWithRankAndCenter) @@ -390,6 +384,15 @@ unique_ptr Retrieval::RetrieveAddressFeatures( return unique_ptr(); } +// static +unique_ptr Retrieval::RetrieveGeometryFeatures( + MwmValue & value, my::Cancellable const & cancellable, m2::RectD const & rect, int scale) +{ + covering::IntervalsT coverage; + CoverRect(rect, scale, coverage); + return RetrieveGeometryFeaturesImpl(value, cancellable, coverage, scale); +} + void Retrieval::Init(Index & index, vector> const & infos, m2::RectD const & viewport, SearchQueryParams const & params, Limits const & limits) @@ -520,7 +523,7 @@ bool Retrieval::InitBucketStrategy(Bucket & bucket, double scale) try { - addressFeatures = RetrieveAddressFeatures(bucket.m_handle.GetValue(), + addressFeatures = RetrieveAddressFeatures(*bucket.m_handle.GetValue(), *this /* cancellable */, m_params); } catch (CancelException &) diff --git a/search/retrieval.hpp b/search/retrieval.hpp index fea915d8fd..f837f38f72 100644 --- a/search/retrieval.hpp +++ b/search/retrieval.hpp @@ -106,8 +106,12 @@ public: // Retrieves from the search index corresponding to |value| all // features matching to |params|. - static unique_ptr RetrieveAddressFeatures( - MwmValue * value, my::Cancellable const & cancellable, SearchQueryParams const & params); + WARN_UNUSED_RESULT static unique_ptr RetrieveAddressFeatures( + MwmValue & value, my::Cancellable const & cancellable, SearchQueryParams const & params); + + // Retrieves from the geometry index corresponding to |value| all features belonging to |rect|. + WARN_UNUSED_RESULT static unique_ptr RetrieveGeometryFeatures( + MwmValue & value, my::Cancellable const & cancellable, m2::RectD const & rect, int scale); // Initializes retrieval process, sets up internal state, takes all // necessary system resources. diff --git a/search/search.pro b/search/search.pro index 179229d3f3..efb91d68cc 100644 --- a/search/search.pro +++ b/search/search.pro @@ -44,12 +44,14 @@ HEADERS += \ search_string_utils.hpp \ search_trie.hpp \ suggest.hpp \ + v2/features_filter.hpp \ v2/features_layer.hpp \ v2/features_layer_matcher.hpp \ v2/features_layer_path_finder.hpp \ v2/geocoder.hpp \ v2/house_numbers_matcher.hpp \ v2/house_to_street_table.hpp \ + v2/rank_table_cache.hpp \ v2/search_model.hpp \ v2/search_query_v2.hpp \ v2/street_vicinity_loader.hpp \ @@ -79,12 +81,14 @@ SOURCES += \ search_query.cpp \ search_query_params.cpp \ search_string_utils.cpp \ + v2/features_filter.cpp \ v2/features_layer.cpp \ v2/features_layer_matcher.cpp \ v2/features_layer_path_finder.cpp \ v2/geocoder.cpp \ v2/house_numbers_matcher.cpp \ v2/house_to_street_table.cpp \ + v2/rank_table_cache.cpp \ v2/search_model.cpp \ v2/search_query_v2.cpp \ v2/street_vicinity_loader.cpp \ diff --git a/search/search_query.cpp b/search/search_query.cpp index d25a393b7f..93f2b421af 100644 --- a/search/search_query.cpp +++ b/search/search_query.cpp @@ -61,9 +61,6 @@ namespace using TCompareFunction1 = function; using TCompareFunction2 = function; -// Maximum result candidates count for each viewport/criteria. -size_t const kPreResultsCount = 200; - TCompareFunction1 const g_arrCompare1[] = { &impl::PreResult1::LessPriority, &impl::PreResult1::LessRank, }; @@ -210,6 +207,9 @@ RankTable const * Query::RetrievalCallback::LoadTable(MwmSet::MwmId const & id) void Query::RetrievalCallback::UnloadTable(MwmSet::MwmId const & id) { m_rankTables.erase(id); } +// static +size_t const Query::kPreResultsCount; + Query::Query(Index & index, CategoriesHolder const & categories, vector const & suggests, storage::CountryInfoGetter const & infoGetter) : m_index(index) diff --git a/search/search_query.hpp b/search/search_query.hpp index ccfa87262f..477a09d9b1 100644 --- a/search/search_query.hpp +++ b/search/search_query.hpp @@ -65,6 +65,9 @@ namespace impl class Query : public my::Cancellable { public: + // Maximum result candidates count for each viewport/criteria. + static size_t const kPreResultsCount = 200; + Query(Index & index, CategoriesHolder const & categories, vector const & suggests, storage::CountryInfoGetter const & infoGetter); @@ -105,7 +108,7 @@ public: // Get scale level to make geometry index query for current viewport. virtual int GetQueryIndexScale(m2::RectD const & viewport) const; - void ClearCaches(); + virtual void ClearCaches(); struct CancelException {}; diff --git a/search/v2/features_filter.cpp b/search/v2/features_filter.cpp new file mode 100644 index 0000000000..934a98ea5a --- /dev/null +++ b/search/v2/features_filter.cpp @@ -0,0 +1,71 @@ +#include "search/v2/features_filter.hpp" + +#include "search/dummy_rank_table.hpp" +#include "search/retrieval.hpp" + +#include "indexer/index.hpp" +#include "indexer/scales.hpp" + +namespace search +{ +namespace v2 +{ +FeaturesFilter::FeaturesFilter(my::Cancellable const & cancellable) + : m_maxNumResults(0) + , m_scale(scales::GetUpperScale()) + , m_cacheIsValid(false) + , m_value(nullptr) + , m_cancellable(cancellable) +{ +} + +void FeaturesFilter::SetValue(MwmValue * value, MwmSet::MwmId const & id) +{ + if (m_value == value && m_id == id) + return; + m_value = value; + m_id = id; + m_cacheIsValid = false; +} + +void FeaturesFilter::SetViewport(m2::RectD const & viewport) +{ + if (viewport == m_viewport) + return; + m_viewport = viewport; + m_cacheIsValid = false; +} + +void FeaturesFilter::SetMaxNumResults(size_t maxNumResults) { m_maxNumResults = maxNumResults; } + +void FeaturesFilter::SetScale(int scale) +{ + if (m_scale == scale) + return; + m_scale = scale; + m_cacheIsValid = false; +} + +bool FeaturesFilter::NeedToFilter(vector const & features) const +{ + return features.size() > m_maxNumResults; +} + +void FeaturesFilter::UpdateCache() +{ + if (m_cacheIsValid) + return; + + if (!m_value) + { + m_featuresCache.reset(); + } + else + { + m_featuresCache = + Retrieval::RetrieveGeometryFeatures(*m_value, m_cancellable, m_viewport, m_scale); + } + m_cacheIsValid = true; +} +} // namespace v2 +} // namespace search diff --git a/search/v2/features_filter.hpp b/search/v2/features_filter.hpp new file mode 100644 index 0000000000..231c785396 --- /dev/null +++ b/search/v2/features_filter.hpp @@ -0,0 +1,69 @@ +#pragma once + +#include "indexer/mwm_set.hpp" + +#include "coding/compressed_bit_vector.hpp" + +#include "geometry/rect2d.hpp" + +#include "base/cancellable.hpp" + +#include "std/algorithm.hpp" +#include "std/unique_ptr.hpp" +#include "std/utility.hpp" +#include "std/vector.hpp" + +class MwmValue; + +namespace search +{ +namespace v2 +{ +class FeaturesFilter +{ +public: + FeaturesFilter(my::Cancellable const & cancellable); + + void SetValue(MwmValue * value, MwmSet::MwmId const & id); + void SetViewport(m2::RectD const & viewport); + void SetMaxNumResults(size_t maxNumResults); + void SetScale(int scale); + + bool NeedToFilter(vector const & features) const; + + template + void Filter(vector const & features, TFn && fn) + { + using TRankAndFeature = pair; + using TComparer = std::greater; + + UpdateCache(); + + if (!m_featuresCache || m_featuresCache->PopCount() == 0) + return; + ASSERT(m_featuresCache.get(), ()); + + // Emit all features from the viewport. + for (uint32_t feature : features) + { + if (m_featuresCache->GetBit(feature)) + fn(feature); + } + } + +private: + void UpdateCache(); + + m2::RectD m_viewport; + size_t m_maxNumResults; + int m_scale; + + unique_ptr m_featuresCache; + bool m_cacheIsValid; + + MwmValue * m_value; + MwmSet::MwmId m_id; + my::Cancellable const & m_cancellable; +}; +} // namespace v2 +} // namespace search diff --git a/search/v2/features_layer.cpp b/search/v2/features_layer.cpp index 4a7dbcb776..bc8914450f 100644 --- a/search/v2/features_layer.cpp +++ b/search/v2/features_layer.cpp @@ -12,7 +12,7 @@ FeaturesLayer::FeaturesLayer() { Clear(); } void FeaturesLayer::Clear() { - m_sortedFeatures.clear(); + m_sortedFeatures = nullptr; m_subQuery.clear(); m_startToken = 0; m_endToken = 0; @@ -22,7 +22,8 @@ void FeaturesLayer::Clear() string DebugPrint(FeaturesLayer const & layer) { ostringstream os; - os << "FeaturesLayer [ size of m_sortedFeatures: " << layer.m_sortedFeatures.size() + os << "FeaturesLayer [ size of m_sortedFeatures: " + << (layer.m_sortedFeatures ? layer.m_sortedFeatures->size() : 0) << ", m_subQuery: " << layer.m_subQuery << ", m_startToken: " << layer.m_startToken << ", m_endToken: " << layer.m_endToken << ", m_type: " << DebugPrint(layer.m_type) << " ]"; return os.str(); diff --git a/search/v2/features_layer.hpp b/search/v2/features_layer.hpp index 72fd22d57d..fe267a8219 100644 --- a/search/v2/features_layer.hpp +++ b/search/v2/features_layer.hpp @@ -4,8 +4,6 @@ #include "std/vector.hpp" -#include "base/macros.hpp" - namespace search { namespace v2 @@ -16,19 +14,17 @@ namespace v2 struct FeaturesLayer { FeaturesLayer(); - FeaturesLayer(FeaturesLayer && layer) = default; void Clear(); - vector m_sortedFeatures; + // Non-owning ptr to a sorted vector of features. + vector const * m_sortedFeatures; string m_subQuery; size_t m_startToken; size_t m_endToken; SearchModel::SearchType m_type; - - DISALLOW_COPY(FeaturesLayer); }; string DebugPrint(FeaturesLayer const & layer); diff --git a/search/v2/features_layer_matcher.cpp b/search/v2/features_layer_matcher.cpp index 20d80e408c..a9ecb3163b 100644 --- a/search/v2/features_layer_matcher.cpp +++ b/search/v2/features_layer_matcher.cpp @@ -22,5 +22,47 @@ FeaturesLayerMatcher::FeaturesLayerMatcher(Index & index, MwmSet::MwmId const & { ASSERT(m_houseToStreetTable.get(), ("Can't load HouseToStreetTable")); } + +uint32_t FeaturesLayerMatcher::GetMatchingStreet(uint32_t houseId, FeatureType & houseFeature) +{ + auto const it = m_matchingStreetsCache.find(houseId); + if (it != m_matchingStreetsCache.cend()) + return it->second; + + auto const & streets = GetNearbyStreets(houseId, houseFeature); + uint32_t const streetIndex = m_houseToStreetTable->Get(houseId); + + uint32_t streetId = kInvalidId; + if (streetIndex < streets.size() && streets[streetIndex].m_id.m_mwmId == m_mwmId) + streetId = streets[streetIndex].m_id.m_index; + m_matchingStreetsCache[houseId] = streetId; + return streetId; +} + +vector const & FeaturesLayerMatcher::GetNearbyStreets(uint32_t featureId) +{ + auto const it = m_nearbyStreetsCache.find(featureId); + if (it != m_nearbyStreetsCache.cend()) + return it->second; + + FeatureType feature; + m_featuresVector.GetByIndex(featureId, feature); + + auto & streets = m_nearbyStreetsCache[featureId]; + m_reverseGeocoder.GetNearbyStreets(feature, streets); + return streets; +} + +vector const & FeaturesLayerMatcher::GetNearbyStreets( + uint32_t featureId, FeatureType & feature) +{ + auto const it = m_nearbyStreetsCache.find(featureId); + if (it != m_nearbyStreetsCache.cend()) + return it->second; + + auto & streets = m_nearbyStreetsCache[featureId]; + m_reverseGeocoder.GetNearbyStreets(feature, streets); + return streets; +} } // namespace v2 } // namespace search diff --git a/search/v2/features_layer_matcher.hpp b/search/v2/features_layer_matcher.hpp index 2b625b0508..3a7deca9b9 100644 --- a/search/v2/features_layer_matcher.hpp +++ b/search/v2/features_layer_matcher.hpp @@ -10,6 +10,7 @@ #include "indexer/feature.hpp" #include "indexer/feature_algo.hpp" +#include "indexer/feature_impl.hpp" #include "indexer/features_vector.hpp" #include "indexer/ftypes_matcher.hpp" #include "indexer/mwm_set.hpp" @@ -19,11 +20,14 @@ #include "geometry/rect2d.hpp" #include "base/cancellable.hpp" +#include "base/logging.hpp" #include "base/macros.hpp" #include "base/stl_helpers.hpp" #include "std/algorithm.hpp" #include "std/bind.hpp" +#include "std/limits.hpp" +#include "std/unordered_map.hpp" #include "std/vector.hpp" class Index; @@ -51,26 +55,27 @@ namespace v2 class FeaturesLayerMatcher { public: + static uint32_t const kInvalidId = numeric_limits::max(); + FeaturesLayerMatcher(Index & index, MwmSet::MwmId const & mwmId, MwmValue & value, FeaturesVector const & featuresVector, my::Cancellable const & cancellable); template - void Match(FeaturesLayer const & child, vector const & sortedParentFeatures, - SearchModel::SearchType parentType, TFn && fn) + void Match(FeaturesLayer const & child, FeaturesLayer const & parent, TFn && fn) { - if (child.m_type >= parentType) + if (child.m_type >= parent.m_type) return; - if (parentType == SearchModel::SEARCH_TYPE_STREET) + if (parent.m_type == SearchModel::SEARCH_TYPE_STREET) { if (child.m_type == SearchModel::SEARCH_TYPE_POI) - MatchPOIsWithStreets(child, sortedParentFeatures, parentType, forward(fn)); + MatchPOIsWithStreets(child, parent, forward(fn)); else if (child.m_type == SearchModel::SEARCH_TYPE_BUILDING) - MatchBuildingsWithStreets(child, sortedParentFeatures, parentType, forward(fn)); + MatchBuildingsWithStreets(child, parent, forward(fn)); return; } vector childCenters; - for (uint32_t featureId : child.m_sortedFeatures) + for (uint32_t featureId : *child.m_sortedFeatures) { FeatureType ft; m_featuresVector.GetByIndex(featureId, ft); @@ -79,48 +84,41 @@ public: BailIfCancelled(m_cancellable); - vector parentRects; - for (uint32_t featureId : sortedParentFeatures) - { - FeatureType feature; - m_featuresVector.GetByIndex(featureId, feature); - m2::PointD center = feature::GetCenter(feature, FeatureType::WORST_GEOMETRY); - double radius = ftypes::GetRadiusByPopulation(feature.GetPopulation()); - parentRects.push_back(MercatorBounds::RectByCenterXYAndSizeInMeters(center, radius)); - } - - for (size_t j = 0; j < sortedParentFeatures.size(); ++j) + for (size_t j = 0; j < parent.m_sortedFeatures->size(); ++j) { BailIfCancelled(m_cancellable); - for (size_t i = 0; i < child.m_sortedFeatures.size(); ++i) + FeatureType ft; + m_featuresVector.GetByIndex((*parent.m_sortedFeatures)[j], ft); + m2::PointD const center = feature::GetCenter(ft, FeatureType::WORST_GEOMETRY); + double const radius = ftypes::GetRadiusByPopulation(ft.GetPopulation()); + m2::RectD const rect = MercatorBounds::RectByCenterXYAndSizeInMeters(center, radius); + + for (size_t i = 0; i < child.m_sortedFeatures->size(); ++i) { - if (parentRects[j].IsPointInside(childCenters[i])) - fn(child.m_sortedFeatures[i], sortedParentFeatures[j]); + if (rect.IsPointInside(childCenters[i])) + fn((*child.m_sortedFeatures)[i], (*parent.m_sortedFeatures)[j]); } } } private: template - void MatchPOIsWithStreets(FeaturesLayer const & child, - vector const & sortedParentFeatures, - SearchModel::SearchType parentType, TFn && fn) + void MatchPOIsWithStreets(FeaturesLayer const & child, FeaturesLayer const & parent, TFn && fn) { ASSERT_EQUAL(child.m_type, SearchModel::SEARCH_TYPE_POI, ()); - ASSERT_EQUAL(parentType, SearchModel::SEARCH_TYPE_STREET, ()); + ASSERT_EQUAL(parent.m_type, SearchModel::SEARCH_TYPE_STREET, ()); - for (uint32_t streetId : sortedParentFeatures) + for (uint32_t streetId : *parent.m_sortedFeatures) { BailIfCancelled(m_cancellable); - m_loader.ForEachInVicinity(streetId, child.m_sortedFeatures, bind(fn, _1, streetId)); + m_loader.ForEachInVicinity(streetId, *child.m_sortedFeatures, bind(fn, _1, streetId)); } } template - void MatchBuildingsWithStreets(FeaturesLayer const & child, - vector const & sortedParentFeatures, - SearchModel::SearchType parentType, TFn && fn) + void MatchBuildingsWithStreets(FeaturesLayer const & child, FeaturesLayer const & parent, + TFn && fn) { // child.m_sortedFeatures contains only buildings matched by name, // not by house number. So, we need to add to @@ -130,10 +128,12 @@ private: auto const & checker = ftypes::IsBuildingChecker::Instance(); ASSERT_EQUAL(child.m_type, SearchModel::SEARCH_TYPE_BUILDING, ()); - ASSERT_EQUAL(parentType, SearchModel::SEARCH_TYPE_STREET, ()); + ASSERT_EQUAL(parent.m_type, SearchModel::SEARCH_TYPE_STREET, ()); vector queryTokens; NormalizeHouseNumber(child.m_subQuery, queryTokens); + bool const queryLooksLikeHouseNumber = + feature::IsHouseNumber(child.m_subQuery) && !queryTokens.empty(); uint32_t numFilterInvocations = 0; auto filter = [&](uint32_t id, FeatureType & feature) -> bool @@ -144,34 +144,58 @@ private: if (!checker(feature)) return false; - if (binary_search(child.m_sortedFeatures.begin(), child.m_sortedFeatures.end(), id)) + if (binary_search(child.m_sortedFeatures->begin(), child.m_sortedFeatures->end(), id)) return true; + + // HouseNumbersMatch() calls are expensive, so following code + // tries to reduce number of calls. The most important + // optimization: as first tokens from the house-number part of + // the query and feature's house numbers must be numbers, their + // first symbols must be the same. + string const houseNumber = feature.GetHouseNumber(); + if (!queryLooksLikeHouseNumber || !feature::IsHouseNumber(houseNumber)) + return false; + if (queryTokens[0][0] != houseNumber[0]) + return false; return HouseNumbersMatch(feature.GetHouseNumber(), queryTokens); }; auto addEdge = [&](uint32_t houseId, FeatureType & houseFeature, uint32_t streetId) { - vector streets; - m_reverseGeocoder.GetNearbyStreets(houseFeature, streets); - uint32_t streetIndex = m_houseToStreetTable->Get(houseId); - - if (streetIndex < streets.size() && streets[streetIndex].m_id.m_mwmId == m_mwmId && - streets[streetIndex].m_id.m_index == streetId) - { + if (GetMatchingStreet(houseId, houseFeature) == streetId) fn(houseId, streetId); - } }; - for (uint32_t streetId : sortedParentFeatures) + for (uint32_t streetId : *parent.m_sortedFeatures) { BailIfCancelled(m_cancellable); m_loader.FilterFeaturesInVicinity(streetId, filter, bind(addEdge, _1, _2, streetId)); } } + // Returns id of a street feature corresponding to a |houseId|, or + // kInvalidId if there're not such street. + uint32_t GetMatchingStreet(uint32_t houseId, FeatureType & houseFeature); + + vector const & GetNearbyStreets(uint32_t featureId); + + vector const & GetNearbyStreets(uint32_t featureId, + FeatureType & feature); + MwmSet::MwmId m_mwmId; ReverseGeocoder m_reverseGeocoder; + + // Cache of streets in a feature's vicinity. All lists in the cache + // are ordered by a distance. + unordered_map> m_nearbyStreetsCache; + + // Cache of correct streets for buildings. Current search algorithm + // supports only one street for a building, whereas buildings can be + // located on multiple streets. + unordered_map m_matchingStreetsCache; + unique_ptr m_houseToStreetTable; + FeaturesVector const & m_featuresVector; StreetVicinityLoader m_loader; my::Cancellable const & m_cancellable; diff --git a/search/v2/features_layer_path_finder.cpp b/search/v2/features_layer_path_finder.cpp index 9bdf26cddd..a2a0d4f1ae 100644 --- a/search/v2/features_layer_path_finder.cpp +++ b/search/v2/features_layer_path_finder.cpp @@ -2,6 +2,7 @@ #include "search/cancel_exception.hpp" #include "search/v2/features_layer_matcher.hpp" +#include "search/v2/features_filter.hpp" #include "indexer/features_vector.hpp" @@ -16,18 +17,16 @@ FeaturesLayerPathFinder::FeaturesLayerPathFinder(my::Cancellable const & cancell { } -void FeaturesLayerPathFinder::BuildGraph(FeaturesLayerMatcher & matcher, +void FeaturesLayerPathFinder::BuildGraph(FeaturesLayerMatcher & matcher, FeaturesFilter & filter, vector const & layers, vector & reachable) { if (layers.empty()) return; - FeaturesLayer child; + reachable = *(layers.back()->m_sortedFeatures); - reachable = layers.back()->m_sortedFeatures; - - vector tmpBuffer; + vector buffer; // The order matters here, as we need to intersect BUILDINGs with // STREETs first, and then POIs with BUILDINGs. @@ -35,16 +34,29 @@ void FeaturesLayerPathFinder::BuildGraph(FeaturesLayerMatcher & matcher, { BailIfCancelled(m_cancellable); - tmpBuffer.clear(); + if (reachable.empty()) + break; + + if (filter.NeedToFilter(reachable)) + { + buffer.clear(); + filter.Filter(reachable, MakeBackInsertFunctor(buffer)); + reachable.swap(buffer); + my::SortUnique(reachable); + } + + buffer.clear(); auto addEdge = [&](uint32_t childFeature, uint32_t /* parentFeature */) { - tmpBuffer.push_back(childFeature); + buffer.push_back(childFeature); }; - matcher.Match(*layers[i - 1], reachable, layers[i]->m_type, addEdge); + FeaturesLayer parent(*layers[i]); + parent.m_sortedFeatures = &reachable; + matcher.Match(*layers[i - 1], parent, addEdge); - my::SortUnique(tmpBuffer); - reachable.swap(tmpBuffer); + reachable.swap(buffer); + my::SortUnique(reachable); } } } // namespace v2 diff --git a/search/v2/features_layer_path_finder.hpp b/search/v2/features_layer_path_finder.hpp index 719025e3f7..5e2f0a1b58 100644 --- a/search/v2/features_layer_path_finder.hpp +++ b/search/v2/features_layer_path_finder.hpp @@ -16,6 +16,7 @@ namespace search { namespace v2 { +class FeaturesFilter; class FeaturesLayerMatcher; // This class is able to find all paths through a layered graph, with @@ -33,22 +34,22 @@ public: FeaturesLayerPathFinder(my::Cancellable const & cancellable); template - void ForEachReachableVertex(FeaturesLayerMatcher & matcher, + void ForEachReachableVertex(FeaturesLayerMatcher & matcher, FeaturesFilter & filter, vector const & layers, TFn && fn) { if (layers.empty()) return; vector reachable; - BuildGraph(matcher, layers, reachable); + BuildGraph(matcher, filter, layers, reachable); for (uint32_t featureId : reachable) fn(featureId); } private: - void BuildGraph(FeaturesLayerMatcher & matcher, vector const & layers, - vector & reachable); + void BuildGraph(FeaturesLayerMatcher & matcher, FeaturesFilter & filter, + vector const & layers, vector & reachable); my::Cancellable const & m_cancellable; }; diff --git a/search/v2/geocoder.cpp b/search/v2/geocoder.cpp index 6d0aea951f..7fadfed16a 100644 --- a/search/v2/geocoder.cpp +++ b/search/v2/geocoder.cpp @@ -19,6 +19,7 @@ #include "base/macros.hpp" #include "base/scope_guard.hpp" #include "base/stl_add.hpp" +#include "base/stl_helpers.hpp" #include "std/algorithm.hpp" #include "std/iterator.hpp" @@ -51,11 +52,44 @@ void JoinQueryTokens(SearchQueryParams const & params, size_t curToken, size_t e } } // namespace +// Geocoder::Partition +Geocoder::Partition::Partition() : m_size(0) {} + +void Geocoder::Partition::FromFeatures(unique_ptr features, + Index::FeaturesLoaderGuard & loader, + SearchModel const & model) +{ + for (auto & cluster : m_clusters) + cluster.clear(); + + auto clusterize = [&](uint64_t featureId) + { + FeatureType feature; + loader.GetFeatureByIndex(featureId, feature); + feature.ParseTypes(); + SearchModel::SearchType searchType = model.GetSearchType(feature); + if (searchType != SearchModel::SEARCH_TYPE_COUNT) + m_clusters[searchType].push_back(featureId); + }; + + if (features) + coding::CompressedBitVectorEnumerator::ForEach(*features, clusterize); + + m_size = 0; + for (auto const & cluster : m_clusters) + m_size += cluster.size(); +} + +// Geocoder::Params -------------------------------------------------------------------------------- +Geocoder::Params::Params() : m_maxNumResults(0) {} + +// Geocoder::Geocoder ------------------------------------------------------------------------------ Geocoder::Geocoder(Index & index) : m_index(index) , m_numTokens(0) , m_model(SearchModel::Instance()) , m_value(nullptr) + , m_filter(static_cast(*this)) , m_finder(static_cast(*this)) , m_results(nullptr) { @@ -63,11 +97,15 @@ Geocoder::Geocoder(Index & index) Geocoder::~Geocoder() {} -void Geocoder::SetSearchQueryParams(SearchQueryParams const & params) +void Geocoder::SetParams(Params const & params) { m_params = params; m_retrievalParams = params; + m_filter.SetViewport(m_params.m_viewport); + m_filter.SetMaxNumResults(m_params.m_maxNumResults); + m_filter.SetScale(m_params.m_scale); + m_numTokens = m_params.m_tokens.size(); if (!m_params.m_prefixTokens.empty()) ++m_numTokens; @@ -99,16 +137,26 @@ void Geocoder::Go(vector & results) m_mwmId = handle.GetId(); MY_SCOPE_GUARD(cleanup, [&]() - { - m_matcher.reset(); - m_loader.reset(); - m_cache.clear(); - }); + { + m_matcher.reset(); + m_loader.reset(); + m_partitions.clear(); + }); - m_cache.clear(); + m_partitions.clear(); m_loader.reset(new Index::FeaturesLoaderGuard(m_index, m_mwmId)); m_matcher.reset(new FeaturesLayerMatcher( m_index, m_mwmId, *m_value, m_loader->GetFeaturesVector(), *this /* cancellable */)); + m_filter.SetValue(m_value, m_mwmId); + + m_partitions.resize(m_numTokens); + for (size_t i = 0; i < m_numTokens; ++i) + { + PrepareRetrievalParams(i, i + 1); + m_partitions[i].FromFeatures(Retrieval::RetrieveAddressFeatures( + *m_value, *this /* cancellable */, m_retrievalParams), + *m_loader, m_model); + } DoGeocoding(0 /* curToken */); } @@ -118,7 +166,13 @@ void Geocoder::Go(vector & results) } } -void Geocoder::PrepareParams(size_t curToken, size_t endToken) +void Geocoder::ClearCaches() +{ + m_partitions.clear(); + m_matcher.reset(); +} + +void Geocoder::PrepareRetrievalParams(size_t curToken, size_t endToken) { ASSERT_LESS(curToken, endToken, ()); ASSERT_LESS_OR_EQUAL(endToken, m_numTokens, ()); @@ -158,7 +212,6 @@ void Geocoder::DoGeocoding(size_t curToken) { BailIfCancelled(static_cast(*this)); - PrepareParams(curToken, curToken + n); { auto & layer = m_layers.back(); layer.Clear(); @@ -168,26 +221,15 @@ void Geocoder::DoGeocoding(size_t curToken) layer.m_subQuery); } - // TODO (@y, @m): as |n| increases, good optimization is to update - // |features| incrementally, from [curToken, curToken + n) to - // [curToken, curToken + n + 1). - auto features = RetrieveAddressFeatures(curToken, curToken + n); - - vector clusters[SearchModel::SEARCH_TYPE_COUNT]; - auto clusterize = [&](uint64_t featureId) - { - FeatureType feature; - m_loader->GetFeatureByIndex(featureId, feature); - feature.ParseTypes(); - SearchModel::SearchType searchType = m_model.GetSearchType(feature); - if (searchType != SearchModel::SEARCH_TYPE_COUNT) - clusters[searchType].push_back(featureId); - }; - - if (features) - coding::CompressedBitVectorEnumerator::ForEach(*features, clusterize); + BailIfCancelled(static_cast(*this)); bool const looksLikeHouseNumber = feature::IsHouseNumber(m_layers.back().m_subQuery); + auto const & partition = m_partitions[curToken + n - 1]; + if (partition.m_size == 0 && !looksLikeHouseNumber) + break; + + vector clusters[SearchModel::SEARCH_TYPE_COUNT]; + vector buffer; for (size_t i = 0; i != SearchModel::SEARCH_TYPE_COUNT; ++i) { @@ -195,12 +237,37 @@ void Geocoder::DoGeocoding(size_t curToken) // DoGeocoding(). This may lead to use-after-free. auto & layer = m_layers.back(); + // Following code intersects posting lists for tokens [curToken, + // curToken + n). This can be done incrementally, as we have + // |clusters| to store intersections. + if (n == 1) + { + layer.m_sortedFeatures = &partition.m_clusters[i]; + } + else if (n == 2) + { + clusters[i].clear(); + auto const & first = m_partitions[curToken].m_clusters[i]; + auto const & second = m_partitions[curToken + 1].m_clusters[i]; + set_intersection(first.begin(), first.end(), second.begin(), second.end(), + back_inserter(clusters[i])); + layer.m_sortedFeatures = &clusters[i]; + } + else + { + buffer.clear(); + set_intersection(clusters[i].begin(), clusters[i].end(), partition.m_clusters[i].begin(), + partition.m_clusters[i].end(), back_inserter(buffer)); + clusters[i].swap(buffer); + layer.m_sortedFeatures = &clusters[i]; + } + if (i == SearchModel::SEARCH_TYPE_BUILDING) { - if (clusters[i].empty() && !looksLikeHouseNumber) + if (layer.m_sortedFeatures->empty() && !looksLikeHouseNumber) continue; } - else if (clusters[i].empty()) + else if (layer.m_sortedFeatures->empty()) { continue; } @@ -213,8 +280,6 @@ void Geocoder::DoGeocoding(size_t curToken) continue; } - layer.m_sortedFeatures.swap(clusters[i]); - ASSERT(is_sorted(layer.m_sortedFeatures.begin(), layer.m_sortedFeatures.end()), ()); layer.m_type = static_cast(i); if (IsLayerSequenceSane()) DoGeocoding(curToken + n); @@ -222,34 +287,41 @@ void Geocoder::DoGeocoding(size_t curToken) } } -coding::CompressedBitVector * Geocoder::RetrieveAddressFeatures(size_t curToken, size_t endToken) -{ - uint64_t const key = (static_cast(curToken) << 32) | static_cast(endToken); - if (m_cache.find(key) == m_cache.end()) - { - m_cache[key] = - Retrieval::RetrieveAddressFeatures(m_value, *this /* cancellable */, m_retrievalParams); - } - return m_cache[key].get(); -} - bool Geocoder::IsLayerSequenceSane() const { ASSERT(!m_layers.empty(), ()); static_assert(SearchModel::SEARCH_TYPE_COUNT <= 32, "Select a wider type to represent search types mask."); uint32_t mask = 0; - for (auto const & layer : m_layers) + size_t buildingIndex = m_layers.size(); + size_t streetIndex = m_layers.size(); + + // Following loop returns false iff there're two different layers + // of the same search type. + for (size_t i = 0; i < m_layers.size(); ++i) { + auto const & layer = m_layers[i]; ASSERT_NOT_EQUAL(layer.m_type, SearchModel::SEARCH_TYPE_COUNT, ()); // TODO (@y): probably it's worth to check belongs-to-locality here. - uint32_t bit = 1U << layer.m_type; if (mask & bit) return false; mask |= bit; + + if (layer.m_type == SearchModel::SEARCH_TYPE_BUILDING) + buildingIndex = i; + if (layer.m_type == SearchModel::SEARCH_TYPE_STREET) + streetIndex = i; + + // Checks that building and street layers are neighbours. + if (buildingIndex != m_layers.size() && streetIndex != m_layers.size() && + buildingIndex != streetIndex + 1 && streetIndex != buildingIndex + 1) + { + return false; + } } + return true; } @@ -257,22 +329,17 @@ void Geocoder::FindPaths() { ASSERT(!m_layers.empty(), ()); - auto const compareByType = [](FeaturesLayer const * lhs, FeaturesLayer const * rhs) - { - return lhs->m_type < rhs->m_type; - }; - // Layers ordered by a search type. vector sortedLayers; sortedLayers.reserve(m_layers.size()); for (auto & layer : m_layers) sortedLayers.push_back(&layer); - sort(sortedLayers.begin(), sortedLayers.end(), compareByType); + sort(sortedLayers.begin(), sortedLayers.end(), my::CompareBy(&FeaturesLayer::m_type)); - m_finder.ForEachReachableVertex(*m_matcher, sortedLayers, [this](uint32_t featureId) - { - m_results->emplace_back(m_mwmId, featureId); - }); + m_finder.ForEachReachableVertex(*m_matcher, m_filter, sortedLayers, [this](uint32_t featureId) + { + m_results->emplace_back(m_mwmId, featureId); + }); } } // namespace v2 } // namespace search diff --git a/search/v2/geocoder.hpp b/search/v2/geocoder.hpp index d9cd5f9d3b..502cb29f9b 100644 --- a/search/v2/geocoder.hpp +++ b/search/v2/geocoder.hpp @@ -1,6 +1,7 @@ #pragma once #include "search/search_query_params.hpp" +#include "search/v2/features_filter.hpp" #include "search/v2/features_layer.hpp" #include "search/v2/features_layer_path_finder.hpp" #include "search/v2/search_model.hpp" @@ -14,6 +15,7 @@ #include "base/buffer_vector.hpp" #include "base/cancellable.hpp" +#include "base/macros.hpp" #include "base/string_utils.hpp" #include "std/set.hpp" @@ -31,8 +33,6 @@ class CompressedBitVector; namespace search { -class RankTable; - namespace v2 { class FeaturesLayerMatcher; @@ -56,32 +56,52 @@ class SearchModel; class Geocoder : public my::Cancellable { public: + struct Params : public SearchQueryParams + { + Params(); + + m2::RectD m_viewport; + size_t m_maxNumResults; + }; + Geocoder(Index & index); ~Geocoder() override; // Sets search query params. - void SetSearchQueryParams(SearchQueryParams const & params); + void SetParams(Params const & params); // Starts geocoding, retrieved features will be appended to // |results|. void Go(vector & results); + void ClearCaches(); + private: + struct Partition + { + Partition(); + + Partition(Partition &&) = default; + + void FromFeatures(unique_ptr features, + Index::FeaturesLoaderGuard & loader, SearchModel const & model); + + vector m_clusters[SearchModel::SEARCH_TYPE_COUNT]; + size_t m_size; + + DISALLOW_COPY(Partition); + }; + // Fills |m_retrievalParams| with [curToken, endToken) subsequence // of search query tokens. - void PrepareParams(size_t curToken, size_t endToken); + void PrepareRetrievalParams(size_t curToken, size_t endToken); // Tries to find all paths in a search tree, where each edge is // marked with some substring of the query tokens. These paths are // called "layer sequence" and current path is stored in |m_layers|. void DoGeocoding(size_t curToken); - // Returns CBV of features corresponding to [curToken, endToken) - // subsequence of search query tokens. This method caches results of - // previous requests. - coding::CompressedBitVector * RetrieveAddressFeatures(size_t curToken, size_t endToken); - // Returns true if current path in the search tree (see comment for // DoGeocoding()) looks sane. This method is used as a fast // pre-check to cut off unnecessary work. @@ -93,8 +113,8 @@ private: Index & m_index; - // Initial search query params. - SearchQueryParams m_params; + // Geocoder params. + Params m_params; // Total number of search query tokens. size_t m_numTokens; @@ -112,8 +132,10 @@ private: // Id of a current mwm. MwmSet::MwmId m_mwmId; - // Cache of posting list of features. - unordered_map> m_cache; + // Cache of posting lists for each token in the query. TODO (@y, + // @m, @vng): consider to update this cache lazily, as user inputs + // tokens one-by-one. + vector m_partitions; // Features loader. unique_ptr m_loader; @@ -121,6 +143,9 @@ private: // Features matcher for layers intersection. unique_ptr m_matcher; + // Features filter for interpretations. + FeaturesFilter m_filter; + // Path finder for interpretations. FeaturesLayerPathFinder m_finder; diff --git a/search/v2/rank_table_cache.cpp b/search/v2/rank_table_cache.cpp new file mode 100644 index 0000000000..78827d829a --- /dev/null +++ b/search/v2/rank_table_cache.cpp @@ -0,0 +1,31 @@ +#include "search/v2/rank_table_cache.hpp" + +#include "search/dummy_rank_table.hpp" + +#include "indexer/index.hpp" +#include "indexer/rank_table.hpp" + +namespace search +{ +namespace v2 +{ +RankTableCache::RankTableCache() {} + +RankTableCache::~RankTableCache() {} + +RankTable const & RankTableCache::Get(MwmValue & value, MwmSet::MwmId const & mwmId) +{ + auto const it = m_ranks.find(mwmId); + if (it != m_ranks.end()) + return *it->second; + auto table = RankTable::Load(value.m_cont); + if (!table) + table.reset(new DummyRankTable()); + auto const * result = table.get(); + m_ranks[mwmId] = move(table); + return *result; +} + +void RankTableCache::Clear() { m_ranks.clear(); } +} // namespace v2 +} // namespace search diff --git a/search/v2/rank_table_cache.hpp b/search/v2/rank_table_cache.hpp new file mode 100644 index 0000000000..f52d55ed00 --- /dev/null +++ b/search/v2/rank_table_cache.hpp @@ -0,0 +1,35 @@ +#pragma once + +#include "indexer/mwm_set.hpp" + +#include "std/map.hpp" +#include "std/unique_ptr.hpp" + +#include "base/macros.hpp" + +class MwmValue; + +namespace search +{ +class RankTable; + +namespace v2 +{ +class RankTableCache +{ +public: + RankTableCache(); + + ~RankTableCache(); + + RankTable const & Get(MwmValue & value, MwmSet::MwmId const & mwmId); + + void Clear(); + +private: + map> m_ranks; + + DISALLOW_COPY_AND_MOVE(RankTableCache); +}; +} // namespace v2 +} // namespace search diff --git a/search/v2/search_model.cpp b/search/v2/search_model.cpp index 55529829a1..2b67bc6046 100644 --- a/search/v2/search_model.cpp +++ b/search/v2/search_model.cpp @@ -37,10 +37,6 @@ SearchModel::SearchType SearchModel::GetSearchType(FeatureType const & feature) { case NONE: return SEARCH_TYPE_COUNT; - case COUNTRY: - return SEARCH_TYPE_COUNTRY; - case STATE: - return SEARCH_TYPE_STATE; case CITY: case TOWN: case VILLAGE: @@ -65,10 +61,6 @@ string DebugPrint(SearchModel::SearchType type) return "STREET"; case SearchModel::SEARCH_TYPE_CITY: return "CITY"; - case SearchModel::SEARCH_TYPE_STATE: - return "STATE"; - case SearchModel::SEARCH_TYPE_COUNTRY: - return "COUNTRY"; case SearchModel::SEARCH_TYPE_COUNT: return "COUNT"; } diff --git a/search/v2/search_model.hpp b/search/v2/search_model.hpp index 4321215fd6..d015e48296 100644 --- a/search/v2/search_model.hpp +++ b/search/v2/search_model.hpp @@ -28,8 +28,6 @@ public: SEARCH_TYPE_BUILDING, SEARCH_TYPE_STREET, SEARCH_TYPE_CITY, - SEARCH_TYPE_STATE, - SEARCH_TYPE_COUNTRY, SEARCH_TYPE_COUNT }; diff --git a/search/v2/search_query_v2.cpp b/search/v2/search_query_v2.cpp index 0fbbb4d6b2..3ff06a8e29 100644 --- a/search/v2/search_query_v2.cpp +++ b/search/v2/search_query_v2.cpp @@ -37,9 +37,11 @@ void SearchQueryV2::Search(Results & res, size_t resCount) if (m_tokens.empty()) SuggestStrings(res); - SearchQueryParams params; + Geocoder::Params params; InitParams(false /* localitySearch */, params); - m_geocoder.SetSearchQueryParams(params); + params.m_viewport = m_viewport[CURRENT_V]; + params.m_maxNumResults = max(resCount, kPreResultsCount); + m_geocoder.SetParams(params); vector results; m_geocoder.Go(results); @@ -50,6 +52,12 @@ void SearchQueryV2::Search(Results & res, size_t resCount) void SearchQueryV2::SearchViewportPoints(Results & res) { NOTIMPLEMENTED(); } +void SearchQueryV2::ClearCaches() +{ + Query::ClearCaches(); + m_geocoder.ClearCaches(); +} + void SearchQueryV2::AddPreResults1(vector & results) { // Group all features by MwmId and add them as PreResult1. diff --git a/search/v2/search_query_v2.hpp b/search/v2/search_query_v2.hpp index 785e12a1bf..83233a44b7 100644 --- a/search/v2/search_query_v2.hpp +++ b/search/v2/search_query_v2.hpp @@ -20,6 +20,7 @@ public: // Query overrides: void Search(Results & res, size_t resCount) override; void SearchViewportPoints(Results & res) override; + void ClearCaches() override; protected: // Adds a bunch of features as PreResult1.