[search] Implemented FeaturesFilter and cache for houses. Optimized house numbers matching.

This commit is contained in:
Yuri Gorshenin 2015-12-15 19:55:29 +03:00 committed by Sergey Yershov
parent fab41db456
commit 2f51f39aa3
22 changed files with 556 additions and 170 deletions

View file

@ -259,8 +259,7 @@ uint64_t SparseCBV::PopCount() const { return m_positions.size(); }
bool SparseCBV::GetBit(uint64_t pos) const
{
auto const it = lower_bound(m_positions.begin(), m_positions.end(), pos);
return it != m_positions.end() && *it == pos;
return binary_search(m_positions.begin(), m_positions.end(), pos);
}
CompressedBitVector::StorageStrategy SparseCBV::GetStorageStrategy() const

View file

@ -57,11 +57,10 @@ void CoverRect(m2::RectD const & rect, int scale, covering::IntervalsT & result)
// features matching to |params|.
template <typename TValue>
unique_ptr<coding::CompressedBitVector> RetrieveAddressFeaturesImpl(
MwmValue * value, my::Cancellable const & cancellable, SearchQueryParams const & params)
MwmValue & value, my::Cancellable const & cancellable, SearchQueryParams const & params)
{
ASSERT(value, ());
serial::CodingParams codingParams(trie::GetCodingParams(value->GetHeader().GetDefCodingParams()));
ModelReaderPtr searchReader = value->m_cont.GetReader(SEARCH_INDEX_FILE_TAG);
serial::CodingParams codingParams(trie::GetCodingParams(value.GetHeader().GetDefCodingParams()));
ModelReaderPtr searchReader = value.m_cont.GetReader(SEARCH_INDEX_FILE_TAG);
auto emptyFilter = [](uint32_t /* featureId */)
{
@ -87,13 +86,10 @@ unique_ptr<coding::CompressedBitVector> RetrieveAddressFeaturesImpl(
// Retrieves from the geometry index corresponding to handle all
// features from |coverage|.
unique_ptr<coding::CompressedBitVector> RetrieveGeometryFeatures(
MwmSet::MwmHandle const & handle, my::Cancellable const & cancellable,
covering::IntervalsT const & coverage, int scale)
unique_ptr<coding::CompressedBitVector> RetrieveGeometryFeaturesImpl(
MwmValue & value, my::Cancellable const & cancellable, covering::IntervalsT const & coverage,
int scale)
{
auto * value = handle.GetValue<MwmValue>();
ASSERT(value, ());
// TODO (@y, @m): remove this code as soon as geometry index will
// have native support for bit vectors.
vector<uint64_t> features;
@ -104,7 +100,7 @@ unique_ptr<coding::CompressedBitVector> RetrieveGeometryFeatures(
features.push_back(featureId);
};
ScaleIndex<ModelReaderPtr> index(value->m_cont.GetReader(INDEX_FILE_TAG), value->m_factory);
ScaleIndex<ModelReaderPtr> index(value.m_cont.GetReader(INDEX_FILE_TAG), value.m_factory);
for (auto const & interval : coverage)
index.ForEachInIntervalAndScale(collector, interval.first, interval.second, scale);
return SortFeaturesAndBuildCBV(move(features));
@ -235,8 +231,8 @@ public:
{
covering::IntervalsT coverage;
CoverRect(currViewport, m_coverageScale, coverage);
geometryFeatures =
RetrieveGeometryFeatures(m_handle, cancellable, coverage, m_coverageScale);
geometryFeatures = RetrieveGeometryFeaturesImpl(*m_handle.GetValue<MwmValue>(), cancellable,
coverage, m_coverageScale);
for (auto const & interval : coverage)
m_visited.Add(interval);
}
@ -269,8 +265,8 @@ public:
for (auto const & interval : coverage)
m_visited.SubtractFrom(interval, reducedCoverage);
geometryFeatures =
RetrieveGeometryFeatures(m_handle, cancellable, reducedCoverage, m_coverageScale);
geometryFeatures = RetrieveGeometryFeaturesImpl(*m_handle.GetValue<MwmValue>(), cancellable,
reducedCoverage, m_coverageScale);
for (auto const & interval : reducedCoverage)
m_visited.Add(interval);
@ -369,11 +365,9 @@ Retrieval::Retrieval() : m_index(nullptr), m_featuresReported(0) {}
// static
unique_ptr<coding::CompressedBitVector> Retrieval::RetrieveAddressFeatures(
MwmValue * value, my::Cancellable const & cancellable, SearchQueryParams const & params)
MwmValue & value, my::Cancellable const & cancellable, SearchQueryParams const & params)
{
ASSERT(value, ());
MwmTraits mwmTraits(value->GetMwmVersion().format);
MwmTraits mwmTraits(value.GetMwmVersion().format);
if (mwmTraits.GetSearchIndexFormat() ==
MwmTraits::SearchIndexFormat::FeaturesWithRankAndCenter)
@ -390,6 +384,15 @@ unique_ptr<coding::CompressedBitVector> Retrieval::RetrieveAddressFeatures(
return unique_ptr<coding::CompressedBitVector>();
}
// static
unique_ptr<coding::CompressedBitVector> Retrieval::RetrieveGeometryFeatures(
MwmValue & value, my::Cancellable const & cancellable, m2::RectD const & rect, int scale)
{
covering::IntervalsT coverage;
CoverRect(rect, scale, coverage);
return RetrieveGeometryFeaturesImpl(value, cancellable, coverage, scale);
}
void Retrieval::Init(Index & index, vector<shared_ptr<MwmInfo>> const & infos,
m2::RectD const & viewport, SearchQueryParams const & params,
Limits const & limits)
@ -520,7 +523,7 @@ bool Retrieval::InitBucketStrategy(Bucket & bucket, double scale)
try
{
addressFeatures = RetrieveAddressFeatures(bucket.m_handle.GetValue<MwmValue>(),
addressFeatures = RetrieveAddressFeatures(*bucket.m_handle.GetValue<MwmValue>(),
*this /* cancellable */, m_params);
}
catch (CancelException &)

View file

@ -106,8 +106,12 @@ public:
// Retrieves from the search index corresponding to |value| all
// features matching to |params|.
static unique_ptr<coding::CompressedBitVector> RetrieveAddressFeatures(
MwmValue * value, my::Cancellable const & cancellable, SearchQueryParams const & params);
WARN_UNUSED_RESULT static unique_ptr<coding::CompressedBitVector> RetrieveAddressFeatures(
MwmValue & value, my::Cancellable const & cancellable, SearchQueryParams const & params);
// Retrieves from the geometry index corresponding to |value| all features belonging to |rect|.
WARN_UNUSED_RESULT static unique_ptr<coding::CompressedBitVector> RetrieveGeometryFeatures(
MwmValue & value, my::Cancellable const & cancellable, m2::RectD const & rect, int scale);
// Initializes retrieval process, sets up internal state, takes all
// necessary system resources.

View file

@ -44,12 +44,14 @@ HEADERS += \
search_string_utils.hpp \
search_trie.hpp \
suggest.hpp \
v2/features_filter.hpp \
v2/features_layer.hpp \
v2/features_layer_matcher.hpp \
v2/features_layer_path_finder.hpp \
v2/geocoder.hpp \
v2/house_numbers_matcher.hpp \
v2/house_to_street_table.hpp \
v2/rank_table_cache.hpp \
v2/search_model.hpp \
v2/search_query_v2.hpp \
v2/street_vicinity_loader.hpp \
@ -79,12 +81,14 @@ SOURCES += \
search_query.cpp \
search_query_params.cpp \
search_string_utils.cpp \
v2/features_filter.cpp \
v2/features_layer.cpp \
v2/features_layer_matcher.cpp \
v2/features_layer_path_finder.cpp \
v2/geocoder.cpp \
v2/house_numbers_matcher.cpp \
v2/house_to_street_table.cpp \
v2/rank_table_cache.cpp \
v2/search_model.cpp \
v2/search_query_v2.cpp \
v2/street_vicinity_loader.cpp \

View file

@ -61,9 +61,6 @@ namespace
using TCompareFunction1 = function<bool(impl::PreResult1 const &, impl::PreResult1 const &)>;
using TCompareFunction2 = function<bool(impl::PreResult2 const &, impl::PreResult2 const &)>;
// Maximum result candidates count for each viewport/criteria.
size_t const kPreResultsCount = 200;
TCompareFunction1 const g_arrCompare1[] = {
&impl::PreResult1::LessPriority, &impl::PreResult1::LessRank,
};
@ -210,6 +207,9 @@ RankTable const * Query::RetrievalCallback::LoadTable(MwmSet::MwmId const & id)
void Query::RetrievalCallback::UnloadTable(MwmSet::MwmId const & id) { m_rankTables.erase(id); }
// static
size_t const Query::kPreResultsCount;
Query::Query(Index & index, CategoriesHolder const & categories, vector<Suggest> const & suggests,
storage::CountryInfoGetter const & infoGetter)
: m_index(index)

View file

@ -65,6 +65,9 @@ namespace impl
class Query : public my::Cancellable
{
public:
// Maximum result candidates count for each viewport/criteria.
static size_t const kPreResultsCount = 200;
Query(Index & index, CategoriesHolder const & categories, vector<Suggest> const & suggests,
storage::CountryInfoGetter const & infoGetter);
@ -105,7 +108,7 @@ public:
// Get scale level to make geometry index query for current viewport.
virtual int GetQueryIndexScale(m2::RectD const & viewport) const;
void ClearCaches();
virtual void ClearCaches();
struct CancelException {};

View file

@ -0,0 +1,71 @@
#include "search/v2/features_filter.hpp"
#include "search/dummy_rank_table.hpp"
#include "search/retrieval.hpp"
#include "indexer/index.hpp"
#include "indexer/scales.hpp"
namespace search
{
namespace v2
{
FeaturesFilter::FeaturesFilter(my::Cancellable const & cancellable)
: m_maxNumResults(0)
, m_scale(scales::GetUpperScale())
, m_cacheIsValid(false)
, m_value(nullptr)
, m_cancellable(cancellable)
{
}
void FeaturesFilter::SetValue(MwmValue * value, MwmSet::MwmId const & id)
{
if (m_value == value && m_id == id)
return;
m_value = value;
m_id = id;
m_cacheIsValid = false;
}
void FeaturesFilter::SetViewport(m2::RectD const & viewport)
{
if (viewport == m_viewport)
return;
m_viewport = viewport;
m_cacheIsValid = false;
}
void FeaturesFilter::SetMaxNumResults(size_t maxNumResults) { m_maxNumResults = maxNumResults; }
void FeaturesFilter::SetScale(int scale)
{
if (m_scale == scale)
return;
m_scale = scale;
m_cacheIsValid = false;
}
bool FeaturesFilter::NeedToFilter(vector<uint32_t> const & features) const
{
return features.size() > m_maxNumResults;
}
void FeaturesFilter::UpdateCache()
{
if (m_cacheIsValid)
return;
if (!m_value)
{
m_featuresCache.reset();
}
else
{
m_featuresCache =
Retrieval::RetrieveGeometryFeatures(*m_value, m_cancellable, m_viewport, m_scale);
}
m_cacheIsValid = true;
}
} // namespace v2
} // namespace search

View file

@ -0,0 +1,69 @@
#pragma once
#include "indexer/mwm_set.hpp"
#include "coding/compressed_bit_vector.hpp"
#include "geometry/rect2d.hpp"
#include "base/cancellable.hpp"
#include "std/algorithm.hpp"
#include "std/unique_ptr.hpp"
#include "std/utility.hpp"
#include "std/vector.hpp"
class MwmValue;
namespace search
{
namespace v2
{
class FeaturesFilter
{
public:
FeaturesFilter(my::Cancellable const & cancellable);
void SetValue(MwmValue * value, MwmSet::MwmId const & id);
void SetViewport(m2::RectD const & viewport);
void SetMaxNumResults(size_t maxNumResults);
void SetScale(int scale);
bool NeedToFilter(vector<uint32_t> const & features) const;
template <typename TFn>
void Filter(vector<uint32_t> const & features, TFn && fn)
{
using TRankAndFeature = pair<uint8_t, uint32_t>;
using TComparer = std::greater<TRankAndFeature>;
UpdateCache();
if (!m_featuresCache || m_featuresCache->PopCount() == 0)
return;
ASSERT(m_featuresCache.get(), ());
// Emit all features from the viewport.
for (uint32_t feature : features)
{
if (m_featuresCache->GetBit(feature))
fn(feature);
}
}
private:
void UpdateCache();
m2::RectD m_viewport;
size_t m_maxNumResults;
int m_scale;
unique_ptr<coding::CompressedBitVector> m_featuresCache;
bool m_cacheIsValid;
MwmValue * m_value;
MwmSet::MwmId m_id;
my::Cancellable const & m_cancellable;
};
} // namespace v2
} // namespace search

View file

@ -12,7 +12,7 @@ FeaturesLayer::FeaturesLayer() { Clear(); }
void FeaturesLayer::Clear()
{
m_sortedFeatures.clear();
m_sortedFeatures = nullptr;
m_subQuery.clear();
m_startToken = 0;
m_endToken = 0;
@ -22,7 +22,8 @@ void FeaturesLayer::Clear()
string DebugPrint(FeaturesLayer const & layer)
{
ostringstream os;
os << "FeaturesLayer [ size of m_sortedFeatures: " << layer.m_sortedFeatures.size()
os << "FeaturesLayer [ size of m_sortedFeatures: "
<< (layer.m_sortedFeatures ? layer.m_sortedFeatures->size() : 0)
<< ", m_subQuery: " << layer.m_subQuery << ", m_startToken: " << layer.m_startToken
<< ", m_endToken: " << layer.m_endToken << ", m_type: " << DebugPrint(layer.m_type) << " ]";
return os.str();

View file

@ -4,8 +4,6 @@
#include "std/vector.hpp"
#include "base/macros.hpp"
namespace search
{
namespace v2
@ -16,19 +14,17 @@ namespace v2
struct FeaturesLayer
{
FeaturesLayer();
FeaturesLayer(FeaturesLayer && layer) = default;
void Clear();
vector<uint32_t> m_sortedFeatures;
// Non-owning ptr to a sorted vector of features.
vector<uint32_t> const * m_sortedFeatures;
string m_subQuery;
size_t m_startToken;
size_t m_endToken;
SearchModel::SearchType m_type;
DISALLOW_COPY(FeaturesLayer);
};
string DebugPrint(FeaturesLayer const & layer);

View file

@ -22,5 +22,47 @@ FeaturesLayerMatcher::FeaturesLayerMatcher(Index & index, MwmSet::MwmId const &
{
ASSERT(m_houseToStreetTable.get(), ("Can't load HouseToStreetTable"));
}
uint32_t FeaturesLayerMatcher::GetMatchingStreet(uint32_t houseId, FeatureType & houseFeature)
{
auto const it = m_matchingStreetsCache.find(houseId);
if (it != m_matchingStreetsCache.cend())
return it->second;
auto const & streets = GetNearbyStreets(houseId, houseFeature);
uint32_t const streetIndex = m_houseToStreetTable->Get(houseId);
uint32_t streetId = kInvalidId;
if (streetIndex < streets.size() && streets[streetIndex].m_id.m_mwmId == m_mwmId)
streetId = streets[streetIndex].m_id.m_index;
m_matchingStreetsCache[houseId] = streetId;
return streetId;
}
vector<ReverseGeocoder::Street> const & FeaturesLayerMatcher::GetNearbyStreets(uint32_t featureId)
{
auto const it = m_nearbyStreetsCache.find(featureId);
if (it != m_nearbyStreetsCache.cend())
return it->second;
FeatureType feature;
m_featuresVector.GetByIndex(featureId, feature);
auto & streets = m_nearbyStreetsCache[featureId];
m_reverseGeocoder.GetNearbyStreets(feature, streets);
return streets;
}
vector<ReverseGeocoder::Street> const & FeaturesLayerMatcher::GetNearbyStreets(
uint32_t featureId, FeatureType & feature)
{
auto const it = m_nearbyStreetsCache.find(featureId);
if (it != m_nearbyStreetsCache.cend())
return it->second;
auto & streets = m_nearbyStreetsCache[featureId];
m_reverseGeocoder.GetNearbyStreets(feature, streets);
return streets;
}
} // namespace v2
} // namespace search

View file

@ -10,6 +10,7 @@
#include "indexer/feature.hpp"
#include "indexer/feature_algo.hpp"
#include "indexer/feature_impl.hpp"
#include "indexer/features_vector.hpp"
#include "indexer/ftypes_matcher.hpp"
#include "indexer/mwm_set.hpp"
@ -19,11 +20,14 @@
#include "geometry/rect2d.hpp"
#include "base/cancellable.hpp"
#include "base/logging.hpp"
#include "base/macros.hpp"
#include "base/stl_helpers.hpp"
#include "std/algorithm.hpp"
#include "std/bind.hpp"
#include "std/limits.hpp"
#include "std/unordered_map.hpp"
#include "std/vector.hpp"
class Index;
@ -51,26 +55,27 @@ namespace v2
class FeaturesLayerMatcher
{
public:
static uint32_t const kInvalidId = numeric_limits<uint32_t>::max();
FeaturesLayerMatcher(Index & index, MwmSet::MwmId const & mwmId, MwmValue & value,
FeaturesVector const & featuresVector, my::Cancellable const & cancellable);
template <typename TFn>
void Match(FeaturesLayer const & child, vector<uint32_t> const & sortedParentFeatures,
SearchModel::SearchType parentType, TFn && fn)
void Match(FeaturesLayer const & child, FeaturesLayer const & parent, TFn && fn)
{
if (child.m_type >= parentType)
if (child.m_type >= parent.m_type)
return;
if (parentType == SearchModel::SEARCH_TYPE_STREET)
if (parent.m_type == SearchModel::SEARCH_TYPE_STREET)
{
if (child.m_type == SearchModel::SEARCH_TYPE_POI)
MatchPOIsWithStreets(child, sortedParentFeatures, parentType, forward<TFn>(fn));
MatchPOIsWithStreets(child, parent, forward<TFn>(fn));
else if (child.m_type == SearchModel::SEARCH_TYPE_BUILDING)
MatchBuildingsWithStreets(child, sortedParentFeatures, parentType, forward<TFn>(fn));
MatchBuildingsWithStreets(child, parent, forward<TFn>(fn));
return;
}
vector<m2::PointD> childCenters;
for (uint32_t featureId : child.m_sortedFeatures)
for (uint32_t featureId : *child.m_sortedFeatures)
{
FeatureType ft;
m_featuresVector.GetByIndex(featureId, ft);
@ -79,48 +84,41 @@ public:
BailIfCancelled(m_cancellable);
vector<m2::RectD> parentRects;
for (uint32_t featureId : sortedParentFeatures)
{
FeatureType feature;
m_featuresVector.GetByIndex(featureId, feature);
m2::PointD center = feature::GetCenter(feature, FeatureType::WORST_GEOMETRY);
double radius = ftypes::GetRadiusByPopulation(feature.GetPopulation());
parentRects.push_back(MercatorBounds::RectByCenterXYAndSizeInMeters(center, radius));
}
for (size_t j = 0; j < sortedParentFeatures.size(); ++j)
for (size_t j = 0; j < parent.m_sortedFeatures->size(); ++j)
{
BailIfCancelled(m_cancellable);
for (size_t i = 0; i < child.m_sortedFeatures.size(); ++i)
FeatureType ft;
m_featuresVector.GetByIndex((*parent.m_sortedFeatures)[j], ft);
m2::PointD const center = feature::GetCenter(ft, FeatureType::WORST_GEOMETRY);
double const radius = ftypes::GetRadiusByPopulation(ft.GetPopulation());
m2::RectD const rect = MercatorBounds::RectByCenterXYAndSizeInMeters(center, radius);
for (size_t i = 0; i < child.m_sortedFeatures->size(); ++i)
{
if (parentRects[j].IsPointInside(childCenters[i]))
fn(child.m_sortedFeatures[i], sortedParentFeatures[j]);
if (rect.IsPointInside(childCenters[i]))
fn((*child.m_sortedFeatures)[i], (*parent.m_sortedFeatures)[j]);
}
}
}
private:
template <typename TFn>
void MatchPOIsWithStreets(FeaturesLayer const & child,
vector<uint32_t> const & sortedParentFeatures,
SearchModel::SearchType parentType, TFn && fn)
void MatchPOIsWithStreets(FeaturesLayer const & child, FeaturesLayer const & parent, TFn && fn)
{
ASSERT_EQUAL(child.m_type, SearchModel::SEARCH_TYPE_POI, ());
ASSERT_EQUAL(parentType, SearchModel::SEARCH_TYPE_STREET, ());
ASSERT_EQUAL(parent.m_type, SearchModel::SEARCH_TYPE_STREET, ());
for (uint32_t streetId : sortedParentFeatures)
for (uint32_t streetId : *parent.m_sortedFeatures)
{
BailIfCancelled(m_cancellable);
m_loader.ForEachInVicinity(streetId, child.m_sortedFeatures, bind(fn, _1, streetId));
m_loader.ForEachInVicinity(streetId, *child.m_sortedFeatures, bind(fn, _1, streetId));
}
}
template <typename TFn>
void MatchBuildingsWithStreets(FeaturesLayer const & child,
vector<uint32_t> const & sortedParentFeatures,
SearchModel::SearchType parentType, TFn && fn)
void MatchBuildingsWithStreets(FeaturesLayer const & child, FeaturesLayer const & parent,
TFn && fn)
{
// child.m_sortedFeatures contains only buildings matched by name,
// not by house number. So, we need to add to
@ -130,10 +128,12 @@ private:
auto const & checker = ftypes::IsBuildingChecker::Instance();
ASSERT_EQUAL(child.m_type, SearchModel::SEARCH_TYPE_BUILDING, ());
ASSERT_EQUAL(parentType, SearchModel::SEARCH_TYPE_STREET, ());
ASSERT_EQUAL(parent.m_type, SearchModel::SEARCH_TYPE_STREET, ());
vector<string> queryTokens;
NormalizeHouseNumber(child.m_subQuery, queryTokens);
bool const queryLooksLikeHouseNumber =
feature::IsHouseNumber(child.m_subQuery) && !queryTokens.empty();
uint32_t numFilterInvocations = 0;
auto filter = [&](uint32_t id, FeatureType & feature) -> bool
@ -144,34 +144,58 @@ private:
if (!checker(feature))
return false;
if (binary_search(child.m_sortedFeatures.begin(), child.m_sortedFeatures.end(), id))
if (binary_search(child.m_sortedFeatures->begin(), child.m_sortedFeatures->end(), id))
return true;
// HouseNumbersMatch() calls are expensive, so following code
// tries to reduce number of calls. The most important
// optimization: as first tokens from the house-number part of
// the query and feature's house numbers must be numbers, their
// first symbols must be the same.
string const houseNumber = feature.GetHouseNumber();
if (!queryLooksLikeHouseNumber || !feature::IsHouseNumber(houseNumber))
return false;
if (queryTokens[0][0] != houseNumber[0])
return false;
return HouseNumbersMatch(feature.GetHouseNumber(), queryTokens);
};
auto addEdge = [&](uint32_t houseId, FeatureType & houseFeature, uint32_t streetId)
{
vector<ReverseGeocoder::Street> streets;
m_reverseGeocoder.GetNearbyStreets(houseFeature, streets);
uint32_t streetIndex = m_houseToStreetTable->Get(houseId);
if (streetIndex < streets.size() && streets[streetIndex].m_id.m_mwmId == m_mwmId &&
streets[streetIndex].m_id.m_index == streetId)
{
if (GetMatchingStreet(houseId, houseFeature) == streetId)
fn(houseId, streetId);
}
};
for (uint32_t streetId : sortedParentFeatures)
for (uint32_t streetId : *parent.m_sortedFeatures)
{
BailIfCancelled(m_cancellable);
m_loader.FilterFeaturesInVicinity(streetId, filter, bind(addEdge, _1, _2, streetId));
}
}
// Returns id of a street feature corresponding to a |houseId|, or
// kInvalidId if there're not such street.
uint32_t GetMatchingStreet(uint32_t houseId, FeatureType & houseFeature);
vector<ReverseGeocoder::Street> const & GetNearbyStreets(uint32_t featureId);
vector<ReverseGeocoder::Street> const & GetNearbyStreets(uint32_t featureId,
FeatureType & feature);
MwmSet::MwmId m_mwmId;
ReverseGeocoder m_reverseGeocoder;
// Cache of streets in a feature's vicinity. All lists in the cache
// are ordered by a distance.
unordered_map<uint32_t, vector<ReverseGeocoder::Street>> m_nearbyStreetsCache;
// Cache of correct streets for buildings. Current search algorithm
// supports only one street for a building, whereas buildings can be
// located on multiple streets.
unordered_map<uint32_t, uint32_t> m_matchingStreetsCache;
unique_ptr<HouseToStreetTable> m_houseToStreetTable;
FeaturesVector const & m_featuresVector;
StreetVicinityLoader m_loader;
my::Cancellable const & m_cancellable;

View file

@ -2,6 +2,7 @@
#include "search/cancel_exception.hpp"
#include "search/v2/features_layer_matcher.hpp"
#include "search/v2/features_filter.hpp"
#include "indexer/features_vector.hpp"
@ -16,18 +17,16 @@ FeaturesLayerPathFinder::FeaturesLayerPathFinder(my::Cancellable const & cancell
{
}
void FeaturesLayerPathFinder::BuildGraph(FeaturesLayerMatcher & matcher,
void FeaturesLayerPathFinder::BuildGraph(FeaturesLayerMatcher & matcher, FeaturesFilter & filter,
vector<FeaturesLayer const *> const & layers,
vector<uint32_t> & reachable)
{
if (layers.empty())
return;
FeaturesLayer child;
reachable = *(layers.back()->m_sortedFeatures);
reachable = layers.back()->m_sortedFeatures;
vector<uint32_t> tmpBuffer;
vector<uint32_t> buffer;
// The order matters here, as we need to intersect BUILDINGs with
// STREETs first, and then POIs with BUILDINGs.
@ -35,16 +34,29 @@ void FeaturesLayerPathFinder::BuildGraph(FeaturesLayerMatcher & matcher,
{
BailIfCancelled(m_cancellable);
tmpBuffer.clear();
if (reachable.empty())
break;
if (filter.NeedToFilter(reachable))
{
buffer.clear();
filter.Filter(reachable, MakeBackInsertFunctor(buffer));
reachable.swap(buffer);
my::SortUnique(reachable);
}
buffer.clear();
auto addEdge = [&](uint32_t childFeature, uint32_t /* parentFeature */)
{
tmpBuffer.push_back(childFeature);
buffer.push_back(childFeature);
};
matcher.Match(*layers[i - 1], reachable, layers[i]->m_type, addEdge);
FeaturesLayer parent(*layers[i]);
parent.m_sortedFeatures = &reachable;
matcher.Match(*layers[i - 1], parent, addEdge);
my::SortUnique(tmpBuffer);
reachable.swap(tmpBuffer);
reachable.swap(buffer);
my::SortUnique(reachable);
}
}
} // namespace v2

View file

@ -16,6 +16,7 @@ namespace search
{
namespace v2
{
class FeaturesFilter;
class FeaturesLayerMatcher;
// This class is able to find all paths through a layered graph, with
@ -33,22 +34,22 @@ public:
FeaturesLayerPathFinder(my::Cancellable const & cancellable);
template <typename TFn>
void ForEachReachableVertex(FeaturesLayerMatcher & matcher,
void ForEachReachableVertex(FeaturesLayerMatcher & matcher, FeaturesFilter & filter,
vector<FeaturesLayer const *> const & layers, TFn && fn)
{
if (layers.empty())
return;
vector<uint32_t> reachable;
BuildGraph(matcher, layers, reachable);
BuildGraph(matcher, filter, layers, reachable);
for (uint32_t featureId : reachable)
fn(featureId);
}
private:
void BuildGraph(FeaturesLayerMatcher & matcher, vector<FeaturesLayer const *> const & layers,
vector<uint32_t> & reachable);
void BuildGraph(FeaturesLayerMatcher & matcher, FeaturesFilter & filter,
vector<FeaturesLayer const *> const & layers, vector<uint32_t> & reachable);
my::Cancellable const & m_cancellable;
};

View file

@ -19,6 +19,7 @@
#include "base/macros.hpp"
#include "base/scope_guard.hpp"
#include "base/stl_add.hpp"
#include "base/stl_helpers.hpp"
#include "std/algorithm.hpp"
#include "std/iterator.hpp"
@ -51,11 +52,44 @@ void JoinQueryTokens(SearchQueryParams const & params, size_t curToken, size_t e
}
} // namespace
// Geocoder::Partition
Geocoder::Partition::Partition() : m_size(0) {}
void Geocoder::Partition::FromFeatures(unique_ptr<coding::CompressedBitVector> features,
Index::FeaturesLoaderGuard & loader,
SearchModel const & model)
{
for (auto & cluster : m_clusters)
cluster.clear();
auto clusterize = [&](uint64_t featureId)
{
FeatureType feature;
loader.GetFeatureByIndex(featureId, feature);
feature.ParseTypes();
SearchModel::SearchType searchType = model.GetSearchType(feature);
if (searchType != SearchModel::SEARCH_TYPE_COUNT)
m_clusters[searchType].push_back(featureId);
};
if (features)
coding::CompressedBitVectorEnumerator::ForEach(*features, clusterize);
m_size = 0;
for (auto const & cluster : m_clusters)
m_size += cluster.size();
}
// Geocoder::Params --------------------------------------------------------------------------------
Geocoder::Params::Params() : m_maxNumResults(0) {}
// Geocoder::Geocoder ------------------------------------------------------------------------------
Geocoder::Geocoder(Index & index)
: m_index(index)
, m_numTokens(0)
, m_model(SearchModel::Instance())
, m_value(nullptr)
, m_filter(static_cast<my::Cancellable const &>(*this))
, m_finder(static_cast<my::Cancellable const &>(*this))
, m_results(nullptr)
{
@ -63,11 +97,15 @@ Geocoder::Geocoder(Index & index)
Geocoder::~Geocoder() {}
void Geocoder::SetSearchQueryParams(SearchQueryParams const & params)
void Geocoder::SetParams(Params const & params)
{
m_params = params;
m_retrievalParams = params;
m_filter.SetViewport(m_params.m_viewport);
m_filter.SetMaxNumResults(m_params.m_maxNumResults);
m_filter.SetScale(m_params.m_scale);
m_numTokens = m_params.m_tokens.size();
if (!m_params.m_prefixTokens.empty())
++m_numTokens;
@ -99,16 +137,26 @@ void Geocoder::Go(vector<FeatureID> & results)
m_mwmId = handle.GetId();
MY_SCOPE_GUARD(cleanup, [&]()
{
m_matcher.reset();
m_loader.reset();
m_cache.clear();
});
{
m_matcher.reset();
m_loader.reset();
m_partitions.clear();
});
m_cache.clear();
m_partitions.clear();
m_loader.reset(new Index::FeaturesLoaderGuard(m_index, m_mwmId));
m_matcher.reset(new FeaturesLayerMatcher(
m_index, m_mwmId, *m_value, m_loader->GetFeaturesVector(), *this /* cancellable */));
m_filter.SetValue(m_value, m_mwmId);
m_partitions.resize(m_numTokens);
for (size_t i = 0; i < m_numTokens; ++i)
{
PrepareRetrievalParams(i, i + 1);
m_partitions[i].FromFeatures(Retrieval::RetrieveAddressFeatures(
*m_value, *this /* cancellable */, m_retrievalParams),
*m_loader, m_model);
}
DoGeocoding(0 /* curToken */);
}
@ -118,7 +166,13 @@ void Geocoder::Go(vector<FeatureID> & results)
}
}
void Geocoder::PrepareParams(size_t curToken, size_t endToken)
void Geocoder::ClearCaches()
{
m_partitions.clear();
m_matcher.reset();
}
void Geocoder::PrepareRetrievalParams(size_t curToken, size_t endToken)
{
ASSERT_LESS(curToken, endToken, ());
ASSERT_LESS_OR_EQUAL(endToken, m_numTokens, ());
@ -158,7 +212,6 @@ void Geocoder::DoGeocoding(size_t curToken)
{
BailIfCancelled(static_cast<my::Cancellable const &>(*this));
PrepareParams(curToken, curToken + n);
{
auto & layer = m_layers.back();
layer.Clear();
@ -168,26 +221,15 @@ void Geocoder::DoGeocoding(size_t curToken)
layer.m_subQuery);
}
// TODO (@y, @m): as |n| increases, good optimization is to update
// |features| incrementally, from [curToken, curToken + n) to
// [curToken, curToken + n + 1).
auto features = RetrieveAddressFeatures(curToken, curToken + n);
vector<uint32_t> clusters[SearchModel::SEARCH_TYPE_COUNT];
auto clusterize = [&](uint64_t featureId)
{
FeatureType feature;
m_loader->GetFeatureByIndex(featureId, feature);
feature.ParseTypes();
SearchModel::SearchType searchType = m_model.GetSearchType(feature);
if (searchType != SearchModel::SEARCH_TYPE_COUNT)
clusters[searchType].push_back(featureId);
};
if (features)
coding::CompressedBitVectorEnumerator::ForEach(*features, clusterize);
BailIfCancelled(static_cast<my::Cancellable const &>(*this));
bool const looksLikeHouseNumber = feature::IsHouseNumber(m_layers.back().m_subQuery);
auto const & partition = m_partitions[curToken + n - 1];
if (partition.m_size == 0 && !looksLikeHouseNumber)
break;
vector<uint32_t> clusters[SearchModel::SEARCH_TYPE_COUNT];
vector<uint32_t> buffer;
for (size_t i = 0; i != SearchModel::SEARCH_TYPE_COUNT; ++i)
{
@ -195,12 +237,37 @@ void Geocoder::DoGeocoding(size_t curToken)
// DoGeocoding(). This may lead to use-after-free.
auto & layer = m_layers.back();
// Following code intersects posting lists for tokens [curToken,
// curToken + n). This can be done incrementally, as we have
// |clusters| to store intersections.
if (n == 1)
{
layer.m_sortedFeatures = &partition.m_clusters[i];
}
else if (n == 2)
{
clusters[i].clear();
auto const & first = m_partitions[curToken].m_clusters[i];
auto const & second = m_partitions[curToken + 1].m_clusters[i];
set_intersection(first.begin(), first.end(), second.begin(), second.end(),
back_inserter(clusters[i]));
layer.m_sortedFeatures = &clusters[i];
}
else
{
buffer.clear();
set_intersection(clusters[i].begin(), clusters[i].end(), partition.m_clusters[i].begin(),
partition.m_clusters[i].end(), back_inserter(buffer));
clusters[i].swap(buffer);
layer.m_sortedFeatures = &clusters[i];
}
if (i == SearchModel::SEARCH_TYPE_BUILDING)
{
if (clusters[i].empty() && !looksLikeHouseNumber)
if (layer.m_sortedFeatures->empty() && !looksLikeHouseNumber)
continue;
}
else if (clusters[i].empty())
else if (layer.m_sortedFeatures->empty())
{
continue;
}
@ -213,8 +280,6 @@ void Geocoder::DoGeocoding(size_t curToken)
continue;
}
layer.m_sortedFeatures.swap(clusters[i]);
ASSERT(is_sorted(layer.m_sortedFeatures.begin(), layer.m_sortedFeatures.end()), ());
layer.m_type = static_cast<SearchModel::SearchType>(i);
if (IsLayerSequenceSane())
DoGeocoding(curToken + n);
@ -222,34 +287,41 @@ void Geocoder::DoGeocoding(size_t curToken)
}
}
coding::CompressedBitVector * Geocoder::RetrieveAddressFeatures(size_t curToken, size_t endToken)
{
uint64_t const key = (static_cast<uint64_t>(curToken) << 32) | static_cast<uint64_t>(endToken);
if (m_cache.find(key) == m_cache.end())
{
m_cache[key] =
Retrieval::RetrieveAddressFeatures(m_value, *this /* cancellable */, m_retrievalParams);
}
return m_cache[key].get();
}
bool Geocoder::IsLayerSequenceSane() const
{
ASSERT(!m_layers.empty(), ());
static_assert(SearchModel::SEARCH_TYPE_COUNT <= 32,
"Select a wider type to represent search types mask.");
uint32_t mask = 0;
for (auto const & layer : m_layers)
size_t buildingIndex = m_layers.size();
size_t streetIndex = m_layers.size();
// Following loop returns false iff there're two different layers
// of the same search type.
for (size_t i = 0; i < m_layers.size(); ++i)
{
auto const & layer = m_layers[i];
ASSERT_NOT_EQUAL(layer.m_type, SearchModel::SEARCH_TYPE_COUNT, ());
// TODO (@y): probably it's worth to check belongs-to-locality here.
uint32_t bit = 1U << layer.m_type;
if (mask & bit)
return false;
mask |= bit;
if (layer.m_type == SearchModel::SEARCH_TYPE_BUILDING)
buildingIndex = i;
if (layer.m_type == SearchModel::SEARCH_TYPE_STREET)
streetIndex = i;
// Checks that building and street layers are neighbours.
if (buildingIndex != m_layers.size() && streetIndex != m_layers.size() &&
buildingIndex != streetIndex + 1 && streetIndex != buildingIndex + 1)
{
return false;
}
}
return true;
}
@ -257,22 +329,17 @@ void Geocoder::FindPaths()
{
ASSERT(!m_layers.empty(), ());
auto const compareByType = [](FeaturesLayer const * lhs, FeaturesLayer const * rhs)
{
return lhs->m_type < rhs->m_type;
};
// Layers ordered by a search type.
vector<FeaturesLayer const *> sortedLayers;
sortedLayers.reserve(m_layers.size());
for (auto & layer : m_layers)
sortedLayers.push_back(&layer);
sort(sortedLayers.begin(), sortedLayers.end(), compareByType);
sort(sortedLayers.begin(), sortedLayers.end(), my::CompareBy(&FeaturesLayer::m_type));
m_finder.ForEachReachableVertex(*m_matcher, sortedLayers, [this](uint32_t featureId)
{
m_results->emplace_back(m_mwmId, featureId);
});
m_finder.ForEachReachableVertex(*m_matcher, m_filter, sortedLayers, [this](uint32_t featureId)
{
m_results->emplace_back(m_mwmId, featureId);
});
}
} // namespace v2
} // namespace search

View file

@ -1,6 +1,7 @@
#pragma once
#include "search/search_query_params.hpp"
#include "search/v2/features_filter.hpp"
#include "search/v2/features_layer.hpp"
#include "search/v2/features_layer_path_finder.hpp"
#include "search/v2/search_model.hpp"
@ -14,6 +15,7 @@
#include "base/buffer_vector.hpp"
#include "base/cancellable.hpp"
#include "base/macros.hpp"
#include "base/string_utils.hpp"
#include "std/set.hpp"
@ -31,8 +33,6 @@ class CompressedBitVector;
namespace search
{
class RankTable;
namespace v2
{
class FeaturesLayerMatcher;
@ -56,32 +56,52 @@ class SearchModel;
class Geocoder : public my::Cancellable
{
public:
struct Params : public SearchQueryParams
{
Params();
m2::RectD m_viewport;
size_t m_maxNumResults;
};
Geocoder(Index & index);
~Geocoder() override;
// Sets search query params.
void SetSearchQueryParams(SearchQueryParams const & params);
void SetParams(Params const & params);
// Starts geocoding, retrieved features will be appended to
// |results|.
void Go(vector<FeatureID> & results);
void ClearCaches();
private:
struct Partition
{
Partition();
Partition(Partition &&) = default;
void FromFeatures(unique_ptr<coding::CompressedBitVector> features,
Index::FeaturesLoaderGuard & loader, SearchModel const & model);
vector<uint32_t> m_clusters[SearchModel::SEARCH_TYPE_COUNT];
size_t m_size;
DISALLOW_COPY(Partition);
};
// Fills |m_retrievalParams| with [curToken, endToken) subsequence
// of search query tokens.
void PrepareParams(size_t curToken, size_t endToken);
void PrepareRetrievalParams(size_t curToken, size_t endToken);
// Tries to find all paths in a search tree, where each edge is
// marked with some substring of the query tokens. These paths are
// called "layer sequence" and current path is stored in |m_layers|.
void DoGeocoding(size_t curToken);
// Returns CBV of features corresponding to [curToken, endToken)
// subsequence of search query tokens. This method caches results of
// previous requests.
coding::CompressedBitVector * RetrieveAddressFeatures(size_t curToken, size_t endToken);
// Returns true if current path in the search tree (see comment for
// DoGeocoding()) looks sane. This method is used as a fast
// pre-check to cut off unnecessary work.
@ -93,8 +113,8 @@ private:
Index & m_index;
// Initial search query params.
SearchQueryParams m_params;
// Geocoder params.
Params m_params;
// Total number of search query tokens.
size_t m_numTokens;
@ -112,8 +132,10 @@ private:
// Id of a current mwm.
MwmSet::MwmId m_mwmId;
// Cache of posting list of features.
unordered_map<uint64_t, unique_ptr<coding::CompressedBitVector>> m_cache;
// Cache of posting lists for each token in the query. TODO (@y,
// @m, @vng): consider to update this cache lazily, as user inputs
// tokens one-by-one.
vector<Partition> m_partitions;
// Features loader.
unique_ptr<Index::FeaturesLoaderGuard> m_loader;
@ -121,6 +143,9 @@ private:
// Features matcher for layers intersection.
unique_ptr<FeaturesLayerMatcher> m_matcher;
// Features filter for interpretations.
FeaturesFilter m_filter;
// Path finder for interpretations.
FeaturesLayerPathFinder m_finder;

View file

@ -0,0 +1,31 @@
#include "search/v2/rank_table_cache.hpp"
#include "search/dummy_rank_table.hpp"
#include "indexer/index.hpp"
#include "indexer/rank_table.hpp"
namespace search
{
namespace v2
{
RankTableCache::RankTableCache() {}
RankTableCache::~RankTableCache() {}
RankTable const & RankTableCache::Get(MwmValue & value, MwmSet::MwmId const & mwmId)
{
auto const it = m_ranks.find(mwmId);
if (it != m_ranks.end())
return *it->second;
auto table = RankTable::Load(value.m_cont);
if (!table)
table.reset(new DummyRankTable());
auto const * result = table.get();
m_ranks[mwmId] = move(table);
return *result;
}
void RankTableCache::Clear() { m_ranks.clear(); }
} // namespace v2
} // namespace search

View file

@ -0,0 +1,35 @@
#pragma once
#include "indexer/mwm_set.hpp"
#include "std/map.hpp"
#include "std/unique_ptr.hpp"
#include "base/macros.hpp"
class MwmValue;
namespace search
{
class RankTable;
namespace v2
{
class RankTableCache
{
public:
RankTableCache();
~RankTableCache();
RankTable const & Get(MwmValue & value, MwmSet::MwmId const & mwmId);
void Clear();
private:
map<MwmSet::MwmId, unique_ptr<RankTable>> m_ranks;
DISALLOW_COPY_AND_MOVE(RankTableCache);
};
} // namespace v2
} // namespace search

View file

@ -37,10 +37,6 @@ SearchModel::SearchType SearchModel::GetSearchType(FeatureType const & feature)
{
case NONE:
return SEARCH_TYPE_COUNT;
case COUNTRY:
return SEARCH_TYPE_COUNTRY;
case STATE:
return SEARCH_TYPE_STATE;
case CITY:
case TOWN:
case VILLAGE:
@ -65,10 +61,6 @@ string DebugPrint(SearchModel::SearchType type)
return "STREET";
case SearchModel::SEARCH_TYPE_CITY:
return "CITY";
case SearchModel::SEARCH_TYPE_STATE:
return "STATE";
case SearchModel::SEARCH_TYPE_COUNTRY:
return "COUNTRY";
case SearchModel::SEARCH_TYPE_COUNT:
return "COUNT";
}

View file

@ -28,8 +28,6 @@ public:
SEARCH_TYPE_BUILDING,
SEARCH_TYPE_STREET,
SEARCH_TYPE_CITY,
SEARCH_TYPE_STATE,
SEARCH_TYPE_COUNTRY,
SEARCH_TYPE_COUNT
};

View file

@ -37,9 +37,11 @@ void SearchQueryV2::Search(Results & res, size_t resCount)
if (m_tokens.empty())
SuggestStrings(res);
SearchQueryParams params;
Geocoder::Params params;
InitParams(false /* localitySearch */, params);
m_geocoder.SetSearchQueryParams(params);
params.m_viewport = m_viewport[CURRENT_V];
params.m_maxNumResults = max(resCount, kPreResultsCount);
m_geocoder.SetParams(params);
vector<FeatureID> results;
m_geocoder.Go(results);
@ -50,6 +52,12 @@ void SearchQueryV2::Search(Results & res, size_t resCount)
void SearchQueryV2::SearchViewportPoints(Results & res) { NOTIMPLEMENTED(); }
void SearchQueryV2::ClearCaches()
{
Query::ClearCaches();
m_geocoder.ClearCaches();
}
void SearchQueryV2::AddPreResults1(vector<FeatureID> & results)
{
// Group all features by MwmId and add them as PreResult1.

View file

@ -20,6 +20,7 @@ public:
// Query overrides:
void Search(Results & res, size_t resCount) override;
void SearchViewportPoints(Results & res) override;
void ClearCaches() override;
protected:
// Adds a bunch of features as PreResult1.