From fd295a51d4455e8cc3e6f2d5579f1e3978d46319 Mon Sep 17 00:00:00 2001 From: Yuri Gorshenin Date: Sun, 24 Apr 2016 19:09:37 +0300 Subject: [PATCH] [search] Implemented postcodes support in geocoder. --- search/feature_offset_match.hpp | 2 +- .../search_query_v2_test.cpp | 50 ++++-- search/v2/features_layer_matcher.cpp | 6 + search/v2/features_layer_matcher.hpp | 10 ++ search/v2/features_layer_path_finder.cpp | 4 +- search/v2/geocoder.cpp | 157 +++++++++++++++--- search/v2/geocoder.hpp | 32 +++- search/v2/search_model.hpp | 16 +- 8 files changed, 226 insertions(+), 51 deletions(-) diff --git a/search/feature_offset_match.hpp b/search/feature_offset_match.hpp index c6f52f89e7..7eeecee551 100644 --- a/search/feature_offset_match.hpp +++ b/search/feature_offset_match.hpp @@ -242,7 +242,7 @@ public: toDo(value); } }; -} // impl +} // namespace impl template struct TrieRootPrefix diff --git a/search/search_integration_tests/search_query_v2_test.cpp b/search/search_integration_tests/search_query_v2_test.cpp index f8a728cb7b..2e21979d83 100644 --- a/search/search_integration_tests/search_query_v2_test.cpp +++ b/search/search_integration_tests/search_query_v2_test.cpp @@ -370,8 +370,14 @@ UNIT_CLASS_TEST(SearchQueryV2Test, TestPostcodes) TestStreet street( vector{m2::PointD(-0.5, 0.0), m2::PointD(0, 0), m2::PointD(0.5, 0.0)}, "Первомайская", "ru"); - TestBuilding building(m2::PointD(0.0, 0.00001), "", "28 а", street, "ru"); - building.SetPostcode("141701"); + TestBuilding building28(m2::PointD(0.0, 0.00001), "", "28а", street, "ru"); + building28.SetPostcode("141701"); + + TestBuilding building29(m2::PointD(0.0, -0.00001), "", "29", street, "ru"); + building29.SetPostcode("141701"); + + TestBuilding building30(m2::PointD(0.00001, 0.00001), "", "30", street, "ru"); + building30.SetPostcode("141702"); BuildWorld([&](TestMwmBuilder & builder) { @@ -380,7 +386,9 @@ UNIT_CLASS_TEST(SearchQueryV2Test, TestPostcodes) auto countryId = BuildCountry(countryName, [&](TestMwmBuilder & builder) { builder.Add(street); - builder.Add(building); + builder.Add(building28); + builder.Add(building29); + builder.Add(building30); }); // Tests that postcode is added to the search index. @@ -391,7 +399,7 @@ UNIT_CLASS_TEST(SearchQueryV2Test, TestPostcodes) SearchQueryParams params; params.m_tokens.emplace_back(); - params.m_tokens.back().push_back(PostcodeToString(strings::MakeUniString("141701"))); + params.m_tokens.back().push_back(PostcodeToString(strings::MakeUniString("141702"))); auto * value = handle.GetValue(); auto features = v2::RetrievePostcodeFeatures(countryId, *value, cancellable, TokenSlice(params, 0, params.m_tokens.size())); @@ -405,21 +413,31 @@ UNIT_CLASS_TEST(SearchQueryV2Test, TestPostcodes) FeatureType ft; loader.GetFeatureByIndex(index, ft); - auto rule = ExactMatch(countryId, building); + auto rule = ExactMatch(countryId, building30); TEST(rule->Matches(ft), ()); } - { - TRules rules{ExactMatch(countryId, building)}; - TEST(ResultsMatch("Долгопрудный первомайская 28а", "ru" /* locale */, rules), ()); - } - // TODO (@y): uncomment this test and add more tests when postcodes - // search will be implemented. - // - // { - // TRules rules{ExactMatch(countryId, building)}; - // TEST(ResultsMatch("Долгопрудный первомайская 28а, 141701", "ru" /* locale */, rules), ()); - // } + { + TRules rules{ExactMatch(countryId, building28)}; + TEST(ResultsMatch("Долгопрудный первомайская 28а", "ru", rules), ()); + } + { + TRules rules{ExactMatch(countryId, building28)}; + TEST(ResultsMatch("Долгопрудный первомайская 28а, 141701", "ru", rules), ()); + } + { + TRules rules{ExactMatch(countryId, building28), ExactMatch(countryId, building29)}; + TEST(ResultsMatch("Долгопрудный первомайская 141701", "ru", rules), ()); + + } + { + TRules rules{ExactMatch(countryId, building28), ExactMatch(countryId, building29)}; + TEST(ResultsMatch("Долгопрудный 141701", "ru", rules), ()); + } + { + TRules rules{ExactMatch(countryId, building30)}; + TEST(ResultsMatch("Долгопрудный 141702", "ru", rules), ()); + } } } // namespace } // namespace search diff --git a/search/v2/features_layer_matcher.cpp b/search/v2/features_layer_matcher.cpp index 0a3955ad9d..e3134e838f 100644 --- a/search/v2/features_layer_matcher.cpp +++ b/search/v2/features_layer_matcher.cpp @@ -18,6 +18,7 @@ int constexpr kMaxApproxStreetDistanceM = 100; FeaturesLayerMatcher::FeaturesLayerMatcher(Index & index, my::Cancellable const & cancellable) : m_context(nullptr) + , m_postcodes(nullptr) , m_reverseGeocoder(index) , m_nearbyStreetsCache("FeatureToNearbyStreets") , m_matchingStreetsCache("BuildingToStreet") @@ -36,6 +37,11 @@ void FeaturesLayerMatcher::SetContext(MwmContext * context) m_loader.SetContext(context); } +void FeaturesLayerMatcher::SetPostcodes(coding::CompressedBitVector const * postcodes) +{ + m_postcodes = postcodes; +} + void FeaturesLayerMatcher::OnQueryFinished() { m_nearbyStreetsCache.ClearIfNeeded(); diff --git a/search/v2/features_layer_matcher.hpp b/search/v2/features_layer_matcher.hpp index 210f337eff..39e505a7ce 100644 --- a/search/v2/features_layer_matcher.hpp +++ b/search/v2/features_layer_matcher.hpp @@ -19,6 +19,8 @@ #include "geometry/point2d.hpp" #include "geometry/rect2d.hpp" +#include "coding/compressed_bit_vector.hpp" + #include "base/cancellable.hpp" #include "base/logging.hpp" #include "base/macros.hpp" @@ -61,6 +63,7 @@ public: FeaturesLayerMatcher(Index & index, my::Cancellable const & cancellable); void SetContext(MwmContext * context); + void SetPostcodes(coding::CompressedBitVector const * postcodes); template void Match(FeaturesLayer const & child, FeaturesLayer const & parent, TFn && fn) @@ -164,6 +167,8 @@ private: MercatorBounds::RectByCenterXYAndSizeInMeters(poiCenters[i], kBuildingRadiusMeters), [&](FeatureType & ft) { + if (m_postcodes && !m_postcodes->GetBit(ft.GetID().m_index)) + return; if (HouseNumbersMatch(strings::MakeUniString(ft.GetHouseNumber()), queryParses)) { double const distanceM = MercatorBounds::DistanceOnEarth(feature::GetCenter(ft), poiCenters[i]); @@ -248,6 +253,9 @@ private: if (binary_search(buildings.begin(), buildings.end(), id)) return true; + if (m_postcodes && !m_postcodes->GetBit(id)) + return false; + // HouseNumbersMatch() calls are expensive, so following code // tries to reduce the number of calls. The most important // optimization: as first tokens from the house-number part of @@ -344,6 +352,8 @@ private: MwmContext * m_context; + coding::CompressedBitVector const * m_postcodes; + ReverseGeocoder m_reverseGeocoder; // Cache of streets in a feature's vicinity. All lists in the cache diff --git a/search/v2/features_layer_path_finder.cpp b/search/v2/features_layer_path_finder.cpp index c05ba6c8d7..0f5ca4e91b 100644 --- a/search/v2/features_layer_path_finder.cpp +++ b/search/v2/features_layer_path_finder.cpp @@ -109,7 +109,7 @@ void FeaturesLayerPathFinder::FindReachableVerticesTopDown( vector reachable = *(layers.back()->m_sortedFeatures); vector buffer; - TParentGraph parent(layers.size() - 1); + TParentGraph parent; auto addEdge = [&](uint32_t childFeature, uint32_t parentFeature) { @@ -156,7 +156,7 @@ void FeaturesLayerPathFinder::FindReachableVerticesBottomUp( vector reachable = *(layers.front()->m_sortedFeatures); vector buffer; - TParentGraph parent(layers.size() - 1); + TParentGraph parent; auto addEdge = [&](uint32_t childFeature, uint32_t parentFeature) { diff --git a/search/v2/geocoder.cpp b/search/v2/geocoder.cpp index fa9edda019..39d1179214 100644 --- a/search/v2/geocoder.cpp +++ b/search/v2/geocoder.cpp @@ -7,6 +7,8 @@ #include "search/v2/features_filter.hpp" #include "search/v2/features_layer_matcher.hpp" #include "search/v2/locality_scorer.hpp" +#include "search/v2/postcodes_matcher.hpp" +#include "search/v2/token_slice.hpp" #include "indexer/classificator.hpp" #include "indexer/feature_decl.hpp" @@ -84,6 +86,10 @@ struct ScopedMarkTokens { ASSERT_LESS_OR_EQUAL(m_from, m_to, ()); ASSERT_LESS_OR_EQUAL(m_to, m_usedTokens.size(), ()); +#if defined(DEBUG) + for (size_t i = m_from; i != m_to; ++i) + ASSERT(!m_usedTokens[i], (i)); +#endif fill(m_usedTokens.begin() + m_from, m_usedTokens.begin() + m_to, true /* used */); } @@ -628,6 +634,7 @@ void Geocoder::ClearCaches() m_matchersCache.clear(); m_streetsCache.clear(); m_villages.reset(); + m_postcodes.Clear(); } void Geocoder::PrepareRetrievalParams(size_t curToken, size_t endToken) @@ -663,6 +670,19 @@ void Geocoder::PrepareAddressFeatures() } } +void Geocoder::InitLayer(SearchModel::SearchType type, size_t startToken, size_t endToken, + FeaturesLayer & layer) +{ + layer.Clear(); + layer.m_type = type; + layer.m_startToken = startToken; + layer.m_endToken = endToken; + + JoinQueryTokens(m_params, layer.m_startToken, layer.m_endToken, kUniSpace /* sep */, + layer.m_subQuery); + layer.m_lastTokenIsPrefix = (layer.m_endToken > m_params.m_tokens.size()); +} + void Geocoder::FillLocalityCandidates(coding::CompressedBitVector const * filter, size_t const maxNumLocalities, vector & preLocalities) @@ -979,12 +999,67 @@ void Geocoder::LimitedSearch(FeaturesFilter const & filter) m_filter = &filter; MY_SCOPE_GUARD(resetFilter, [&]() { m_filter = nullptr; }); - // TODO (@y): implement postcodes matching here. - - // The order is rather important. Match streets first, then all other stuff. - GreedilyMatchStreets(); - MatchPOIsAndBuildings(0 /* curToken */); MatchUnclassified(0 /* curToken */); + + auto search = [this]() + { + GreedilyMatchStreets(); + MatchPOIsAndBuildings(0 /* curToken */); + }; + + WithPostcodes(search); + search(); +} + +template +void Geocoder::WithPostcodes(TFn && fn) +{ + size_t const maxPostcodeTokens = GetMaxNumTokensInPostcode(); + + for (size_t startToken = 0; startToken != m_numTokens; ++startToken) + { + if (m_usedTokens[startToken]) + continue; + + size_t endToken = startToken; + for (; endToken < m_numTokens && endToken - startToken < maxPostcodeTokens && + !m_usedTokens[endToken]; + ++endToken) + { + TokenSlice slice(m_params, startToken, endToken + 1); + if (!LooksLikePostcode(slice)) + break; + } + + if (startToken == endToken) + continue; + + PrepareRetrievalParams(startToken, endToken); + for (auto & tokens : m_retrievalParams.m_tokens) + { + tokens.resize(1); + tokens[0] = PostcodeToString(tokens[0]); + } + if (!m_retrievalParams.m_prefixTokens.empty()) + { + m_retrievalParams.m_prefixTokens.resize(1); + m_retrievalParams.m_prefixTokens[0] = PostcodeToString(m_retrievalParams.m_prefixTokens[0]); + } + auto postcodes = RetrievePostcodeFeatures( + *m_context, TokenSlice(m_retrievalParams, 0, endToken - startToken)); + if (!coding::CompressedBitVector::IsEmpty(postcodes)) + { + ScopedMarkTokens mark(m_usedTokens, startToken, endToken); + + m_postcodes.m_startToken = startToken; + m_postcodes.m_endToken = endToken; + m_postcodes.m_features = move(postcodes); + + fn(); + + m_postcodes.Clear(); + } + } } void Geocoder::GreedilyMatchStreets() @@ -1054,13 +1129,7 @@ void Geocoder::CreateStreetsLayerAndMatchLowerLayers( MY_SCOPE_GUARD(cleanupGuard, bind(&vector::pop_back, &m_layers)); auto & layer = m_layers.back(); - layer.Clear(); - layer.m_type = SearchModel::SEARCH_TYPE_STREET; - layer.m_startToken = startToken; - layer.m_endToken = endToken; - JoinQueryTokens(m_params, layer.m_startToken, layer.m_endToken, kUniSpace /* sep */, - layer.m_subQuery); - layer.m_lastTokenIsPrefix = (layer.m_endToken > m_params.m_tokens.size()); + InitLayer(SearchModel::SEARCH_TYPE_STREET, startToken, endToken, layer); vector sortedFeatures; sortedFeatures.reserve(features->PopCount()); @@ -1080,8 +1149,44 @@ void Geocoder::MatchPOIsAndBuildings(size_t curToken) { // All tokens were consumed, find paths through layers, emit // features. - FindPaths(); - return; + if (m_postcodes.IsEmpty()) + return FindPaths(); + + // When there are no layers but user entered a postcode, we have + // to emit all features matching to the postcode. + if (m_layers.size() == 0) + { + CBVPtr filtered; + if (m_filter->NeedToFilter(*m_postcodes.m_features)) + filtered.Set(m_filter->Filter(*m_postcodes.m_features)); + else + filtered.Set(m_postcodes.m_features.get(), false /* isOwner */); + filtered.ForEach([&](uint32_t id) + { + EmitResult(m_context->GetId(), id, GetSearchTypeInGeocoding(id), + m_postcodes.m_startToken, m_postcodes.m_endToken); + }); + return; + } + + if (!(m_layers.size() == 1 && m_layers[0].m_type == SearchModel::SEARCH_TYPE_STREET)) + return FindPaths(); + + // If there're only one street layer but user also entered a + // postcode, we need to emit all features matching to postcode on + // the given street. + m_layers.emplace_back(); + MY_SCOPE_GUARD(cleanupGuard, bind(&vector::pop_back, &m_layers)); + + auto & layer = m_layers.back(); + InitLayer(SearchModel::SEARCH_TYPE_BUILDING, m_postcodes.m_startToken, m_postcodes.m_endToken, + layer); + + vector features; + coding::CompressedBitVectorEnumerator::ForEach(*m_postcodes.m_features, + MakeBackInsertFunctor(features)); + layer.m_sortedFeatures = &features; + return FindPaths(); } m_layers.emplace_back(); @@ -1089,7 +1194,7 @@ void Geocoder::MatchPOIsAndBuildings(size_t curToken) // Clusters of features by search type. Each cluster is a sorted // list of ids. - size_t const kNumClusters = SearchModel::SEARCH_TYPE_STREET; + size_t const kNumClusters = SearchModel::SEARCH_TYPE_BUILDING + 1; vector clusters[kNumClusters]; // Appends |featureId| to the end of the corresponding cluster, if @@ -1101,8 +1206,11 @@ void Geocoder::MatchPOIsAndBuildings(size_t curToken) // All SEARCH_TYPE_CITY features were filtered in // MatchCities(). All SEARCH_TYPE_STREET features were // filtered in GreedilyMatchStreets(). - if (searchType < SearchModel::SEARCH_TYPE_STREET) - clusters[searchType].push_back(featureId); + if (searchType < kNumClusters) + { + if (m_postcodes.IsEmpty() || m_postcodes.m_features->GetBit(featureId)) + clusters[searchType].push_back(featureId); + } }; CBVPtr features; @@ -1119,12 +1227,7 @@ void Geocoder::MatchPOIsAndBuildings(size_t curToken) { auto & layer = m_layers.back(); - layer.Clear(); - layer.m_startToken = curToken; - layer.m_endToken = curToken + n; - JoinQueryTokens(m_params, layer.m_startToken, layer.m_endToken, kUniSpace /* sep */, - layer.m_subQuery); - layer.m_lastTokenIsPrefix = (layer.m_endToken > m_params.m_tokens.size()); + InitLayer(layer.m_type, curToken, curToken + n, layer); } features.Intersect(m_addressFeatures[curToken + n - 1].get()); @@ -1262,6 +1365,7 @@ void Geocoder::FindPaths() auto const & innermostLayer = *sortedLayers.front(); + m_matcher->SetPostcodes(m_postcodes.m_features.get()); m_finder.ForEachReachableVertex(*m_matcher, sortedLayers, [this, &innermostLayer](IntersectionResult const & result) { @@ -1439,6 +1543,13 @@ unique_ptr Geocoder::LoadVillages(MwmContext & cont return LoadCategories(context, GetVillageCategories()); } +unique_ptr Geocoder::RetrievePostcodeFeatures( + MwmContext const & context, TokenSlice const & slice) +{ + return ::search::v2::RetrievePostcodeFeatures(context.GetId(), context.m_value, + static_cast(*this), slice); +} + coding::CompressedBitVector const * Geocoder::RetrieveGeometryFeatures(MwmContext const & context, m2::RectD const & rect, RectId id) diff --git a/search/v2/geocoder.hpp b/search/v2/geocoder.hpp index c273aad162..355b25c5fe 100644 --- a/search/v2/geocoder.hpp +++ b/search/v2/geocoder.hpp @@ -53,6 +53,7 @@ namespace v2 class FeaturesFilter; class FeaturesLayerMatcher; class SearchModel; +class TokenSlice; // This class is used to retrieve all features corresponding to a // search query. Search query is represented as a sequence of tokens @@ -165,6 +166,22 @@ private: RECT_ID_COUNT }; + struct Postcodes + { + void Clear() + { + m_startToken = 0; + m_endToken = 0; + m_features.reset(); + } + + inline bool IsEmpty() const { return coding::CompressedBitVector::IsEmpty(m_features); } + + size_t m_startToken = 0; + size_t m_endToken = 0; + unique_ptr m_features; + }; + void GoImpl(vector> & infos, bool inViewport); template @@ -180,6 +197,9 @@ private: // for each token and saves it to m_addressFeatures. void PrepareAddressFeatures(); + void InitLayer(SearchModel::SearchType type, size_t startToken, size_t endToken, + FeaturesLayer & layer); + void FillLocalityCandidates(coding::CompressedBitVector const * filter, size_t const maxNumLocalities, vector & preLocalities); @@ -215,6 +235,9 @@ private: // incorporated into |filter|. void LimitedSearch(FeaturesFilter const & filter); + template + void WithPostcodes(TFn && fn); + // Tries to match some adjacent tokens in the query as streets and // then performs geocoding in street vicinities. void GreedilyMatchStreets(); @@ -257,7 +280,11 @@ private: unique_ptr LoadVillages(MwmContext & context); - /// A caching wrapper around Retrieval::RetrieveGeometryFeatures. + // A wrapper around RetrievePostcodeFeatures. + unique_ptr RetrievePostcodeFeatures(MwmContext const & context, + TokenSlice const & slice); + + // A caching wrapper around Retrieval::RetrieveGeometryFeatures. coding::CompressedBitVector const * RetrieveGeometryFeatures(MwmContext const & context, m2::RectD const & rect, RectId id); @@ -329,6 +356,9 @@ private: // Village features in the mwm that is currently being processed. unique_ptr m_villages; + // Postcodes features in the mwm that is currently being processed. + Postcodes m_postcodes; + // This vector is used to indicate what tokens were matched by // locality and can't be re-used during the geocoding process. vector m_usedTokens; diff --git a/search/v2/search_model.hpp b/search/v2/search_model.hpp index 8f51001754..5b81678944 100644 --- a/search/v2/search_model.hpp +++ b/search/v2/search_model.hpp @@ -32,20 +32,20 @@ public: { // Low-level features such as amenities, offices, shops, buildings // without house number, etc. - SEARCH_TYPE_POI = 0, + SEARCH_TYPE_POI, // All features with set house number. - SEARCH_TYPE_BUILDING = 1, + SEARCH_TYPE_BUILDING, - SEARCH_TYPE_STREET = 2, + SEARCH_TYPE_STREET, // All low-level features except POI, BUILDING and STREET. - SEARCH_TYPE_UNCLASSIFIED = 3, + SEARCH_TYPE_UNCLASSIFIED, - SEARCH_TYPE_VILLAGE = 4, - SEARCH_TYPE_CITY = 5, - SEARCH_TYPE_STATE = 6, // US or Canadian states - SEARCH_TYPE_COUNTRY = 7, + SEARCH_TYPE_VILLAGE, + SEARCH_TYPE_CITY, + SEARCH_TYPE_STATE, // US or Canadian states + SEARCH_TYPE_COUNTRY, SEARCH_TYPE_COUNT };