From 6fec36a795682dfd1f1aa058b33054d81a10634b Mon Sep 17 00:00:00 2001 From: Yuri Gorshenin Date: Tue, 7 Feb 2017 17:29:22 +0300 Subject: [PATCH] [search] All token categories are exposed to PreRankingInfo. --- search/CMakeLists.txt | 16 +- search/geocoder.cpp | 150 ++++++++++--------- search/geocoder.hpp | 73 +-------- search/geocoder_context.cpp | 8 +- search/geocoder_context.hpp | 21 ++- search/geocoder_locality.cpp | 29 ++++ search/geocoder_locality.hpp | 79 ++++++++++ search/locality.cpp | 124 --------------- search/locality.hpp | 51 ------- search/locality_scorer.cpp | 7 +- search/locality_scorer.hpp | 11 +- search/pre_ranking_info.cpp | 9 +- search/pre_ranking_info.hpp | 6 +- search/processor.cpp | 2 - search/region.cpp | 35 ----- search/region.hpp | 22 --- search/search.pro | 6 +- search/search_tests/locality_scorer_test.cpp | 4 +- 18 files changed, 234 insertions(+), 419 deletions(-) create mode 100644 search/geocoder_locality.cpp create mode 100644 search/geocoder_locality.hpp delete mode 100644 search/locality.cpp delete mode 100644 search/locality.hpp delete mode 100644 search/region.cpp delete mode 100644 search/region.hpp diff --git a/search/CMakeLists.txt b/search/CMakeLists.txt index c41b230c83..5b2c22322f 100644 --- a/search/CMakeLists.txt +++ b/search/CMakeLists.txt @@ -27,16 +27,18 @@ set( feature_offset_match.hpp features_filter.cpp features_filter.hpp + features_layer.cpp + features_layer.hpp features_layer_matcher.cpp features_layer_matcher.hpp features_layer_path_finder.cpp features_layer_path_finder.hpp - features_layer.cpp - features_layer.hpp - geocoder_context.cpp - geocoder_context.hpp geocoder.cpp geocoder.hpp + geocoder_context.cpp + geocoder_context.hpp + geocoder_locality.cpp + geocoder_locality.hpp geometry_cache.cpp geometry_cache.hpp geometry_utils.cpp @@ -68,8 +70,6 @@ set( locality_finder.hpp locality_scorer.cpp locality_scorer.hpp - locality.cpp - locality.hpp mode.cpp mode.hpp model.cpp @@ -84,9 +84,9 @@ set( pre_ranker.hpp pre_ranking_info.cpp pre_ranking_info.hpp - processor_factory.hpp processor.cpp processor.hpp + processor_factory.hpp projection_on_street.cpp projection_on_street.hpp query_params.cpp @@ -101,8 +101,6 @@ set( ranking_info.hpp ranking_utils.cpp ranking_utils.hpp - region.cpp - region.hpp result.cpp result.hpp retrieval.cpp diff --git a/search/geocoder.cpp b/search/geocoder.cpp index 4585d44258..7042fa19b6 100644 --- a/search/geocoder.cpp +++ b/search/geocoder.cpp @@ -349,7 +349,6 @@ Geocoder::Geocoder(Index const & index, storage::CountryInfoGetter const & infoG , m_filter(nullptr) , m_matcher(nullptr) , m_finder(m_cancellable) - , m_lastMatchedRegion(nullptr) , m_preRanker(preRanker) { } @@ -529,8 +528,7 @@ void Geocoder::GoImpl(vector> & infos, bool inViewport) }); - m_lastMatchedRegion = nullptr; - MatchRegions(ctx, REGION_TYPE_COUNTRY); + MatchRegions(ctx, Region::TYPE_COUNTRY); if (index < numIntersectingMaps || m_preRanker.NumSentResults() == 0) MatchAroundPivot(ctx); @@ -614,7 +612,7 @@ void Geocoder::FillLocalitiesTable(BaseContext const & ctx) if (!m_context->GetFeature(l.m_featureId, ft)) continue; - auto addRegionMaps = [&](size_t & count, size_t maxCount, RegionType type) + auto addRegionMaps = [&](size_t & count, size_t maxCount, Region::Type type) { if (count < maxCount && ft.GetFeatureType() == feature::GEOM_POINT) { @@ -666,12 +664,12 @@ void Geocoder::FillLocalitiesTable(BaseContext const & ctx) } case SearchModel::SEARCH_TYPE_STATE: { - addRegionMaps(numStates, kMaxNumStates, REGION_TYPE_STATE); + addRegionMaps(numStates, kMaxNumStates, Region::TYPE_STATE); break; } case SearchModel::SEARCH_TYPE_COUNTRY: { - addRegionMaps(numCountries, kMaxNumCountries, REGION_TYPE_COUNTRY); + addRegionMaps(numCountries, kMaxNumCountries, Region::TYPE_COUNTRY); break; } default: break; @@ -736,21 +734,21 @@ void Geocoder::ForEachCountry(vector> const & infos, TFn && } } -void Geocoder::MatchRegions(BaseContext & ctx, RegionType type) +void Geocoder::MatchRegions(BaseContext & ctx, Region::Type type) { switch (type) { - case REGION_TYPE_STATE: + case Region::TYPE_STATE: // Tries to skip state matching and go to cities matching. // Then, performs states matching. MatchCities(ctx); break; - case REGION_TYPE_COUNTRY: + case Region::TYPE_COUNTRY: // Tries to skip country matching and go to states matching. // Then, performs countries matching. - MatchRegions(ctx, REGION_TYPE_STATE); + MatchRegions(ctx, Region::TYPE_STATE); break; - case REGION_TYPE_COUNT: ASSERT(false, ("Invalid region type.")); return; + case Region::TYPE_COUNT: ASSERT(false, ("Invalid region type.")); return; } auto const & regions = m_regions[type]; @@ -777,8 +775,8 @@ void Geocoder::MatchRegions(BaseContext & ctx, RegionType type) // mwm that is currently being processed belongs to region. if (isWorld) { - matches = m_lastMatchedRegion == nullptr || - m_infoGetter.IsBelongToRegions(region.m_center, m_lastMatchedRegion->m_ids); + matches = ctx.m_regions.empty() || + m_infoGetter.IsBelongToRegions(region.m_center, ctx.m_regions.back()->m_ids); } else { @@ -788,6 +786,9 @@ void Geocoder::MatchRegions(BaseContext & ctx, RegionType type) if (!matches) continue; + ctx.m_regions.push_back(®ion); + MY_SCOPE_GUARD(cleanup, [&ctx]() { ctx.m_regions.pop_back(); }); + ScopedMarkTokens mark(ctx.m_usedTokens, tokenRange); if (ctx.AllTokensUsed()) { @@ -796,16 +797,11 @@ void Geocoder::MatchRegions(BaseContext & ctx, RegionType type) continue; } - m_lastMatchedRegion = ®ion; - MY_SCOPE_GUARD(cleanup, [this]() - { - m_lastMatchedRegion = nullptr; - }); switch (type) { - case REGION_TYPE_STATE: MatchCities(ctx); break; - case REGION_TYPE_COUNTRY: MatchRegions(ctx, REGION_TYPE_STATE); break; - case REGION_TYPE_COUNT: ASSERT(false, ("Invalid region type.")); break; + case Region::TYPE_STATE: MatchCities(ctx); break; + case Region::TYPE_COUNTRY: MatchRegions(ctx, Region::TYPE_STATE); break; + case Region::TYPE_COUNT: ASSERT(false, ("Invalid region type.")); break; } } } @@ -813,6 +809,8 @@ void Geocoder::MatchRegions(BaseContext & ctx, RegionType type) void Geocoder::MatchCities(BaseContext & ctx) { + ASSERT(!ctx.m_city, ()); + // Localities are ordered my (m_startToken, m_endToken) pairs. for (auto const & p : m_cities) { @@ -824,13 +822,16 @@ void Geocoder::MatchCities(BaseContext & ctx) { BailIfCancelled(); - if (m_lastMatchedRegion && - !m_infoGetter.IsBelongToRegions(city.m_rect.Center(), m_lastMatchedRegion->m_ids)) + if (!ctx.m_regions.empty() && + !m_infoGetter.IsBelongToRegions(city.m_rect.Center(), ctx.m_regions.back()->m_ids)) { continue; } ScopedMarkTokens mark(ctx.m_usedTokens, tokenRange); + ctx.m_city = &city; + MY_SCOPE_GUARD(cleanup, [&ctx]() { ctx.m_city = nullptr; }); + if (ctx.AllTokensUsed()) { // City matches to search query, we need to emit it as is. @@ -934,12 +935,13 @@ void Geocoder::GreedilyMatchStreets(BaseContext & ctx) void Geocoder::CreateStreetsLayerAndMatchLowerLayers(BaseContext & ctx, StreetsMatcher::Prediction const & prediction) { - ASSERT(m_layers.empty(), ()); + auto & layers = ctx.m_layers; + ASSERT(layers.empty(), ()); - m_layers.emplace_back(); - MY_SCOPE_GUARD(cleanupGuard, bind(&vector::pop_back, &m_layers)); + layers.emplace_back(); + MY_SCOPE_GUARD(cleanupGuard, bind(&vector::pop_back, &layers)); - auto & layer = m_layers.back(); + auto & layer = layers.back(); InitLayer(SearchModel::SEARCH_TYPE_STREET, prediction.m_tokenRange, layer); vector sortedFeatures; @@ -955,6 +957,8 @@ void Geocoder::MatchPOIsAndBuildings(BaseContext & ctx, size_t curToken) { BailIfCancelled(); + auto & layers = ctx.m_layers; + curToken = ctx.SkipUsedTokens(curToken); if (curToken == ctx.m_numTokens) { @@ -965,7 +969,7 @@ void Geocoder::MatchPOIsAndBuildings(BaseContext & ctx, size_t curToken) // When there are no layers but user entered a postcode, we have // to emit all features matching to the postcode. - if (m_layers.size() == 0) + if (layers.size() == 0) { CBV filtered = m_postcodes.m_features; if (m_filter->NeedToFilter(m_postcodes.m_features)) @@ -978,7 +982,7 @@ void Geocoder::MatchPOIsAndBuildings(BaseContext & ctx, size_t curToken) return; } - if (!(m_layers.size() == 1 && m_layers[0].m_type == SearchModel::SEARCH_TYPE_STREET)) + if (!(layers.size() == 1 && layers[0].m_type == SearchModel::SEARCH_TYPE_STREET)) return FindPaths(ctx); // If there're only one street layer but user also entered a @@ -989,21 +993,21 @@ void Geocoder::MatchPOIsAndBuildings(BaseContext & ctx, size_t curToken) // GreedilyMatchStreets() doesn't (and shouldn't) perform // postcodes matching. { - for (auto const & id : *m_layers.back().m_sortedFeatures) + for (auto const & id : *layers.back().m_sortedFeatures) { if (!m_postcodes.m_features.HasBit(id)) continue; EmitResult(ctx, m_context->GetId(), id, SearchModel::SEARCH_TYPE_STREET, - m_layers.back().m_tokenRange); + layers.back().m_tokenRange); } } // Following code creates a fake layer with buildings and // intersects it with the streets layer. - m_layers.emplace_back(); - MY_SCOPE_GUARD(cleanupGuard, bind(&vector::pop_back, &m_layers)); + layers.emplace_back(); + MY_SCOPE_GUARD(cleanupGuard, bind(&vector::pop_back, &layers)); - auto & layer = m_layers.back(); + auto & layer = layers.back(); InitLayer(SearchModel::SEARCH_TYPE_BUILDING, m_postcodes.m_tokenRange, layer); vector features; @@ -1012,8 +1016,8 @@ void Geocoder::MatchPOIsAndBuildings(BaseContext & ctx, size_t curToken) return FindPaths(ctx); } - m_layers.emplace_back(); - MY_SCOPE_GUARD(cleanupGuard, bind(&vector::pop_back, &m_layers)); + layers.emplace_back(); + MY_SCOPE_GUARD(cleanupGuard, bind(&vector::pop_back, &layers)); // Clusters of features by search type. Each cluster is a sorted // list of ids. @@ -1051,7 +1055,7 @@ void Geocoder::MatchPOIsAndBuildings(BaseContext & ctx, size_t curToken) BailIfCancelled(); { - auto & layer = m_layers.back(); + auto & layer = layers.back(); InitLayer(layer.m_type, TokenRange(curToken, curToken + n), layer); } @@ -1062,7 +1066,7 @@ void Geocoder::MatchPOIsAndBuildings(BaseContext & ctx, size_t curToken) filtered = m_filter->Filter(features); bool const looksLikeHouseNumber = house_numbers::LooksLikeHouseNumber( - m_layers.back().m_subQuery, m_layers.back().m_lastTokenIsPrefix); + layers.back().m_subQuery, layers.back().m_lastTokenIsPrefix); if (filtered.IsEmpty() && !looksLikeHouseNumber) break; @@ -1107,7 +1111,7 @@ void Geocoder::MatchPOIsAndBuildings(BaseContext & ctx, size_t curToken) { // ATTENTION: DO NOT USE layer after recursive calls to // MatchPOIsAndBuildings(). This may lead to use-after-free. - auto & layer = m_layers.back(); + auto & layer = layers.back(); layer.m_sortedFeatures = &clusters[i]; if (i == SearchModel::SEARCH_TYPE_BUILDING) @@ -1121,26 +1125,26 @@ void Geocoder::MatchPOIsAndBuildings(BaseContext & ctx, size_t curToken) } layer.m_type = static_cast(i); - if (IsLayerSequenceSane()) + if (IsLayerSequenceSane(layers)) MatchPOIsAndBuildings(ctx, curToken + n); } } } -bool Geocoder::IsLayerSequenceSane() const +bool Geocoder::IsLayerSequenceSane(vector const & layers) const { - ASSERT(!m_layers.empty(), ()); + ASSERT(!layers.empty(), ()); static_assert(SearchModel::SEARCH_TYPE_COUNT <= 32, "Select a wider type to represent search types mask."); uint32_t mask = 0; - size_t buildingIndex = m_layers.size(); - size_t streetIndex = m_layers.size(); + size_t buildingIndex = layers.size(); + size_t streetIndex = layers.size(); // Following loop returns false iff there're two different layers // of the same search type. - for (size_t i = 0; i < m_layers.size(); ++i) + for (size_t i = 0; i < layers.size(); ++i) { - auto const & layer = m_layers[i]; + auto const & layer = layers[i]; ASSERT_NOT_EQUAL(layer.m_type, SearchModel::SEARCH_TYPE_COUNT, ()); // TODO (@y): probably it's worth to check belongs-to-locality here. @@ -1155,14 +1159,14 @@ bool Geocoder::IsLayerSequenceSane() const streetIndex = i; } - bool const hasBuildings = buildingIndex != m_layers.size(); - bool const hasStreets = streetIndex != m_layers.size(); + bool const hasBuildings = buildingIndex != layers.size(); + bool const hasStreets = streetIndex != layers.size(); // Checks that building and street layers are neighbours. if (hasBuildings && hasStreets) { - auto const & buildings = m_layers[buildingIndex]; - auto const & streets = m_layers[streetIndex]; + auto const & buildings = layers[buildingIndex]; + auto const & streets = layers[streetIndex]; if (!buildings.m_tokenRange.AdjacentTo(streets.m_tokenRange)) return false; } @@ -1172,13 +1176,15 @@ bool Geocoder::IsLayerSequenceSane() const void Geocoder::FindPaths(BaseContext const & ctx) { - if (m_layers.empty()) + auto const & layers = ctx.m_layers; + + if (layers.empty()) return; // Layers ordered by search type. vector sortedLayers; - sortedLayers.reserve(m_layers.size()); - for (auto & layer : m_layers) + sortedLayers.reserve(layers.size()); + for (auto & layer : layers) sortedLayers.push_back(&layer); sort(sortedLayers.begin(), sortedLayers.end(), my::LessBy(&FeaturesLayer::m_type)); @@ -1212,19 +1218,27 @@ void Geocoder::EmitResult(BaseContext const & ctx, MwmSet::MwmId const & mwmId, // TODO (@y, @m): need to skip zero rank features that are too // distant from the pivot when there're enough results close to the // pivot. - m_preRanker.Emplace(id, PreRankingInfo(type, tokenRange)); + PreRankingInfo info(type, tokenRange); + for (auto const & layer : ctx.m_layers) + info.m_tokenRange[layer.m_type] = layer.m_tokenRange; + + for (auto const * region : ctx.m_regions) + { + auto const regionType = Region::ToSearchType(region->m_type); + ASSERT(regionType != SearchModel::SEARCH_TYPE_COUNT, ()); + info.m_tokenRange[regionType] = region->m_tokenRange; + } + + if (ctx.m_city) + info.m_tokenRange[SearchModel::SEARCH_TYPE_CITY] = ctx.m_city->m_tokenRange; + + m_preRanker.Emplace(id, info); } void Geocoder::EmitResult(BaseContext const & ctx, Region const & region, TokenRange const & tokenRange) { - SearchModel::SearchType type; - switch (region.m_type) - { - case REGION_TYPE_STATE: type = SearchModel::SEARCH_TYPE_STATE; break; - case REGION_TYPE_COUNTRY: type = SearchModel::SEARCH_TYPE_COUNTRY; break; - case REGION_TYPE_COUNT: type = SearchModel::SEARCH_TYPE_COUNT; break; - } + auto const type = Region::ToSearchType(region.m_type); EmitResult(ctx, region.m_countryId, region.m_featureId, type, tokenRange); } @@ -1235,7 +1249,7 @@ void Geocoder::EmitResult(BaseContext const & ctx, City const & city, TokenRange void Geocoder::MatchUnclassified(BaseContext & ctx, size_t curToken) { - ASSERT(m_layers.empty(), ()); + ASSERT(ctx.m_layers.empty(), ()); // We need to match all unused tokens to UNCLASSIFIED features, // therefore unused tokens must be adjacent to each other. For @@ -1310,16 +1324,4 @@ bool Geocoder::GetSearchTypeInGeocoding(BaseContext const & ctx, uint32_t featur return false; } - -string DebugPrint(Geocoder::Locality const & locality) -{ - ostringstream os; - os << "Locality [ "; - os << "m_countryId=" << DebugPrint(locality.m_countryId) << ", "; - os << "m_featureId=" << locality.m_featureId << ", "; - os << "m_tokenRange=" << DebugPrint(locality.m_tokenRange) << ", "; - os << "m_prob=" << locality.m_prob; - os << " ]"; - return os.str(); -} } // namespace search diff --git a/search/geocoder.hpp b/search/geocoder.hpp index f1eb9bcb08..b489ad237f 100644 --- a/search/geocoder.hpp +++ b/search/geocoder.hpp @@ -7,6 +7,7 @@ #include "search/features_layer.hpp" #include "search/features_layer_path_finder.hpp" #include "search/geocoder_context.hpp" +#include "search/geocoder_locality.hpp" #include "search/geometry_cache.hpp" #include "search/hotels_filter.hpp" #include "search/mode.hpp" @@ -86,64 +87,6 @@ public: shared_ptr m_hotelsFilter; }; - enum RegionType - { - REGION_TYPE_STATE, - REGION_TYPE_COUNTRY, - REGION_TYPE_COUNT - }; - - struct Locality - { - Locality() = default; - - Locality(MwmSet::MwmId const & countryId, uint32_t featureId, TokenRange const & tokenRange, - double prob) - : m_countryId(countryId) - , m_featureId(featureId) - , m_tokenRange(tokenRange) - , m_prob(prob) - { - } - - MwmSet::MwmId m_countryId; - uint32_t m_featureId = 0; - TokenRange m_tokenRange; - - // Measures our belief in the fact that tokens in the range - // [m_startToken, m_endToken) indeed specify a locality. Currently - // it is set only for villages. - double m_prob = 0.0; - }; - - // This struct represents a country or US- or Canadian- state. It - // is used to filter maps before search. - struct Region : public Locality - { - Region(Locality const & l, RegionType type) : Locality(l), m_center(0, 0), m_type(type) {} - - storage::CountryInfoGetter::TRegionIdSet m_ids; - string m_defaultName; - m2::PointD m_center; - RegionType m_type; - }; - - // This struct represents a city or a village. It is used to filter features - // during search. - // todo(@m) It works well as is, but consider a new naming scheme - // when counties etc. are added. E.g., Region for countries and - // states and Locality for smaller settlements. - struct City : public Locality - { - City(Locality const & l, SearchModel::SearchType type) : Locality(l), m_type(type) {} - - m2::RectD m_rect; - SearchModel::SearchType m_type; -#if defined(DEBUG) - string m_defaultName; -#endif - }; - Geocoder(Index const & index, storage::CountryInfoGetter const & infoGetter, PreRanker & preRanker, VillagesCache & villagesCache, my::Cancellable const & cancellable); @@ -210,7 +153,7 @@ private: // Tries to find all countries and states in a search query and then // performs matching of cities in found maps. - void MatchRegions(BaseContext & ctx, RegionType type); + void MatchRegions(BaseContext & ctx, Region::Type type); // Tries to find all cities in a search query and then performs // matching of streets in found cities. @@ -245,7 +188,7 @@ private: // Returns true if current path in the search tree (see comment for // MatchPOIsAndBuildings()) looks sane. This method is used as a fast // pre-check to cut off unnecessary work. - bool IsLayerSequenceSane() const; + bool IsLayerSequenceSane(vector const & layers) const; // Finds all paths through layers and emits reachable features from // the lowest layer. @@ -302,7 +245,7 @@ private: // m_cities stores both big cities that are visible at World.mwm // and small villages and hamlets that are not. LocalitiesCache m_cities; - LocalitiesCache m_regions[REGION_TYPE_COUNT]; + LocalitiesCache m_regions[Region::TYPE_COUNT]; // Caches of features in rects. These caches are separated from // TLocalitiesCache because the latter are quite lightweight and not @@ -327,14 +270,6 @@ private: vector> m_tokenRequests; SearchTrieRequest> m_prefixTokenRequest; - // Pointer to the most nested region filled during geocoding. - Region const * m_lastMatchedRegion; - - // Stack of layers filled during geocoding. - vector m_layers; - PreRanker & m_preRanker; }; - -string DebugPrint(Geocoder::Locality const & locality); } // namespace search diff --git a/search/geocoder_context.cpp b/search/geocoder_context.cpp index c2fa74c33a..088d91eb5c 100644 --- a/search/geocoder_context.cpp +++ b/search/geocoder_context.cpp @@ -5,7 +5,7 @@ #include "base/assert.hpp" #include "base/stl_add.hpp" -#include "std/algorithm.hpp" +#include namespace search { @@ -18,14 +18,14 @@ size_t BaseContext::SkipUsedTokens(size_t curToken) const bool BaseContext::AllTokensUsed() const { - return all_of(m_usedTokens.begin(), m_usedTokens.end(), IdFunctor()); + return std::all_of(m_usedTokens.begin(), m_usedTokens.end(), IdFunctor()); } bool BaseContext::HasUsedTokensInRange(TokenRange const & range) const { ASSERT(range.IsValid(), (range)); - return any_of(m_usedTokens.begin() + range.Begin(), m_usedTokens.begin() + range.End(), - IdFunctor()); + return std::any_of(m_usedTokens.begin() + range.Begin(), m_usedTokens.begin() + range.End(), + IdFunctor()); } size_t BaseContext::NumUnusedTokenGroups() const diff --git a/search/geocoder_context.hpp b/search/geocoder_context.hpp index 7dec28bcc2..d00f7e9f65 100644 --- a/search/geocoder_context.hpp +++ b/search/geocoder_context.hpp @@ -1,10 +1,13 @@ #pragma once #include "search/cbv.hpp" +#include "search/features_layer.hpp" +#include "search/geocoder_locality.hpp" #include "search/hotels_filter.hpp" -#include "std/unique_ptr.hpp" -#include "std/vector.hpp" +#include +#include +#include namespace search { @@ -28,17 +31,25 @@ struct BaseContext // List of bit-vectors of features, where i-th element of the list // corresponds to the i-th token in the search query. - vector m_features; + std::vector m_features; CBV m_villages; CBV m_streets; + // Stack of layers filled during geocoding. + std::vector m_layers; + + // Stack of regions filled during geocoding. + std::vector m_regions; + + City const * m_city = nullptr; + // This vector is used to indicate what tokens were already matched // and can't be re-used during the geocoding process. - vector m_usedTokens; + std::vector m_usedTokens; // Number of tokens in the query. size_t m_numTokens = 0; - unique_ptr m_hotelsFilter; + std::unique_ptr m_hotelsFilter; }; } // namespace search diff --git a/search/geocoder_locality.cpp b/search/geocoder_locality.cpp new file mode 100644 index 0000000000..4d80fbd20e --- /dev/null +++ b/search/geocoder_locality.cpp @@ -0,0 +1,29 @@ +#include "search/geocoder_locality.hpp" + +#include + +namespace search +{ +// static +SearchModel::SearchType Region::ToSearchType(Type type) +{ + switch (type) + { + case Region::TYPE_STATE: return SearchModel::SEARCH_TYPE_STATE; + case Region::TYPE_COUNTRY: return SearchModel::SEARCH_TYPE_COUNTRY; + case Region::TYPE_COUNT: return SearchModel::SEARCH_TYPE_COUNT; + } +} + +std::string DebugPrint(Locality const & locality) +{ + std::ostringstream os; + os << "Locality [ "; + os << "m_countryId=" << DebugPrint(locality.m_countryId) << ", "; + os << "m_featureId=" << locality.m_featureId << ", "; + os << "m_tokenRange=" << DebugPrint(locality.m_tokenRange) << ", "; + os << "m_prob=" << locality.m_prob; + os << " ]"; + return os.str(); +} +} // namespace search diff --git a/search/geocoder_locality.hpp b/search/geocoder_locality.hpp new file mode 100644 index 0000000000..6ae2361616 --- /dev/null +++ b/search/geocoder_locality.hpp @@ -0,0 +1,79 @@ +#pragma once + +#include "search/model.hpp" +#include "search/token_range.hpp" + +#include "indexer/mwm_set.hpp" + +#include "storage/country_info_getter.hpp" + +#include "geometry/rect2d.hpp" + +#include +#include + +namespace search +{ +struct Locality +{ + Locality() = default; + + Locality(MwmSet::MwmId const & countryId, uint32_t featureId, TokenRange const & tokenRange, + double prob) + : m_countryId(countryId), m_featureId(featureId), m_tokenRange(tokenRange), m_prob(prob) + { + } + + MwmSet::MwmId m_countryId; + uint32_t m_featureId = 0; + TokenRange m_tokenRange; + + // Measures our belief in the fact that tokens in the range + // [m_startToken, m_endToken) indeed specify a locality. Currently + // it is set only for villages. + double m_prob = 0.0; +}; + +// This struct represents a country or US- or Canadian- state. It +// is used to filter maps before search. +struct Region : public Locality +{ + enum Type + { + TYPE_STATE, + TYPE_COUNTRY, + TYPE_COUNT + }; + + Region(Locality const & locality, Type type) : Locality(locality), m_center(0, 0), m_type(type) {} + + static SearchModel::SearchType ToSearchType(Type type); + + storage::CountryInfoGetter::TRegionIdSet m_ids; + std::string m_defaultName; + m2::PointD m_center; + Type m_type; +}; + +// This struct represents a city or a village. It is used to filter features +// during search. +// todo(@m) It works well as is, but consider a new naming scheme +// when counties etc. are added. E.g., Region for countries and +// states and Locality for smaller settlements. +struct City : public Locality +{ + City(Locality const & locality, SearchModel::SearchType type) : Locality(locality), m_type(type) + { + } + + m2::RectD m_rect; + SearchModel::SearchType m_type; + +#if defined(DEBUG) + std::string m_defaultName; +#endif +}; + + +std::string DebugPrint(Locality const & locality); +} // namespace search diff --git a/search/locality.cpp b/search/locality.cpp deleted file mode 100644 index 2b9a12df66..0000000000 --- a/search/locality.cpp +++ /dev/null @@ -1,124 +0,0 @@ -#include "locality.hpp" - -#include "indexer/search_delimiters.hpp" -#include "indexer/search_string_utils.hpp" - -#include "base/assert.hpp" - -#include "std/algorithm.hpp" -#include "std/limits.hpp" - - -namespace search -{ -Locality::Locality() - : m_type(ftypes::NONE) - , m_featureId(numeric_limits::max()) - , m_rank(numeric_limits::max()) - , m_radius(0) -{ -} - -Locality::Locality(ftypes::Type type, uint32_t featureId, m2::PointD const & center, uint8_t rank) - : m_type(type), m_featureId(featureId), m_center(center), m_rank(rank), m_radius(0) -{ -} - -bool Locality::IsValid() const -{ - if (m_type == ftypes::NONE) - return false; - ASSERT(!m_matchedTokens.empty(), ()); - return true; -} - -bool Locality::IsSuitable(TTokensArray const & tokens, TToken const & prefix) const -{ - bool const isMatched = IsFullNameMatched(); - - // Do filtering of possible localities. - using namespace ftypes; - - switch (m_type) - { - case COUNTRY: - // USA has synonyms: "US" or "USA" - return (isMatched || (m_enName == "usa" && GetSynonymTokenLength(tokens, prefix) <= 3) || - (m_enName == "uk" && GetSynonymTokenLength(tokens, prefix) == 2)); - - case STATE: // we process USA, Canada states only for now - // USA states has 2-symbol synonyms - return (isMatched || GetSynonymTokenLength(tokens, prefix) == 2); - case CITY: - // need full name match for cities - return isMatched; - case NONE: - case TOWN: - case VILLAGE: - case LOCALITY_COUNT: - ASSERT(false, ("Unsupported type:", m_type)); - return false; - } -} - -void Locality::Swap(Locality & rhs) -{ - m_name.swap(rhs.m_name); - m_enName.swap(rhs.m_enName); - m_matchedTokens.swap(rhs.m_matchedTokens); - - swap(m_type, rhs.m_type); - swap(m_featureId, rhs.m_featureId); - swap(m_center, rhs.m_center); - swap(m_rank, rhs.m_rank); - swap(m_radius, rhs.m_radius); -} - -bool Locality::operator<(Locality const & rhs) const -{ - if (m_type != rhs.m_type) - return (m_type < rhs.m_type); - - if (m_matchedTokens.size() != rhs.m_matchedTokens.size()) - return (m_matchedTokens.size() < rhs.m_matchedTokens.size()); - - return m_rank < rhs.m_rank; -} - -bool Locality::IsFullNameMatched() const -{ - size_t count = 0; - SplitUniString(NormalizeAndSimplifyString(m_name), [&count](strings::UniString const &) - { - ++count; - }, - search::Delimiters()); - return count <= m_matchedTokens.size(); -} - -size_t Locality::GetSynonymTokenLength(TTokensArray const & tokens, TToken const & prefix) const -{ - // check only one token as a synonym - if (m_matchedTokens.size() == 1) - { - size_t const index = m_matchedTokens[0]; - if (index < tokens.size()) - return tokens[index].size(); - ASSERT_EQUAL(index, tokens.size(), ()); - ASSERT(!prefix.empty(), ()); - return prefix.size(); - } - - return size_t(-1); -} - -string DebugPrint(Locality const & l) -{ - stringstream ss; - ss << "{ Locality: " - << "Name = " + l.m_name << "; Name English = " << l.m_enName - << "; Rank = " << static_cast(l.m_rank) - << "; Matched: " << l.m_matchedTokens.size() << " }"; - return ss.str(); -} -} // namespace search diff --git a/search/locality.hpp b/search/locality.hpp deleted file mode 100644 index 0bc5e5fa81..0000000000 --- a/search/locality.hpp +++ /dev/null @@ -1,51 +0,0 @@ -#pragma once -#include "indexer/ftypes_matcher.hpp" - -#include "geometry/point2d.hpp" - -#include "base/buffer_vector.hpp" -#include "base/string_utils.hpp" - -#include "std/string.hpp" -#include "std/vector.hpp" - -namespace search -{ -struct Locality -{ - using TToken = strings::UniString; - using TTokensArray = buffer_vector; - - // Native and English names of locality. - string m_name; - string m_enName; - - // Indexes of matched tokens for locality. - vector m_matchedTokens; - - ftypes::Type m_type; - uint32_t m_featureId; - m2::PointD m_center; - uint8_t m_rank; - double m_radius; - - Locality(); - - Locality(ftypes::Type type, uint32_t featureId, m2::PointD const & center, uint8_t rank); - - bool IsValid() const; - - bool IsSuitable(TTokensArray const & tokens, TToken const & prefix) const; - - void Swap(Locality & rhs); - - bool operator<(Locality const & rhs) const; - -private: - bool IsFullNameMatched() const; - - size_t GetSynonymTokenLength(TTokensArray const & tokens, TToken const & prefix) const; -}; - -string DebugPrint(Locality const & l); -} // namespace search diff --git a/search/locality_scorer.cpp b/search/locality_scorer.cpp index 7f9996c343..45136622de 100644 --- a/search/locality_scorer.cpp +++ b/search/locality_scorer.cpp @@ -26,7 +26,7 @@ LocalityScorer::ExLocality::ExLocality() : m_numTokens(0), m_rank(0), m_nameScor { } -LocalityScorer::ExLocality::ExLocality(Geocoder::Locality const & locality) +LocalityScorer::ExLocality::ExLocality(Locality const & locality) : m_locality(locality) , m_numTokens(locality.m_tokenRange.Size()) , m_rank(0) @@ -42,7 +42,7 @@ LocalityScorer::LocalityScorer(QueryParams const & params, Delegate const & dele void LocalityScorer::GetTopLocalities(MwmSet::MwmId const & countryId, BaseContext const & ctx, CBV const & filter, size_t limit, - std::vector & localities) + std::vector & localities) { CHECK_EQUAL(ctx.m_numTokens, m_params.GetNumTokens(), ()); @@ -83,8 +83,7 @@ void LocalityScorer::GetTopLocalities(MwmSet::MwmId const & countryId, BaseConte LeaveTopLocalities(limit, localities); } -void LocalityScorer::LeaveTopLocalities(size_t limit, - std::vector & localities) const +void LocalityScorer::LeaveTopLocalities(size_t limit, std::vector & localities) const { std::vector ls; ls.reserve(localities.size()); diff --git a/search/locality_scorer.hpp b/search/locality_scorer.hpp index 38a34b6f80..6919b3dc06 100644 --- a/search/locality_scorer.hpp +++ b/search/locality_scorer.hpp @@ -1,6 +1,6 @@ #pragma once -#include "search/geocoder.hpp" +#include "search/geocoder_locality.hpp" #include "search/ranking_utils.hpp" #include @@ -32,18 +32,17 @@ public: // Leaves at most |limit| elements of |localities|, ordered by their // features. void GetTopLocalities(MwmSet::MwmId const & countryId, BaseContext const & ctx, - CBV const & filter, size_t limit, - std::vector & localities); + CBV const & filter, size_t limit, std::vector & localities); private: struct ExLocality { ExLocality(); - explicit ExLocality(Geocoder::Locality const & locality); + explicit ExLocality(Locality const & locality); inline uint32_t GetId() const { return m_locality.m_featureId; } - Geocoder::Locality m_locality; + Locality m_locality; size_t m_numTokens; uint8_t m_rank; NameScore m_nameScore; @@ -53,7 +52,7 @@ private: // Leaves at most |limit| elements of |localities|, ordered by some // combination of ranks and number of matched tokens. - void LeaveTopLocalities(size_t limit, std::vector & localities) const; + void LeaveTopLocalities(size_t limit, std::vector & localities) const; void RemoveDuplicates(std::vector & ls) const; void LeaveTopByRankAndProb(size_t limit, std::vector & ls) const; diff --git a/search/pre_ranking_info.cpp b/search/pre_ranking_info.cpp index 2e4237635d..255e96a23a 100644 --- a/search/pre_ranking_info.cpp +++ b/search/pre_ranking_info.cpp @@ -1,12 +1,12 @@ #include "search/ranking_info.hpp" -#include "std/sstream.hpp" +#include namespace search { -string DebugPrint(PreRankingInfo const & info) +std::string DebugPrint(PreRankingInfo const & info) { - ostringstream os; + std::ostringstream os; os << "PreRankingInfo ["; os << "m_distanceToPivot:" << info.m_distanceToPivot << ","; for (size_t i = 0; i < static_cast(SearchModel::SEARCH_TYPE_COUNT); ++i) @@ -17,10 +17,9 @@ string DebugPrint(PreRankingInfo const & info) auto const type = static_cast(i); os << "m_tokenRange[" << DebugPrint(type) << "]:" << DebugPrint(info.m_tokenRange[i]) << ","; } - os << "m_rank:" << info.m_rank << ","; + os << "m_rank:" << static_cast(info.m_rank) << ","; os << "m_searchType:" << info.m_searchType; os << "]"; return os.str(); } - } // namespace search diff --git a/search/pre_ranking_info.hpp b/search/pre_ranking_info.hpp index a8b0d2899c..a251cd9951 100644 --- a/search/pre_ranking_info.hpp +++ b/search/pre_ranking_info.hpp @@ -7,7 +7,8 @@ #include "base/assert.hpp" -#include "std/cstdint.hpp" +#include +#include namespace search { @@ -45,6 +46,5 @@ struct PreRankingInfo SearchModel::SearchType m_searchType = SearchModel::SEARCH_TYPE_COUNT; }; -string DebugPrint(PreRankingInfo const & info); - +std::string DebugPrint(PreRankingInfo const & info); } // namespace search diff --git a/search/processor.cpp b/search/processor.cpp index ab6caeb387..b05441b7fb 100644 --- a/search/processor.cpp +++ b/search/processor.cpp @@ -5,12 +5,10 @@ #include "search/geometry_utils.hpp" #include "search/intermediate_result.hpp" #include "search/latlon_match.hpp" -#include "search/locality.hpp" #include "search/pre_ranking_info.hpp" #include "search/query_params.hpp" #include "search/ranking_info.hpp" #include "search/ranking_utils.hpp" -#include "search/region.hpp" #include "search/search_index_values.hpp" #include "search/utils.hpp" diff --git a/search/region.cpp b/search/region.cpp deleted file mode 100644 index 4c7f4291fe..0000000000 --- a/search/region.cpp +++ /dev/null @@ -1,35 +0,0 @@ -#include "search/region.hpp" - -#include "base/assert.hpp" - -namespace search -{ -bool Region::operator<(Region const & rhs) const -{ - return (m_matchedTokens.size() < rhs.m_matchedTokens.size()); -} - -bool Region::IsValid() const -{ - if (m_ids.empty()) - return false; - ASSERT(!m_matchedTokens.empty(), ()); - ASSERT(!m_enName.empty(), ()); - return true; -} - -void Region::Swap(Region & rhs) -{ - m_ids.swap(rhs.m_ids); - m_matchedTokens.swap(rhs.m_matchedTokens); - m_enName.swap(rhs.m_enName); -} - -string DebugPrint(Region const & r) -{ - string res("Region: "); - res += "Name English: " + r.m_enName; - res += "; Matched: " + ::DebugPrint(r.m_matchedTokens.size()); - return res; -} -} // namespace search diff --git a/search/region.hpp b/search/region.hpp deleted file mode 100644 index 5e830292c7..0000000000 --- a/search/region.hpp +++ /dev/null @@ -1,22 +0,0 @@ -#pragma once - -#include "std/string.hpp" -#include "std/vector.hpp" - -namespace search -{ -struct Region -{ - vector m_ids; - vector m_matchedTokens; - string m_enName; - - bool IsValid() const; - - void Swap(Region & rhs); - - bool operator<(Region const & rhs) const; -}; - -string DebugPrint(Region const & r); -} // namespace search diff --git a/search/search.pro b/search/search.pro index 813afb9428..abb050f3c0 100644 --- a/search/search.pro +++ b/search/search.pro @@ -30,6 +30,7 @@ HEADERS += \ features_layer_path_finder.hpp \ geocoder.hpp \ geocoder_context.hpp \ + geocoder_locality.hpp \ geometry_cache.hpp \ geometry_utils.hpp \ hotels_classifier.hpp \ @@ -44,7 +45,6 @@ HEADERS += \ keyword_matcher.hpp \ latlon_match.hpp \ lazy_centers_table.hpp \ - locality.hpp \ locality_finder.hpp \ locality_scorer.hpp \ mode.hpp \ @@ -63,7 +63,6 @@ HEADERS += \ ranker.hpp \ ranking_info.hpp \ ranking_utils.hpp \ - region.hpp \ result.hpp \ retrieval.hpp \ reverse_geocoder.hpp \ @@ -97,6 +96,7 @@ SOURCES += \ features_layer_path_finder.cpp \ geocoder.cpp \ geocoder_context.cpp \ + geocoder_locality.cpp \ geometry_cache.cpp \ geometry_utils.cpp \ hotels_classifier.cpp \ @@ -110,7 +110,6 @@ SOURCES += \ keyword_matcher.cpp \ latlon_match.cpp \ lazy_centers_table.cpp \ - locality.cpp \ locality_finder.cpp \ locality_scorer.cpp \ mode.cpp \ @@ -128,7 +127,6 @@ SOURCES += \ ranker.cpp \ ranking_info.cpp \ ranking_utils.cpp \ - region.cpp \ result.cpp \ retrieval.cpp \ reverse_geocoder.cpp \ diff --git a/search/search_tests/locality_scorer_test.cpp b/search/search_tests/locality_scorer_test.cpp index 1da8bda800..8580ca7e0c 100644 --- a/search/search_tests/locality_scorer_test.cpp +++ b/search/search_tests/locality_scorer_test.cpp @@ -96,7 +96,7 @@ public: filter.SetFull(); m_scorer.GetTopLocalities(MwmSet::MwmId(), ctx, filter, limit, m_localities); - sort(m_localities.begin(), m_localities.end(), my::LessBy(&Geocoder::Locality::m_featureId)); + sort(m_localities.begin(), m_localities.end(), my::LessBy(&Locality::m_featureId)); } // LocalityScorer::Delegate overrides: @@ -111,7 +111,7 @@ public: protected: QueryParams m_params; - vector m_localities; + vector m_localities; unordered_map> m_names; LocalityScorer m_scorer;