diff --git a/indexer/ftypes_matcher.cpp b/indexer/ftypes_matcher.cpp index 4a990d15e0..355438edad 100644 --- a/indexer/ftypes_matcher.cpp +++ b/indexer/ftypes_matcher.cpp @@ -115,8 +115,8 @@ IsRailwayStationChecker const & IsRailwayStationChecker::Instance() IsStreetChecker::IsStreetChecker() { // TODO (@y, @m, @vng): this list must be up-to-date with - // data/categories.txt, so, it worth to generate or parse it from that - // file. + // data/categories.txt, so, it's worth it to generate or parse it + // from that file. Classificator const & c = classif(); char const * arr[][2] = {{"highway", "living_street"}, {"highway", "footway"}, @@ -147,6 +147,24 @@ IsStreetChecker const & IsStreetChecker::Instance() return inst; } +IsVillageChecker::IsVillageChecker() +{ + // TODO (@y, @m, @vng): this list must be up-to-date with + // data/categories.txt, so, it's worth it to generate or parse it + // from that file. + Classificator const & c = classif(); + char const * arr[][2] = {{"place", "village"}, {"place", "hamlet"}}; + + for (auto const & p : arr) + m_types.push_back(c.GetTypeByPath({p[0], p[1]})); +} + +IsVillageChecker const & IsVillageChecker::Instance() +{ + static const IsVillageChecker inst; + return inst; +} + IsOneWayChecker::IsOneWayChecker() { Classificator const & c = classif(); diff --git a/indexer/ftypes_matcher.hpp b/indexer/ftypes_matcher.hpp index 4cb7af9eed..400ef14499 100644 --- a/indexer/ftypes_matcher.hpp +++ b/indexer/ftypes_matcher.hpp @@ -84,6 +84,20 @@ public: static IsStreetChecker const & Instance(); }; +class IsVillageChecker : public BaseChecker +{ +public: + IsVillageChecker(); + + template + void ForEachType(TFn && fn) const + { + for_each(m_types.cbegin(), m_types.cend(), forward(fn)); + } + + static IsVillageChecker const & Instance(); +}; + class IsOneWayChecker : public BaseChecker { IsOneWayChecker(); diff --git a/search/search_quality_tests/queries.txt b/search/search_quality_tests/queries.txt index 65c271f285..3d3fc3dbc2 100644 --- a/search/search_quality_tests/queries.txt +++ b/search/search_quality_tests/queries.txt @@ -38,3 +38,4 @@ tehama 4th street фрезерная 1, 2/1, стр. 1 фрезерная 1, д. 2/1, стр. 1 фрезерная 1, д. 2/1, стр. 10 +хиславичи улица толстого 19 diff --git a/search/search_quality_tests/search_quality_tests.cpp b/search/search_quality_tests/search_quality_tests.cpp index ff26c77eb9..48efbdebf6 100644 --- a/search/search_quality_tests/search_quality_tests.cpp +++ b/search/search_quality_tests/search_quality_tests.cpp @@ -227,7 +227,8 @@ int main(int argc, char * argv[]) vector responseTimes(queries.size()); for (size_t i = 0; i < queries.size(); ++i) { - string const & query = queries[i]; + // todo(@m) Add a bool flag to search with prefixes? + string const & query = queries[i] + " "; my::Timer timer; // todo(@m) Viewport and position should belong to the query info. TestSearchRequest request(engine, query, FLAGS_locale, search::SearchParams::ALL, viewport); diff --git a/search/v2/geocoder.cpp b/search/v2/geocoder.cpp index 9753e8aa28..3504e07924 100644 --- a/search/v2/geocoder.cpp +++ b/search/v2/geocoder.cpp @@ -10,10 +10,13 @@ #include "indexer/classificator.hpp" #include "indexer/feature_decl.hpp" #include "indexer/feature_impl.hpp" +#include "indexer/ftypes_matcher.hpp" #include "indexer/index.hpp" #include "indexer/mwm_set.hpp" #include "indexer/rank_table.hpp" +#include "storage/country_info_getter.hpp" + #include "coding/multilang_utf8_string.hpp" #include "platform/preferred_languages.hpp" @@ -51,6 +54,7 @@ namespace { size_t constexpr kMaxNumCities = 5; size_t constexpr kMaxNumStates = 5; +size_t constexpr kMaxNumVillages = 5; size_t constexpr kMaxNumCountries = 5; size_t constexpr kMaxNumLocalities = kMaxNumCities + kMaxNumStates + kMaxNumCountries; @@ -124,6 +128,11 @@ public: return binary_search(m_categories.cbegin(), m_categories.cend(), category); } + vector const & GetCategories() const + { + return m_categories; + } + private: StreetCategories() { @@ -195,6 +204,22 @@ void GetEnglishName(FeatureType const & ft, string & name) } } +// todo(@m) Refactor at least here, or even at indexer/ftypes_matcher.hpp. +vector GetVillageCategories() +{ + vector categories; + + auto const & classificator = classif(); + auto addCategory = [&](uint32_t type) + { + uint32_t const index = classificator.GetIndexForType(type); + categories.push_back(FeatureTypeToString(index)); + }; + ftypes::IsVillageChecker::Instance().ForEachType(addCategory); + + return categories; +} + bool HasSearchIndex(MwmValue const & value) { return value.m_cont.IsExist(SEARCH_INDEX_FILE_TAG); } bool HasGeometryIndex(MwmValue & value) { return value.m_cont.IsExist(INDEX_FILE_TAG); } @@ -288,6 +313,21 @@ TIt OrderCountries(Geocoder::Params const & params, TIt begin, TIt end) return stable_partition(begin, end, intersects); } +// Performs pairwise union of adjacent bit vectors +// until at most one bit vector is left. +void UniteCBVs(vector> & cbvs) +{ + while (cbvs.size() > 1) + { + size_t i = 0; + size_t j = 0; + for (; j + 1 < cbvs.size(); j += 2) + cbvs[i++] = coding::CompressedBitVector::Union(*cbvs[j], *cbvs[j + 1]); + for (; j < cbvs.size(); ++j) + cbvs[i++] = move(cbvs[j]); + cbvs.resize(i); + } +} } // namespace // Geocoder::Params -------------------------------------------------------------------------------- @@ -301,6 +341,7 @@ Geocoder::Geocoder(Index & index, storage::CountryInfoGetter const & infoGetter) , m_model(SearchModel::Instance()) , m_streets(nullptr) , m_matcher(nullptr) + , m_villages(nullptr) , m_finder(static_cast(*this)) , m_results(nullptr) { @@ -411,8 +452,13 @@ void Geocoder::GoImpl(vector> & infos, bool inViewport) // All MwmIds are unique during the application lifetime, so // it's ok to save MwmId. m_worldId = handle.GetId(); + m_context = make_unique(move(handle)); if (HasSearchIndex(value)) - FillLocalitiesTable(move(handle)); + { + PrepareAddressFeatures(); + FillLocalitiesTable(); + } + m_context.reset(); } } @@ -446,6 +492,7 @@ void Geocoder::GoImpl(vector> & infos, bool inViewport) m_context.reset(); m_addressFeatures.clear(); m_streets = nullptr; + m_villages = nullptr; }); auto it = m_matchersCache.find(m_context->m_id); @@ -465,24 +512,18 @@ void Geocoder::GoImpl(vector> & infos, bool inViewport) m_params.m_viewport, m_params.m_scale); } - // Creates a cache of posting lists for each token. - m_addressFeatures.resize(m_numTokens); - for (size_t i = 0; i < m_numTokens; ++i) + PrepareAddressFeatures(); + + if (viewportCBV) { - PrepareRetrievalParams(i, i + 1); - - m_addressFeatures[i] = Retrieval::RetrieveAddressFeatures( - m_context->m_id, m_context->m_value, cancellable, m_retrievalParams); - ASSERT(m_addressFeatures[i], ()); - - if (viewportCBV) - { - m_addressFeatures[i] = - coding::CompressedBitVector::Intersect(*m_addressFeatures[i], *viewportCBV); - } + for (size_t i = 0; i < m_numTokens; ++i) + m_addressFeatures[i] = coding::CompressedBitVector::Intersect(*m_addressFeatures[i], *viewportCBV); } m_streets = LoadStreets(*m_context); + m_villages = LoadVillages(*m_context); + + FillVillageLocalities(); m_usedTokens.assign(m_numTokens, false); MatchRegions(REGION_TYPE_COUNTRY); @@ -505,6 +546,7 @@ void Geocoder::ClearCaches() m_addressFeatures.clear(); m_matchersCache.clear(); m_streetsCache.clear(); + m_villagesCache.clear(); } void Geocoder::PrepareRetrievalParams(size_t curToken, size_t endToken) @@ -527,43 +569,50 @@ void Geocoder::PrepareRetrievalParams(size_t curToken, size_t endToken) } } -void Geocoder::FillLocalitiesTable(MwmContext const & context) +void Geocoder::PrepareAddressFeatures() { - // 1. Get cbv for every single token and prefix. - vector> tokensCBV; + m_addressFeatures.resize(m_numTokens); for (size_t i = 0; i < m_numTokens; ++i) { PrepareRetrievalParams(i, i + 1); - tokensCBV.push_back(Retrieval::RetrieveAddressFeatures( - context.m_id, context.m_value, static_cast(*this), - m_retrievalParams)); + m_addressFeatures[i] = Retrieval::RetrieveAddressFeatures( + m_context->m_id, m_context->m_value, static_cast(*this), m_retrievalParams); + ASSERT(m_addressFeatures[i], ()); } +} - // 2. Get all locality candidates for the continuous token ranges. - vector preLocalities; +void Geocoder::FillLocalityCandidates(coding::CompressedBitVector const * filter, + size_t const maxNumLocalities, + vector & preLocalities) +{ + preLocalities.clear(); - for (size_t i = 0; i < m_numTokens; ++i) + for (size_t startToken = 0; startToken < m_numTokens; ++startToken) { CBVPtr intersection; - intersection.Set(tokensCBV[i].get(), false /*isOwner*/); + intersection.SetFull(); + if (filter) + intersection.Intersect(filter); + intersection.Intersect(m_addressFeatures[startToken].get()); if (intersection.IsEmpty()) continue; - for (size_t j = i + 1; j <= m_numTokens; ++j) + for (size_t endToken = startToken + 1; endToken <= m_numTokens; ++endToken) { coding::CompressedBitVectorEnumerator::ForEach(*intersection.Get(), [&](uint32_t featureId) { Locality l; + l.m_countryId = m_context->m_id; l.m_featureId = featureId; - l.m_startToken = i; - l.m_endToken = j; + l.m_startToken = startToken; + l.m_endToken = endToken; preLocalities.push_back(l); }); - if (j < m_numTokens) + if (endToken < m_numTokens) { - intersection.Intersect(tokensCBV[j].get()); + intersection.Intersect(m_addressFeatures[endToken].get()); if (intersection.IsEmpty()) break; } @@ -580,7 +629,7 @@ void Geocoder::FillLocalitiesTable(MwmContext const & context) return d; }; - // 3. Unique preLocalities with featureId but leave the longest range if equal. + // Unique preLocalities with featureId but leave the longest range if equal. sort(preLocalities.begin(), preLocalities.end(), [&](Locality const & l1, Locality const & l2) { @@ -596,10 +645,10 @@ void Geocoder::FillLocalitiesTable(MwmContext const & context) }), preLocalities.end()); - LazyRankTable rankTable(context.m_value); + LazyRankTable rankTable(m_context->m_value); - // 4. Leave most popular localities. - if (preLocalities.size() > kMaxNumLocalities) + // Leave the most popular localities. + if (preLocalities.size() > maxNumLocalities) { /// @todo Calculate match costs according to the exact locality name /// (for 'york' query "york city" is better than "new york"). @@ -613,17 +662,22 @@ void Geocoder::FillLocalitiesTable(MwmContext const & context) return d1 > d2; return rankTable.Get(l1.m_featureId) > rankTable.Get(l2.m_featureId); }); - preLocalities.resize(kMaxNumLocalities); + preLocalities.resize(maxNumLocalities); } +} + +void Geocoder::FillLocalitiesTable() +{ + vector preLocalities; + FillLocalityCandidates(nullptr, kMaxNumLocalities, preLocalities); - // 5. Fill result container. size_t numCities = 0; size_t numStates = 0; size_t numCountries = 0; for (auto & l : preLocalities) { FeatureType ft; - context.m_vector.GetByIndex(l.m_featureId, ft); + m_context->m_vector.GetByIndex(l.m_featureId, ft); switch (m_model.GetSearchType(ft)) { @@ -642,7 +696,7 @@ void Geocoder::FillLocalitiesTable(MwmContext const & context) LOG(LDEBUG, ("City =", name)); #endif - m_cities[make_pair(l.m_startToken, l.m_endToken)].push_back(city); + m_cities[{l.m_startToken, l.m_endToken}].push_back(city); } break; } @@ -696,6 +750,44 @@ void Geocoder::FillLocalitiesTable(MwmContext const & context) } } +void Geocoder::FillVillageLocalities() +{ + vector preLocalities; + FillLocalityCandidates(m_villages, kMaxNumVillages, preLocalities); + + size_t numVillages = 0; + + for (auto & l : preLocalities) + { + FeatureType ft; + m_context->m_vector.GetByIndex(l.m_featureId, ft); + + switch (m_model.GetSearchType(ft)) + { + case SearchModel::SEARCH_TYPE_CITY: + { + if (numVillages < kMaxNumVillages && ft.GetFeatureType() == feature::GEOM_POINT) + { + ++numVillages; + City village = l; + village.m_rect = MercatorBounds::RectByCenterXYAndSizeInMeters( + ft.GetCenter(), ftypes::GetRadiusByPopulation(ft.GetPopulation())); + +#if defined(DEBUG) + string name; + ft.GetName(StringUtf8Multilang::DEFAULT_CODE, name); + LOG(LDEBUG, ("Village =", name)); +#endif + + m_cities[{l.m_startToken, l.m_endToken}].push_back(village); + } + break; + } + default: break; + } + } +} + template void Geocoder::ForEachCountry(vector> const & infos, TFn && fn) { @@ -803,7 +895,7 @@ void Geocoder::MatchCities() { // Localities match to search query. for (auto const & city : p.second) - m_results->emplace_back(m_worldId, city.m_featureId); + m_results->emplace_back(city.m_countryId, city.m_featureId); continue; } @@ -1082,7 +1174,7 @@ void Geocoder::FindPaths() if (m_layers.empty()) return; - // Layers ordered by a search type. + // Layers ordered by search type. vector sortedLayers; sortedLayers.reserve(m_layers.size()); for (auto & layer : m_layers) @@ -1099,6 +1191,33 @@ void Geocoder::FindPaths() }); } +unique_ptr Geocoder::LoadCategories( + MwmContext & context, vector const & categories) +{ + ASSERT(context.m_handle.IsAlive() && HasSearchIndex(context.m_value), ()); + + m_retrievalParams.m_tokens.resize(1); + m_retrievalParams.m_tokens[0].resize(1); + m_retrievalParams.m_prefixTokens.clear(); + + vector> cbvs; + + for_each(categories.begin(), categories.end(), [&](strings::UniString const & category) + { + m_retrievalParams.m_tokens[0][0] = category; + auto cbv = Retrieval::RetrieveAddressFeatures( + context.m_id, context.m_value, static_cast(*this), m_retrievalParams); + if (!coding::CompressedBitVector::IsEmpty(cbv)) + cbvs.push_back(move(cbv)); + }); + + UniteCBVs(cbvs); + if (cbvs.empty()) + cbvs.push_back(make_unique()); + + return move(cbvs[0]); +} + coding::CompressedBitVector const * Geocoder::LoadStreets(MwmContext & context) { if (!context.m_handle.IsAlive() || !HasSearchIndex(context.m_value)) @@ -1109,40 +1228,27 @@ coding::CompressedBitVector const * Geocoder::LoadStreets(MwmContext & context) if (it != m_streetsCache.cend()) return it->second.get(); - unique_ptr allStreets; + auto streets = LoadCategories(context, StreetCategories::Instance().GetCategories()); - m_retrievalParams.m_tokens.resize(1); - m_retrievalParams.m_tokens[0].resize(1); - m_retrievalParams.m_prefixTokens.clear(); + auto const * result = streets.get(); + m_streetsCache[mwmId] = move(streets); + return result; +} - vector> streetsList; - StreetCategories::Instance().ForEach([&](strings::UniString const & category) - { - m_retrievalParams.m_tokens[0][0] = category; - auto streets = Retrieval::RetrieveAddressFeatures( - context.m_id, context.m_value, *this /* cancellable */, - m_retrievalParams); - if (!coding::CompressedBitVector::IsEmpty(streets)) - streetsList.push_back(move(streets)); - }); +coding::CompressedBitVector const * Geocoder::LoadVillages(MwmContext & context) +{ + if (!context.m_handle.IsAlive() || !HasSearchIndex(context.m_value)) + return nullptr; - // Following code performs pairwise union of adjacent bit vectors - // until at most one bit vector is left. - while (streetsList.size() > 1) - { - size_t i = 0; - size_t j = 0; - for (; j + 1 < streetsList.size(); j += 2) - streetsList[i++] = coding::CompressedBitVector::Union(*streetsList[j], *streetsList[j + 1]); - for (; j < streetsList.size(); ++j) - streetsList[i++] = move(streetsList[j]); - streetsList.resize(i); - } + auto mwmId = context.m_handle.GetId(); + auto const it = m_villagesCache.find(mwmId); + if (it != m_villagesCache.cend()) + return it->second.get(); - if (streetsList.empty()) - streetsList.push_back(make_unique()); - auto const * result = streetsList[0].get(); - m_streetsCache[mwmId] = move(streetsList[0]); + auto villages = LoadCategories(context, GetVillageCategories()); + + auto const * result = villages.get(); + m_villagesCache[mwmId] = move(villages); return result; } diff --git a/search/v2/geocoder.hpp b/search/v2/geocoder.hpp index 0c1ac607ac..35113750fb 100644 --- a/search/v2/geocoder.hpp +++ b/search/v2/geocoder.hpp @@ -60,8 +60,8 @@ class SearchModel; // from the highest layer (BUILDING is located on STREET, STREET is // located inside CITY, CITY is located inside STATE, etc.). Final // part is to find all paths through this layered graph and report all -// features from the lowest layer, that are reachable from the highest -// layer. +// features from the lowest layer, that are reachable from the +// highest layer. class Geocoder : public my::Cancellable { public: @@ -101,6 +101,7 @@ private: struct Locality { + MwmSet::MwmId m_countryId; uint32_t m_featureId = 0; size_t m_startToken = 0; size_t m_endToken = 0; @@ -137,7 +138,16 @@ private: // of search query tokens. void PrepareRetrievalParams(size_t curToken, size_t endToken); - void FillLocalitiesTable(MwmContext const & context); + // Creates a cache of posting lists corresponding to features in m_context + // for each token and saves it to m_addressFeatures. + void PrepareAddressFeatures(); + + void FillLocalityCandidates(coding::CompressedBitVector const * filter, + size_t const maxNumLocalities, vector & preLocalities); + + void FillLocalitiesTable(); + + void FillVillageLocalities(); template void ForEachCountry(vector> const & infos, TFn && fn); @@ -154,22 +164,18 @@ private: // Tries to find all cities in a search query and then performs // matching of streets in found cities. - // - // *NOTE* those cities will be looked for in World.mwm, so, for now, - // villages won't be found on this stage. TODO (@y, @m, @vng): try - // to add villages to World.mwm. void MatchCities(); // Tries to do geocoding without localities, ie. find POIs, - // BUILDINGs and STREETs without knowledge about country, state or - // city. If during the geocoding too many features are retrieved, - // viewport is used to throw away excess features. + // BUILDINGs and STREETs without knowledge about country, state, + // city or village. If during the geocoding too many features are + // retrieved, viewport is used to throw away excess features. void MatchViewportAndPosition(); void LimitedSearch(coding::CompressedBitVector const * filter, size_t filterThreshold); // Tries to match some adjacent tokens in the query as streets and - // then performs geocoding in streets vicinities. + // then performs geocoding in street vicinities. void GreedilyMatchStreets(); // Tries to find all paths in a search tree, where each edge is @@ -178,7 +184,7 @@ private: void MatchPOIsAndBuildings(size_t curToken); // Returns true if current path in the search tree (see comment for - // DoGeocoding()) looks sane. This method is used as a fast + // MatchPOIsAndBuildings()) looks sane. This method is used as a fast // pre-check to cut off unnecessary work. bool IsLayerSequenceSane() const; @@ -186,8 +192,13 @@ private: // the lowest layer. void FindPaths(); + unique_ptr LoadCategories( + MwmContext & context, vector const & categories); + coding::CompressedBitVector const * LoadStreets(MwmContext & context); + coding::CompressedBitVector const * LoadVillages(MwmContext & context); + /// A caching wrapper around Retrieval::RetrieveGeometryFeatures. /// param[in] Optional query id. Use VIEWPORT_ID, POSITION_ID or feature index for locality. coding::CompressedBitVector const * RetrieveGeometryFeatures( @@ -219,6 +230,8 @@ private: // Context of the currently processed mwm. unique_ptr m_context; + // m_cities stores both big cities that are visible at World.mwm + // and small villages and hamlets that are not. TLocalitiesCache m_cities; TLocalitiesCache m_regions[REGION_TYPE_COUNT]; @@ -239,9 +252,15 @@ private: // Cache of street ids in mwms. map> m_streetsCache; - // Streets in a currenly processed mwm. + // Cache of village ids in mwms. + map> m_villagesCache; + + // Street features in the mwm that is currently being processed. coding::CompressedBitVector const * m_streets; + // Village features in the mwm that is currently being processed. + coding::CompressedBitVector const * m_villages; + // This vector is used to indicate what tokens were matched by // locality and can't be re-used during the geocoding process. vector m_usedTokens;