diff --git a/generator/search_index_builder.cpp b/generator/search_index_builder.cpp index 9ba6112884..e4e32ed20b 100644 --- a/generator/search_index_builder.cpp +++ b/generator/search_index_builder.cpp @@ -261,9 +261,11 @@ public: void operator() (FeatureType const & f, uint32_t index) const { + using namespace search; + feature::TypesHolder types(f); - static search::TypesSkipper skipIndex; + static TypesSkipper skipIndex; skipIndex.SkipTypes(types); if (types.Empty()) @@ -284,10 +286,10 @@ public: // See OSM TagInfo or Wiki about modern postcodes format. The average number of tokens is less // than two. buffer_vector tokens; - SplitUniString(search::NormalizeAndSimplifyString(postcode), MakeBackInsertFunctor(tokens), - search::Delimiters()); + SplitUniString(NormalizeAndSimplifyString(postcode), MakeBackInsertFunctor(tokens), + Delimiters()); for (auto const & token : tokens) - inserter.AddToken(search::kCategoriesLang, search::PostcodeToString(token)); + inserter.AddToken(kPostcodesLang, PostcodeToString(token)); } // Skip types for features without names. @@ -303,7 +305,7 @@ public: // add names of categories of the feature for (uint32_t t : categoryTypes) - inserter.AddToken(search::kCategoriesLang, search::FeatureTypeToString(c.GetIndexForType(t))); + inserter.AddToken(kCategoriesLang, FeatureTypeToString(c.GetIndexForType(t))); } }; diff --git a/search/feature_offset_match.hpp b/search/feature_offset_match.hpp index 232bec7b7e..c6f52f89e7 100644 --- a/search/feature_offset_match.hpp +++ b/search/feature_offset_match.hpp @@ -3,6 +3,7 @@ #include "search/search_index_values.hpp" #include "search/search_query.hpp" #include "search/search_query_params.hpp" +#include "search/v2/token_slice.hpp" #include "indexer/trie.hpp" @@ -103,6 +104,25 @@ bool CheckMatchString(strings::UniChar const * rootPrefix, size_t rootPrefixSize return false; } + +template +bool FindLangIndex(trie::Iterator> const & trieRoot, uint8_t lang, uint32_t & langIx) +{ + ASSERT_LESS(trieRoot.m_edge.size(), numeric_limits::max(), ()); + + uint32_t const numLangs = static_cast(trieRoot.m_edge.size()); + for (uint32_t i = 0; i < numLangs; ++i) + { + auto const & edge = trieRoot.m_edge[i].m_label; + ASSERT_GREATER_OR_EQUAL(edge.size(), 1, ()); + if (edge[0] == lang) + { + langIx = i; + return true; + } + } + return false; +} } // namespace template @@ -222,7 +242,7 @@ public: toDo(value); } }; -} // namespace search::impl +} // impl template struct TrieRootPrefix @@ -345,27 +365,23 @@ template bool MatchCategoriesInTrie(SearchQueryParams const & params, trie::Iterator> const & trieRoot, THolder && holder) { - ASSERT_LESS(trieRoot.m_edge.size(), numeric_limits::max(), ()); - uint32_t const numLangs = static_cast(trieRoot.m_edge.size()); - for (uint32_t langIx = 0; langIx < numLangs; ++langIx) - { - auto const & edge = trieRoot.m_edge[langIx].m_label; - ASSERT_GREATER_OR_EQUAL(edge.size(), 1, ()); - if (edge[0] == search::kCategoriesLang) - { - auto const catRoot = trieRoot.GoToEdge(langIx); - MatchTokensInTrie(params.m_tokens, TrieRootPrefix(*catRoot, edge), holder); + uint32_t langIx = 0; + if (!impl::FindLangIndex(trieRoot, search::kCategoriesLang, langIx)) + return false; - // Last token's prefix is used as a complete token here, to - // limit the number of features in the last bucket of a - // holder. Probably, this is a false optimization. - holder.Resize(params.m_tokens.size() + 1); - holder.SwitchTo(params.m_tokens.size()); - MatchTokenInTrie(params.m_prefixTokens, TrieRootPrefix(*catRoot, edge), holder); - return true; - } - } - return false; + auto const & edge = trieRoot.m_edge[langIx].m_label; + ASSERT_GREATER_OR_EQUAL(edge.size(), 1, ()); + + auto const catRoot = trieRoot.GoToEdge(langIx); + MatchTokensInTrie(params.m_tokens, TrieRootPrefix(*catRoot, edge), holder); + + // Last token's prefix is used as a complete token here, to limit + // the number of features in the last bucket of a holder. Probably, + // this is a false optimization. + holder.Resize(params.m_tokens.size() + 1); + holder.SwitchTo(params.m_tokens.size()); + MatchTokenInTrie(params.m_prefixTokens, TrieRootPrefix(*catRoot, edge), holder); + return true; } // Calls toDo with trie root prefix and language code on each language @@ -427,4 +443,29 @@ void MatchFeaturesInTrie(SearchQueryParams const & params, intersecter.ForEachResult(forward(toDo)); } + +template +void MatchPostcodesInTrie(v2::TokenSlice const & slice, + trie::Iterator> const & trieRoot, + TFilter const & filter, ToDo && toDo) +{ + uint32_t langIx = 0; + if (!impl::FindLangIndex(trieRoot, search::kPostcodesLang, langIx)) + return; + + auto const & edge = trieRoot.m_edge[langIx].m_label; + auto const postcodesRoot = trieRoot.GoToEdge(langIx); + + impl::OffsetIntersecter intersecter(filter); + for (size_t i = 0; i < slice.Size(); ++i) + { + if (slice.IsPrefix(i)) + MatchTokenPrefixInTrie(slice.Get(i), TrieRootPrefix(*postcodesRoot, edge), intersecter); + else + MatchTokenInTrie(slice.Get(i), TrieRootPrefix(*postcodesRoot, edge), intersecter); + intersecter.NextStep(); + } + + intersecter.ForEachResult(forward(toDo)); +} } // namespace search diff --git a/search/retrieval.cpp b/search/retrieval.cpp index 3d4ac39a62..c4376b5996 100644 --- a/search/retrieval.cpp +++ b/search/retrieval.cpp @@ -7,6 +7,7 @@ #include "search_trie.hpp" #include "v2/mwm_context.hpp" +#include "v2/token_slice.hpp" #include "indexer/feature.hpp" #include "indexer/feature_algo.hpp" @@ -29,8 +30,65 @@ using osm::Editor; namespace search { +namespace v2 +{ namespace { +class FeaturesCollector +{ +public: + FeaturesCollector(my::Cancellable const & cancellable, vector & features) + : m_cancellable(cancellable), m_features(features), m_counter(0) + { + } + + template + void operator()(TValue const & value) + { + if ((++m_counter & 0xFF) == 0) + BailIfCancelled(m_cancellable); + m_features.push_back(value.m_featureId); + } + + inline void operator()(uint32_t feature) { m_features.push_back(feature); } + + inline void operator()(uint64_t feature) { m_features.push_back(feature); } + +private: + my::Cancellable const & m_cancellable; + vector & m_features; + uint32_t m_counter; +}; + +class EditedFeaturesHolder +{ +public: + EditedFeaturesHolder(MwmSet::MwmId const & id) + { + Editor & editor = Editor::Instance(); + m_deleted = editor.GetFeaturesByStatus(id, Editor::FeatureStatus::Deleted); + m_modified = editor.GetFeaturesByStatus(id, Editor::FeatureStatus::Modified); + m_created = editor.GetFeaturesByStatus(id, Editor::FeatureStatus::Created); + } + + bool ModifiedOrDeleted(uint32_t featureIndex) const + { + return binary_search(m_deleted.begin(), m_deleted.end(), featureIndex) || + binary_search(m_modified.begin(), m_modified.end(), featureIndex); + } + + template + void ForEachModifiedOrCreated(TFn & fn) + { + for_each(m_modified.begin(), m_modified.end(), fn); + for_each(m_created.begin(), m_created.end(), fn); + } + +private: + vector m_deleted; + vector m_modified; + vector m_created; +}; unique_ptr SortFeaturesAndBuildCBV(vector && features) { @@ -89,6 +147,13 @@ bool MatchFeatureByName(FeatureType const & ft, SearchQueryParams const & params return matched; } +bool MatchFeatureByPostcode(FeatureType const & ft, v2::TokenSlice const & slice) +{ + string const postcode = ft.GetMetadata().Get(feature::Metadata::FMD_POSTCODE); + // TODO(@y): implement this. + return false; +} + // Retrieves from the search index corresponding to |value| all // features matching to |params|. template @@ -96,16 +161,7 @@ unique_ptr RetrieveAddressFeaturesImpl( MwmSet::MwmId const & id, MwmValue & value, my::Cancellable const & cancellable, SearchQueryParams const & params) { - // Exclude from search all deleted/modified features and match all edited/created features separately. - Editor & editor = Editor::Instance(); - - auto const deleted = editor.GetFeaturesByStatus(id, Editor::FeatureStatus::Deleted); - auto const modified = editor.GetFeaturesByStatus(id, Editor::FeatureStatus::Modified); - auto const filter = [&](uint32_t featureIndex) -> bool - { - return (!binary_search(deleted.begin(), deleted.end(), featureIndex) && - !binary_search(modified.begin(), modified.end(), featureIndex)); - }; + EditedFeaturesHolder holder(id); serial::CodingParams codingParams(trie::GetCodingParams(value.GetHeader().GetDefCodingParams())); ModelReaderPtr searchReader = value.m_cont.GetReader(SEARCH_INDEX_FILE_TAG); @@ -116,17 +172,16 @@ unique_ptr RetrieveAddressFeaturesImpl( // TODO (@y, @m): This code may be optimized in the case where // bit vectors are sorted in the search index. vector features; - uint32_t counter = 0; - auto const collector = [&](TValue const & value) - { - if ((++counter & 0xFF) == 0) - BailIfCancelled(cancellable); - features.push_back(value.m_featureId); - }; + FeaturesCollector collector(cancellable, features); - MatchFeaturesInTrie(params, *trieRoot, filter, collector); + MatchFeaturesInTrie(params, *trieRoot, [&holder](uint32_t featureIndex) + { + return !holder.ModifiedOrDeleted(featureIndex); + }, + collector); // Match all edited/created features separately. + Editor & editor = Editor::Instance(); auto const matcher = [&](uint32_t featureIndex) { FeatureType ft; @@ -136,9 +191,47 @@ unique_ptr RetrieveAddressFeaturesImpl( features.push_back(featureIndex); }; - for_each(modified.begin(), modified.end(), matcher); - auto const created = editor.GetFeaturesByStatus(id, Editor::FeatureStatus::Created); - for_each(created.begin(), created.end(), matcher); + holder.ForEachModifiedOrCreated(matcher); + + return SortFeaturesAndBuildCBV(move(features)); +} + +template +unique_ptr RetrievePostcodeFeaturesImpl( + MwmSet::MwmId const & id, MwmValue & value, my::Cancellable const & cancellable, + TokenSlice const & slice) +{ + EditedFeaturesHolder holder(id); + + serial::CodingParams codingParams(trie::GetCodingParams(value.GetHeader().GetDefCodingParams())); + ModelReaderPtr searchReader = value.m_cont.GetReader(SEARCH_INDEX_FILE_TAG); + + auto const trieRoot = trie::ReadTrie, ValueList>( + SubReaderWrapper(searchReader.GetPtr()), SingleValueSerializer(codingParams)); + + // TODO (@y, @m): This code may be optimized in the case where + // bit vectors are sorted in the search index. + vector features; + FeaturesCollector collector(cancellable, features); + + MatchPostcodesInTrie(slice, *trieRoot, [&holder](uint32_t featureIndex) + { + return !holder.ModifiedOrDeleted(featureIndex); + }, + collector); + + // Match all edited/created features separately. + Editor & editor = Editor::Instance(); + auto const matcher = [&](uint32_t featureIndex) + { + FeatureType ft; + VERIFY(editor.GetEditedFeature(id, featureIndex, ft), ()); + // TODO(AlexZ): Should we match by some feature's metafields too? + if (MatchFeatureByPostcode(ft, slice)) + features.push_back(featureIndex); + }; + + holder.ForEachModifiedOrCreated(matcher); return SortFeaturesAndBuildCBV(move(features)); } @@ -149,16 +242,11 @@ unique_ptr RetrieveGeometryFeaturesImpl( v2::MwmContext const & context, my::Cancellable const & cancellable, covering::IntervalsT const & coverage, int scale) { - uint32_t counter = 0; vector features; - context.ForEachIndex(coverage, scale, [&](uint64_t featureId) - { - if ((++counter & 0xFF) == 0) - BailIfCancelled(cancellable); - features.push_back(featureId); - }); + FeaturesCollector collector(cancellable, features); + context.ForEachIndex(coverage, scale, collector); return SortFeaturesAndBuildCBV(move(features)); } @@ -172,6 +260,16 @@ struct RetrieveAddressFeaturesAdaptor } }; +template +struct RetrievePostcodeFeaturesAdaptor +{ + template + unique_ptr operator()(TArgs &&... args) + { + return RetrievePostcodeFeaturesImpl(forward(args)...); + } +}; + template