From 28fcf5da2fe5595c7db67093472366d92c62b93b Mon Sep 17 00:00:00 2001 From: Maxim Pimenov Date: Fri, 13 Nov 2015 17:16:14 +0300 Subject: [PATCH] [search] Added support for both old and new sdx formats. --- indexer/search_trie.hpp | 2 - platform/mwm_version.hpp | 3 +- search/feature_offset_match.hpp | 90 +++++++++++---------- search/retrieval.cpp | 65 +++++++++++---- search/search_query.cpp | 135 +++++++++++++++++++++++--------- search/search_query.hpp | 1 - 6 files changed, 200 insertions(+), 96 deletions(-) diff --git a/indexer/search_trie.hpp b/indexer/search_trie.hpp index 1327a5fbd9..ea63c822a1 100644 --- a/indexer/search_trie.hpp +++ b/indexer/search_trie.hpp @@ -15,8 +15,6 @@ static const uint8_t kPointCodingBits = 20; namespace trie { -using DefaultIterator = trie::Iterator>; - inline serial::CodingParams GetCodingParams(serial::CodingParams const & orig) { return serial::CodingParams(search::kPointCodingBits, diff --git a/platform/mwm_version.hpp b/platform/mwm_version.hpp index 0fcd02623f..56bada4861 100644 --- a/platform/mwm_version.hpp +++ b/platform/mwm_version.hpp @@ -18,7 +18,8 @@ enum Format v4, // April 2015 (distinguish и and й in search index) v5, // July 2015 (feature id is the index in vector now). v6, // October 2015 (offsets vector is in mwm now). - lastFormat = v6 + v7, // November 2015 (supply different search index formats). + lastFormat = v7 }; struct MwmVersion diff --git a/search/feature_offset_match.hpp b/search/feature_offset_match.hpp index e9c04165c2..eec689e1a2 100644 --- a/search/feature_offset_match.hpp +++ b/search/feature_offset_match.hpp @@ -30,8 +30,9 @@ size_t CalcEqualLength(TSrcIter b, TSrcIter e, TCompIter bC, TCompIter eC) return count; } -inline shared_ptr MoveTrieIteratorToString( - trie::DefaultIterator const & trieRoot, strings::UniString const & queryS, +template +inline shared_ptr>> MoveTrieIteratorToString( + trie::Iterator> const & trieRoot, strings::UniString const & queryS, size_t & symbolsMatched, bool & bFullEdgeMatched) { symbolsMatched = 0; @@ -92,9 +93,10 @@ namespace } } -template -void FullMatchInTrie(trie::DefaultIterator const & trieRoot, strings::UniChar const * rootPrefix, - size_t rootPrefixSize, strings::UniString s, F & f) +template +void FullMatchInTrie(trie::Iterator> const & trieRoot, + strings::UniChar const * rootPrefix, size_t rootPrefixSize, + strings::UniString s, TF & f) { if (!CheckMatchString(rootPrefix, rootPrefixSize, s)) return; @@ -117,14 +119,17 @@ void FullMatchInTrie(trie::DefaultIterator const & trieRoot, strings::UniChar co it->m_valueList.ForEach(f); } -template -void PrefixMatchInTrie(trie::DefaultIterator const & trieRoot, strings::UniChar const * rootPrefix, - size_t rootPrefixSize, strings::UniString s, F & f) +template +void PrefixMatchInTrie(trie::Iterator> const & trieRoot, + strings::UniChar const * rootPrefix, size_t rootPrefixSize, + strings::UniString s, TF & f) { if (!CheckMatchString(rootPrefix, rootPrefixSize, s)) return; - using TQueue = vector>; + using TIterator = trie::Iterator>; + + using TQueue = vector>; TQueue trieQueue; { size_t symbolsMatched = 0; @@ -207,14 +212,15 @@ public: }; } // namespace search::impl +template struct TrieRootPrefix { - trie::DefaultIterator const & m_root; + using TIterator = trie::Iterator>; + TIterator const & m_root; strings::UniChar const * m_prefix; size_t m_prefixSize; - TrieRootPrefix(trie::DefaultIterator const & root, - trie::DefaultIterator::Edge::TEdgeLabel const & edge) + TrieRootPrefix(TIterator const & root, typename TIterator::Edge::TEdgeLabel const & edge) : m_root(root) { if (edge.size() == 1) @@ -266,9 +272,9 @@ private: // Calls toDo for each feature corresponding to at least one synonym. // *NOTE* toDo may be called several times for the same feature. -template +template void MatchTokenInTrie(SearchQueryParams::TSynonymsVector const & syns, - TrieRootPrefix const & trieRoot, ToDo && toDo) + TrieRootPrefix const & trieRoot, ToDo && toDo) { for (auto const & syn : syns) { @@ -280,9 +286,9 @@ void MatchTokenInTrie(SearchQueryParams::TSynonymsVector const & syns, // Calls toDo for each feature whose tokens contains at least one // synonym as a prefix. // *NOTE* toDo may be called serveral times for the same feature. -template +template void MatchTokenPrefixInTrie(SearchQueryParams::TSynonymsVector const & syns, - TrieRootPrefix const & trieRoot, ToDo && toDo) + TrieRootPrefix const & trieRoot, ToDo && toDo) { for (auto const & syn : syns) { @@ -293,9 +299,9 @@ void MatchTokenPrefixInTrie(SearchQueryParams::TSynonymsVector const & syns, // Fills holder with features whose names correspond to tokens list up to synonyms. // *NOTE* the same feature may be put in the same holder's slot several times. -template +template void MatchTokensInTrie(vector const & tokens, - TrieRootPrefix const & trieRoot, THolder && holder) + TrieRootPrefix const & trieRoot, THolder && holder) { holder.Resize(tokens.size()); for (size_t i = 0; i < tokens.size(); ++i) @@ -308,10 +314,10 @@ void MatchTokensInTrie(vector const & tokens // Fills holder with features whose names correspond to tokens list up to synonyms, // also, last holder's slot will be filled with features corresponding to prefixTokens. // *NOTE* the same feature may be put in the same holder's slot several times. -template +template void MatchTokensAndPrefixInTrie(vector const & tokens, SearchQueryParams::TSynonymsVector const & prefixTokens, - TrieRootPrefix const & trieRoot, THolder && holder) + TrieRootPrefix const & trieRoot, THolder && holder) { MatchTokensInTrie(tokens, trieRoot, holder); @@ -323,9 +329,9 @@ void MatchTokensAndPrefixInTrie(vector const // Fills holder with categories whose description matches to at least one // token from a search query. // *NOTE* query prefix will be treated as a complete token in the function. -template -bool MatchCategoriesInTrie(SearchQueryParams const & params, trie::DefaultIterator const & trieRoot, - THolder && holder) +template +bool MatchCategoriesInTrie(SearchQueryParams const & params, + trie::Iterator> const & trieRoot, THolder && holder) { ASSERT_LESS(trieRoot.m_edge.size(), numeric_limits::max(), ()); uint32_t const numLangs = static_cast(trieRoot.m_edge.size()); @@ -336,14 +342,14 @@ bool MatchCategoriesInTrie(SearchQueryParams const & params, trie::DefaultIterat if (edge[0] == search::kCategoriesLang) { auto const catRoot = trieRoot.GoToEdge(langIx); - MatchTokensInTrie(params.m_tokens, TrieRootPrefix(*catRoot, edge), holder); + MatchTokensInTrie(params.m_tokens, TrieRootPrefix(*catRoot, edge), holder); // Last token's prefix is used as a complete token here, to // limit the number of features in the last bucket of a // holder. Probably, this is a false optimization. holder.Resize(params.m_tokens.size() + 1); holder.SwitchTo(params.m_tokens.size()); - MatchTokenInTrie(params.m_prefixTokens, TrieRootPrefix(*catRoot, edge), holder); + MatchTokenInTrie(params.m_prefixTokens, TrieRootPrefix(*catRoot, edge), holder); return true; } } @@ -352,9 +358,9 @@ bool MatchCategoriesInTrie(SearchQueryParams const & params, trie::DefaultIterat // Calls toDo with trie root prefix and language code on each language // allowed by params. -template -void ForEachLangPrefix(SearchQueryParams const & params, trie::DefaultIterator const & trieRoot, - ToDo && toDo) +template +void ForEachLangPrefix(SearchQueryParams const & params, + trie::Iterator> const & trieRoot, ToDo && toDo) { ASSERT_LESS(trieRoot.m_edge.size(), numeric_limits::max(), ()); uint32_t const numLangs = static_cast(trieRoot.m_edge.size()); @@ -366,7 +372,7 @@ void ForEachLangPrefix(SearchQueryParams const & params, trie::DefaultIterator c if (edge[0] < search::kCategoriesLang && params.IsLangExist(lang)) { auto const langRoot = trieRoot.GoToEdge(langIx); - TrieRootPrefix langPrefix(*langRoot, edge); + TrieRootPrefix langPrefix(*langRoot, edge); toDo(langPrefix, lang); } } @@ -374,21 +380,23 @@ void ForEachLangPrefix(SearchQueryParams const & params, trie::DefaultIterator c // Calls toDo for each feature whose description contains *ALL* tokens from a search query. // Each feature will be passed to toDo only once. -template -void MatchFeaturesInTrie(SearchQueryParams const & params, trie::DefaultIterator const & trieRoot, - TFilter const & filter, ToDo && toDo) +template +void MatchFeaturesInTrie(SearchQueryParams const & params, + trie::Iterator> const & trieRoot, TFilter const & filter, + ToDo && toDo) { - using TValue = trie::DefaultIterator::TValue; + using TIterator = trie::Iterator>; + TrieValuesHolder categoriesHolder(filter); bool const categoriesMatched = MatchCategoriesInTrie(params, trieRoot, categoriesHolder); impl::OffsetIntersecter intersecter(filter); for (size_t i = 0; i < params.m_tokens.size(); ++i) { - ForEachLangPrefix(params, trieRoot, [&](TrieRootPrefix & langRoot, int8_t lang) - { - MatchTokenInTrie(params.m_tokens[i], langRoot, intersecter); - }); + ForEachLangPrefix(params, trieRoot, [&](TrieRootPrefix & langRoot, int8_t lang) + { + MatchTokenInTrie(params.m_tokens[i], langRoot, intersecter); + }); if (categoriesMatched) categoriesHolder.ForEachValue(i, intersecter); intersecter.NextStep(); @@ -396,10 +404,10 @@ void MatchFeaturesInTrie(SearchQueryParams const & params, trie::DefaultIterator if (!params.m_prefixTokens.empty()) { - ForEachLangPrefix(params, trieRoot, [&](TrieRootPrefix & langRoot, int8_t /* lang */) - { - MatchTokenPrefixInTrie(params.m_prefixTokens, langRoot, intersecter); - }); + ForEachLangPrefix(params, trieRoot, [&](TrieRootPrefix & langRoot, int8_t /* lang */) + { + MatchTokenPrefixInTrie(params.m_prefixTokens, langRoot, intersecter); + }); if (categoriesMatched) categoriesHolder.ForEachValue(params.m_tokens.size(), intersecter); intersecter.NextStep(); diff --git a/search/retrieval.cpp b/search/retrieval.cpp index c6676e97ae..cccb8a5817 100644 --- a/search/retrieval.cpp +++ b/search/retrieval.cpp @@ -9,6 +9,8 @@ #include "indexer/scales.hpp" #include "indexer/search_trie.hpp" +#include "platform/mwm_version.hpp" + #include "coding/compressed_bit_vector.hpp" #include "coding/reader_wrapper.hpp" @@ -63,32 +65,67 @@ unique_ptr RetrieveAddressFeatures(MwmSet::MwmHandl my::Cancellable const & cancellable, SearchQueryParams const & params) { - using TValue = FeatureIndexValue; - auto * value = handle.GetValue(); ASSERT(value, ()); serial::CodingParams codingParams(trie::GetCodingParams(value->GetHeader().GetDefCodingParams())); ModelReaderPtr searchReader = value->m_cont.GetReader(SEARCH_INDEX_FILE_TAG); - auto const trieRoot = trie::ReadTrie, ValueList>( - SubReaderWrapper(searchReader.GetPtr()), SingleValueSerializer(codingParams)); auto emptyFilter = [](uint32_t /* featureId */) { return true; }; - // TODO (@y, @m): remove this code as soon as search index will have - // native support for bit vectors. - vector features; - auto collector = [&](TValue const & value) + version::MwmVersion version; + if (!version::ReadVersion(value->m_cont, version)) { - if (cancellable.IsCancelled()) - throw CancelException(); - features.push_back(value.m_featureId); - }; + LOG(LERROR, ("Unreadable mwm version.")); + return unique_ptr(); + } + if (version.format < version::v7) + { + using TValue = FeatureWithRankAndCenter; - MatchFeaturesInTrie(params, *trieRoot, emptyFilter, collector); - return SortFeaturesAndBuildCBV(move(features)); + auto const trieRoot = trie::ReadTrie, ValueList>( + SubReaderWrapper(searchReader.GetPtr()), + SingleValueSerializer(codingParams)); + + vector features; + auto collector = [&](TValue const & value) + { + if (cancellable.IsCancelled()) + throw CancelException(); + features.push_back(value.m_featureId); + }; + + MatchFeaturesInTrie(params, *trieRoot, emptyFilter, collector); + return SortFeaturesAndBuildCBV(move(features)); + } + else if (version.format == version::v7) + { + using TValue = FeatureIndexValue; + + auto const trieRoot = trie::ReadTrie, ValueList>( + SubReaderWrapper(searchReader.GetPtr()), + SingleValueSerializer(codingParams)); + + // TODO (@y, @m): remove this code as soon as search index will have + // native support for bit vectors. + vector features; + auto collector = [&](TValue const & value) + { + if (cancellable.IsCancelled()) + throw CancelException(); + features.push_back(value.m_featureId); + }; + + MatchFeaturesInTrie(params, *trieRoot, emptyFilter, collector); + return SortFeaturesAndBuildCBV(move(features)); + } + else + { + LOG(LERROR, ("Unsupported mwm version:", version.format)); + return unique_ptr(); + } } // Retrieves from the geometry index corresponding to handle all diff --git a/search/search_query.cpp b/search/search_query.cpp index 8b7289a55d..6109c2c9fd 100644 --- a/search/search_query.cpp +++ b/search/search_query.cpp @@ -23,8 +23,10 @@ #include "indexer/index.hpp" #include "indexer/scales.hpp" #include "indexer/search_delimiters.hpp" +#include "indexer/search_index_values.hpp" #include "indexer/search_string_utils.hpp" +#include "platform/mwm_version.hpp" #include "platform/preferred_languages.hpp" #include "coding/compressed_bit_vector.hpp" @@ -1495,7 +1497,11 @@ public: void SwitchTo(size_t ind) { m_index = ind; } - void operator()(Query::TTrieValue const & v) + void operator()(FeatureWithRankAndCenter const & value) { operator()(value.m_featureId); } + + void operator()(FeatureIndexValue const & value) { operator()(value.m_featureId); } + + void operator()(uint32_t const featureId) { if (m_query.IsCancelled()) throw Query::CancelException(); @@ -1503,7 +1509,7 @@ public: // find locality in current results for (size_t i = 0; i < 3; ++i) { - auto it = find_if(m_localities[i].begin(), m_localities[i].end(), EqualID(v.m_featureId)); + auto it = find_if(m_localities[i].begin(), m_localities[i].end(), EqualID(featureId)); if (it != m_localities[i].end()) { it->m_matchedTokens.push_back(m_index); @@ -1513,7 +1519,7 @@ public: // Load feature. FeatureType f; - m_vector.GetByIndex(v.m_featureId, f); + m_vector.GetByIndex(featureId, f); using namespace ftypes; @@ -1528,14 +1534,14 @@ public: uint8_t rank = 0; if (m_table.get()) { - ASSERT_LESS(v.m_featureId, m_table->Size(), ()); - rank = m_table->Get(v.m_featureId); + ASSERT_LESS(featureId, m_table->Size(), ()); + rank = m_table->Get(featureId); } else { LOG(LWARNING, ("Can't get ranks table for locality search.")); } - m_localities[type].emplace_back(type, v.m_featureId, center, rank); + m_localities[type].emplace_back(type, featureId, center, rank); Locality & loc = m_localities[type].back(); loc.m_radius = GetRadiusByPopulation(GetPopulation(f)); @@ -1609,44 +1615,99 @@ void Query::SearchLocality(MwmValue const * pMwm, Locality & res1, Region & res2 auto codingParams = trie::GetCodingParams(pMwm->GetHeader().GetDefCodingParams()); - ModelReaderPtr searchReader = pMwm->m_cont.GetReader(SEARCH_INDEX_FILE_TAG); - - using TValue = FeatureIndexValue; - auto const trieRoot = trie::ReadTrie, ValueList>( - SubReaderWrapper(searchReader.GetPtr()), SingleValueSerializer(codingParams)); - - ForEachLangPrefix(params, *trieRoot, [&](TrieRootPrefix & langRoot, int8_t lang) + auto versionFormat = pMwm->GetHeader().GetFormat(); + if (versionFormat < version::v7) { - impl::DoFindLocality doFind(*this, pMwm, lang); - MatchTokensInTrie(params.m_tokens, langRoot, doFind); + using TValue = FeatureWithRankAndCenter; - // Last token's prefix is used as a complete token here, to limit number of results. - doFind.Resize(params.m_tokens.size() + 1); - doFind.SwitchTo(params.m_tokens.size()); - MatchTokenInTrie(params.m_prefixTokens, langRoot, doFind); - doFind.SortLocalities(); + ModelReaderPtr searchReader = pMwm->m_cont.GetReader(SEARCH_INDEX_FILE_TAG); - // Get regions from STATE and COUNTRY localities - vector regions; - doFind.GetRegions(regions); + auto const trieRoot = trie::ReadTrie, ValueList>( + SubReaderWrapper(searchReader.GetPtr()), + SingleValueSerializer(codingParams)); - // Get best CITY locality. - Locality loc; - doFind.GetBestCity(loc, regions); - if (res1 < loc) + ForEachLangPrefix(params, *trieRoot, [&](TrieRootPrefix & langRoot, int8_t lang) + { + impl::DoFindLocality doFind(*this, pMwm, lang); + MatchTokensInTrie(params.m_tokens, langRoot, doFind); + + // Last token's prefix is used as a complete token here, to limit number of + // results. + doFind.Resize(params.m_tokens.size() + 1); + doFind.SwitchTo(params.m_tokens.size()); + MatchTokenInTrie(params.m_prefixTokens, langRoot, doFind); + doFind.SortLocalities(); + + // Get regions from STATE and COUNTRY localities + vector regions; + doFind.GetRegions(regions); + + // Get best CITY locality. + Locality loc; + doFind.GetBestCity(loc, regions); + if (res1 < loc) + { + LOG(LDEBUG, ("Better location ", loc, " for language ", lang)); + res1.Swap(loc); + } + + // Get best region. + if (!regions.empty()) + { + sort(regions.begin(), regions.end()); + if (res2 < regions.back()) + res2.Swap(regions.back()); + } + }); + } + else if (versionFormat == version::v7) + { + using TValue = FeatureIndexValue; + + ModelReaderPtr searchReader = pMwm->m_cont.GetReader(SEARCH_INDEX_FILE_TAG); + + auto const trieRoot = trie::ReadTrie, ValueList>( + SubReaderWrapper(searchReader.GetPtr()), + SingleValueSerializer(codingParams)); + + ForEachLangPrefix(params, *trieRoot, [&](TrieRootPrefix & langRoot, int8_t lang) { - LOG(LDEBUG, ("Better location ", loc, " for language ", lang)); - res1.Swap(loc); - } + impl::DoFindLocality doFind(*this, pMwm, lang); + MatchTokensInTrie(params.m_tokens, langRoot, doFind); - // Get best region. - if (!regions.empty()) - { - sort(regions.begin(), regions.end()); - if (res2 < regions.back()) + // Last token's prefix is used as a complete token here, to limit number of + // results. + doFind.Resize(params.m_tokens.size() + 1); + doFind.SwitchTo(params.m_tokens.size()); + MatchTokenInTrie(params.m_prefixTokens, langRoot, doFind); + doFind.SortLocalities(); + + // Get regions from STATE and COUNTRY localities + vector regions; + doFind.GetRegions(regions); + + // Get best CITY locality. + Locality loc; + doFind.GetBestCity(loc, regions); + if (res1 < loc) + { + LOG(LDEBUG, ("Better location", loc, " for language", lang)); + res1.Swap(loc); + } + + // Get best region. + if (!regions.empty()) + { + sort(regions.begin(), regions.end()); + if (res2 < regions.back()) res2.Swap(regions.back()); - } - }); + } + }); + } + else + { + LOG(LERROR, ("Unsupported mwm version:", versionFormat)); + } } void Query::SearchFeatures() diff --git a/search/search_query.hpp b/search/search_query.hpp index b91575d162..59ebb70269 100644 --- a/search/search_query.hpp +++ b/search/search_query.hpp @@ -110,7 +110,6 @@ public: /// @name This stuff is public for implementation classes in search_query.cpp /// Do not use it in client code. //@{ - using TTrieValue = FeatureIndexValue; void InitParams(bool localitySearch, SearchQueryParams & params);