[search] Added support for both old and new sdx formats.

This commit is contained in:
Maxim Pimenov 2015-11-13 17:16:14 +03:00 committed by Sergey Yershov
parent 1f6a141d4b
commit 28fcf5da2f
6 changed files with 200 additions and 96 deletions

View file

@ -15,8 +15,6 @@ static const uint8_t kPointCodingBits = 20;
namespace trie
{
using DefaultIterator = trie::Iterator<ValueList<FeatureIndexValue>>;
inline serial::CodingParams GetCodingParams(serial::CodingParams const & orig)
{
return serial::CodingParams(search::kPointCodingBits,

View file

@ -18,7 +18,8 @@ enum Format
v4, // April 2015 (distinguish и and й in search index)
v5, // July 2015 (feature id is the index in vector now).
v6, // October 2015 (offsets vector is in mwm now).
lastFormat = v6
v7, // November 2015 (supply different search index formats).
lastFormat = v7
};
struct MwmVersion

View file

@ -30,8 +30,9 @@ size_t CalcEqualLength(TSrcIter b, TSrcIter e, TCompIter bC, TCompIter eC)
return count;
}
inline shared_ptr<trie::DefaultIterator> MoveTrieIteratorToString(
trie::DefaultIterator const & trieRoot, strings::UniString const & queryS,
template <typename TValue>
inline shared_ptr<trie::Iterator<ValueList<TValue>>> MoveTrieIteratorToString(
trie::Iterator<ValueList<TValue>> const & trieRoot, strings::UniString const & queryS,
size_t & symbolsMatched, bool & bFullEdgeMatched)
{
symbolsMatched = 0;
@ -92,9 +93,10 @@ namespace
}
}
template <typename F>
void FullMatchInTrie(trie::DefaultIterator const & trieRoot, strings::UniChar const * rootPrefix,
size_t rootPrefixSize, strings::UniString s, F & f)
template <typename TValue, typename TF>
void FullMatchInTrie(trie::Iterator<ValueList<TValue>> const & trieRoot,
strings::UniChar const * rootPrefix, size_t rootPrefixSize,
strings::UniString s, TF & f)
{
if (!CheckMatchString(rootPrefix, rootPrefixSize, s))
return;
@ -117,14 +119,17 @@ void FullMatchInTrie(trie::DefaultIterator const & trieRoot, strings::UniChar co
it->m_valueList.ForEach(f);
}
template <typename F>
void PrefixMatchInTrie(trie::DefaultIterator const & trieRoot, strings::UniChar const * rootPrefix,
size_t rootPrefixSize, strings::UniString s, F & f)
template <typename TValue, typename TF>
void PrefixMatchInTrie(trie::Iterator<ValueList<TValue>> const & trieRoot,
strings::UniChar const * rootPrefix, size_t rootPrefixSize,
strings::UniString s, TF & f)
{
if (!CheckMatchString(rootPrefix, rootPrefixSize, s))
return;
using TQueue = vector<shared_ptr<trie::DefaultIterator>>;
using TIterator = trie::Iterator<ValueList<TValue>>;
using TQueue = vector<shared_ptr<TIterator>>;
TQueue trieQueue;
{
size_t symbolsMatched = 0;
@ -207,14 +212,15 @@ public:
};
} // namespace search::impl
template <typename TValue>
struct TrieRootPrefix
{
trie::DefaultIterator const & m_root;
using TIterator = trie::Iterator<ValueList<TValue>>;
TIterator const & m_root;
strings::UniChar const * m_prefix;
size_t m_prefixSize;
TrieRootPrefix(trie::DefaultIterator const & root,
trie::DefaultIterator::Edge::TEdgeLabel const & edge)
TrieRootPrefix(TIterator const & root, typename TIterator::Edge::TEdgeLabel const & edge)
: m_root(root)
{
if (edge.size() == 1)
@ -266,9 +272,9 @@ private:
// Calls toDo for each feature corresponding to at least one synonym.
// *NOTE* toDo may be called several times for the same feature.
template <typename ToDo>
template <typename TValue, typename ToDo>
void MatchTokenInTrie(SearchQueryParams::TSynonymsVector const & syns,
TrieRootPrefix const & trieRoot, ToDo && toDo)
TrieRootPrefix<TValue> const & trieRoot, ToDo && toDo)
{
for (auto const & syn : syns)
{
@ -280,9 +286,9 @@ void MatchTokenInTrie(SearchQueryParams::TSynonymsVector const & syns,
// Calls toDo for each feature whose tokens contains at least one
// synonym as a prefix.
// *NOTE* toDo may be called serveral times for the same feature.
template <typename ToDo>
template <typename TValue, typename ToDo>
void MatchTokenPrefixInTrie(SearchQueryParams::TSynonymsVector const & syns,
TrieRootPrefix const & trieRoot, ToDo && toDo)
TrieRootPrefix<TValue> const & trieRoot, ToDo && toDo)
{
for (auto const & syn : syns)
{
@ -293,9 +299,9 @@ void MatchTokenPrefixInTrie(SearchQueryParams::TSynonymsVector const & syns,
// Fills holder with features whose names correspond to tokens list up to synonyms.
// *NOTE* the same feature may be put in the same holder's slot several times.
template <typename THolder>
template <typename TValue, typename THolder>
void MatchTokensInTrie(vector<SearchQueryParams::TSynonymsVector> const & tokens,
TrieRootPrefix const & trieRoot, THolder && holder)
TrieRootPrefix<TValue> const & trieRoot, THolder && holder)
{
holder.Resize(tokens.size());
for (size_t i = 0; i < tokens.size(); ++i)
@ -308,10 +314,10 @@ void MatchTokensInTrie(vector<SearchQueryParams::TSynonymsVector> const & tokens
// Fills holder with features whose names correspond to tokens list up to synonyms,
// also, last holder's slot will be filled with features corresponding to prefixTokens.
// *NOTE* the same feature may be put in the same holder's slot several times.
template <typename THolder>
template <typename TValue, typename THolder>
void MatchTokensAndPrefixInTrie(vector<SearchQueryParams::TSynonymsVector> const & tokens,
SearchQueryParams::TSynonymsVector const & prefixTokens,
TrieRootPrefix const & trieRoot, THolder && holder)
TrieRootPrefix<TValue> const & trieRoot, THolder && holder)
{
MatchTokensInTrie(tokens, trieRoot, holder);
@ -323,9 +329,9 @@ void MatchTokensAndPrefixInTrie(vector<SearchQueryParams::TSynonymsVector> const
// Fills holder with categories whose description matches to at least one
// token from a search query.
// *NOTE* query prefix will be treated as a complete token in the function.
template <typename THolder>
bool MatchCategoriesInTrie(SearchQueryParams const & params, trie::DefaultIterator const & trieRoot,
THolder && holder)
template <typename TValue, typename THolder>
bool MatchCategoriesInTrie(SearchQueryParams const & params,
trie::Iterator<ValueList<TValue>> const & trieRoot, THolder && holder)
{
ASSERT_LESS(trieRoot.m_edge.size(), numeric_limits<uint32_t>::max(), ());
uint32_t const numLangs = static_cast<uint32_t>(trieRoot.m_edge.size());
@ -336,14 +342,14 @@ bool MatchCategoriesInTrie(SearchQueryParams const & params, trie::DefaultIterat
if (edge[0] == search::kCategoriesLang)
{
auto const catRoot = trieRoot.GoToEdge(langIx);
MatchTokensInTrie(params.m_tokens, TrieRootPrefix(*catRoot, edge), holder);
MatchTokensInTrie(params.m_tokens, TrieRootPrefix<TValue>(*catRoot, edge), holder);
// Last token's prefix is used as a complete token here, to
// limit the number of features in the last bucket of a
// holder. Probably, this is a false optimization.
holder.Resize(params.m_tokens.size() + 1);
holder.SwitchTo(params.m_tokens.size());
MatchTokenInTrie(params.m_prefixTokens, TrieRootPrefix(*catRoot, edge), holder);
MatchTokenInTrie(params.m_prefixTokens, TrieRootPrefix<TValue>(*catRoot, edge), holder);
return true;
}
}
@ -352,9 +358,9 @@ bool MatchCategoriesInTrie(SearchQueryParams const & params, trie::DefaultIterat
// Calls toDo with trie root prefix and language code on each language
// allowed by params.
template <typename ToDo>
void ForEachLangPrefix(SearchQueryParams const & params, trie::DefaultIterator const & trieRoot,
ToDo && toDo)
template <typename TValue, typename ToDo>
void ForEachLangPrefix(SearchQueryParams const & params,
trie::Iterator<ValueList<TValue>> const & trieRoot, ToDo && toDo)
{
ASSERT_LESS(trieRoot.m_edge.size(), numeric_limits<uint32_t>::max(), ());
uint32_t const numLangs = static_cast<uint32_t>(trieRoot.m_edge.size());
@ -366,7 +372,7 @@ void ForEachLangPrefix(SearchQueryParams const & params, trie::DefaultIterator c
if (edge[0] < search::kCategoriesLang && params.IsLangExist(lang))
{
auto const langRoot = trieRoot.GoToEdge(langIx);
TrieRootPrefix langPrefix(*langRoot, edge);
TrieRootPrefix<TValue> langPrefix(*langRoot, edge);
toDo(langPrefix, lang);
}
}
@ -374,21 +380,23 @@ void ForEachLangPrefix(SearchQueryParams const & params, trie::DefaultIterator c
// Calls toDo for each feature whose description contains *ALL* tokens from a search query.
// Each feature will be passed to toDo only once.
template <typename TFilter, typename ToDo>
void MatchFeaturesInTrie(SearchQueryParams const & params, trie::DefaultIterator const & trieRoot,
TFilter const & filter, ToDo && toDo)
template <typename TValue, typename TFilter, typename ToDo>
void MatchFeaturesInTrie(SearchQueryParams const & params,
trie::Iterator<ValueList<TValue>> const & trieRoot, TFilter const & filter,
ToDo && toDo)
{
using TValue = trie::DefaultIterator::TValue;
using TIterator = trie::Iterator<ValueList<TValue>>;
TrieValuesHolder<TFilter, TValue> categoriesHolder(filter);
bool const categoriesMatched = MatchCategoriesInTrie(params, trieRoot, categoriesHolder);
impl::OffsetIntersecter<TFilter, TValue> intersecter(filter);
for (size_t i = 0; i < params.m_tokens.size(); ++i)
{
ForEachLangPrefix(params, trieRoot, [&](TrieRootPrefix & langRoot, int8_t lang)
{
MatchTokenInTrie(params.m_tokens[i], langRoot, intersecter);
});
ForEachLangPrefix(params, trieRoot, [&](TrieRootPrefix<TValue> & langRoot, int8_t lang)
{
MatchTokenInTrie(params.m_tokens[i], langRoot, intersecter);
});
if (categoriesMatched)
categoriesHolder.ForEachValue(i, intersecter);
intersecter.NextStep();
@ -396,10 +404,10 @@ void MatchFeaturesInTrie(SearchQueryParams const & params, trie::DefaultIterator
if (!params.m_prefixTokens.empty())
{
ForEachLangPrefix(params, trieRoot, [&](TrieRootPrefix & langRoot, int8_t /* lang */)
{
MatchTokenPrefixInTrie(params.m_prefixTokens, langRoot, intersecter);
});
ForEachLangPrefix(params, trieRoot, [&](TrieRootPrefix<TValue> & langRoot, int8_t /* lang */)
{
MatchTokenPrefixInTrie(params.m_prefixTokens, langRoot, intersecter);
});
if (categoriesMatched)
categoriesHolder.ForEachValue(params.m_tokens.size(), intersecter);
intersecter.NextStep();

View file

@ -9,6 +9,8 @@
#include "indexer/scales.hpp"
#include "indexer/search_trie.hpp"
#include "platform/mwm_version.hpp"
#include "coding/compressed_bit_vector.hpp"
#include "coding/reader_wrapper.hpp"
@ -63,32 +65,67 @@ unique_ptr<coding::CompressedBitVector> RetrieveAddressFeatures(MwmSet::MwmHandl
my::Cancellable const & cancellable,
SearchQueryParams const & params)
{
using TValue = FeatureIndexValue;
auto * value = handle.GetValue<MwmValue>();
ASSERT(value, ());
serial::CodingParams codingParams(trie::GetCodingParams(value->GetHeader().GetDefCodingParams()));
ModelReaderPtr searchReader = value->m_cont.GetReader(SEARCH_INDEX_FILE_TAG);
auto const trieRoot = trie::ReadTrie<SubReaderWrapper<Reader>, ValueList<TValue>>(
SubReaderWrapper<Reader>(searchReader.GetPtr()), SingleValueSerializer<TValue>(codingParams));
auto emptyFilter = [](uint32_t /* featureId */)
{
return true;
};
// TODO (@y, @m): remove this code as soon as search index will have
// native support for bit vectors.
vector<uint64_t> features;
auto collector = [&](TValue const & value)
version::MwmVersion version;
if (!version::ReadVersion(value->m_cont, version))
{
if (cancellable.IsCancelled())
throw CancelException();
features.push_back(value.m_featureId);
};
LOG(LERROR, ("Unreadable mwm version."));
return unique_ptr<coding::CompressedBitVector>();
}
if (version.format < version::v7)
{
using TValue = FeatureWithRankAndCenter;
MatchFeaturesInTrie(params, *trieRoot, emptyFilter, collector);
return SortFeaturesAndBuildCBV(move(features));
auto const trieRoot = trie::ReadTrie<SubReaderWrapper<Reader>, ValueList<TValue>>(
SubReaderWrapper<Reader>(searchReader.GetPtr()),
SingleValueSerializer<TValue>(codingParams));
vector<uint64_t> features;
auto collector = [&](TValue const & value)
{
if (cancellable.IsCancelled())
throw CancelException();
features.push_back(value.m_featureId);
};
MatchFeaturesInTrie(params, *trieRoot, emptyFilter, collector);
return SortFeaturesAndBuildCBV(move(features));
}
else if (version.format == version::v7)
{
using TValue = FeatureIndexValue;
auto const trieRoot = trie::ReadTrie<SubReaderWrapper<Reader>, ValueList<TValue>>(
SubReaderWrapper<Reader>(searchReader.GetPtr()),
SingleValueSerializer<TValue>(codingParams));
// TODO (@y, @m): remove this code as soon as search index will have
// native support for bit vectors.
vector<uint64_t> features;
auto collector = [&](TValue const & value)
{
if (cancellable.IsCancelled())
throw CancelException();
features.push_back(value.m_featureId);
};
MatchFeaturesInTrie(params, *trieRoot, emptyFilter, collector);
return SortFeaturesAndBuildCBV(move(features));
}
else
{
LOG(LERROR, ("Unsupported mwm version:", version.format));
return unique_ptr<coding::CompressedBitVector>();
}
}
// Retrieves from the geometry index corresponding to handle all

View file

@ -23,8 +23,10 @@
#include "indexer/index.hpp"
#include "indexer/scales.hpp"
#include "indexer/search_delimiters.hpp"
#include "indexer/search_index_values.hpp"
#include "indexer/search_string_utils.hpp"
#include "platform/mwm_version.hpp"
#include "platform/preferred_languages.hpp"
#include "coding/compressed_bit_vector.hpp"
@ -1495,7 +1497,11 @@ public:
void SwitchTo(size_t ind) { m_index = ind; }
void operator()(Query::TTrieValue const & v)
void operator()(FeatureWithRankAndCenter const & value) { operator()(value.m_featureId); }
void operator()(FeatureIndexValue const & value) { operator()(value.m_featureId); }
void operator()(uint32_t const featureId)
{
if (m_query.IsCancelled())
throw Query::CancelException();
@ -1503,7 +1509,7 @@ public:
// find locality in current results
for (size_t i = 0; i < 3; ++i)
{
auto it = find_if(m_localities[i].begin(), m_localities[i].end(), EqualID(v.m_featureId));
auto it = find_if(m_localities[i].begin(), m_localities[i].end(), EqualID(featureId));
if (it != m_localities[i].end())
{
it->m_matchedTokens.push_back(m_index);
@ -1513,7 +1519,7 @@ public:
// Load feature.
FeatureType f;
m_vector.GetByIndex(v.m_featureId, f);
m_vector.GetByIndex(featureId, f);
using namespace ftypes;
@ -1528,14 +1534,14 @@ public:
uint8_t rank = 0;
if (m_table.get())
{
ASSERT_LESS(v.m_featureId, m_table->Size(), ());
rank = m_table->Get(v.m_featureId);
ASSERT_LESS(featureId, m_table->Size(), ());
rank = m_table->Get(featureId);
}
else
{
LOG(LWARNING, ("Can't get ranks table for locality search."));
}
m_localities[type].emplace_back(type, v.m_featureId, center, rank);
m_localities[type].emplace_back(type, featureId, center, rank);
Locality & loc = m_localities[type].back();
loc.m_radius = GetRadiusByPopulation(GetPopulation(f));
@ -1609,44 +1615,99 @@ void Query::SearchLocality(MwmValue const * pMwm, Locality & res1, Region & res2
auto codingParams = trie::GetCodingParams(pMwm->GetHeader().GetDefCodingParams());
ModelReaderPtr searchReader = pMwm->m_cont.GetReader(SEARCH_INDEX_FILE_TAG);
using TValue = FeatureIndexValue;
auto const trieRoot = trie::ReadTrie<SubReaderWrapper<Reader>, ValueList<TValue>>(
SubReaderWrapper<Reader>(searchReader.GetPtr()), SingleValueSerializer<TValue>(codingParams));
ForEachLangPrefix(params, *trieRoot, [&](TrieRootPrefix & langRoot, int8_t lang)
auto versionFormat = pMwm->GetHeader().GetFormat();
if (versionFormat < version::v7)
{
impl::DoFindLocality doFind(*this, pMwm, lang);
MatchTokensInTrie(params.m_tokens, langRoot, doFind);
using TValue = FeatureWithRankAndCenter;
// Last token's prefix is used as a complete token here, to limit number of results.
doFind.Resize(params.m_tokens.size() + 1);
doFind.SwitchTo(params.m_tokens.size());
MatchTokenInTrie(params.m_prefixTokens, langRoot, doFind);
doFind.SortLocalities();
ModelReaderPtr searchReader = pMwm->m_cont.GetReader(SEARCH_INDEX_FILE_TAG);
// Get regions from STATE and COUNTRY localities
vector<Region> regions;
doFind.GetRegions(regions);
auto const trieRoot = trie::ReadTrie<SubReaderWrapper<Reader>, ValueList<TValue>>(
SubReaderWrapper<Reader>(searchReader.GetPtr()),
SingleValueSerializer<TValue>(codingParams));
// Get best CITY locality.
Locality loc;
doFind.GetBestCity(loc, regions);
if (res1 < loc)
ForEachLangPrefix(params, *trieRoot, [&](TrieRootPrefix<TValue> & langRoot, int8_t lang)
{
impl::DoFindLocality doFind(*this, pMwm, lang);
MatchTokensInTrie(params.m_tokens, langRoot, doFind);
// Last token's prefix is used as a complete token here, to limit number of
// results.
doFind.Resize(params.m_tokens.size() + 1);
doFind.SwitchTo(params.m_tokens.size());
MatchTokenInTrie(params.m_prefixTokens, langRoot, doFind);
doFind.SortLocalities();
// Get regions from STATE and COUNTRY localities
vector<Region> regions;
doFind.GetRegions(regions);
// Get best CITY locality.
Locality loc;
doFind.GetBestCity(loc, regions);
if (res1 < loc)
{
LOG(LDEBUG, ("Better location ", loc, " for language ", lang));
res1.Swap(loc);
}
// Get best region.
if (!regions.empty())
{
sort(regions.begin(), regions.end());
if (res2 < regions.back())
res2.Swap(regions.back());
}
});
}
else if (versionFormat == version::v7)
{
using TValue = FeatureIndexValue;
ModelReaderPtr searchReader = pMwm->m_cont.GetReader(SEARCH_INDEX_FILE_TAG);
auto const trieRoot = trie::ReadTrie<SubReaderWrapper<Reader>, ValueList<TValue>>(
SubReaderWrapper<Reader>(searchReader.GetPtr()),
SingleValueSerializer<TValue>(codingParams));
ForEachLangPrefix(params, *trieRoot, [&](TrieRootPrefix<TValue> & langRoot, int8_t lang)
{
LOG(LDEBUG, ("Better location ", loc, " for language ", lang));
res1.Swap(loc);
}
impl::DoFindLocality doFind(*this, pMwm, lang);
MatchTokensInTrie(params.m_tokens, langRoot, doFind);
// Get best region.
if (!regions.empty())
{
sort(regions.begin(), regions.end());
if (res2 < regions.back())
// Last token's prefix is used as a complete token here, to limit number of
// results.
doFind.Resize(params.m_tokens.size() + 1);
doFind.SwitchTo(params.m_tokens.size());
MatchTokenInTrie(params.m_prefixTokens, langRoot, doFind);
doFind.SortLocalities();
// Get regions from STATE and COUNTRY localities
vector<Region> regions;
doFind.GetRegions(regions);
// Get best CITY locality.
Locality loc;
doFind.GetBestCity(loc, regions);
if (res1 < loc)
{
LOG(LDEBUG, ("Better location", loc, " for language", lang));
res1.Swap(loc);
}
// Get best region.
if (!regions.empty())
{
sort(regions.begin(), regions.end());
if (res2 < regions.back())
res2.Swap(regions.back());
}
});
}
});
}
else
{
LOG(LERROR, ("Unsupported mwm version:", versionFormat));
}
}
void Query::SearchFeatures()

View file

@ -110,7 +110,6 @@ public:
/// @name This stuff is public for implementation classes in search_query.cpp
/// Do not use it in client code.
//@{
using TTrieValue = FeatureIndexValue;
void InitParams(bool localitySearch, SearchQueryParams & params);