forked from organicmaps/organicmaps
Switched the search index to use compressed bit vectors.
This commit is contained in:
parent
7e2abfba0b
commit
2ef3f59f92
14 changed files with 116 additions and 48 deletions
|
@ -277,7 +277,7 @@ UNIT_TEST(TrieBuilder_Build)
|
|||
trie::ReadTrie<MemReader, ValueList<uint32_t>>(memReader, serial::CodingParams());
|
||||
vector<KeyValuePair> res;
|
||||
KeyValuePairBackInserter f;
|
||||
trie::ForEachRef(*root, f, vector<trie::TrieChar>());
|
||||
trie::ForEachRefWithValues(*root, f, vector<trie::TrieChar>());
|
||||
sort(f.m_v.begin(), f.m_v.end());
|
||||
TEST_EQUAL(v, f.m_v, ());
|
||||
}
|
||||
|
|
|
@ -312,6 +312,27 @@ unique_ptr<CompressedBitVector> CompressedBitVectorBuilder::FromBitGroups(
|
|||
return make_unique<SparseCBV>(setBits);
|
||||
}
|
||||
|
||||
// static
|
||||
unique_ptr<CompressedBitVector> CompressedBitVectorBuilder::FromCBV(CompressedBitVector const & cbv)
|
||||
{
|
||||
auto strat = cbv.GetStorageStrategy();
|
||||
switch (strat)
|
||||
{
|
||||
case CompressedBitVector::StorageStrategy::Dense:
|
||||
{
|
||||
DenseCBV const & dense = static_cast<DenseCBV const &>(cbv);
|
||||
auto bitGroups = dense.m_bitGroups;
|
||||
return CompressedBitVectorBuilder::FromBitGroups(move(bitGroups));
|
||||
}
|
||||
case CompressedBitVector::StorageStrategy::Sparse:
|
||||
{
|
||||
SparseCBV const & sparse = static_cast<SparseCBV const &>(cbv);
|
||||
return CompressedBitVectorBuilder::FromBitPositions(sparse.m_positions);
|
||||
}
|
||||
}
|
||||
return unique_ptr<CompressedBitVector>();
|
||||
}
|
||||
|
||||
string DebugPrint(CompressedBitVector::StorageStrategy strat)
|
||||
{
|
||||
switch (strat)
|
||||
|
|
|
@ -2,8 +2,11 @@
|
|||
#include "coding/reader.hpp"
|
||||
#include "coding/writer.hpp"
|
||||
|
||||
#include "base/assert.hpp"
|
||||
|
||||
#include "std/algorithm.hpp"
|
||||
#include "std/unique_ptr.hpp"
|
||||
#include "std/utility.hpp"
|
||||
#include "std/vector.hpp"
|
||||
|
||||
namespace coding
|
||||
|
@ -150,6 +153,9 @@ public:
|
|||
// by concatenating the elements of bitGroups.
|
||||
static unique_ptr<CompressedBitVector> FromBitGroups(vector<uint64_t> && bitGroups);
|
||||
|
||||
// Copies a CBV.
|
||||
static unique_ptr<CompressedBitVector> FromCBV(CompressedBitVector const & cbv);
|
||||
|
||||
// Reads a bit vector from reader which must contain a valid
|
||||
// bit vector representation (see CompressedBitVector::Serialize for the format).
|
||||
template <typename TReader>
|
||||
|
|
|
@ -24,6 +24,8 @@ class Iterator
|
|||
//dbg::ObjectTracker m_tracker;
|
||||
|
||||
public:
|
||||
using TValue = typename TValueList::TValue;
|
||||
|
||||
struct Edge
|
||||
{
|
||||
typedef buffer_vector<TrieChar, 8> EdgeStrT;
|
||||
|
@ -70,9 +72,9 @@ struct FixedSizeValueReader
|
|||
template <typename TValueList, typename TF, typename TString>
|
||||
void ForEachRef(Iterator<TValueList> const & iter, TF && f, TString const & s)
|
||||
{
|
||||
iter.m_valueList.ForEach([&f, &s](typename TValueList::TValue value)
|
||||
iter.m_valueList.ForEach([&f, &s](typename TValueList::TValue const & /* value */)
|
||||
{
|
||||
f(s, value);
|
||||
f(s);
|
||||
});
|
||||
for (size_t i = 0; i < iter.m_edge.size(); ++i)
|
||||
{
|
||||
|
@ -83,4 +85,19 @@ void ForEachRef(Iterator<TValueList> const & iter, TF && f, TString const & s)
|
|||
}
|
||||
}
|
||||
|
||||
template <typename TValueList, typename TF, typename TString>
|
||||
void ForEachRefWithValues(Iterator<TValueList> const & iter, TF && f, TString const & s)
|
||||
{
|
||||
iter.m_valueList.ForEach([&f, &s](typename TValueList::TValue const & value)
|
||||
{
|
||||
f(s, value);
|
||||
});
|
||||
for (size_t i = 0; i < iter.m_edge.size(); ++i)
|
||||
{
|
||||
TString s1(s);
|
||||
s1.insert(s1.end(), iter.m_edge[i].m_str.begin(), iter.m_edge[i].m_str.end());
|
||||
auto it = iter.GoToEdge(i);
|
||||
ForEachRefWithValues(*it, f, s1);
|
||||
}
|
||||
}
|
||||
} // namespace trie
|
||||
|
|
|
@ -17,13 +17,13 @@
|
|||
// pre-order alphabetically reversed (parent, last child, first child).
|
||||
|
||||
// Leaf node format:
|
||||
// [value] ... [value]
|
||||
// [valueList]
|
||||
|
||||
// Internal node format:
|
||||
// [1: header]: [2: min(valueCount, 3)] [6: min(childCount, 63)]
|
||||
// [vu valueCount]: if valueCount in header == 3
|
||||
// [vu childCount]: if childCount in header == 63
|
||||
// [value] ... [value]
|
||||
// [valueList]
|
||||
// [childInfo] ... [childInfo]
|
||||
|
||||
// Child info format:
|
||||
|
|
|
@ -94,7 +94,7 @@ private:
|
|||
if (childCount == 63)
|
||||
childCount = ReadVarUint<uint32_t>(src);
|
||||
|
||||
// [value] ... [value]
|
||||
// [valueList]
|
||||
m_valueList.Deserialize(src, valueCount);
|
||||
|
||||
// [childInfo] ... [childInfo]
|
||||
|
|
|
@ -164,7 +164,7 @@ namespace feature
|
|||
|
||||
SearchTokensCollector() : m_currentS(), m_currentCount(0) {}
|
||||
|
||||
void operator()(strings::UniString const & s, FeatureWithRankAndCenter const &)
|
||||
void operator()(strings::UniString const & s)
|
||||
{
|
||||
if (m_currentS == s)
|
||||
{
|
||||
|
@ -200,7 +200,7 @@ namespace feature
|
|||
feature::DataHeader header(container);
|
||||
serial::CodingParams codingParams(trie::GetCodingParams(header.GetDefCodingParams()));
|
||||
|
||||
auto const pTrieRoot = trie::ReadTrie<ModelReaderPtr, ValueList<FeatureWithRankAndCenter>>(
|
||||
auto const pTrieRoot = trie::ReadTrie<ModelReaderPtr, ValueList<FeatureIndexValue>>(
|
||||
container.GetReader(SEARCH_INDEX_FILE_TAG), codingParams);
|
||||
|
||||
SearchTokensCollector f;
|
||||
|
|
|
@ -177,11 +177,15 @@ struct ValueBuilder<FeatureWithRankAndCenter>
|
|||
template <>
|
||||
struct ValueBuilder<FeatureIndexValue>
|
||||
{
|
||||
ValueBuilder(serial::CodingParams const & cp) : m_cp(cp) {}
|
||||
|
||||
void MakeValue(FeatureType const & /* f */, feature::TypesHolder const & /* types */,
|
||||
uint32_t index, FeatureIndexValue & value) const
|
||||
{
|
||||
value.m_value = index;
|
||||
value.m_featureId = index;
|
||||
}
|
||||
|
||||
serial::CodingParams m_cp;
|
||||
};
|
||||
|
||||
template <typename TStringsFile>
|
||||
|
@ -258,22 +262,23 @@ public:
|
|||
}
|
||||
};
|
||||
|
||||
template <typename TValue>
|
||||
void AddFeatureNameIndexPairs(FilesContainerR const & container,
|
||||
CategoriesHolder & categoriesHolder,
|
||||
StringsFile<FeatureWithRankAndCenter> & stringsFile)
|
||||
StringsFile<TValue> & stringsFile)
|
||||
{
|
||||
FeaturesVectorTest features(container);
|
||||
feature::DataHeader const & header = features.GetHeader();
|
||||
|
||||
serial::CodingParams codingParams(trie::GetCodingParams(header.GetDefCodingParams()));
|
||||
|
||||
ValueBuilder<FeatureWithRankAndCenter> valueBuilder(codingParams);
|
||||
ValueBuilder<TValue> valueBuilder(codingParams);
|
||||
|
||||
unique_ptr<SynonymsHolder> synonyms;
|
||||
if (header.GetType() == feature::DataHeader::world)
|
||||
synonyms.reset(new SynonymsHolder(GetPlatform().WritablePathForFile(SYNONYMS_FILE)));
|
||||
|
||||
features.GetVector().ForEach(FeatureInserter<StringsFile<FeatureWithRankAndCenter>>(
|
||||
features.GetVector().ForEach(FeatureInserter<StringsFile<TValue>>(
|
||||
synonyms.get(), stringsFile, categoriesHolder, header.GetScaleRange(), valueBuilder));
|
||||
}
|
||||
} // namespace
|
||||
|
@ -326,12 +331,14 @@ bool BuildSearchIndexFromDatFile(string const & datFile, bool forceRebuild)
|
|||
void BuildSearchIndex(FilesContainerR & container, Writer & indexWriter,
|
||||
string const & stringsFilePath)
|
||||
{
|
||||
using TValue = FeatureIndexValue;
|
||||
|
||||
Platform & platform = GetPlatform();
|
||||
|
||||
LOG(LINFO, ("Start building search index for", container.GetFileName()));
|
||||
my::Timer timer;
|
||||
|
||||
StringsFile<FeatureWithRankAndCenter> stringsFile(stringsFilePath);
|
||||
StringsFile<TValue> stringsFile(stringsFilePath);
|
||||
|
||||
CategoriesHolder categoriesHolder(platform.GetReader(SEARCH_CATEGORIES_FILE_NAME));
|
||||
|
||||
|
@ -341,9 +348,8 @@ void BuildSearchIndex(FilesContainerR & container, Writer & indexWriter,
|
|||
LOG(LINFO, ("End sorting strings:", timer.ElapsedSeconds()));
|
||||
|
||||
stringsFile.OpenForRead();
|
||||
trie::Build<Writer, typename StringsFile<FeatureWithRankAndCenter>::IteratorT,
|
||||
ValueList<FeatureWithRankAndCenter>>(indexWriter, stringsFile.Begin(),
|
||||
stringsFile.End());
|
||||
trie::Build<Writer, typename StringsFile<TValue>::IteratorT, ValueList<TValue>>(
|
||||
indexWriter, stringsFile.Begin(), stringsFile.End());
|
||||
|
||||
LOG(LINFO, ("End building search index, elapsed seconds:", timer.ElapsedSeconds()));
|
||||
}
|
||||
|
|
|
@ -16,7 +16,7 @@ static const uint8_t kPointCodingBits = 20;
|
|||
|
||||
namespace trie
|
||||
{
|
||||
using DefaultIterator = trie::Iterator<ValueList<FeatureWithRankAndCenter>>;
|
||||
using DefaultIterator = trie::Iterator<ValueList<FeatureIndexValue>>;
|
||||
|
||||
inline serial::CodingParams GetCodingParams(serial::CodingParams const & orig)
|
||||
{
|
||||
|
|
|
@ -23,31 +23,42 @@
|
|||
/// A wrapper around feature index.
|
||||
struct FeatureIndexValue
|
||||
{
|
||||
FeatureIndexValue() : m_value(0) {}
|
||||
FeatureIndexValue() : m_featureId(0) {}
|
||||
|
||||
FeatureIndexValue(uint64_t featureId) : m_featureId(featureId) {}
|
||||
|
||||
// The serialization and deserialization is needed for StringsFile.
|
||||
// Use ValueList for group serialization in CBVs.
|
||||
template <typename TWriter>
|
||||
void Write(TWriter & writer) const
|
||||
void Serialize(TWriter & writer) const
|
||||
{
|
||||
WriteToSink(writer, m_value);
|
||||
WriteToSink(writer, m_featureId);
|
||||
}
|
||||
|
||||
template <typename TReader>
|
||||
void Read(TReader & reader)
|
||||
void Deserialize(TReader & reader)
|
||||
{
|
||||
m_value = ReadPrimitiveFromSource<uint64_t>(reader);
|
||||
ReaderSource<TReader> src(reader);
|
||||
DeserializeFromSource(src);
|
||||
}
|
||||
|
||||
inline void const * data() const { return &m_value; }
|
||||
template <typename TSource>
|
||||
void DeserializeFromSource(TSource & src)
|
||||
{
|
||||
m_featureId = ReadPrimitiveFromSource<uint64_t>(src);
|
||||
}
|
||||
|
||||
inline size_t size() const { return sizeof(m_value); }
|
||||
inline void const * data() const { return &m_featureId; }
|
||||
|
||||
bool operator<(FeatureIndexValue const & value) const { return m_value < value.m_value; }
|
||||
inline size_t size() const { return sizeof(m_featureId); }
|
||||
|
||||
bool operator==(FeatureIndexValue const & value) const { return m_value == value.m_value; }
|
||||
bool operator<(FeatureIndexValue const & o) const { return m_featureId < o.m_featureId; }
|
||||
|
||||
void swap(FeatureIndexValue & value) { ::swap(m_value, value.m_value); }
|
||||
bool operator==(FeatureIndexValue const & o) const { return m_featureId == o.m_featureId; }
|
||||
|
||||
uint64_t m_value;
|
||||
void Swap(FeatureIndexValue & o) { ::swap(m_featureId, o.m_featureId); }
|
||||
|
||||
uint64_t m_featureId;
|
||||
};
|
||||
|
||||
struct FeatureWithRankAndCenter
|
||||
|
@ -117,12 +128,20 @@ public:
|
|||
|
||||
ValueList() : m_cbv(unique_ptr<coding::CompressedBitVector>()) {}
|
||||
|
||||
ValueList(ValueList<FeatureIndexValue> const & o) : m_codingParams(o.m_codingParams)
|
||||
{
|
||||
if (o.m_cbv)
|
||||
m_cbv = coding::CompressedBitVectorBuilder::FromCBV(*o.m_cbv);
|
||||
else
|
||||
m_cbv = unique_ptr<coding::CompressedBitVector>();
|
||||
}
|
||||
|
||||
void Init(vector<FeatureIndexValue> const & values)
|
||||
{
|
||||
vector<uint64_t> offsets(values.size());
|
||||
for (size_t i = 0; i < offsets.size(); ++i)
|
||||
offsets[i] = values[i].m_value;
|
||||
m_cbv = coding::CompressedBitVectorBuilder::FromBitPositions(offsets);
|
||||
vector<uint64_t> ids(values.size());
|
||||
for (size_t i = 0; i < ids.size(); ++i)
|
||||
ids[i] = values[i].m_featureId;
|
||||
m_cbv = coding::CompressedBitVectorBuilder::FromBitPositions(ids);
|
||||
}
|
||||
|
||||
// This method returns number of values in the current instance of
|
||||
|
@ -161,7 +180,10 @@ public:
|
|||
template <typename TF>
|
||||
void ForEach(TF && f) const
|
||||
{
|
||||
coding::CompressedBitVectorEnumerator::ForEach(*m_cbv, forward<TF>(f));
|
||||
coding::CompressedBitVectorEnumerator::ForEach(*m_cbv, [&](uint64_t const bitPosition)
|
||||
{
|
||||
f(TValue(bitPosition));
|
||||
});
|
||||
}
|
||||
|
||||
void SetCodingParams(serial::CodingParams const & codingParams) { m_codingParams = codingParams; }
|
||||
|
|
|
@ -113,7 +113,6 @@ void FullMatchInTrie(trie::DefaultIterator const & trieRoot, strings::UniChar co
|
|||
|
||||
ASSERT_EQUAL ( symbolsMatched, s.size(), () );
|
||||
|
||||
LOG(LINFO, ("foreach`ing", it->m_valueList.Size()));
|
||||
it->m_valueList.ForEach(f);
|
||||
}
|
||||
|
||||
|
@ -152,11 +151,9 @@ void PrefixMatchInTrie(trie::DefaultIterator const & trieRoot, strings::UniChar
|
|||
}
|
||||
}
|
||||
|
||||
template <class TFilter>
|
||||
template <typename TFilter, typename TValue>
|
||||
class OffsetIntersecter
|
||||
{
|
||||
using TValue = FeatureWithRankAndCenter;
|
||||
|
||||
struct HashFn
|
||||
{
|
||||
size_t operator()(TValue const & v) const { return v.m_featureId; }
|
||||
|
@ -232,7 +229,7 @@ struct TrieRootPrefix
|
|||
}
|
||||
};
|
||||
|
||||
template <class TFilter>
|
||||
template <typename TFilter, typename TValue>
|
||||
class TrieValuesHolder
|
||||
{
|
||||
public:
|
||||
|
@ -246,7 +243,7 @@ public:
|
|||
m_index = index;
|
||||
}
|
||||
|
||||
void operator()(Query::TTrieValue const & v)
|
||||
void operator()(TValue const & v)
|
||||
{
|
||||
if (m_filter(v.m_featureId))
|
||||
m_holder[m_index].push_back(v);
|
||||
|
@ -261,7 +258,7 @@ public:
|
|||
}
|
||||
|
||||
private:
|
||||
vector<vector<Query::TTrieValue>> m_holder;
|
||||
vector<vector<TValue>> m_holder;
|
||||
size_t m_index;
|
||||
TFilter const & m_filter;
|
||||
};
|
||||
|
@ -380,10 +377,11 @@ template <typename TFilter, typename ToDo>
|
|||
void MatchFeaturesInTrie(SearchQueryParams const & params, trie::DefaultIterator const & trieRoot,
|
||||
TFilter const & filter, ToDo && toDo)
|
||||
{
|
||||
using TValue = trie::DefaultIterator::TValue;
|
||||
TrieValuesHolder<TFilter> categoriesHolder(filter);
|
||||
bool const categoriesMatched = MatchCategoriesInTrie(params, trieRoot, categoriesHolder);
|
||||
|
||||
impl::OffsetIntersecter<TFilter> intersecter(filter);
|
||||
impl::OffsetIntersecter<TFilter, TValue> intersecter(filter);
|
||||
for (size_t i = 0; i < params.m_tokens.size(); ++i)
|
||||
{
|
||||
ForEachLangPrefix(params, trieRoot, [&](TrieRootPrefix & langRoot, int8_t lang)
|
||||
|
|
|
@ -67,9 +67,8 @@ unique_ptr<coding::CompressedBitVector> RetrieveAddressFeatures(MwmSet::MwmHandl
|
|||
ASSERT(value, ());
|
||||
serial::CodingParams codingParams(trie::GetCodingParams(value->GetHeader().GetDefCodingParams()));
|
||||
ModelReaderPtr searchReader = value->m_cont.GetReader(SEARCH_INDEX_FILE_TAG);
|
||||
auto const trieRoot =
|
||||
trie::ReadTrie<SubReaderWrapper<Reader>, ValueList<FeatureWithRankAndCenter>>(
|
||||
SubReaderWrapper<Reader>(searchReader.GetPtr()), codingParams);
|
||||
auto const trieRoot = trie::ReadTrie<SubReaderWrapper<Reader>, ValueList<FeatureIndexValue>>(
|
||||
SubReaderWrapper<Reader>(searchReader.GetPtr()), codingParams);
|
||||
|
||||
auto emptyFilter = [](uint32_t /* featureId */)
|
||||
{
|
||||
|
|
|
@ -1611,9 +1611,8 @@ void Query::SearchLocality(MwmValue const * pMwm, Locality & res1, Region & res2
|
|||
|
||||
ModelReaderPtr searchReader = pMwm->m_cont.GetReader(SEARCH_INDEX_FILE_TAG);
|
||||
|
||||
auto const trieRoot =
|
||||
trie::ReadTrie<SubReaderWrapper<Reader>, ValueList<FeatureWithRankAndCenter>>(
|
||||
SubReaderWrapper<Reader>(searchReader.GetPtr()), cp);
|
||||
auto const trieRoot = trie::ReadTrie<SubReaderWrapper<Reader>, ValueList<FeatureIndexValue>>(
|
||||
SubReaderWrapper<Reader>(searchReader.GetPtr()), cp);
|
||||
|
||||
ForEachLangPrefix(params, *trieRoot, [&](TrieRootPrefix & langRoot, int8_t lang)
|
||||
{
|
||||
|
|
|
@ -110,7 +110,7 @@ public:
|
|||
/// @name This stuff is public for implementation classes in search_query.cpp
|
||||
/// Do not use it in client code.
|
||||
//@{
|
||||
using TTrieValue = FeatureWithRankAndCenter;
|
||||
using TTrieValue = FeatureIndexValue;
|
||||
|
||||
void InitParams(bool localitySearch, SearchQueryParams & params);
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue