Refactored the serialization code.

This commit is contained in:
Maxim Pimenov 2015-10-21 14:48:15 +03:00 committed by Sergey Yershov
parent 4e33a1f23c
commit cea61ad807
16 changed files with 246 additions and 226 deletions

View file

@ -6,7 +6,7 @@ TEMPLATE = app
ROOT_DIR = ../..
DEPENDENCIES = coding base indexer minizip tomcrypt succinct
DEPENDENCIES = coding base minizip tomcrypt succinct
include($$ROOT_DIR/common.pri)

View file

@ -6,9 +6,6 @@
#include "coding/trie_reader.hpp"
#include "coding/write_to_sink.hpp"
#include "indexer/coding_params.hpp"
#include "indexer/string_file_values.hpp"
#include "base/logging.hpp"
#include "std/algorithm.hpp"
@ -109,21 +106,44 @@ struct MaxValueCalc
}
};
class CharValueList
// The ValueList and SingleValueSerializer classes are similar to
// those in indexer/string_file_values.hpp but that file
// is not included to avoid coding_tests's dependency from indexer.
class SingleValueSerializerChar
{
public:
template <typename TWriter>
void Serialize(TWriter & writer, char & v) const
{
WriteToSink(writer, v);
}
};
class SingleValueSerializerUint32
{
public:
template <typename TWriter>
void Serialize(TWriter & writer, uint32_t & v) const
{
WriteToSink(writer, v);
}
};
class ValueListChar
{
public:
using TValue = char;
void Init(vector<TValue> const &) {}
CharValueList(const string & s) : m_string(s) {}
ValueListChar(const string & s) : m_string(s) {}
size_t Size() const { return m_string.size(); }
bool IsEmpty() const { return m_string.empty(); }
template <typename TSink>
void Serialize(TSink & sink) const
template <typename TSink, typename TSerializer>
void Serialize(TSink & sink, TSerializer const & /* serializer */) const
{
sink.Write(m_string.data(), m_string.size());
}
@ -132,16 +152,13 @@ private:
string m_string;
};
} // namespace
template <>
class ValueList<uint32_t>
class ValueListUint32
{
public:
using TValue = uint32_t;
using TSerializer = SingleValueSerializerUint32;
ValueList() = default;
ValueList(serial::CodingParams const & codingParams) : m_codingParams(codingParams) {}
ValueListUint32() = default;
void Init(vector<TValue> const & values) { m_values = values; }
@ -150,14 +167,14 @@ public:
bool IsEmpty() const { return m_values.empty(); }
template <typename TSink>
void Serialize(TSink & sink) const
void Serialize(TSink & sink, TSerializer const & /* serializer */) const
{
for (auto const & value : m_values)
WriteToSink(sink, value);
}
template <typename TSource>
void Deserialize(TSource & src, uint32_t valueCount)
void Deserialize(TSource & src, uint32_t valueCount, TSerializer const & /* serializer */)
{
m_values.resize(valueCount);
for (size_t i = 0; i < valueCount; ++i)
@ -165,7 +182,7 @@ public:
}
template <typename TSource>
void Deserialize(TSource & src)
void Deserialize(TSource & src, TSerializer const & /* serializer */)
{
m_values.clear();
while (src.Size() > 0)
@ -179,12 +196,10 @@ public:
f(value);
}
void SetCodingParams(serial::CodingParams const & codingParams) { m_codingParams = codingParams; }
private:
vector<TValue> m_values;
serial::CodingParams m_codingParams;
};
} // namespace
#define ZENC bits::ZigZagEncode
#define MKSC(x) static_cast<signed char>(x)
@ -200,8 +215,9 @@ UNIT_TEST(TrieBuilder_WriteNode_Smoke)
"abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghij"),
ChildNodeInfo(true, 5, "a")};
CharValueList valueList("123");
trie::WriteNode(sink, 0, valueList, &children[0], &children[0] + ARRAY_SIZE(children));
ValueListChar valueList("123");
trie::WriteNode(sink, SingleValueSerializerChar(), 0, valueList, &children[0],
&children[0] + ARRAY_SIZE(children));
uint8_t const expected [] =
{
BOOST_BINARY(11000101), // Header: [0b11] [0b000101]
@ -266,13 +282,13 @@ UNIT_TEST(TrieBuilder_Build)
vector<uint8_t> buf;
PushBackByteSink<vector<uint8_t>> sink(buf);
SingleValueSerializerUint32 serializer;
trie::Build<PushBackByteSink<vector<uint8_t>>, typename vector<KeyValuePair>::iterator,
ValueList<uint32_t>>(sink, v.begin(), v.end());
ValueListUint32>(sink, serializer, v.begin(), v.end());
reverse(buf.begin(), buf.end());
MemReader memReader = MemReader(&buf[0], buf.size());
auto const root =
trie::ReadTrie<MemReader, ValueList<uint32_t>>(memReader, serial::CodingParams());
auto const root = trie::ReadTrie<MemReader, ValueListUint32>(memReader, serializer);
vector<KeyValuePair> res;
KeyValuePairBackInserter f;
trie::ForEachRefWithValues(*root, f, vector<trie::TrieChar>());

View file

@ -319,17 +319,17 @@ unique_ptr<CompressedBitVector> CompressedBitVectorBuilder::FromCBV(CompressedBi
auto strat = cbv.GetStorageStrategy();
switch (strat)
{
case CompressedBitVector::StorageStrategy::Dense:
{
DenseCBV const & dense = static_cast<DenseCBV const &>(cbv);
auto bitGroups = dense.m_bitGroups;
return CompressedBitVectorBuilder::FromBitGroups(move(bitGroups));
}
case CompressedBitVector::StorageStrategy::Sparse:
{
SparseCBV const & sparse = static_cast<SparseCBV const &>(cbv);
return CompressedBitVectorBuilder::FromBitPositions(sparse.m_positions);
}
case CompressedBitVector::StorageStrategy::Dense:
{
DenseCBV const & dense = static_cast<DenseCBV const &>(cbv);
auto bitGroups = dense.m_bitGroups;
return CompressedBitVectorBuilder::FromBitGroups(move(bitGroups));
}
case CompressedBitVector::StorageStrategy::Sparse:
{
SparseCBV const & sparse = static_cast<SparseCBV const &>(cbv);
return CompressedBitVectorBuilder::FromBitPositions(sparse.m_positions);
}
}
return unique_ptr<CompressedBitVector>();
}

View file

@ -68,6 +68,7 @@ string DebugPrint(CompressedBitVector::StorageStrategy strat);
class DenseCBV : public CompressedBitVector
{
public:
friend class CompressedBitVectorBuilder;
static uint64_t const kBlockSize = 64;
DenseCBV() = default;
@ -111,6 +112,7 @@ private:
class SparseCBV : public CompressedBitVector
{
public:
friend class CompressedBitVectorBuilder;
using TIterator = vector<uint64_t>::const_iterator;
SparseCBV(vector<uint64_t> const & setBits);

View file

@ -40,16 +40,16 @@
namespace trie
{
template <typename TSink, typename TChildIter, typename TValueList>
void WriteNode(TSink & sink, TrieChar baseChar, TValueList const & valueList,
TChildIter const begChild, TChildIter const endChild, bool isRoot = false)
template <typename TSink, typename TChildIter, typename TValueList, typename TSerializer>
void WriteNode(TSink & sink, TSerializer const & serializer, TrieChar baseChar,
TValueList const & valueList, TChildIter const begChild, TChildIter const endChild,
bool isRoot = false)
{
uint32_t const valueCount = valueList.Size();
if (begChild == endChild && !isRoot)
{
// Leaf node.
WriteVarUint(sink, valueCount);
valueList.Serialize(sink);
valueList.Serialize(sink, serializer);
return;
}
uint32_t const childCount = endChild - begChild;
@ -59,7 +59,7 @@ void WriteNode(TSink & sink, TrieChar baseChar, TValueList const & valueList,
WriteVarUint(sink, valueCount);
if (childCount >= 63)
WriteVarUint(sink, childCount);
valueList.Serialize(sink);
valueList.Serialize(sink, serializer);
for (TChildIter it = begChild; it != endChild; /*++it*/)
{
uint8_t header = (it->IsLeaf() ? 128 : 0);
@ -156,22 +156,22 @@ struct NodeInfo
}
};
template <typename TSink, typename TValueList>
void WriteNodeReverse(TSink & sink, TrieChar baseChar, NodeInfo<TValueList> & node,
bool isRoot = false)
template <typename TSink, typename TValueList, typename TSerializer>
void WriteNodeReverse(TSink & sink, TSerializer const & serializer, TrieChar baseChar,
NodeInfo<TValueList> & node, bool isRoot = false)
{
using TOutStorage = buffer_vector<uint8_t, 64>;
TOutStorage out;
PushBackByteSink<TOutStorage> outSink(out);
node.FinalizeValueList();
WriteNode(outSink, baseChar, node.m_valueList, node.m_children.rbegin(), node.m_children.rend(),
isRoot);
WriteNode(outSink, serializer, baseChar, node.m_valueList, node.m_children.rbegin(),
node.m_children.rend(), isRoot);
reverse(out.begin(), out.end());
sink.Write(out.data(), out.size());
}
template <typename TSink, class TNodes>
void PopNodes(TSink & sink, TNodes & nodes, int nodesToPop)
template <typename TSink, typename TNodes, typename TSerializer>
void PopNodes(TSink & sink, TSerializer const & serializer, TNodes & nodes, int nodesToPop)
{
using TNodeInfo = typename TNodes::value_type;
ASSERT_GREATER(nodes.size(), nodesToPop, ());
@ -190,7 +190,7 @@ void PopNodes(TSink & sink, TNodes & nodes, int nodesToPop)
}
else
{
WriteNodeReverse(sink, node.m_char, node);
WriteNodeReverse(sink, serializer, node.m_char, node);
prevNode.m_children.emplace_back(
node.m_children.empty(), static_cast<uint32_t>(sink.Pos() - node.m_begPos), node.m_char);
}
@ -214,8 +214,8 @@ void AppendValue(TNodeInfo & node, TValue const & value)
node.m_temporaryValueList.push_back(value);
}
template <typename TSink, typename TIter, typename TValueList>
void Build(TSink & sink, TIter const beg, TIter const end)
template <typename TSink, typename TIter, typename TValueList, typename TSerializer>
void Build(TSink & sink, TSerializer const & serializer, TIter const beg, TIter const end)
{
using TTrieString = buffer_vector<TrieChar, 32>;
using TNodeInfo = NodeInfo<TValueList>;
@ -241,7 +241,7 @@ void Build(TSink & sink, TIter const beg, TIter const end)
while (nCommon < min(key.size(), prevKey.size()) && prevKey[nCommon] == key[nCommon])
++nCommon;
PopNodes(sink, nodes, nodes.size() - nCommon - 1); // Root is also a common node.
PopNodes(sink, serializer, nodes, nodes.size() - nCommon - 1); // Root is also a common node.
uint64_t const pos = sink.Pos();
for (size_t i = nCommon; i < key.size(); ++i)
@ -253,10 +253,10 @@ void Build(TSink & sink, TIter const beg, TIter const end)
}
// Pop all the nodes from the stack.
PopNodes(sink, nodes, nodes.size() - 1);
PopNodes(sink, serializer, nodes, nodes.size() - 1);
// Write the root.
WriteNodeReverse(sink, DEFAULT_CHAR /* baseChar */, nodes.back(), true /* isRoot */);
WriteNodeReverse(sink, serializer, DEFAULT_CHAR /* baseChar */, nodes.back(), true /* isRoot */);
}
} // namespace trie

View file

@ -3,36 +3,32 @@
#include "coding/reader.hpp"
#include "coding/varint.hpp"
#include "indexer/coding_params.hpp"
#include "indexer/string_file_values.hpp"
#include "base/assert.hpp"
#include "base/bits.hpp"
#include "base/macros.hpp"
namespace trie
{
template <class TValueList>
template <class TValueList, typename TSerializer>
class LeafIterator0 : public Iterator<TValueList>
{
public:
using TValue = typename TValueList::TValue;
using Iterator<TValueList>::m_valueList;
template <class TReader>
LeafIterator0(TReader const & reader, serial::CodingParams const & codingParams)
LeafIterator0(TReader const & reader, TSerializer const & serializer)
{
ReaderSource<TReader> src(reader);
uint32_t valueCount = ReadVarUint<uint32_t>(src);
m_valueList.SetCodingParams(codingParams);
m_valueList.Deserialize(src, valueCount);
// todo(@mpimenov) There used to be an assert here
// that src is completely exhausted by this time.
if (src.Size() > 0)
m_valueList.Deserialize(src, 1 /* valueCount */, serializer);
ASSERT_EQUAL(src.Size(), 0, ());
}
// trie::Iterator overrides:
unique_ptr<Iterator<TValueList>> Clone() const override
{
return make_unique<LeafIterator0<TValueList>>(*this);
return make_unique<LeafIterator0<TValueList, TSerializer>>(*this);
}
unique_ptr<Iterator<TValueList>> GoToEdge(size_t i) const override
@ -43,24 +39,24 @@ public:
}
};
template <class TReader, class TValueList>
template <typename TReader, typename TValueList, typename TSerializer>
class Iterator0 : public Iterator<TValueList>
{
public:
using TValue = typename TValueList::TValue;
using Iterator<TValueList>::m_valueList;
using Iterator<TValueList>::m_edge;
Iterator0(TReader const & reader, TrieChar baseChar, serial::CodingParams const & codingParams)
: m_reader(reader), m_codingParams(codingParams)
Iterator0(TReader const & reader, TrieChar baseChar, TSerializer const & serializer)
: m_reader(reader), m_serializer(serializer)
{
m_valueList.SetCodingParams(m_codingParams);
ParseNode(baseChar);
}
// trie::Iterator overrides:
unique_ptr<Iterator<TValueList>> Clone() const override
{
return make_unique<Iterator0<TReader, TValueList>>(*this);
return make_unique<Iterator0<TReader, TValueList, TSerializer>>(*this);
}
unique_ptr<Iterator<TValueList>> GoToEdge(size_t i) const override
@ -71,12 +67,12 @@ public:
if (m_edgeInfo[i].m_isLeaf)
{
return make_unique<LeafIterator0<TValueList>>(m_reader.SubReader(offset, size),
m_codingParams);
return make_unique<LeafIterator0<TValueList, TSerializer>>(m_reader.SubReader(offset, size),
m_serializer);
}
return make_unique<Iterator0<TReader, TValueList>>(
m_reader.SubReader(offset, size), this->m_edge[i].m_str.back(), m_codingParams);
return make_unique<Iterator0<TReader, TValueList, TSerializer>>(
m_reader.SubReader(offset, size), this->m_edge[i].m_str.back(), m_serializer);
}
private:
@ -98,7 +94,7 @@ private:
childCount = ReadVarUint<uint32_t>(src);
// [valueList]
m_valueList.Deserialize(src, valueCount);
m_valueList.Deserialize(src, valueCount, m_serializer);
// [childInfo] ... [childInfo]
this->m_edge.resize(childCount);
@ -150,15 +146,14 @@ private:
buffer_vector<EdgeInfo, 9> m_edgeInfo;
TReader m_reader;
serial::CodingParams m_codingParams;
TSerializer m_serializer;
};
// Returns iterator to the root of the trie.
template <class TReader, class TValueList>
unique_ptr<Iterator<TValueList>> ReadTrie(TReader const & reader,
serial::CodingParams const & codingParams)
template <class TReader, class TValueList, class TSerializer>
unique_ptr<Iterator<TValueList>> ReadTrie(TReader const & reader, TSerializer const & serializer)
{
return make_unique<Iterator0<TReader, TValueList>>(reader, DEFAULT_CHAR, codingParams);
return make_unique<Iterator0<TReader, TValueList, TSerializer>>(reader, DEFAULT_CHAR, serializer);
}
} // namespace trie

Binary file not shown.

Binary file not shown.

Binary file not shown.

View file

@ -196,12 +196,14 @@ namespace feature
void DumpSearchTokens(string const & fPath)
{
using TValue = FeatureIndexValue;
FilesContainerR container(new FileReader(fPath));
feature::DataHeader header(container);
serial::CodingParams codingParams(trie::GetCodingParams(header.GetDefCodingParams()));
auto const trieRoot = trie::ReadTrie<ModelReaderPtr, ValueList<FeatureIndexValue>>(
container.GetReader(SEARCH_INDEX_FILE_TAG), codingParams);
auto const trieRoot = trie::ReadTrie<ModelReaderPtr, ValueList<TValue>>(
container.GetReader(SEARCH_INDEX_FILE_TAG), SingleValueSerializer<TValue>(codingParams));
SearchTokensCollector f;
trie::ForEachRef(*trieRoot, f, strings::UniString());

View file

@ -157,35 +157,29 @@ struct ValueBuilder;
template <>
struct ValueBuilder<FeatureWithRankAndCenter>
{
ValueBuilder(serial::CodingParams const & cp) : m_cp(cp) {}
ValueBuilder() = default;
void MakeValue(FeatureType const & ft, feature::TypesHolder const & types, uint32_t index,
FeatureWithRankAndCenter & v) const
{
v.SetCodingParams(m_cp);
v.m_featureId = index;
// get BEST geometry rect of feature
v.m_pt = feature::GetCenter(ft);
v.m_rank = feature::GetSearchRank(types, v.m_pt, ft.GetPopulation());
}
serial::CodingParams m_cp;
};
template <>
struct ValueBuilder<FeatureIndexValue>
{
ValueBuilder(serial::CodingParams const & cp) : m_cp(cp) {}
ValueBuilder() = default;
void MakeValue(FeatureType const & /* f */, feature::TypesHolder const & /* types */,
uint32_t index, FeatureIndexValue & value) const
{
value.m_featureId = index;
}
serial::CodingParams m_cp;
};
template <typename TStringsFile>
@ -263,16 +257,13 @@ public:
};
template <typename TValue>
void AddFeatureNameIndexPairs(FilesContainerR const & container,
CategoriesHolder & categoriesHolder,
StringsFile<TValue> & stringsFile)
void AddFeatureNameIndexPairs(FeaturesVectorTest & features, CategoriesHolder & categoriesHolder,
StringsFile<TValue> & stringsFile,
SingleValueSerializer<TValue> const & serializer)
{
FeaturesVectorTest features(container);
feature::DataHeader const & header = features.GetHeader();
serial::CodingParams codingParams(trie::GetCodingParams(header.GetDefCodingParams()));
ValueBuilder<TValue> valueBuilder(codingParams);
ValueBuilder<TValue> valueBuilder;
unique_ptr<SynonymsHolder> synonyms;
if (header.GetType() == feature::DataHeader::world)
@ -338,18 +329,22 @@ void BuildSearchIndex(FilesContainerR & container, Writer & indexWriter,
LOG(LINFO, ("Start building search index for", container.GetFileName()));
my::Timer timer;
StringsFile<TValue> stringsFile(stringsFilePath);
CategoriesHolder categoriesHolder(platform.GetReader(SEARCH_CATEGORIES_FILE_NAME));
AddFeatureNameIndexPairs(container, categoriesHolder, stringsFile);
FeaturesVectorTest features(container);
auto codingParams = trie::GetCodingParams(features.GetHeader().GetDefCodingParams());
SingleValueSerializer<TValue> serializer(codingParams);
StringsFile<TValue> stringsFile(stringsFilePath, serializer);
AddFeatureNameIndexPairs(features, categoriesHolder, stringsFile, serializer);
stringsFile.EndAdding();
LOG(LINFO, ("End sorting strings:", timer.ElapsedSeconds()));
stringsFile.OpenForRead();
trie::Build<Writer, typename StringsFile<TValue>::IteratorT, ValueList<TValue>>(
indexWriter, stringsFile.Begin(), stringsFile.End());
indexWriter, serializer, stringsFile.Begin(), stringsFile.End());
LOG(LINFO, ("End building search index, elapsed seconds:", timer.ElapsedSeconds()));
}

View file

@ -2,13 +2,13 @@
#include "coding/file_writer.hpp"
#include "coding/file_reader.hpp"
#include "coding/read_write_utils.hpp"
#include "base/macros.hpp"
#include "base/mem_trie.hpp"
#include "base/string_utils.hpp"
#include "base/worker_thread.hpp"
#include "coding/read_write_utils.hpp"
#include "std/iterator_facade.hpp"
#include "std/queue.hpp"
#include "std/functional.hpp"
@ -58,29 +58,27 @@ public:
return m_name == name.m_name && m_val == name.m_val;
}
/*
template <class TWriter>
IdT Write(TWriter & writer) const
IdT Write(TWriter & writer, SingleValueSerializer<TValue> const & serializer) const
{
IdT const pos = static_cast<IdT>(writer.Pos());
CHECK_EQUAL(static_cast<uint64_t>(pos), writer.Pos(), ());
rw::Write(writer, m_name);
m_val.Write(writer);
serializer.Serialize(writer, m_val);
return pos;
}
*/
template <class TReader>
void Read(TReader & src)
void Read(TReader & src, SingleValueSerializer<TValue> const & serializer)
{
rw::Read(src, m_name);
m_val.DeserializeFromSource(src);
serializer.DeserializeFromSource(src, m_val);
}
inline void const * value_data() const { return m_val.data(); }
inline size_t value_size() const { return m_val.size(); }
void Swap(TString & r)
{
m_name.swap(r.m_name);
@ -107,8 +105,9 @@ public:
/// to the list.
/// \param strings Vector of strings that should be sorted. Internal data is moved out from
/// strings, so it'll become empty after ctor.
SortAndDumpStringsTask(FileWriter & writer, OffsetsListT & offsets, StringsListT & strings)
: m_writer(writer), m_offsets(offsets)
SortAndDumpStringsTask(FileWriter & writer, OffsetsListT & offsets, StringsListT & strings,
SingleValueSerializer<TValue> const & serializer)
: m_writer(writer), m_offsets(offsets), m_serializer(serializer)
{
strings.swap(m_strings);
}
@ -122,10 +121,10 @@ public:
for (auto const & s : m_strings)
trie.Add(s.GetString(), s.GetValue());
MemWriter<vector<uint8_t>> memWriter(memBuffer);
trie.ForEach([&memWriter](const strings::UniString & s, const ValueT & v)
trie.ForEach([&memWriter, this](const strings::UniString & s, const ValueT & v)
{
rw::Write(memWriter, s);
v.Serialize(memWriter);
m_serializer.Serialize(memWriter, v);
});
}
@ -140,6 +139,7 @@ public:
FileWriter & m_writer;
OffsetsListT & m_offsets;
StringsListT m_strings;
SingleValueSerializer<TValue> m_serializer;
DISALLOW_COPY_AND_MOVE(SortAndDumpStringsTask);
};
@ -165,7 +165,7 @@ public:
void increment();
};
StringsFile(string const & fPath);
StringsFile(string const & fPath, SingleValueSerializer<ValueT> const & serializer);
void EndAdding();
void OpenForRead();
@ -177,21 +177,9 @@ public:
IteratorT End() { return IteratorT(*this, true); }
private:
unique_ptr<FileWriter> m_writer;
unique_ptr<FileReader> m_reader;
void Flush();
bool PushNextValue(size_t i);
StringsListT m_strings;
OffsetsListT m_offsets;
// A worker thread that sorts and writes groups of strings. The
// whole process looks like a pipeline, i.e. main thread accumulates
// strings while worker thread sequentially sorts and stores groups
// of strings on a disk.
my::WorkerThread<SortAndDumpStringsTask> m_workerThread;
struct QValue
{
TString m_string;
@ -199,10 +187,27 @@ private:
QValue(TString const & s, size_t i) : m_string(s), m_index(i) {}
inline bool operator>(QValue const & rhs) const { return !(m_string < rhs.m_string) && !(m_string == rhs.m_string); }
inline bool operator>(QValue const & rhs) const
{
return !(m_string < rhs.m_string) && !(m_string == rhs.m_string);
}
};
priority_queue<QValue, vector<QValue>, greater<QValue>> m_queue;
// A worker thread that sorts and writes groups of strings. The
// whole process looks like a pipeline, i.e. main thread accumulates
// strings while worker thread sequentially sorts and stores groups
// of strings on a disk.
my::WorkerThread<SortAndDumpStringsTask> m_workerThread;
unique_ptr<FileWriter> m_writer;
unique_ptr<FileReader> m_reader;
StringsListT m_strings;
OffsetsListT m_offsets;
SingleValueSerializer<TValue> m_serializer;
};
template <typename ValueT>
@ -242,8 +247,9 @@ void StringsFile<ValueT>::IteratorT::increment()
}
template <typename ValueT>
StringsFile<ValueT>::StringsFile(string const & fPath)
: m_workerThread(1 /* maxTasks */)
StringsFile<ValueT>::StringsFile(string const & fPath,
SingleValueSerializer<ValueT> const & serializer)
: m_workerThread(1 /* maxTasks */), m_serializer(serializer)
{
m_writer.reset(new FileWriter(fPath));
}
@ -252,7 +258,7 @@ template <typename ValueT>
void StringsFile<ValueT>::Flush()
{
shared_ptr<SortAndDumpStringsTask> task(
new SortAndDumpStringsTask(*m_writer, m_offsets, m_strings));
new SortAndDumpStringsTask(*m_writer, m_offsets, m_strings, m_serializer));
m_workerThread.Push(task);
}
@ -269,7 +275,7 @@ bool StringsFile<ValueT>::PushNextValue(size_t i)
// read string
TString s;
s.Read(src);
s.Read(src, m_serializer);
// update offset
m_offsets[i].first = src.Pos();

View file

@ -20,38 +20,13 @@
/// compressed search index construction, they allow to avoid
/// redundant serialization-deserialization or sorting.
/// A wrapper around feature index.
// A wrapper around feature index.
struct FeatureIndexValue
{
FeatureIndexValue() : m_featureId(0) {}
FeatureIndexValue(uint64_t featureId) : m_featureId(featureId) {}
// The serialization and deserialization is needed for StringsFile.
// Use ValueList for group serialization in CBVs.
template <typename TWriter>
void Serialize(TWriter & writer) const
{
WriteToSink(writer, m_featureId);
}
template <typename TReader>
void Deserialize(TReader & reader)
{
ReaderSource<TReader> src(reader);
DeserializeFromSource(src);
}
template <typename TSource>
void DeserializeFromSource(TSource & src)
{
m_featureId = ReadPrimitiveFromSource<uint64_t>(src);
}
inline void const * data() const { return &m_featureId; }
inline size_t size() const { return sizeof(m_featureId); }
bool operator<(FeatureIndexValue const & o) const { return m_featureId < o.m_featureId; }
bool operator==(FeatureIndexValue const & o) const { return m_featureId == o.m_featureId; }
@ -63,40 +38,13 @@ struct FeatureIndexValue
struct FeatureWithRankAndCenter
{
FeatureWithRankAndCenter()
: m_pt(m2::PointD()), m_featureId(0), m_rank(0), m_codingParams(serial::CodingParams())
{
}
FeatureWithRankAndCenter() : m_pt(m2::PointD()), m_featureId(0), m_rank(0) {}
FeatureWithRankAndCenter(m2::PointD pt, uint32_t featureId, uint8_t rank,
serial::CodingParams codingParams)
: m_pt(pt), m_featureId(featureId), m_rank(rank), m_codingParams(codingParams)
FeatureWithRankAndCenter(m2::PointD pt, uint32_t featureId, uint8_t rank)
: m_pt(pt), m_featureId(featureId), m_rank(rank)
{
}
template <typename TWriter>
void Serialize(TWriter & writer) const
{
serial::SavePoint(writer, m_pt, m_codingParams);
WriteToSink(writer, m_featureId);
WriteToSink(writer, m_rank);
}
template <typename TReader>
void Deserialize(TReader & reader)
{
ReaderSource<TReader> src(reader);
DeserializeFromSource(src);
}
template <typename TSource>
void DeserializeFromSource(TSource & src)
{
m_pt = serial::LoadPoint(src, m_codingParams);
m_featureId = ReadPrimitiveFromSource<uint32_t>(src);
m_rank = ReadPrimitiveFromSource<uint8_t>(src);
}
bool operator<(FeatureWithRankAndCenter const & o) const { return m_featureId < o.m_featureId; }
bool operator==(FeatureWithRankAndCenter const & o) const { return m_featureId == o.m_featureId; }
@ -108,14 +56,82 @@ struct FeatureWithRankAndCenter
swap(m_rank, o.m_rank);
}
void SetCodingParams(serial::CodingParams const & codingParams) { m_codingParams = codingParams; }
m2::PointD m_pt; // Center point of the feature.
uint32_t m_featureId; // Feature identifier.
uint8_t m_rank; // Rank of the feature.
};
template <typename TValue>
class SingleValueSerializer;
template <>
class SingleValueSerializer<FeatureWithRankAndCenter>
{
public:
using TValue = FeatureWithRankAndCenter;
SingleValueSerializer(serial::CodingParams const & codingParams) : m_codingParams(codingParams) {}
template <typename TWriter>
void Serialize(TWriter & writer, TValue const & v) const
{
serial::SavePoint(writer, v.m_pt, m_codingParams);
WriteToSink(writer, v.m_featureId);
WriteToSink(writer, v.m_rank);
}
template <typename TReader>
void Deserialize(TReader & reader, TValue & v) const
{
ReaderSource<TReader> src(reader);
DeserializeFromSource(src, v);
}
template <typename TSource>
void DeserializeFromSource(TSource & src, TValue & v) const
{
v.m_pt = serial::LoadPoint(src, m_codingParams);
v.m_featureId = ReadPrimitiveFromSource<uint32_t>(src);
v.m_rank = ReadPrimitiveFromSource<uint8_t>(src);
}
private:
serial::CodingParams m_codingParams;
};
template <>
class SingleValueSerializer<FeatureIndexValue>
{
public:
using TValue = FeatureIndexValue;
SingleValueSerializer() = default;
// todo(@mpimenov). Remove.
SingleValueSerializer(serial::CodingParams const & /* codingParams */) {}
// The serialization and deserialization is needed for StringsFile.
// Use ValueList for group serialization in CBVs.
template <typename TWriter>
void Serialize(TWriter & writer, TValue const & v) const
{
WriteToSink(writer, v.m_featureId);
}
template <typename TReader>
void Deserialize(TReader & reader, TValue & v) const
{
ReaderSource<TReader> src(reader);
DeserializeFromSource(src, v);
}
template <typename TSource>
void DeserializeFromSource(TSource & src, TValue & v) const
{
v.m_featureId = ReadPrimitiveFromSource<uint64_t>(src);
}
};
// This template is used to accumulate, serialize and deserialize
// a group of values of the same type.
template <typename TValue>
@ -129,12 +145,9 @@ class ValueList<FeatureIndexValue>
public:
using TValue = FeatureIndexValue;
ValueList()
: m_cbv(unique_ptr<coding::CompressedBitVector>()), m_codingParams(serial::CodingParams())
{
}
ValueList() : m_cbv(unique_ptr<coding::CompressedBitVector>()) {}
ValueList(ValueList<FeatureIndexValue> const & o) : m_codingParams(o.m_codingParams)
ValueList(ValueList<FeatureIndexValue> const & o)
{
if (o.m_cbv)
m_cbv = coding::CompressedBitVectorBuilder::FromCBV(*o.m_cbv);
@ -163,15 +176,10 @@ public:
return m_cbv->PopCount() == 0 ? 0 : 1;
}
bool IsEmpty() const
{
if (!m_cbv)
return true;
return m_cbv->PopCount() == 0;
}
bool IsEmpty() const { return !m_cbv || m_cbv->PopCount() == 0; }
template <typename TSink>
void Serialize(TSink & sink) const
void Serialize(TSink & sink, SingleValueSerializer<TValue> const & /* serializer */) const
{
if (IsEmpty())
return;
@ -190,7 +198,8 @@ public:
// A better approach is to make Serialize/Deserialize responsible for
// every part of serialization and as such it should not need valueCount.
template <typename TSource>
void Deserialize(TSource & src, uint32_t valueCount)
void Deserialize(TSource & src, uint32_t valueCount,
SingleValueSerializer<TValue> const & /* serializer */)
{
if (valueCount > 0)
m_cbv = coding::CompressedBitVectorBuilder::DeserializeFromSource(src);
@ -201,7 +210,7 @@ public:
template <typename TF>
void ForEach(TF && f) const
{
if (!m_cbv)
if (IsEmpty())
return;
coding::CompressedBitVectorEnumerator::ForEach(*m_cbv, [&f](uint64_t const bitPosition)
{
@ -209,11 +218,8 @@ public:
});
}
void SetCodingParams(serial::CodingParams const & codingParams) { m_codingParams = codingParams; }
private:
unique_ptr<coding::CompressedBitVector> m_cbv;
serial::CodingParams m_codingParams;
};
/// ValueList<FeatureWithRankAndCenter> sequentially serializes
@ -223,10 +229,9 @@ class ValueList<FeatureWithRankAndCenter>
{
public:
using TValue = FeatureWithRankAndCenter;
using TSerializer = SingleValueSerializer<TValue>;
ValueList() : m_codingParams(serial::CodingParams()) {}
ValueList(serial::CodingParams const & codingParams) : m_codingParams(codingParams) {}
ValueList() = default;
void Init(vector<TValue> const & values) { m_values = values; }
@ -235,30 +240,31 @@ public:
bool IsEmpty() const { return m_values.empty(); }
template <typename TSink>
void Serialize(TSink & sink) const
void Serialize(TSink & sink, SingleValueSerializer<TValue> const & serializer) const
{
for (auto const & value : m_values)
value.Serialize(sink);
serializer.Serialize(sink, value);
}
template <typename TSource>
void Deserialize(TSource & src, uint32_t valueCount)
void Deserialize(TSource & src, uint32_t valueCount,
SingleValueSerializer<TValue> const & serializer)
{
m_values.resize(valueCount);
for (size_t i = 0; i < valueCount; ++i)
m_values[i].DeserializeFromSource(src);
serializer.DeserializeFromSource(src, m_values[i]);
}
// When valueCount is not known, Deserialize reads
// until the source is exhausted.
template <typename TSource>
void Deserialize(TSource & src)
void Deserialize(TSource & src, SingleValueSerializer<TValue> const & serializer)
{
m_values.clear();
while (src.Size() > 0)
{
m_values.push_back(TValue());
m_values.back().DeserializeFromSource(src);
serializer.DeserializeFromSource(src, m_values.back());
}
}
@ -269,9 +275,6 @@ public:
f(value);
}
void SetCodingParams(serial::CodingParams const & codingParams) { m_codingParams = codingParams; }
private:
vector<TValue> m_values;
serial::CodingParams m_codingParams;
};

View file

@ -378,7 +378,7 @@ void MatchFeaturesInTrie(SearchQueryParams const & params, trie::DefaultIterator
TFilter const & filter, ToDo && toDo)
{
using TValue = trie::DefaultIterator::TValue;
TrieValuesHolder<TFilter> categoriesHolder(filter);
TrieValuesHolder<TFilter, TValue> categoriesHolder(filter);
bool const categoriesMatched = MatchCategoriesInTrie(params, trieRoot, categoriesHolder);
impl::OffsetIntersecter<TFilter, TValue> intersecter(filter);

View file

@ -67,8 +67,8 @@ unique_ptr<coding::CompressedBitVector> RetrieveAddressFeatures(MwmSet::MwmHandl
ASSERT(value, ());
serial::CodingParams codingParams(trie::GetCodingParams(value->GetHeader().GetDefCodingParams()));
ModelReaderPtr searchReader = value->m_cont.GetReader(SEARCH_INDEX_FILE_TAG);
auto const trieRoot = trie::ReadTrie<SubReaderWrapper<Reader>, ValueList<FeatureIndexValue>>(
SubReaderWrapper<Reader>(searchReader.GetPtr()), codingParams);
auto const trieRoot = trie::ReadTrie<SubReaderWrapper<Reader>, ValueList<TValue>>(
SubReaderWrapper<Reader>(searchReader.GetPtr()), SingleValueSerializer<TValue>(codingParams));
auto emptyFilter = [](uint32_t /* featureId */)
{

View file

@ -1607,12 +1607,13 @@ void Query::SearchLocality(MwmValue const * pMwm, Locality & res1, Region & res2
SearchQueryParams params;
InitParams(true /* localitySearch */, params);
serial::CodingParams cp(trie::GetCodingParams(pMwm->GetHeader().GetDefCodingParams()));
auto codingParams = trie::GetCodingParams(pMwm->GetHeader().GetDefCodingParams());
ModelReaderPtr searchReader = pMwm->m_cont.GetReader(SEARCH_INDEX_FILE_TAG);
auto const trieRoot = trie::ReadTrie<SubReaderWrapper<Reader>, ValueList<FeatureIndexValue>>(
SubReaderWrapper<Reader>(searchReader.GetPtr()), cp);
using TValue = FeatureIndexValue;
auto const trieRoot = trie::ReadTrie<SubReaderWrapper<Reader>, ValueList<TValue>>(
SubReaderWrapper<Reader>(searchReader.GetPtr()), SingleValueSerializer<TValue>(codingParams));
ForEachLangPrefix(params, *trieRoot, [&](TrieRootPrefix & langRoot, int8_t lang)
{