[omim] Removed EdgeBuilder.

This commit is contained in:
Maxim Pimenov 2015-09-25 18:28:28 +03:00 committed by Sergey Yershov
parent fd503ac72e
commit 9f0abf24fc
12 changed files with 96 additions and 232 deletions

View file

@ -100,17 +100,17 @@ struct SimpleValueList
vector<uint8_t> m_valueList;
};
void ReadAllValues(unique_ptr<trie::SuccinctTrieIterator<MemReader, SimpleValueReader,
trie::EmptyValueReader>> const & root,
vector<uint8_t> & values)
void ReadAllValues(
unique_ptr<trie::SuccinctTrieIterator<MemReader, SimpleValueReader>> const & root,
vector<uint8_t> & values)
{
for (size_t i = 0; i < root->NumValues(); ++i)
values.push_back(root->GetValue(i));
}
void CollectInSubtree(unique_ptr<trie::SuccinctTrieIterator<MemReader, SimpleValueReader,
trie::EmptyValueReader>> const & root,
vector<uint8_t> & collectedValues)
void CollectInSubtree(
unique_ptr<trie::SuccinctTrieIterator<MemReader, SimpleValueReader>> const & root,
vector<uint8_t> & collectedValues)
{
ReadAllValues(root, collectedValues);
@ -138,16 +138,14 @@ UNIT_TEST(SuccinctTrie_Serialization_Smoke1)
vector<StringsFileEntryMock> data = {StringsFileEntryMock("abacaba", 1)};
trie::BuildSuccinctTrie<TWriter, vector<StringsFileEntryMock>::iterator, trie::EmptyEdgeBuilder,
EmptyValueList<TWriter>>(memWriter, data.begin(), data.end(),
trie::EmptyEdgeBuilder());
trie::BuildSuccinctTrie<TWriter, vector<StringsFileEntryMock>::iterator, EmptyValueList<TWriter>>(
memWriter, data.begin(), data.end());
MemReader memReader(buf.data(), buf.size());
using TEmptyValue = trie::EmptyValueReader::ValueType;
auto trieRoot =
trie::ReadSuccinctTrie(memReader, trie::EmptyValueReader(), trie::EmptyValueReader());
auto trieRoot = trie::ReadSuccinctTrie(memReader, trie::EmptyValueReader());
TEST(trieRoot, ());
}
@ -160,15 +158,14 @@ UNIT_TEST(SuccinctTrie_Serialization_Smoke2)
vector<StringsFileEntryMock> data = {StringsFileEntryMock("abacaba", 1)};
trie::BuildSuccinctTrie<TWriter, vector<StringsFileEntryMock>::iterator, trie::EmptyEdgeBuilder,
SimpleValueList<TWriter>>(memWriter, data.begin(), data.end(),
trie::EmptyEdgeBuilder());
trie::BuildSuccinctTrie<TWriter, vector<StringsFileEntryMock>::iterator,
SimpleValueList<TWriter>>(memWriter, data.begin(), data.end());
MemReader memReader(buf.data(), buf.size());
using TEmptyValue = trie::EmptyValueReader::ValueType;
auto trieRoot = trie::ReadSuccinctTrie(memReader, SimpleValueReader(), trie::EmptyValueReader());
auto trieRoot = trie::ReadSuccinctTrie(memReader, SimpleValueReader());
TEST(trieRoot, ());
}
@ -184,15 +181,14 @@ UNIT_TEST(SuccinctTrie_Iterator)
StringsFileEntryMock("abc", 5)};
sort(data.begin(), data.end());
trie::BuildSuccinctTrie<TWriter, vector<StringsFileEntryMock>::iterator, trie::EmptyEdgeBuilder,
SimpleValueList<TWriter>>(memWriter, data.begin(), data.end(),
trie::EmptyEdgeBuilder());
trie::BuildSuccinctTrie<TWriter, vector<StringsFileEntryMock>::iterator,
SimpleValueList<TWriter>>(memWriter, data.begin(), data.end());
MemReader memReader(buf.data(), buf.size());
using TEmptyValue = trie::EmptyValueReader::ValueType;
auto trieRoot = trie::ReadSuccinctTrie(memReader, SimpleValueReader(), trie::EmptyValueReader());
auto trieRoot = trie::ReadSuccinctTrie(memReader, SimpleValueReader());
vector<uint8_t> collectedValues;
CollectInSubtree(trieRoot, collectedValues);
@ -214,14 +210,13 @@ UNIT_TEST(SuccinctTrie_MoveToString)
StringsFileEntryMock("aaa", 3), StringsFileEntryMock("aaa", 4)};
sort(data.begin(), data.end());
trie::BuildSuccinctTrie<TWriter, vector<StringsFileEntryMock>::iterator, trie::EmptyEdgeBuilder,
SimpleValueList<TWriter>>(memWriter, data.begin(), data.end(),
trie::EmptyEdgeBuilder());
trie::BuildSuccinctTrie<TWriter, vector<StringsFileEntryMock>::iterator,
SimpleValueList<TWriter>>(memWriter, data.begin(), data.end());
MemReader memReader(buf.data(), buf.size());
using TEmptyValue = trie::EmptyValueReader::ValueType;
auto trieRoot = trie::ReadSuccinctTrie(memReader, SimpleValueReader(), trie::EmptyValueReader());
auto trieRoot = trie::ReadSuccinctTrie(memReader, SimpleValueReader());
{
auto it = trieRoot->GoToString(strings::MakeUniString("a"));

View file

@ -23,9 +23,8 @@ struct ChildNodeInfo
bool m_isLeaf;
uint32_t m_size;
vector<uint32_t> m_edge;
string m_edgeValue;
ChildNodeInfo(bool isLeaf, uint32_t size, char const * edge, char const * edgeValue)
: m_isLeaf(isLeaf), m_size(size), m_edgeValue(edgeValue)
ChildNodeInfo(bool isLeaf, uint32_t size, char const * edge) : m_isLeaf(isLeaf), m_size(size)
{
while (*edge)
m_edge.push_back(*edge++);
@ -35,8 +34,6 @@ struct ChildNodeInfo
bool IsLeaf() const { return m_isLeaf; }
uint32_t const * GetEdge() const { return &m_edge[0]; }
uint32_t GetEdgeSize() const { return m_edge.size(); }
void const * GetEdgeValue() const { return m_edgeValue.data(); }
uint32_t GetEdgeValueSize() const { return m_edgeValue.size(); }
};
struct KeyValuePair
@ -163,15 +160,11 @@ UNIT_TEST(TrieBuilder_WriteNode_Smoke)
{
vector<uint8_t> serial;
PushBackByteSink<vector<uint8_t> > sink(serial);
ChildNodeInfo children[] =
{
ChildNodeInfo(true, 1, "1A", "i1"),
ChildNodeInfo(false, 2, "B", "ii2"),
ChildNodeInfo(false, 3, "zz", ""),
ChildNodeInfo(true, 4,
"abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghij", "i4"),
ChildNodeInfo(true, 5, "a", "5z")
};
ChildNodeInfo children[] = {
ChildNodeInfo(true, 1, "1A"), ChildNodeInfo(false, 2, "B"), ChildNodeInfo(false, 3, "zz"),
ChildNodeInfo(true, 4,
"abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghij"),
ChildNodeInfo(true, 5, "a")};
CharValueList valueList("123");
trie::WriteNode(sink, 0, valueList, &children[0], &children[0] + ARRAY_SIZE(children));
@ -182,10 +175,8 @@ UNIT_TEST(TrieBuilder_WriteNode_Smoke)
'1', '2', '3', // Values
BOOST_BINARY(10000001), // Child 1: header: [+leaf] [-supershort] [2 symbols]
MKUC(ZENC(MKSC('1'))), MKUC(ZENC(MKSC('A') - MKSC('1'))), // Child 1: edge
'i', '1', // Child 1: intermediate data
1, // Child 1: size
MKUC(64 | ZENC(MKSC('B') - MKSC('1'))), // Child 2: header: [-leaf] [+supershort]
'i', 'i', '2', // Child 2: intermediate data
2, // Child 2: size
BOOST_BINARY(00000001), // Child 3: header: [-leaf] [-supershort] [2 symbols]
MKUC(ZENC(MKSC('z') - MKSC('B'))), 0, // Child 3: edge
@ -199,10 +190,8 @@ UNIT_TEST(TrieBuilder_WriteNode_Smoke)
MKUC(ZENC(MKSC('a') - MKSC('j'))), 2,2,2,2,2,2,2,2,2, // Child 4: edge
MKUC(ZENC(MKSC('a') - MKSC('j'))), 2,2,2,2,2,2,2,2,2, // Child 4: edge
MKUC(ZENC(MKSC('a') - MKSC('j'))), 2,2,2,2,2,2,2,2,2, // Child 4: edge
'i', '4', // Child 4: intermediate data
4, // Child 4: size
MKUC(BOOST_BINARY(11000000) | ZENC(0)), // Child 5: header: [+leaf] [+supershort]
'5', 'z' // Child 5: intermediate data
};
TEST_EQUAL(serial, vector<uint8_t>(&expected[0], &expected[0] + ARRAY_SIZE(expected)), ());
@ -244,29 +233,16 @@ UNIT_TEST(TrieBuilder_Build)
vector<uint8_t> serial;
PushBackByteSink<vector<uint8_t> > sink(serial);
trie::Build<PushBackByteSink<vector<uint8_t>>, typename vector<KeyValuePair>::iterator,
trie::MaxValueEdgeBuilder<MaxValueCalc>, Uint32ValueList>(
sink, v.begin(), v.end(), trie::MaxValueEdgeBuilder<MaxValueCalc>());
Uint32ValueList>(sink, v.begin(), v.end());
reverse(serial.begin(), serial.end());
// LOG(LINFO, (serial.size(), vs));
MemReader memReader = MemReader(&serial[0], serial.size());
using IteratorType = trie::Iterator<trie::FixedSizeValueReader<4>::ValueType,
trie::FixedSizeValueReader<1>::ValueType>;
unique_ptr<IteratorType> const root(trie::ReadTrie(memReader, trie::FixedSizeValueReader<4>(),
trie::FixedSizeValueReader<1>()));
using IteratorType = trie::Iterator<trie::FixedSizeValueReader<4>::ValueType>;
unique_ptr<IteratorType> const root(trie::ReadTrie(memReader, trie::FixedSizeValueReader<4>()));
vector<KeyValuePair> res;
KeyValuePairBackInserter f;
trie::ForEachRef(*root, f, vector<trie::TrieChar>());
sort(f.m_v.begin(), f.m_v.end());
TEST_EQUAL(v, f.m_v, ());
uint32_t expectedMaxEdgeValue = 0;
for (size_t i = 0; i < v.size(); ++i)
if (!v[i].m_key.empty())
expectedMaxEdgeValue = max(expectedMaxEdgeValue, v[i].m_value);
uint32_t maxEdgeValue = 0;
for (uint32_t i = 0; i < root->m_edge.size(); ++i)
maxEdgeValue = max(maxEdgeValue, static_cast<uint32_t>(root->m_edge[i].m_value.m_data[0]));
TEST_EQUAL(maxEdgeValue, expectedMaxEdgeValue, (v, f.m_v));
}
}

View file

@ -27,7 +27,7 @@ namespace trie
// Node is a temporary struct that is used to store the trie in memory
// before it is converted to a succinct representation and put to disk.
// It is rather verbose but hopefully is small enough to fit in memory.
template <class TEdgeBuilder, class TValueList>
template <class TValueList>
struct Node
{
// The left child is reached by 0, the right by 1.
@ -42,11 +42,6 @@ struct Node
// to the key string which leads to this node.
TValueList m_valueList;
// m_edgeBuilder is obsolete and is here only for backward-compatibility
// with an older implementation of the trie.
// todo(@pimenov): Remove it.
TEdgeBuilder m_edgeBuilder;
Node() : l(nullptr), r(nullptr), m_isFinal(false) {}
};
@ -95,13 +90,12 @@ void WriteInLevelOrder(TNode * root, vector<TNode *> & levelOrder)
}
}
template <typename TWriter, typename TIter, typename TEdgeBuilder, typename TValueList>
void BuildSuccinctTrie(TWriter & writer, TIter const beg, TIter const end,
TEdgeBuilder const & edgeBuilder)
template <typename TWriter, typename TIter, typename TValueList>
void BuildSuccinctTrie(TWriter & writer, TIter const beg, TIter const end)
{
using TrieChar = uint32_t;
using TTrieString = buffer_vector<TrieChar, 32>;
using TNode = Node<TEdgeBuilder, TValueList>;
using TNode = Node<TValueList>;
using TEntry = typename TIter::value_type;
TNode * root = new TNode();
@ -144,7 +138,6 @@ void BuildSuccinctTrie(TWriter & writer, TIter const beg, TIter const end,
TNode * cur = AddToTrie(root, bitEncoding, numBits);
cur->m_isFinal = true;
cur->m_valueList.Append(entry.GetValue());
cur->m_edgeBuilder.AddValue(entry.value_data(), entry.value_size());
}
vector<TNode *> levelOrder;

View file

@ -21,16 +21,14 @@ namespace trie
// of the trie: the trie topology and the offsets into the data buffer.
// The topology can then be used to navigate the trie and the offsets
// can be used to extract the values associated with the key strings.
template <class TReader, class TValueReader, class TEdgeValueReader>
template <class TReader, class TValueReader>
class TopologyAndOffsets
{
public:
using TValue = typename TValueReader::ValueType;
using TEdgeValue = typename TEdgeValueReader::ValueType;
TopologyAndOffsets(TReader const & reader, TValueReader const & valueReader,
TEdgeValueReader const & edgeValueReader)
: m_reader(reader), m_valueReader(valueReader), m_edgeValueReader(edgeValueReader)
TopologyAndOffsets(TReader const & reader, TValueReader const & valueReader)
: m_reader(reader), m_valueReader(valueReader)
{
Parse();
}
@ -114,7 +112,6 @@ private:
// todo(@pimenov) Why do we even need an instance? Type name is enough.
TValueReader const & m_valueReader;
TEdgeValueReader const & m_edgeValueReader;
uint32_t m_numNodes;
coding::HuffmanCoder m_huffman;
@ -127,13 +124,12 @@ private:
vector<uint32_t> m_offsetTable;
};
template <class TReader, class TValueReader, class TEdgeValueReader>
template <class TReader, class TValueReader>
class SuccinctTrieIterator
{
public:
using TValue = typename TValueReader::ValueType;
using TEdgeValue = typename TEdgeValueReader::ValueType;
using TCommonData = TopologyAndOffsets<TReader, TValueReader, TEdgeValueReader>;
using TCommonData = TopologyAndOffsets<TReader, TValueReader>;
SuccinctTrieIterator(TReader const & reader, shared_ptr<TCommonData> common,
uint32_t nodeBitPosition)
@ -246,15 +242,14 @@ private:
bool m_valuesRead;
};
template <typename TReader, typename TValueReader, typename TEdgeValueReader>
unique_ptr<SuccinctTrieIterator<TReader, TValueReader, TEdgeValueReader>> ReadSuccinctTrie(
TReader const & reader, TValueReader valueReader = TValueReader(),
TEdgeValueReader edgeValueReader = TEdgeValueReader())
template <typename TReader, typename TValueReader>
unique_ptr<SuccinctTrieIterator<TReader, TValueReader>> ReadSuccinctTrie(
TReader const & reader, TValueReader valueReader = TValueReader())
{
using TCommonData = TopologyAndOffsets<TReader, TValueReader, TEdgeValueReader>;
using TIter = SuccinctTrieIterator<TReader, TValueReader, TEdgeValueReader>;
using TCommonData = TopologyAndOffsets<TReader, TValueReader>;
using TIter = SuccinctTrieIterator<TReader, TValueReader>;
shared_ptr<TCommonData> common(new TCommonData(reader, valueReader, edgeValueReader));
shared_ptr<TCommonData> common(new TCommonData(reader, valueReader));
return make_unique<TIter>(common->GetReader(), common, 1 /* bitPosition */);
}

View file

@ -17,7 +17,7 @@ typedef uint32_t TrieChar;
// However 0 is used because the first byte is actually language id.
static uint32_t const DEFAULT_CHAR = 0;
template <typename ValueT, typename EdgeValueT>
template <typename ValueT>
class Iterator
{
//dbg::ObjectTracker m_tracker;
@ -27,7 +27,6 @@ public:
{
typedef buffer_vector<TrieChar, 8> EdgeStrT;
EdgeStrT m_str;
EdgeValueT m_value;
};
buffer_vector<Edge, 8> m_edge;
@ -35,8 +34,8 @@ public:
virtual ~Iterator() {}
virtual Iterator<ValueT, EdgeValueT> * Clone() const = 0;
virtual Iterator<ValueT, EdgeValueT> * GoToEdge(size_t i) const = 0;
virtual Iterator<ValueT> * Clone() const = 0;
virtual Iterator<ValueT> * GoToEdge(size_t i) const = 0;
};
struct EmptyValueReader
@ -67,8 +66,8 @@ struct FixedSizeValueReader
}
};
template <typename ValueT, typename EdgeValueT, typename F, typename StringT>
void ForEachRef(Iterator<ValueT, EdgeValueT> const & iter, F & f, StringT const & s)
template <typename ValueT, typename F, typename StringT>
void ForEachRef(Iterator<ValueT> const & iter, F & f, StringT const & s)
{
for (size_t i = 0; i < iter.m_value.size(); ++i)
f(s, iter.m_value[i]);
@ -76,7 +75,7 @@ void ForEachRef(Iterator<ValueT, EdgeValueT> const & iter, F & f, StringT const
{
StringT s1(s);
s1.insert(s1.end(), iter.m_edge[i].m_str.begin(), iter.m_edge[i].m_str.end());
unique_ptr<Iterator<ValueT, EdgeValueT> > const pIter1(iter.GoToEdge(i));
unique_ptr<Iterator<ValueT>> const pIter1(iter.GoToEdge(i));
ForEachRef(*pIter1, f, s1);
}
}

View file

@ -35,7 +35,6 @@
// [vi edgeChar1 - edgeChar0]
// ...
// [vi edgeCharN - edgeCharN-1]
// [edge value]
// [child size]: if the child is not the last one when reading
namespace trie
@ -93,7 +92,6 @@ void WriteNode(TSink & sink, TrieChar baseChar, TValueList const & valueList,
}
}
baseChar = edge[0];
sink.Write(it->GetEdgeValue(), it->GetEdgeValueSize());
uint32_t const childSize = it->Size();
if (++it != endChild)
@ -106,8 +104,6 @@ struct ChildInfo
bool m_isLeaf;
uint32_t m_size;
buffer_vector<TrieChar, 8> m_edge;
using TEdgeValueStorage = buffer_vector<uint8_t, 8>;
TEdgeValueStorage m_edgeValue;
ChildInfo(bool isLeaf, uint32_t size, TrieChar c) : m_isLeaf(isLeaf), m_size(size), m_edge(1, c)
{
@ -117,29 +113,23 @@ struct ChildInfo
bool IsLeaf() const { return m_isLeaf; }
TrieChar const * GetEdge() const { return m_edge.data(); }
uint32_t GetEdgeSize() const { return m_edge.size(); }
void const * GetEdgeValue() const { return m_edgeValue.data(); }
uint32_t GetEdgeValueSize() const { return m_edgeValue.size(); }
};
template <class TEdgeBuilder, class TValueList>
template <typename TValueList>
struct NodeInfo
{
uint64_t m_begPos;
TrieChar m_char;
vector<ChildInfo> m_children;
TValueList m_valueList;
TEdgeBuilder m_edgeBuilder;
NodeInfo() : m_begPos(0), m_char(0) {}
NodeInfo(uint64_t pos, TrieChar trieChar, TEdgeBuilder const & edgeBuilder)
: m_begPos(pos), m_char(trieChar), m_edgeBuilder(edgeBuilder)
{
}
NodeInfo(uint64_t pos, TrieChar trieChar) : m_begPos(pos), m_char(trieChar) {}
};
template <typename TSink, typename TEdgeBuilder, typename TValueList>
void WriteNodeReverse(TSink & sink, TrieChar baseChar,
NodeInfo<TEdgeBuilder, TValueList> const & node, bool isRoot = false)
template <typename TSink, typename TValueList>
void WriteNodeReverse(TSink & sink, TrieChar baseChar, NodeInfo<TValueList> const & node,
bool isRoot = false)
{
using TOutStorage = buffer_vector<uint8_t, 64>;
TOutStorage out;
@ -175,72 +165,18 @@ void PopNodes(TSink & sink, TNodes & nodes, int nodesToPop)
node.m_char));
}
prevNode.m_edgeBuilder.AddEdge(node.m_edgeBuilder);
PushBackByteSink<ChildInfo::TEdgeValueStorage> sink(prevNode.m_children.back().m_edgeValue);
node.m_edgeBuilder.StoreValue(sink);
nodes.pop_back();
}
}
struct EmptyEdgeBuilder
{
using ValueType = unsigned char;
void AddValue(void const *, uint32_t) {}
void AddEdge(EmptyEdgeBuilder &) {}
template <typename TSink>
void StoreValue(TSink &) const
{
}
};
template <typename TMaxValueCalc>
struct MaxValueEdgeBuilder
{
using ValueType = typename TMaxValueCalc::ValueType;
TMaxValueCalc m_maxCalc;
ValueType m_value;
explicit MaxValueEdgeBuilder(TMaxValueCalc const & maxCalc = TMaxValueCalc())
: m_maxCalc(maxCalc), m_value()
{
}
MaxValueEdgeBuilder(MaxValueEdgeBuilder<TMaxValueCalc> const & edgeBuilder)
: m_maxCalc(edgeBuilder.m_maxCalc), m_value(edgeBuilder.m_value)
{
}
void AddValue(void const * p, uint32_t size)
{
ValueType value = m_maxCalc(p, size);
if (m_value < value)
m_value = value;
}
void AddEdge(MaxValueEdgeBuilder & edgeBuilder)
{
if (m_value < edgeBuilder.m_value)
m_value = edgeBuilder.m_value;
}
template <typename TSink>
void StoreValue(TSink & sink) const
{
sink.Write(&m_value, sizeof(m_value));
}
};
template <typename TSink, typename TIter, typename TEdgeBuilder, typename TValueList>
void Build(TSink & sink, TIter const beg, TIter const end, TEdgeBuilder const & edgeBuilder)
template <typename TSink, typename TIter, typename TValueList>
void Build(TSink & sink, TIter const beg, TIter const end)
{
using TTrieString = buffer_vector<TrieChar, 32>;
using TNodeInfo = NodeInfo<TEdgeBuilder, TValueList>;
using TNodeInfo = NodeInfo<TValueList>;
buffer_vector<TNodeInfo, 32> nodes;
nodes.push_back(TNodeInfo(sink.Pos(), DEFAULT_CHAR, edgeBuilder));
nodes.push_back(TNodeInfo(sink.Pos(), DEFAULT_CHAR));
TTrieString prevKey;
@ -264,11 +200,9 @@ void Build(TSink & sink, TIter const beg, TIter const end, TEdgeBuilder const &
uint64_t const pos = sink.Pos();
for (size_t i = nCommon; i < key.size(); ++i)
nodes.push_back(TNodeInfo(pos, key[i], edgeBuilder));
nodes.push_back(TNodeInfo(pos, key[i]));
nodes.back().m_valueList.Append(e.GetValue());
nodes.back().m_edgeBuilder.AddValue(e.value_data(), e.value_size());
prevKey.swap(key);
prevE.Swap(e);
}

View file

@ -2,18 +2,18 @@
#include "coding/trie.hpp"
#include "coding/reader.hpp"
#include "coding/varint.hpp"
#include "base/assert.hpp"
#include "base/bits.hpp"
#include "base/macros.hpp"
namespace trie
{
template <class ValueReaderT, typename EdgeValueT>
class LeafIterator0 : public Iterator<typename ValueReaderT::ValueType, EdgeValueT>
template <class ValueReaderT>
class LeafIterator0 : public Iterator<typename ValueReaderT::ValueType>
{
public:
typedef typename ValueReaderT::ValueType ValueType;
typedef EdgeValueT EdgeValueType;
template <class ReaderT>
LeafIterator0(ReaderT const & reader, ValueReaderT const & valueReader)
@ -32,12 +32,9 @@ public:
ASSERT_EQUAL(size, src.Pos(), ());
}
Iterator<ValueType, EdgeValueType> * Clone() const
{
return new LeafIterator0<ValueReaderT, EdgeValueT>(*this);
}
Iterator<ValueType> * Clone() const { return new LeafIterator0<ValueReaderT>(*this); }
Iterator<ValueType, EdgeValueType> * GoToEdge(size_t i) const
Iterator<ValueType> * GoToEdge(size_t i) const
{
ASSERT(false, (i));
UNUSED_VALUE(i);
@ -45,45 +42,36 @@ public:
}
};
template <class ReaderT, class ValueReaderT, class EdgeValueReaderT>
class IteratorImplBase :
public Iterator<typename ValueReaderT::ValueType, typename EdgeValueReaderT::ValueType>
template <class ReaderT, class ValueReaderT>
class IteratorImplBase : public Iterator<typename ValueReaderT::ValueType>
{
protected:
enum { IS_READER_IN_MEMORY = 0 };
};
template <class ValueReaderT, class EdgeValueReaderT>
class IteratorImplBase<SharedMemReader, ValueReaderT, EdgeValueReaderT> :
public Iterator<typename ValueReaderT::ValueType, typename EdgeValueReaderT::ValueType>
template <class ValueReaderT>
class IteratorImplBase<SharedMemReader, ValueReaderT>
: public Iterator<typename ValueReaderT::ValueType>
{
protected:
enum { IS_READER_IN_MEMORY = 1 };
};
template <class ReaderT, class ValueReaderT, class EdgeValueReaderT>
class Iterator0 : public IteratorImplBase<ReaderT, ValueReaderT, EdgeValueReaderT>
template <class ReaderT, class ValueReaderT>
class Iterator0 : public IteratorImplBase<ReaderT, ValueReaderT>
{
public:
typedef typename ValueReaderT::ValueType ValueType;
typedef typename EdgeValueReaderT::ValueType EdgeValueType;
Iterator0(ReaderT const & reader,
ValueReaderT const & valueReader,
EdgeValueReaderT const & edgeValueReader,
TrieChar baseChar)
: m_reader(reader), m_valueReader(valueReader), m_edgeValueReader(edgeValueReader)
Iterator0(ReaderT const & reader, ValueReaderT const & valueReader, TrieChar baseChar)
: m_reader(reader), m_valueReader(valueReader)
{
ParseNode(baseChar);
}
Iterator<ValueType, EdgeValueType> * Clone() const
{
return new Iterator0<ReaderT, ValueReaderT, EdgeValueReaderT>(*this);
}
Iterator<ValueType> * Clone() const { return new Iterator0<ReaderT, ValueReaderT>(*this); }
Iterator<ValueType, EdgeValueType> * GoToEdge(size_t i) const
Iterator<ValueType> * GoToEdge(size_t i) const
{
ASSERT_LESS(i, this->m_edge.size(), ());
uint32_t const offset = m_edgeInfo[i].m_offset;
@ -91,29 +79,27 @@ public:
// TODO: Profile to check that MemReader optimization helps?
/*
if (!IteratorImplBase<ReaderT, ValueReaderT, EdgeValueReaderT>::IS_READER_IN_MEMORY &&
if (!IteratorImplBase<ReaderT, ValueReaderT>::IS_READER_IN_MEMORY &&
size < 1024)
{
SharedMemReader memReader(size);
m_reader.Read(offset, memReader.Data(), size);
if (m_edgeInfo[i].m_isLeaf)
return new LeafIterator0<SharedMemReader, ValueReaderT, EdgeValueType>(
return new LeafIterator0<SharedMemReader, ValueReaderT>(
memReader, m_valueReader);
else
return new Iterator0<SharedMemReader, ValueReaderT, EdgeValueReaderT>(
memReader, m_valueReader, m_edgeValueReader,
return new Iterator0<SharedMemReader, ValueReaderT>(
memReader, m_valueReader,
this->m_edge[i].m_str.back());
}
else
*/
{
if (m_edgeInfo[i].m_isLeaf)
return new LeafIterator0<ValueReaderT, EdgeValueType>(
m_reader.SubReader(offset, size), m_valueReader);
return new LeafIterator0<ValueReaderT>(m_reader.SubReader(offset, size), m_valueReader);
else
return new Iterator0<ReaderT, ValueReaderT, EdgeValueReaderT>(
m_reader.SubReader(offset, size), m_valueReader, m_edgeValueReader,
this->m_edge[i].m_str.back());
return new Iterator0<ReaderT, ValueReaderT>(m_reader.SubReader(offset, size), m_valueReader,
this->m_edge[i].m_str.back());
}
}
@ -146,7 +132,7 @@ private:
m_edgeInfo[0].m_offset = 0;
for (uint32_t i = 0; i < childCount; ++i)
{
typename Iterator<ValueType, EdgeValueType>::Edge & e = this->m_edge[i];
typename Iterator<ValueType>::Edge & e = this->m_edge[i];
// [1: header]: [1: isLeaf] [1: isShortEdge] [6: (edgeChar0 - baseChar) or min(edgeLen-1, 63)]
uint8_t const header = ReadPrimitiveFromSource<uint8_t>(src);
@ -167,9 +153,6 @@ private:
e.m_str.push_back(baseChar += ReadVarInt<int32_t>(src));
}
// [edge value]
m_edgeValueReader(src, e.m_value);
// [child size]: if the child is not the last one
m_edgeInfo[i + 1].m_offset = m_edgeInfo[i].m_offset;
if (i != childCount - 1)
@ -194,18 +177,14 @@ private:
ReaderT m_reader;
ValueReaderT m_valueReader;
EdgeValueReaderT m_edgeValueReader;
};
// Returns iterator to the root of the trie.
template <class ReaderT, class ValueReaderT, class EdgeValueReaderT>
Iterator<typename ValueReaderT::ValueType, typename EdgeValueReaderT::ValueType> *
ReadTrie(ReaderT const & reader,
ValueReaderT valueReader = ValueReaderT(),
EdgeValueReaderT edgeValueReader = EdgeValueReaderT())
template <class ReaderT, class ValueReaderT>
Iterator<typename ValueReaderT::ValueType> * ReadTrie(ReaderT const & reader,
ValueReaderT valueReader = ValueReaderT())
{
return new Iterator0<ReaderT, ValueReaderT, EdgeValueReaderT>(
reader, valueReader, edgeValueReader, DEFAULT_CHAR);
return new Iterator0<ReaderT, ValueReaderT>(reader, valueReader, DEFAULT_CHAR);
}
} // namespace trie

View file

@ -201,8 +201,7 @@ namespace feature
serial::CodingParams cp(trie::GetCodingParams(header.GetDefCodingParams()));
unique_ptr<trie::DefaultIterator> const pTrieRoot(
trie::ReadTrie(container.GetReader(SEARCH_INDEX_FILE_TAG), trie::ValueReader(cp),
trie::TEdgeValueReader()));
trie::ReadTrie(container.GetReader(SEARCH_INDEX_FILE_TAG), trie::ValueReader(cp)));
SearchTokensCollector f;
trie::ForEachRef(*pTrieRoot, f, strings::UniString());

View file

@ -301,10 +301,9 @@ void BuildSearchIndex(FilesContainerR const & cont, CategoriesHolder const & cat
names.EndAdding();
names.OpenForRead();
trie::Build<Writer, typename StringsFile<SerializedFeatureInfoValue>::IteratorT,
trie::EmptyEdgeBuilder, ValueList<SerializedFeatureInfoValue>>(
writer, names.Begin(), names.End(), trie::EmptyEdgeBuilder());
ValueList<SerializedFeatureInfoValue>>(writer, names.Begin(), names.End());
// at this point all readers of StringsFile should be dead
}
@ -420,9 +419,8 @@ void BuildCompressedSearchIndex(FilesContainerR & container, Writer & indexWrite
LOG(LINFO, ("End sorting strings:", timer.ElapsedSeconds()));
stringsFile.OpenForRead();
trie::Build<Writer, typename StringsFile<FeatureIndexValue>::IteratorT, trie::EmptyEdgeBuilder,
ValueList<FeatureIndexValue>>(indexWriter, stringsFile.Begin(), stringsFile.End(),
trie::EmptyEdgeBuilder());
trie::Build<Writer, typename StringsFile<FeatureIndexValue>::IteratorT,
ValueList<FeatureIndexValue>>(indexWriter, stringsFile.Begin(), stringsFile.End());
LOG(LINFO, ("End building compressed search index, elapsed seconds:", timer.ElapsedSeconds()));
}

View file

@ -48,9 +48,7 @@ public:
}
};
using TEdgeValueReader = EmptyValueReader;
using DefaultIterator =
trie::Iterator<trie::ValueReader::ValueType, trie::TEdgeValueReader::ValueType>;
using DefaultIterator = trie::Iterator<trie::ValueReader::ValueType>;
inline serial::CodingParams GetCodingParams(serial::CodingParams const & orig)
{

View file

@ -49,9 +49,8 @@ void RetrieveAddressFeatures(MwmSet::MwmHandle const & handle, SearchQueryParams
ASSERT(value, ());
serial::CodingParams codingParams(trie::GetCodingParams(value->GetHeader().GetDefCodingParams()));
ModelReaderPtr searchReader = value->m_cont.GetReader(SEARCH_INDEX_FILE_TAG);
unique_ptr<trie::DefaultIterator> const trieRoot(
trie::ReadTrie(SubReaderWrapper<Reader>(searchReader.GetPtr()),
trie::ValueReader(codingParams), trie::TEdgeValueReader()));
unique_ptr<trie::DefaultIterator> const trieRoot(trie::ReadTrie(
SubReaderWrapper<Reader>(searchReader.GetPtr()), trie::ValueReader(codingParams)));
MatchFeaturesInTrie(params, *trieRoot, EmptyFilter(), forward<ToDo>(toDo));
}

View file

@ -1566,8 +1566,7 @@ void Query::SearchLocality(MwmValue const * pMwm, Locality & res1, Region & res2
ModelReaderPtr searchReader = pMwm->m_cont.GetReader(SEARCH_INDEX_FILE_TAG);
unique_ptr<trie::DefaultIterator> const trieRoot(
trie::ReadTrie(SubReaderWrapper<Reader>(searchReader.GetPtr()), trie::ValueReader(cp),
trie::TEdgeValueReader()));
trie::ReadTrie(SubReaderWrapper<Reader>(searchReader.GetPtr()), trie::ValueReader(cp)));
ForEachLangPrefix(params, *trieRoot, [&](TrieRootPrefix & langRoot, int8_t lang)
{