Trie: add support for building edge values.

This commit is contained in:
Yury Melnichek 2011-08-10 10:28:59 +02:00 committed by Alex Zolotarev
parent 786dd9af11
commit a0298e0ea1
2 changed files with 85 additions and 13 deletions

View file

@ -82,6 +82,20 @@ struct KeyValuePairBackInserter
}
};
struct MaxValueCalc
{
typedef uint8_t ValueType;
ValueType operator() (void const * p, uint32_t size) const
{
ASSERT_EQUAL(size, 4, ());
uint32_t value;
memcpy(&value, p, 4);
ASSERT_LESS(value, 256, ());
return static_cast<uint8_t>(value);
}
};
} // unnamed namespace
#define ZENC bits::ZigZagEncode
@ -168,22 +182,27 @@ UNIT_TEST(TrieBuilder_Build)
vector<uint8_t> serial;
PushBackByteSink<vector<uint8_t> > sink(serial);
trie::Build(sink, v.begin(), v.end());
trie::Build(sink, v.begin(), v.end(), trie::builder::MaxValueEdgeBuilder<MaxValueCalc>());
reverse(serial.begin(), serial.end());
// LOG(LINFO, (serial.size(), vs));
MemReader memReader = MemReader(serial.data(), serial.size());
typedef trie::Iterator<
trie::reader::FixedSizeValueReader<4>::ValueType,
trie::reader::EmptyValueReader::ValueType
trie::reader::FixedSizeValueReader<1>::ValueType
> IteratorType;
scoped_ptr<IteratorType> root(trie::reader::ReadTrie(memReader,
trie::reader::FixedSizeValueReader<4>(),
trie::reader::EmptyValueReader()));
trie::reader::FixedSizeValueReader<1>()));
vector<KeyValuePair> res;
KeyValuePairBackInserter f;
trie::ForEachRef(*root, f, vector<trie::TrieChar>());
sort(f.m_v.begin(), f.m_v.end());
TEST_EQUAL(v, f.m_v, ());
int maxEdgeValue = 0;
for (uint32_t i = 0; i < root->m_edge.size(); ++i)
maxEdgeValue = max(maxEdgeValue, static_cast<int>(root->m_edge[i].m_value.m_data[0]));
TEST_EQUAL(maxEdgeValue, max(max(0, i0), max(i1, i2)), ());
}
}

View file

@ -33,7 +33,7 @@
// ...
// [vi edgeCharN - edgeCharN-1]
// [edge value]
// [child size]: if the child is not the first one (last one when reading)
// [child size]: if the child is not the last one when reading
namespace trie
{
@ -87,7 +87,7 @@ void WriteNode(SinkT & sink, TrieChar baseChar, uint32_t const valueCount,
WriteToSink(sink, header);
WriteVarUint(sink, edgeSize - 1);
}
for (size_t i = 0; i < edgeSize; ++i)
for (uint32_t i = 0; i < edgeSize; ++i)
{
WriteVarInt(sink, int32_t(edge[i] - baseChar));
baseChar = edge[i];
@ -121,7 +121,8 @@ struct ChildInfo
bool m_isLeaf;
uint32_t m_size;
buffer_vector<TrieChar, 8> m_edge;
buffer_vector<uint8_t, 8> m_edgeValue;
typedef buffer_vector<uint8_t, 8> EdgeValueStorageType;
EdgeValueStorageType m_edgeValue;
ChildInfo() {}
ChildInfo(bool isLeaf, uint32_t size, TrieChar c) : m_isLeaf(isLeaf), m_size(size), m_edge(1, c)
@ -136,6 +137,7 @@ struct ChildInfo
uint32_t GetEdgeValueSize() const { return m_edgeValue.size(); }
};
template <class EdgeBuilderT>
struct NodeInfo
{
uint64_t m_begPos;
@ -143,19 +145,22 @@ struct NodeInfo
vector<ChildInfo> m_children;
buffer_vector<uint8_t, 32> m_values;
uint32_t m_valueCount;
EdgeBuilderT m_edgeBuilder;
NodeInfo() : m_valueCount(0) {}
NodeInfo(uint64_t pos, TrieChar trieChar) : m_begPos(pos), m_char(trieChar), m_valueCount(0) {}
NodeInfo(uint64_t pos, TrieChar trieChar, EdgeBuilderT const & edgeBuilder)
: m_begPos(pos), m_char(trieChar), m_valueCount(0), m_edgeBuilder(edgeBuilder) {}
};
template <typename SinkT, class NodesT>
void PopNodes(SinkT & sink, NodesT & nodes, int nodesToPop)
{
typedef typename NodesT::value_type NodeInfoType;
ASSERT_GREATER(nodes.size(), nodesToPop, ());
for (; nodesToPop > 0; --nodesToPop)
{
NodeInfo & node = nodes.back();
NodeInfo & prevNode = nodes[nodes.size() - 2];
NodeInfoType & node = nodes.back();
NodeInfoType & prevNode = nodes[nodes.size() - 2];
if (node.m_valueCount == 0 && node.m_children.size() <= 1)
{
@ -175,17 +180,64 @@ void PopNodes(SinkT & sink, NodesT & nodes, int nodesToPop)
node.m_char));
}
prevNode.m_edgeBuilder.AddEdge(node.m_edgeBuilder);
PushBackByteSink<ChildInfo::EdgeValueStorageType> sink(prevNode.m_children.back().m_edgeValue);
node.m_edgeBuilder.StoreValue(sink);
nodes.pop_back();
}
}
struct EmptyEdgeBuilder
{
typedef unsigned char ValueType;
void AddValue(void const *, uint32_t) {}
void AddEdge(EmptyEdgeBuilder &) {}
template <typename SinkT> void StoreValue(SinkT &) const {}
};
template <typename MaxValueCalcT>
struct MaxValueEdgeBuilder
{
typedef typename MaxValueCalcT::ValueType ValueType;
MaxValueCalcT m_maxCalc;
ValueType m_value;
explicit MaxValueEdgeBuilder(MaxValueCalcT const & maxCalc = MaxValueCalcT())
: m_maxCalc(maxCalc), m_value() {}
MaxValueEdgeBuilder(MaxValueEdgeBuilder<MaxValueCalcT> const & edgeBuilder)
: m_maxCalc(edgeBuilder.m_maxCalc), m_value(edgeBuilder.m_value) {}
void AddValue(void const * p, uint32_t size)
{
ValueType value = m_maxCalc(p, size);
if (m_value < value)
m_value = value;
}
void AddEdge(MaxValueEdgeBuilder & edgeBuilder)
{
if (m_value < edgeBuilder.m_value)
m_value = edgeBuilder.m_value;
}
template <typename SinkT> void StoreValue(SinkT & sink) const
{
sink.Write(&m_value, sizeof(m_value));
}
};
} // namespace builder
template <typename SinkT, typename IterT>
void Build(SinkT & sink, IterT const beg, IterT const end)
template <typename SinkT, typename IterT, class EdgeBuilderT>
void Build(SinkT & sink, IterT const beg, IterT const end, EdgeBuilderT const & edgeBuilder)
{
typedef buffer_vector<TrieChar, 32> TrieString;
buffer_vector<builder::NodeInfo, 32> nodes(1, builder::NodeInfo(sink.Pos(), DEFAULT_CHAR));
buffer_vector<builder::NodeInfo<EdgeBuilderT>, 32> nodes;
nodes.push_back(builder::NodeInfo<EdgeBuilderT>(sink.Pos(), DEFAULT_CHAR, edgeBuilder));
TrieString prevKey;
for (IterT it = beg; it != end; ++it)
{
@ -198,10 +250,11 @@ void Build(SinkT & sink, IterT const beg, IterT const end)
builder::PopNodes(sink, nodes, nodes.size() - nCommon - 1); // Root is also a common node.
uint64_t const pos = sink.Pos();
for (int i = nCommon; i < key.size(); ++i)
nodes.push_back(builder::NodeInfo(pos, key[i]));
nodes.push_back(builder::NodeInfo<EdgeBuilderT>(pos, key[i], edgeBuilder));
uint8_t const * const pValue = static_cast<uint8_t const *>(it->GetValueData());
nodes.back().m_values.insert(nodes.back().m_values.end(), pValue, pValue + it->GetValueSize());
nodes.back().m_valueCount += 1;
nodes.back().m_edgeBuilder.AddValue(pValue, it->GetValueSize());
prevKey.swap(key);
}