From 77e91a25e3b0047aa8a20489fc4a76791f77c6ef Mon Sep 17 00:00:00 2001 From: Yury Melnichek Date: Thu, 28 Jul 2011 16:17:09 +0200 Subject: [PATCH] trie::builder::Build(), not finished. --- coding/coding_tests/trie_test.cpp | 11 +--- coding/trie_builder.hpp | 88 ++++++++++++++++++++++++++----- 2 files changed, 75 insertions(+), 24 deletions(-) diff --git a/coding/coding_tests/trie_test.cpp b/coding/coding_tests/trie_test.cpp index 9b1d2bf9d9..81db627517 100644 --- a/coding/coding_tests/trie_test.cpp +++ b/coding/coding_tests/trie_test.cpp @@ -9,14 +9,6 @@ namespace { -struct ValueWriter -{ - template void Write(SinkT & sink, ValueT x) - { - WriteToSink(sink, x); - } -}; - struct ChildNodeInfo { bool m_isLeaf; @@ -35,7 +27,6 @@ UNIT_TEST(TrieBuilder_WriteNode_Smoke) { vector serial; PushBackByteSink > sink(serial); - string const values = "123"; ChildNodeInfo children[] = { {true, 1, "1A"}, @@ -43,7 +34,7 @@ UNIT_TEST(TrieBuilder_WriteNode_Smoke) {false, 3, "zz"}, {true, 4, "abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghij"} }; - trie::builder::WriteNode(sink, ValueWriter(), 0, values.begin(), values.end(), + trie::builder::WriteNode(sink, 0, 3, "123", 3, &children[0], &children[0] + ARRAY_SIZE(children)); unsigned char const expected [] = { diff --git a/coding/trie_builder.hpp b/coding/trie_builder.hpp index 9a0e6125d9..633eb34d9d 100644 --- a/coding/trie_builder.hpp +++ b/coding/trie_builder.hpp @@ -8,19 +8,17 @@ namespace trie namespace builder { -template -void WriteLeaf(SinkT & sink, ValueWriter valueWriter, ValueIterT begValue, ValueIterT endValue) +template +void WriteNode(SinkT & sink, strings::UniChar baseChar, + uint32_t const valueCount, void const * valuesData, uint32_t const valuesSize, + ChildIterT const begChild, ChildIterT const endChild) { - for (ValueIterT it = begValue; it != endValue; ++it) - valueWriter.Write(sink, *it); -} - -template -void WriteNode(SinkT & sink, ValueWriter valueWriter, strings::UniChar baseChar, - ValueIterT begValue, ValueIterT endValue, - ChildIterT begChild, ChildIterT endChild) -{ - uint32_t const valueCount = endValue - begValue; + if (begChild == endChild) + { + // Leaf node. + sink.Write(valuesData, valuesSize); + return; + } uint32_t const childCount = endChild - begChild; uint8_t const header = static_cast((min(valueCount, 3U) << 6) + min(childCount, 63U)); sink.Write(&header, 1); @@ -28,8 +26,7 @@ void WriteNode(SinkT & sink, ValueWriter valueWriter, strings::UniChar baseChar, WriteVarUint(sink, valueCount); if (childCount >= 63) WriteVarUint(sink, childCount); - for (ValueIterT it = begValue; it != endValue; ++it) - valueWriter.Write(sink, *it); + sink.Write(valuesData, valuesSize); for (ChildIterT it = begChild; it != endChild; ++it) { WriteVarUint(sink, it->Size()); @@ -66,6 +63,69 @@ void WriteNode(SinkT & sink, ValueWriter valueWriter, strings::UniChar baseChar, } } +struct ChildInfo +{ + bool m_isLeaf; + uint32_t m_size; + char const * m_edge; + uint32_t Size() const { return m_size; } + bool IsLeaf() const { return m_isLeaf; } + strings::UniString GetEdge() const { return strings::MakeUniString(m_edge); } +}; + +struct NodeInfo +{ + NodeInfo(uint64_t pos, strings::UniChar uniChar) : m_begPos(pos), m_char(uniChar) {} + uint64_t m_begPos; + strings::UniChar m_char; + buffer_vector m_children; + buffer_vector m_values; +}; + +void PopNodes(vector & nodes, int nodesToPop) +{ + if (nodesToPop == 0) + return; + ASSERT_GREATER_OR_EQUAL(nodes.size(), nodesToPop, ()); + strings::UniString reverseEdge; + while (nodesToPop > 0) + { + reverseEdge.push_back(nodes.back().m_char);reverseEdge.push_back(nodes.back().m_char); + if (nodes.back().m_values.empty() && nodes.back().m_children.size() <= 1) + { + ASSERT_EQUAL(nodes.back().m_children.size(), 1, ()); + continue; + } + + } +} } // namespace builder + +/* +template +void Build(SinkT & sink, IterT const beg, IterT const end) +{ + vector nodes; + strings::UniString prevKey; + for (IterT it = beg; it != end; ++it) + { + strings::UniString const key = it->Key(); + CHECK(!key.empty(), ()); + CHECK_LESS_OR_EQUAL(prevKey, key, ()); + int nCommon = 0; + while (nCommon < min(key.size(),prevKey.size()) && prevKey[nCommon] == key[nCommon]) + ++nCommon; + builder::PopNodes(nodes, nodes.size() - nCommon); + uint64_t const pos = sink.Pos(); + for (int i = nCommon; i < key.size(); ++i) + nodes.push_back(NodeInfo(pos, key[i])); + uint8_t const * pValue = static_cast(it->ValueData()); + nodes.back().m_values.insert(nodes.back().m_values.end(), pValue, pValue + it->ValueSize()); + prevKey.swap(key); + } + builder::PopNodes(nodes.size()); +} +*/ + } // namespace trie