Remove old mm_* and dd_* code from coding. Now only dd_vector.hpp is used.

This commit is contained in:
Yury Melnichek 2011-09-18 23:16:48 +02:00 committed by Alex Zolotarev
parent 751ac71b07
commit 6db20eaea2
16 changed files with 0 additions and 1400 deletions

View file

@ -1,52 +0,0 @@
#pragma once
#include "varint.hpp"
#include "write_to_sink.hpp"
#include "../base/base.hpp"
#include "../std/iterator.hpp"
// TWord - word type, uint32_t or uint64_t.
// TSink - where to write.
// TIter - iterator to bool.
template <typename TWord, typename TSink, typename TIter>
void BuildMMBitVector(TSink & sink, TIter beg, TIter end, bool bWriteSize = true, size_t size = -1)
{
if (size == size_t(-1))
size = distance(beg, end);
CHECK(static_cast<TWord>(size) == size, ("Vector is more than word size.", size));
if (bWriteSize)
WriteVarUint(sink, size);
int bitInWord = 0;
TWord word = 0;
for (; beg != end; ++beg)
{
if (*beg)
word |= (TWord(1) << bitInWord);
if (++bitInWord == 8 * sizeof(TWord))
{
WriteToSink(sink, word);
bitInWord = 0;
word = 0;
}
}
if (bitInWord != 0)
WriteToSink(sink, word);
}
// TSink - where to write.
// TIter - iterator to bool.
// TODO: optimize logChunkSize default value.
template <typename TSink, typename TIter>
void BuildMMBitVector32RankDirectory(TSink & sink, TIter beg, TIter end, uint32_t logChunkSize = 5)
{
WriteVarUint(sink, logChunkSize);
uint32_t rank1 = 0;
for (uint32_t i = 0; beg != end; ++beg,++i)
{
if ((i & ((1 << (logChunkSize + 5)) - 1)) == 0 && i != 0)
WriteToSink(sink, rank1);
if (*beg)
++rank1;
}
WriteToSink(sink, rank1);
}

View file

@ -34,27 +34,15 @@ HEADERS += \
internal/file64_api.hpp \
parse_xml.hpp \
varint.hpp \
mm_vector.hpp \
mm_bit_vector.hpp \
mm_base.hpp \
endianness.hpp \
byte_stream.hpp \
var_serial_vector.hpp \
hex.hpp \
mm_compact_trie.hpp \
mm_compact_tree.hpp \
compact_trie_builder.hpp \
compact_tree_builder.hpp \
bit_vector_builder.hpp \
dd_vector.hpp \
dd_bit_vector.hpp \
dd_base.hpp \
writer.hpp \
write_to_sink.hpp \
reader.hpp \
dd_bit_rank_directory.hpp \
dd_compact_tree.hpp \
dd_compact_trie.hpp \
diff.hpp \
diff_patch_common.hpp \
source.hpp \

View file

@ -12,8 +12,6 @@ include($$ROOT_DIR/common.pri)
SOURCES += ../../testing/testingmain.cpp \
endianness_test.cpp \
varint_test.cpp \
mm_bit_vector_test.cpp \
mm_compact_trie_test.cpp \
mem_file_reader_test.cpp \
mem_file_writer_test.cpp \
var_serial_vector_test.cpp \

View file

@ -1,177 +0,0 @@
#include "../../testing/testing.hpp"
#include "../dd_vector.hpp"
#include "../dd_bit_vector.hpp"
#include "../dd_bit_rank_directory.hpp"
#include "../bit_vector_builder.hpp"
#include "../byte_stream.hpp"
#include "../reader.hpp"
#include "../../base/base.hpp"
#include "../../base/macros.hpp"
#include "../../std/cstdlib.hpp"
#include "../../base/start_mem_debug.hpp"
namespace
{
template <typename TWord>
void TestBitVector(unsigned char const * bits, size_t N)
{
typedef PushBackByteSink<vector<char> > SinkType;
vector<char> data;
SinkType sink(data);
BuildMMBitVector<TWord>(sink, &bits[0], &bits[N]);
DDBitVector<DDVector<TWord, MemReader> > bitVectorDD;
MemReader reader(&data[0], data.size());
DDParseInfo<MemReader> info(reader, true);
bitVectorDD.Parse(info);
for (size_t i = 0; i < N; ++i)
TEST_EQUAL(bitVectorDD[i], bits[i] != 0, (i));
}
void TestBitVector32Rank(unsigned char const * bits, size_t N, size_t logChunkSize)
{
typedef PushBackByteSink<vector<char> > SinkType;
vector<char> data;
SinkType sink(data);
BuildMMBitVector<uint32_t>(sink, &bits[0], &bits[N]);
BuildMMBitVector32RankDirectory(sink, &bits[0], &bits[N], logChunkSize);
DDBitRankDirectory<DDBitVector<DDVector<uint32_t, MemReader> > > rankDirectoryDD;
MemReader reader(&data[0], data.size());
{
DDParseInfo<MemReader> info(reader, true);
rankDirectoryDD.Parse(info);
}
uint32_t rank1 = 0;
for (size_t i = 0; i < N; ++i)
{
if (rankDirectoryDD[i])
{
++rank1;
TEST_EQUAL(rankDirectoryDD.BitVector().Select1FromWord(0, rank1), i,
(rank1, N, logChunkSize));
TEST_EQUAL(rankDirectoryDD.Select1(rank1), i, (rank1, N, logChunkSize));
}
TEST_EQUAL(rankDirectoryDD.Rank1(i), rank1, (i, N, logChunkSize));
TEST_EQUAL(rankDirectoryDD.Rank0(i), i + 1 - rank1, (i, N, logChunkSize));
}
}
unsigned char const simpleBits[] = {
1,1,0,1,0,0,0,1,
1,1,0,0,0,0,0,1,
1,1,0,0,1,1,1,0,
0,1,1,1,0,0,1,1,
0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,
0,0,1,0,0,1,0,0,
0,0,0,1,0,0,0,1 };
static size_t const simpleBitsSize = ARRAY_SIZE(simpleBits);
}
UNIT_TEST(BitVector8Simple)
{
TestBitVector<uint8_t>(simpleBits, simpleBitsSize);
}
UNIT_TEST(BitVector16Simple)
{
TestBitVector<uint16_t>(simpleBits, simpleBitsSize);
}
UNIT_TEST(BitVector32Simple)
{
TestBitVector<uint32_t>(simpleBits, simpleBitsSize);
}
UNIT_TEST(BitVector64Simple)
{
TestBitVector<uint64_t>(simpleBits, simpleBitsSize);
}
UNIT_TEST(BitVector32RankSimple)
{
for (size_t chunkSize = 1; chunkSize <= 7; ++chunkSize)
TestBitVector32Rank(simpleBits, simpleBitsSize, chunkSize);
}
UNIT_TEST(BitVector32RankChunkSizePlus1All0)
{
vector<unsigned char> bits(129, 0);
TestBitVector<uint32_t>(&bits[0], bits.size());
TestBitVector32Rank(&bits[0], bits.size(), 2);
}
UNIT_TEST(BitVector32RankChunkSizePlus1All1)
{
vector<unsigned char> bits(129, 1);
TestBitVector<uint32_t>(&bits[0], bits.size());
TestBitVector32Rank(&bits[0], bits.size(), 2);
}
UNIT_TEST(BitVector32Empty)
{
TestBitVector<uint8_t>(NULL, 0);
TestBitVector<uint16_t>(NULL, 0);
TestBitVector<uint32_t>(NULL, 0);
TestBitVector<uint64_t>(NULL, 0);
// TODO: When uncommented, there is an error:
// malloc: *** error for object 0x12c2e: pointer being freed was not allocated
// TestBitVector32Rank(NULL, 0, 2);
}
UNIT_TEST(BitVector32RankRandom)
{
// TODO: When l == 0, there is an error:
// malloc: *** error for object 0x12c2e: pointer being freed was not allocated
// *** set a breakpoint in malloc_error_break to debug
for (size_t l = 1; l <= 150; ++l)
{
vector<unsigned char> bits(l);
for (size_t i = 0; i < bits.size(); ++i)
bits[i] = (rand() & 1);
unsigned char * p = bits.empty() ? NULL : &bits[0];
TestBitVector<uint32_t>(p, bits.size());
TestBitVector32Rank(p, bits.size(), 1);
TestBitVector32Rank(p, bits.size(), 2);
TestBitVector32Rank(p, bits.size(), 3);
}
}
UNIT_TEST(BitVector32RankRandomPow2)
{
uint32_t values[] = {16, 32, 64, 128, 256, 1024, 2048, 4096};
for (size_t j = 0; j < ARRAY_SIZE(values); ++j)
{
for (size_t l = values[j] - 2; l <= values[j] + 2; ++l)
{
vector<unsigned char> bits(l);
for (size_t i = 0; i < bits.size(); ++i)
bits[i] = (rand() & 1);
TestBitVector<uint32_t>(&bits[0], bits.size());
TestBitVector32Rank(&bits[0], bits.size(), 1);
TestBitVector32Rank(&bits[0], bits.size(), 2);
TestBitVector32Rank(&bits[0], bits.size(), 3);
}
}
}
UNIT_TEST(BitVector32RankRandomLargeVector)
{
size_t const l = 9000;
vector<unsigned char> bits(l);
for (size_t i = 0; i < bits.size(); ++i)
bits[i] = (rand() & 1);
unsigned char * p = &bits[0];
TestBitVector<uint32_t>(p, bits.size());
TestBitVector32Rank(p, bits.size(), 1);
TestBitVector32Rank(p, bits.size(), 2);
TestBitVector32Rank(p, bits.size(), 3);
}
// TODO: Test large bitVector.
// TODO: Test max size bitVector.
// TODO: Test wrong data size for BitVector and BitVector32RankDirectory.
#include "../../base/stop_mem_debug.hpp"

View file

@ -1,97 +0,0 @@
#include "../../testing/testing.hpp"
#include "../byte_stream.hpp"
#include "../compact_trie_builder.hpp"
#include "../dd_compact_trie.hpp"
#include "../reader.hpp"
#include "../../base/base.hpp"
#include "../../base/macros.hpp"
#include "../../std/string.hpp"
#include "../../std/vector.hpp"
//namespace
//{
class DDCompactTrieTester
{
public:
template <class TChar, class TBitRankDirectory>
static typename TBitRankDirectory::BitVectorType const &
IsParentVector(DDCompactTrie<TChar, TBitRankDirectory> const & trie)
{
return trie.m_IsParent.BitVector();
}
template <class TChar, class TBitRankDirectory>
static typename TBitRankDirectory::BitVectorType const &
IsFirstChildVector(DDCompactTrie<TChar, TBitRankDirectory> const & trie)
{
return trie.m_IsFirstChild.BitVector();
}
template <class TChar, class TBitRankDirectory>
static int ParentsWithDataCount(
DDCompactTrie<TChar, TBitRankDirectory> const & trie)
{
return trie.m_ParentsWithDataCount;
}
};
//}
UNIT_TEST(CompactTrieSimple)
{
vector<string> words;
words.push_back("hello");
words.push_back("help");
words.push_back("sim");
words.push_back("simple");
words.push_back("world");
words.push_back("z");
typedef PushBackByteSink<vector<char> > SinkType;
vector<char> data;
SinkType sink(data);
BuildMMCompactTrie<char>(sink, words.begin(), words.end());
typedef DDBitVector<DDVector<uint32_t, MemReader> > BitVectorDD;
typedef DDBitRankDirectory<BitVectorDD> RankDirDD;
typedef DDCompactTrie<char, RankDirDD> TrieDD;
TrieDD trie;
MemReader reader(&data[0], data.size());
{
DDParseInfo<MemReader> info(reader, true);
trie.Parse(info);
}
// 0 (0)
// 1 h(1) s(2) w(3) z(4)
// 2 e(5) i(6) o(7)
// 3 l(8) m(9) r(10)
// 4 l(11) p(12) p(13) l(14)
// 5 o(15) l(16) d(17)
// 6 e(18)
TEST_EQUAL(trie.NodesWithData(), words.size(), ());
TEST_EQUAL(trie.Root(), 0U, ());
TEST_EQUAL(DDCompactTrieTester::ParentsWithDataCount(trie), 1, ());
// 0123456789012345678
char const * chars = "$hswzeiolmrlpplolde";
char const * isParent = "1111011111110110100";
char const * isFChild = "0100011111110111111";
char const * nexts = "0111000000010000000";
// 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
TrieDD::Id fchild[] = { 1, 5, 6, 7,-1, 8, 9,10,11,13,14,15,-1,16,17,-1,18,-1,-1 };
TrieDD::Id parent[] = { -1, 0, 0, 0, 0, 1, 2, 3, 5, 6, 7, 8, 8, 9,10,11,13,14,16 };
TrieDD::Id dataI[] = { -1,-1,-1,-1, 1,-1,-1,-1,-1, 0,-1,-1, 2,-1,-1, 3,-1, 4, 5 };
for (size_t i = 0; i <= 17; ++i) {
// wcout << "!! " << i << endl;
if (i != 0) {
TEST_EQUAL(trie.Char(i), chars[i], (i, string(1, trie.Char(i)), string(1, chars[i])));
}
TEST_EQUAL(DDCompactTrieTester::IsParentVector(trie)[i], isParent[i] == '1', (i));
TEST_EQUAL(DDCompactTrieTester::IsFirstChildVector(trie)[i], isFChild[i] == '1', (i));
TEST_EQUAL(trie.NextSibling(i), nexts[i] == '1' ? i+1 : trie.INVALID_ID, (i));
TEST_EQUAL(trie.FirstChild(i), fchild[i], (i));
TEST_EQUAL(trie.Parent(i), parent[i], (i));
TEST_EQUAL(trie.Data(i), dataI[i], (i));
}
}

View file

@ -1,30 +0,0 @@
#pragma once
#include "writer.hpp"
#include "bit_vector_builder.hpp"
#include "../base/base.hpp"
#include "../base/start_mem_debug.hpp"
#include "../std/iterator.hpp"
template <typename TSink, typename TIter>
void BuildCompactTree(TSink & sink,
TIter const isParentBeg, TIter const isParentEnd,
TIter const isFirstChildBeg, TIter const isFirstChildEnd)
{
size_t const size = distance(isParentBeg, isParentEnd);
BuildMMBitVector<uint32_t>(sink, isParentBeg, isParentEnd, true, size);
BuildMMBitVector32RankDirectory(sink, isParentBeg, isParentEnd);
BuildMMBitVector<uint32_t>(sink, isFirstChildBeg, isFirstChildEnd, false, size);
BuildMMBitVector32RankDirectory(sink, isFirstChildBeg, isFirstChildEnd);
}
template <typename TSink, typename TIter>
void BuildCompactTreeWithData(TSink & sink,
TIter const isParentBeg, TIter const isParentEnd,
TIter const isFirstChildBeg, TIter const isFirstChildEnd,
TIter const parentHasDataBeg, TIter const parentHasDataEnd)
{
BuildCompactTree(sink, isParentBeg, isParentEnd, isFirstChildBeg, isFirstChildEnd);
BuildMMBitVector<uint32_t>(sink, parentHasDataBeg, parentHasDataEnd);
BuildMMBitVector32RankDirectory(sink, parentHasDataBeg, parentHasDataEnd);
}

View file

@ -1,98 +0,0 @@
#pragma once
#include "compact_tree_builder.hpp"
#include "../base/assert.hpp"
#include "../base/base.hpp"
#include "../std/algorithm.hpp"
#include "../std/vector.hpp"
#include "../base/start_mem_debug.hpp"
// Build compact trie given a sorted sequence on strings.
// If writeChars == false, characters will not be written
template <class TChar, class TSink, class TIter>
void BuildMMCompactTrie(TSink & sink, TIter const beg, TIter const end, bool writeChars = true)
{
size_t maxLen = 0;
size_t size = 0;
for (TIter it = beg; it != end; ++it, ++size)
maxLen = max(maxLen, it->size());
vector<vector<bool> > isParent(maxLen + 1);
vector<vector<bool> > isFirstChild(maxLen + 1);
vector<vector<bool> > hasData(maxLen + 1);
vector<vector<TChar> > chars(maxLen);
isParent[0].push_back(true);
isFirstChild[0].push_back(false);
hasData[0].push_back(false);
TIter prev;
size_t word = 0;
for (TIter it = beg; it != end; ++word, prev = it++)
{
CHECK_NOT_EQUAL(it->size(), 0U, ());
size_t commonLen = 0;
bool nextIsExtensionOfPrev = true;
if (it != beg)
{
while (commonLen < it->size() && commonLen < prev->size() &&
(*it)[commonLen] == (*prev)[commonLen])
++commonLen;
// Next string should be strictly greater than previous.
CHECK(commonLen != it->size(),
(commonLen, it->size(), prev->size(), word));
CHECK(commonLen == prev->size() || (*prev)[commonLen] != (*it)[commonLen],
(commonLen, it->size(), prev->size(), word));
nextIsExtensionOfPrev = (commonLen == prev->size());
}
isParent[commonLen].back() = true;
size_t last = it->size() - 1;
for (size_t i = commonLen; i <= last; ++i)
{
isParent[i+1].push_back(i != last);
isFirstChild[i+1].push_back(i != commonLen || nextIsExtensionOfPrev);
hasData[i+1].push_back(i == last);
if (writeChars)
chars[i].push_back((*it)[i]);
}
}
vector<bool> isParentComined, isFirstChildCombined, parentHasDataCombined;
isParentComined.reserve(size);
isFirstChildCombined.reserve(size);
parentHasDataCombined.reserve(size);
for (vector<vector<bool> >::const_iterator i = isParent.begin(); i != isParent.end(); ++i)
isParentComined.insert(isParentComined.end(), i->begin(), i->end());
for (vector<vector<bool> >::const_iterator i = isFirstChild.begin(); i != isFirstChild.end(); ++i)
isFirstChildCombined.insert(isFirstChildCombined.end(), i->begin(), i->end());
for (size_t i = 0; i < isParent.size(); ++i)
{
for (size_t j = 0; j < isParent[i].size(); ++j)
{
if (isParent[i][j])
{
parentHasDataCombined.push_back(hasData[i][j]);
}
}
}
BuildCompactTreeWithData(sink, isParentComined.begin(), isParentComined.end(),
isFirstChildCombined.begin(), isFirstChildCombined.end(),
parentHasDataCombined.begin(), parentHasDataCombined.end());
size_t numChars = 0;
if (writeChars)
{
for (typename vector<vector<TChar> >::const_iterator i = chars.begin(); i != chars.end(); ++i)
{
for (typename vector<TChar>::const_iterator it = i->begin(); it != i->end(); ++it)
{
WriteToSink(sink, TChar(*it));
++numChars;
}
}
}
size_t padding = (4 - (numChars * sizeof(TChar)) % 4) % 4;
for (size_t i = 0; i < padding; ++i)
{
WriteToSink(sink, uint8_t(0));
}
}
#include "../base/stop_mem_debug.hpp"

View file

@ -1,121 +0,0 @@
#pragma once
#include "dd_base.hpp"
#include "dd_vector.hpp"
#include "endianness.hpp"
#include "varint.hpp"
#include "../base/assert.hpp"
#include "../base/bits.hpp"
#include <boost/iterator/transform_iterator.hpp>
#include "../base/start_mem_debug.hpp"
template <class TBitVector, class SizeT = typename TBitVector::size_type> class DDBitRankDirectory
{
public:
typedef TBitVector BitVectorType;
typedef SizeT size_type;
DDBitRankDirectory()
{
}
template <class ReaderT>
void Parse(DDParseInfo<ReaderT> & info)
{
BitVectorType bitVector;
bitVector.Parse(info);
Parse(info, bitVector);
}
template <class ReaderT>
void Parse(DDParseInfo<ReaderT> & info, BitVectorType const & bitVector)
{
Parse(info, bitVector, ReadVarUint<size_type>(info.Source()));
}
template <class ReaderT>
void Parse(DDParseInfo<ReaderT> &info, BitVectorType const & bitVector, size_type logChunkSize)
{
m_BitVector = bitVector;
m_LogChunkSize = logChunkSize;
size_type const sizeChunks = bits::RoundLastBitsUpAndShiftRight(
m_BitVector.size(), bits::LogBitSizeOfType<size_type>::value + m_LogChunkSize);
if (sizeChunks != 0)
{
m_Chunks = ChunkVectorType(info.Source().SubReader(sizeChunks * sizeof(size_type)),
sizeChunks);
if (SwapIfBigEndian(m_Chunks[m_Chunks.size() - 1]) > m_BitVector.size())
MYTHROW(DDParseException, (m_Chunks.size(),
SwapIfBigEndian(m_Chunks[m_Chunks.size() - 1]),
m_BitVector.size()));
}
}
size_type Rank0(size_type x) const
{
ASSERT_LESS(x, m_BitVector.size(), ());
return x + 1 - Rank1(x);
}
size_type Rank1(size_type x) const
{
ASSERT_LESS(x, m_BitVector.size(), ());
size_type const logBitSize = bits::LogBitSizeOfType<size_type>::value;
size_type const iWord = x >> logBitSize;
size_type const iChunk = iWord >> m_LogChunkSize;
return (iChunk == 0 ? 0 : SwapIfBigEndian(m_Chunks[iChunk - 1])) +
m_BitVector.PopCountWords(iChunk << m_LogChunkSize, iWord) +
bits::popcount(
m_BitVector.WordAt(iWord) &
(size_type(-1) >> ((8 * sizeof(size_type) - 1) - (x - (iWord << logBitSize)))));
}
size_type Select1(size_type i) const
{
ASSERT_GREATER(i, 0, ());
ASSERT_LESS_OR_EQUAL(i, m_Chunks[m_Chunks.size() - 1], ());
// TODO: First try approximate lower and upper bound.
size_type iChunk = lower_bound(
boost::make_transform_iterator(m_Chunks.begin(),
&SwapIfBigEndian<size_type>),
boost::make_transform_iterator(m_Chunks.end(),
&SwapIfBigEndian<size_type>),
i) -
boost::make_transform_iterator(m_Chunks.begin(),
&SwapIfBigEndian<size_type>);
ASSERT_LESS(iChunk, m_Chunks.size(), ());
ASSERT_GREATER_OR_EQUAL(SwapIfBigEndian(m_Chunks[iChunk]), i, ());
ASSERT_LESS((iChunk << m_LogChunkSize), m_BitVector.size_words(), (iChunk, m_LogChunkSize));
return (iChunk << (bits::LogBitSizeOfType<size_type>::value + m_LogChunkSize)) +
m_BitVector.Select1FromWord(
iChunk << m_LogChunkSize,
i - (iChunk == 0 ? 0 : SwapIfBigEndian(m_Chunks[iChunk - 1])));
}
size_type size() const
{
return m_BitVector.size();
}
bool empty() const
{
return size() == 0;
}
bool operator[](size_type i) const
{
return m_BitVector[i];
}
TBitVector const & BitVector() const
{
return m_BitVector;
}
private:
TBitVector m_BitVector;
typedef DDVector<size_type, typename TBitVector::VectorType::ReaderType> ChunkVectorType;
ChunkVectorType m_Chunks;
size_type m_LogChunkSize; // Chunk size in size_types.
};
#include "../base/stop_mem_debug.hpp"

View file

@ -1,103 +0,0 @@
#pragma once
#include "dd_base.hpp"
#include "endianness.hpp"
#include "varint.hpp"
#include "../base/assert.hpp"
#include "../base/base.hpp"
#include "../base/bits.hpp"
#include "../std/type_traits.hpp"
#include "../base/start_mem_debug.hpp"
template <
class TWordVector,
typename TSize = typename TWordVector::size_type,
typename TDifference = typename make_signed<TSize>::type
> class DDBitVector
{
public:
typedef TWordVector VectorType;
typedef typename VectorType::value_type WordType;
typedef TSize size_type;
typedef TDifference difference_type;
DDBitVector()
{
}
template <class ReaderT>
void Parse(DDParseInfo<ReaderT> & info)
{
Parse(info, ReadVarUint<size_type>(info.Source()));
}
template <class ReaderT>
void Parse(DDParseInfo<ReaderT> & info, size_type vectorSize)
{
m_Size = vectorSize;
m_Data = VectorType(info.Source().SubReader(size_words() * sizeof(WordType)), size_words());
}
bool operator[](size_type i) const
{
ASSERT(i < size(), (i, size()));
return 0 !=
(WordAt(i / 8 / sizeof(WordType)) & (WordType(1) << (i & (8 * sizeof(WordType) - 1))));
}
size_type size() const
{
return m_Size;
}
size_type size_words() const
{
return (m_Size + sizeof(WordType) * 8 - 1) / 8 / sizeof(WordType);
}
bool empty() const
{
return m_Size == 0;
}
WordType WordAt(size_type i) const
{
ASSERT(i < size_words(), (i));
return SwapIfBigEndian(m_Data[i]);
}
size_type PopCountWords(size_type begWord, size_type endWord) const
{
if (begWord == endWord)
return 0;
ASSERT_LESS(begWord, endWord, ());
ASSERT_LESS(begWord, size_words(), ());
ASSERT_LESS_OR_EQUAL(endWord, size_words(), ());
// popcount doesn't depend on byte order.
size_type result = 0;
while (begWord != endWord)
result += bits::popcount(m_Data[begWord++]);
return result;
}
size_type Select1FromWord(size_type iWord, size_type i) const
{
ASSERT(iWord < size_words(), (iWord, size_words(), i));
size_type const startWord = iWord;
size_type wordRank = bits::popcount(m_Data[iWord]);
while (wordRank < i)
{
i -= wordRank;
wordRank = bits::popcount(m_Data[++iWord]);
}
return (iWord - startWord) * sizeof(WordType) * 8 +
bits::select1(SwapIfBigEndian(m_Data[iWord]), i);
}
private:
size_type m_Size;
VectorType m_Data;
};
#include "../base/stop_mem_debug.hpp"

View file

@ -1,128 +0,0 @@
#pragma once
#include "dd_bit_vector.hpp"
#include "dd_bit_rank_directory.hpp"
#include "../base/base.hpp"
#include "../base/start_mem_debug.hpp"
template <class BitRankDirT>
class DDCompactTree
{
public:
typedef BitRankDirT BitRankDirType;
// Node id.
typedef typename BitRankDirT::size_type Id;
static Id const INVALID_ID = -1;
DDCompactTree()
{
}
// Id of the root.
Id Root() const
{
return 0;
}
// Parent id and INVALID_ID for root.
Id Parent(Id id) const
{
ASSERT(id != INVALID_ID, ());
return id ? m_IsParent.Select1(m_IsFirstChild.Rank1(id)) : INVALID_ID;
}
// First child id and INVALID_ID for leaf.
Id FirstChild(Id id) const
{
ASSERT(id != INVALID_ID, ());
return m_IsParent[id] ? m_IsFirstChild.Select1(m_IsParent.Rank1(id)) : INVALID_ID;
}
// Next sibling id and INVALID_ID if there is no next sibling.
Id NextSibling(Id id) const
{
ASSERT(id != INVALID_ID, ());
return (id + 1 == m_IsFirstChild.size() || m_IsFirstChild[id + 1]) ? INVALID_ID : id + 1;
}
template <class ReaderT>
void Parse(DDParseInfo<ReaderT> & info)
{
typedef typename BitRankDirT::BitVectorType BitVectorType;
{
BitVectorType isParent;
isParent.Parse(info);
if (isParent.size() == 0)
MYTHROW(DDParseException, ());
m_IsParent.Parse(info, isParent);
}
{
// TODO: Don't write logRankChunkSize twice.
// TODO: Allow logRankChunkSize be explicitly specified.
BitVectorType isFirstChild;
isFirstChild.Parse(info, m_IsParent.size());
if (isFirstChild.size() == 0)
MYTHROW(DDParseException, ());
m_IsFirstChild.Parse(info, isFirstChild);
}
}
protected:
friend class MMCompactTreeTester;
BitRankDirT m_IsParent;
BitRankDirT m_IsFirstChild;
};
template <class BitRankDirT>
class DDCompactTreeWithData : public DDCompactTree<BitRankDirT>
{
public:
typedef DDCompactTree<BitRankDirT> BaseType;
typedef BitRankDirT BitRankDirType;
typedef typename BaseType::Id Id;
static Id const INVALID_ID = BaseType::INVALID_ID;
DDCompactTreeWithData() : m_NodesWithData(0)
{
}
// Number of nodes with data.
Id NodesWithData() const
{
return m_NodesWithData;
}
// Id of the data for a given node id and INVALID_ID if node doesn't have any data.
Id Data(Id id) const
{
ASSERT(id != INVALID_ID, ());
if (BaseType::m_IsParent[id])
{
Id const parentIndex = BaseType::m_IsParent.Rank1(id) - 1;
return m_ParentHasData[parentIndex] ? m_ParentHasData.Rank1(parentIndex) - 1 : INVALID_ID;
}
else
{
return m_ParentsWithDataCount + BaseType::m_IsParent.Rank0(id) - 1;
}
}
template <class ReaderT>
void Parse(DDParseInfo<ReaderT> & info)
{
BaseType::Parse(info);
// TODO: Pass the vector size here.
m_ParentHasData.Parse(info);
m_ParentsWithDataCount =
m_ParentHasData.empty() ? 0 : m_ParentHasData.Rank1(m_ParentHasData.size() - 1);
m_NodesWithData = m_ParentsWithDataCount +
BaseType::m_IsParent.Rank0(BaseType::m_IsParent.size() - 1);
}
protected:
BitRankDirType m_ParentHasData;
uint32_t m_ParentsWithDataCount;
size_t m_NodesWithData;
};
#include "../base/stop_mem_debug.hpp"

View file

@ -1,70 +0,0 @@
#pragma once
#include "dd_compact_tree.hpp"
#include "dd_vector.hpp"
#include "../base/assert.hpp"
#include "../base/base.hpp"
#include "../std/string.hpp"
#include "../base/start_mem_debug.hpp"
class MMCompactTTrieester;
template <class TChar, class BitRankDirT>
class DDCompactTrie : public DDCompactTreeWithData<BitRankDirT>
{
public:
typedef DDCompactTreeWithData<BitRankDirT> BaseType;
typedef typename BaseType::Id Id;
static Id const INVALID_ID = BaseType::INVALID_ID;
DDCompactTrie()
{
}
TChar Char(Id id) const
{
ASSERT(id != 0, ());
ASSERT(id != INVALID_ID, ());
return m_Chars[id - 1]; // There is no char for the root.
}
template <class ReaderT>
void Parse(DDParseInfo<ReaderT> & info)
{
BaseType::Parse(info);
m_Chars =
CharVectorType(info.Source().SubReader((BaseType::m_IsParent.size() - 1) * sizeof(TChar)),
BaseType::m_IsParent.size() - 1);
}
protected:
friend class DDCompactTrieTester;
typedef DDVector<TChar, typename BaseType::BitRankDirType::BitVectorType::VectorType::ReaderType>
CharVectorType;
CharVectorType m_Chars;
};
template <class TTrie, typename TIter>
typename TTrie::Id FindNodeByPath(TTrie const & trie, TIter pathBegin, TIter pathEnd)
{
typename TTrie::Id nodeId = trie.Root();
for (TIter edge = pathBegin; edge != pathEnd; ++edge)
{
bool found = false;
for (typename TTrie::Id child = trie.FirstChild(nodeId);
child != TTrie::INVALID_ID;
child = trie.NextSibling(child))
{
if (trie.Char(child) == *edge)
{
nodeId = child;
found = true;
break;
}
}
if (!found)
return TTrie::INVALID_ID;
}
return nodeId;
}
#include "../base/stop_mem_debug.hpp"

View file

@ -1,80 +0,0 @@
#pragma once
#include "../base/assert.hpp"
#include "../base/base.hpp"
#include "../base/start_mem_debug.hpp"
class MMParseInfo
{
public:
MMParseInfo(void const * p, size_t size, bool failOnError) :
m_p(static_cast<char const *>(p)), m_Size(size), m_bFailOnError(failOnError),
m_bSuccessful(true)
{
}
~MMParseInfo()
{
CHECK(!m_bFailOnError || m_bSuccessful, ());
}
void CheckAligned(size_t size)
{
size_t p = reinterpret_cast<size_t>(m_p);
if (!(size & 7)) {
CHECK_OR_CALL(m_bFailOnError, Fail, !(p & 7), (p, size));
}
else if (!(size & 3)) {
CHECK_OR_CALL(m_bFailOnError, Fail, !(p & 3), (p, size));
}
else if (!(size & 1)) {
CHECK_OR_CALL(m_bFailOnError, Fail, !(p & 1), (p, size));
}
}
template <typename T> T const * Advance()
{
CheckAligned(sizeof(T));
size_t const advanceSize = sizeof(T);
CHECK_OR_CALL(m_bFailOnError, Fail, advanceSize <= m_Size, (sizeof(T), m_Size));
void const * p = m_p;
m_p += advanceSize;
m_Size -= advanceSize;
return static_cast<T const *>(p);
}
template <typename T> T const * Advance(size_t size)
{
CheckAligned(sizeof(T));
size_t const advanceSize = size * sizeof(T);
CHECK_OR_CALL(m_bFailOnError, Fail, advanceSize <= m_Size, (size, sizeof(T), m_Size));
void const * p = m_p;
m_p += advanceSize;
m_Size -= advanceSize;
return static_cast<T const *>(p);
}
bool Successful() const
{
return m_bSuccessful;
}
void Fail()
{
CHECK(!m_bFailOnError, (m_bSuccessful));
m_bSuccessful = false;
}
bool FailOnError() const
{
return m_bFailOnError;
}
private:
char const * m_p;
size_t m_Size;
bool m_bFailOnError;
bool m_bSuccessful;
};
#include "../base/stop_mem_debug.hpp"

View file

@ -1,189 +0,0 @@
#pragma once
#include "endianness.hpp"
#include "mm_base.hpp"
#include "../base/assert.hpp"
#include "../base/base.hpp"
#include "../base/bits.hpp"
#include "../std/algorithm.hpp"
#include <boost/iterator/transform_iterator.hpp>
#include "../base/start_mem_debug.hpp"
// . !!
// network byte order (big endian).
// .
template <typename TWord> class MMBitVector
{
public:
typedef TWord WordType;
MMBitVector()
{
}
MMBitVector(void const *p, size_t size)
{
MMParseInfo parseInfo(p, size, true);
this->Parse(parseInfo);
}
bool operator[](TWord i) const
{
ASSERT(i < size(), (i, size()));
return (WordAt(i / 8 / sizeof(TWord)) & (TWord(1) << (i & (8 * sizeof(TWord) - 1)))) != 0;
}
TWord size() const
{
return m_Size;
}
TWord size_words() const
{
return (m_Size + sizeof(TWord)*8 - 1) / 8 / sizeof(TWord);
}
size_t bytes_used() const
{
return (size_words() + 1) * sizeof(TWord);
}
bool empty() const
{
return m_Size == 0;
}
TWord WordAt(TWord i) const
{
ASSERT(i < size_words(), (i));
return SwapIfBigEndian(m_pWords[i]);
}
TWord Select1FromWord(TWord word, TWord i) const
{
ASSERT(word < size_words(), (word, size_words(), i));
TWord const * const pStartWord = m_pWords + word;
TWord const * pWord = pStartWord;
TWord wordRank = bits::popcount(*pWord);
while (wordRank < i)
{
i -= wordRank;
wordRank = bits::popcount(*++pWord);
}
return static_cast<TWord>(pWord - pStartWord) * sizeof(TWord) * 8 +
bits::select1(SwapIfBigEndian(*pWord), i);
}
TWord PopCountWords(TWord begWord, TWord endWord) const
{
// popcount byte order.
ASSERT(begWord < size_words(), (begWord));
ASSERT(endWord <= size_words(), (endWord));
return bits::popcount(m_pWords + begWord, endWord - begWord);
}
void Parse(MMParseInfo & info)
{
if (!info.Successful()) return;
TWord size = *info.Advance<TWord>(1);
Parse(info, SwapIfBigEndian(size));
}
void Parse(MMParseInfo & info, TWord vectorSize)
{
m_Size = vectorSize;
if (!info.Successful()) return;
m_pWords = info.Advance<TWord>(static_cast<size_t>(size_words()));
}
private:
TWord const * m_pWords;
TWord m_Size;
};
// , Rank0 Rank1
// Select0 Select1 .
class MMBitVector32RankDirectory
{
public:
MMBitVector32RankDirectory(MMBitVector<uint32_t> const & bitVector) : m_BitVector(bitVector)
{
}
MMBitVector32RankDirectory(MMBitVector<uint32_t> const & bitVector, void const * p, size_t size)
: m_BitVector(bitVector)
{
MMParseInfo parseInfo(p, size, true);
this->Parse(parseInfo);
}
uint32_t Rank0(uint32_t x) const
{
ASSERT(x < m_BitVector.size(), (x, m_BitVector.size()));
return x + 1 - Rank1(x);
}
uint32_t Rank1(uint32_t x) const
{
ASSERT(x < m_BitVector.size(), (x, m_BitVector.size()));
uint32_t const iWord = x >> 5;
uint32_t const iChunk = iWord >> m_LogChunkSize;
return (iChunk == 0 ? 0 : SwapIfBigEndian(m_pChunks[iChunk - 1])) +
m_BitVector.PopCountWords(iChunk << m_LogChunkSize, iWord) +
bits::popcount(m_BitVector.WordAt(iWord) & (0xFFFFFFFFU >> (31 - (x - (iWord << 5)))));
}
uint32_t Select1(uint32_t i) const
{
ASSERT(i > 0 && i <= m_MaxRank, (i, m_MaxRank));
// TODO: First try approximate lower and upper bound.
uint32_t iChunk = lower_bound(
boost::make_transform_iterator(m_pChunks, &SwapIfBigEndian<uint32_t>),
boost::make_transform_iterator(m_pChunks + size_chunks(), &SwapIfBigEndian<uint32_t>),
i) - boost::make_transform_iterator(m_pChunks, &SwapIfBigEndian<uint32_t>);
ASSERT_LESS(iChunk, size_chunks(), ());
ASSERT_GREATER_OR_EQUAL(SwapIfBigEndian(m_pChunks[iChunk]), i, ());
ASSERT_LESS((iChunk << m_LogChunkSize), m_BitVector.size_words(), (iChunk, m_LogChunkSize));
return (iChunk << (5 + m_LogChunkSize)) +
m_BitVector.Select1FromWord(
iChunk << m_LogChunkSize,
i - (iChunk == 0 ? 0 : SwapIfBigEndian(m_pChunks[iChunk - 1])));
}
uint32_t size_chunks() const
{
return bits::RoundLastBitsUpAndShiftRight(m_BitVector.size(), 5 + m_LogChunkSize);
}
size_t bytes_used() const
{
return (1 + size_chunks()) << (2 + m_LogChunkSize);
}
void Parse(MMParseInfo & info)
{
// TODO: Store version in MMBitVector32RankDirectory?
if (!info.Successful()) return;
m_LogChunkSize = SwapIfBigEndian(*info.Advance<uint32_t>());
if (!info.Successful()) return;
m_pChunks = info.Advance<uint32_t>(size_chunks());
#ifdef DEBUG
m_MaxRank = (m_BitVector.empty() ? 0 : Rank1(m_BitVector.size() - 1));
if (m_MaxRank > m_BitVector.size())
{
CHECK(!info.FailOnError(), (m_MaxRank, m_BitVector.size()));
info.Fail();
}
#endif
}
protected:
MMBitVector<uint32_t> const & m_BitVector;
uint32_t const * m_pChunks;
uint32_t m_LogChunkSize; // 32 .
#ifdef DEBUG
uint32_t m_MaxRank;
#endif
};
#include "../base/stop_mem_debug.hpp"

View file

@ -1,113 +0,0 @@
#pragma once
#include "mm_bit_vector.hpp"
#include "../base/base.hpp"
class MMCompactTree
{
public:
// Node id.
typedef uint32_t Id;
static Id const INVALID_ID = 0xFFFFFFFF;
MMCompactTree() :
m_IsParentDir(m_IsParent), m_IsFirstChildDir(m_IsFirstChild)
{
}
MMCompactTree(void const * p, size_t size) :
m_IsParentDir(m_IsParent), m_IsFirstChildDir(m_IsFirstChild)
{
MMParseInfo info(p, size, true);
this->Parse(info);
}
// Id of the root.
Id Root() const
{
return 0;
}
// Parent id and INVALID_ID for root.
Id Parent(Id id) const
{
return id ? m_IsParentDir.Select1(m_IsFirstChildDir.Rank1(id)) : INVALID_ID;
}
// First child id and INVALID_ID for leaf.
Id FirstChild(Id id) const
{
return m_IsParent[id] ? m_IsFirstChildDir.Select1(m_IsParentDir.Rank1(id)) : INVALID_ID;
}
// Next sibling id and INVALID_ID if there is no next sibling.
Id NextSibling(Id id) const
{
return (id + 1 == m_IsFirstChild.size() || m_IsFirstChild[id + 1]) ? INVALID_ID : id + 1;
}
void Parse(MMParseInfo & info)
{
if (!info.Successful())
return;
m_IsParent.Parse(info);
m_IsParentDir.Parse(info);
CHECK_OR_CALL(info.FailOnError(), info.Fail, m_IsParent.size() > 0, ());
m_IsFirstChild.Parse(info, m_IsParent.size());
m_IsFirstChildDir.Parse(info);
}
protected:
friend class MMCompactTreeTester;
MMBitVector<uint32_t> m_IsParent;
MMBitVector32RankDirectory m_IsParentDir;
MMBitVector<uint32_t> m_IsFirstChild;
MMBitVector32RankDirectory m_IsFirstChildDir;
};
class MMCompactTreeWithData : public MMCompactTree
{
public:
MMCompactTreeWithData() : m_ParentHasDataDir(m_ParentHasData), m_Size(0)
{
}
// Number of nodes with data.
size_t NodesWithData()
{
return m_Size;
}
// Id of the data for a given node id and INVALID_ID if node doesn't have any data.
uint32_t Data(Id id) const
{
if (m_IsParent[id])
{
uint32_t const parentIndex = m_IsParentDir.Rank1(id) - 1;
return m_ParentHasData[parentIndex] ? m_ParentHasDataDir.Rank1(parentIndex) - 1 : INVALID_ID;
}
else
{
return m_ParentsWithDataCount + m_IsParentDir.Rank0(id) - 1;
}
}
void Parse(MMParseInfo & info)
{
MMCompactTree::Parse(info);
if (!info.Successful())
return;
m_ParentHasData.Parse(
info, m_IsParent.empty() ? 0 : m_IsParentDir.Rank1(m_IsParent.size() - 1));
m_ParentHasDataDir.Parse(info);
m_ParentsWithDataCount =
m_ParentHasData.empty() ? 0 : m_ParentHasDataDir.Rank1(m_ParentHasData.size() - 1);
m_Size = m_ParentsWithDataCount +
(m_IsParent.empty() ? 0 : m_IsParentDir.Rank0(m_IsParent.size() - 1));
}
protected:
MMBitVector<uint32_t> m_ParentHasData;
MMBitVector32RankDirectory m_ParentHasDataDir;
uint32_t m_ParentsWithDataCount;
size_t m_Size;
};

View file

@ -1,70 +0,0 @@
#pragma once
#include "mm_base.hpp"
#include "mm_compact_tree.hpp"
#include "mm_vector.hpp"
#include "../base/assert.hpp"
#include "../base/base.hpp"
#include "../std/string.hpp"
#include "../base/start_mem_debug.hpp"
class MMCompactTrieTester;
template <class TChar> class MMCompactTrie : public MMCompactTreeWithData
{
public:
MMCompactTrie()
{
}
MMCompactTrie(void const * p, size_t size) : MMCompactTreeWithData()
{
MMParseInfo info(p, size, true);
Parse(info);
}
TChar Char(Id id) const
{
ASSERT(id != 0, ());
ASSERT(id != INVALID_ID, ());
return m_Chars[id - 1]; // There is no char for the root.
}
void Parse(MMParseInfo & info)
{
MMCompactTreeWithData::Parse(info);
if (!info.Successful())
return;
m_Chars.Parse(info, m_IsParent.size() - 1);
}
protected:
friend class MMCompactTrieTester;
MMVector<TChar> m_Chars;
};
#if 0
template <class TrieT, typename ItT>
MMCompactTree::Id FindNodeByPath(TrieT const & trie, ItT pathBegin, ItT pathEnd)
{
MMCompactTree::Id nodeId = 0;
for (ItT edge = pathBegin; edge != pathEnd; ++edge)
{
bool found = false;
for (MMCompactTree::Id child = trie.FirstChild(nodeId);
child != MMCompactTree::INVALID_ID;
child = trie.NextSibling(child))
{
if (trie.Char(child) == *edge)
{
nodeId = child;
found = true;
break;
}
}
if (!found) return MMCompactTree::INVALID_ID;
}
return nodeId;
}
#endif
#include "../base/stop_mem_debug.hpp"

View file

@ -1,58 +0,0 @@
#pragma once
#include "endianness.hpp"
#include "mm_base.hpp"
#include "../base/assert.hpp"
#include "../base/base.hpp"
#include "../std/memcpy.hpp"
#include "../base/start_mem_debug.hpp"
template <typename T, int Align = sizeof(T)> class MMVector
{
public:
typedef T * const_iterator;
typedef const_iterator iterator;
typedef T value_type;
const_iterator begin() const
{
return m_p;
}
const_iterator end() const
{
return m_p + m_Size;
}
T const & operator [] (size_t i) const
{
ASSERT_LESS(i, m_Size, ());
return m_p[i];
}
size_t size() const
{
return m_Size;
}
void Parse(MMParseInfo & info)
{
if (!info.Successful()) return;
uint32_t size;
memcpy(size, info.Advance<uint8_t>(4), 4);
Parse(info, SwapIfBigEndian(size));
}
void Parse(MMParseInfo & info, size_t vectorSize)
{
m_Size = vectorSize;
if (!info.Successful()) return;
m_p = info.Advance<T>(m_Size);
}
private:
T const * m_p;
size_t m_Size;
};
#include "../base/stop_mem_debug.hpp"