forked from organicmaps/organicmaps
Remove old mm_* and dd_* code from coding. Now only dd_vector.hpp is used.
This commit is contained in:
parent
751ac71b07
commit
6db20eaea2
16 changed files with 0 additions and 1400 deletions
|
@ -1,52 +0,0 @@
|
|||
#pragma once
|
||||
#include "varint.hpp"
|
||||
#include "write_to_sink.hpp"
|
||||
#include "../base/base.hpp"
|
||||
#include "../std/iterator.hpp"
|
||||
|
||||
// TWord - word type, uint32_t or uint64_t.
|
||||
// TSink - where to write.
|
||||
// TIter - iterator to bool.
|
||||
template <typename TWord, typename TSink, typename TIter>
|
||||
void BuildMMBitVector(TSink & sink, TIter beg, TIter end, bool bWriteSize = true, size_t size = -1)
|
||||
{
|
||||
if (size == size_t(-1))
|
||||
size = distance(beg, end);
|
||||
CHECK(static_cast<TWord>(size) == size, ("Vector is more than word size.", size));
|
||||
if (bWriteSize)
|
||||
WriteVarUint(sink, size);
|
||||
|
||||
int bitInWord = 0;
|
||||
TWord word = 0;
|
||||
for (; beg != end; ++beg)
|
||||
{
|
||||
if (*beg)
|
||||
word |= (TWord(1) << bitInWord);
|
||||
if (++bitInWord == 8 * sizeof(TWord))
|
||||
{
|
||||
WriteToSink(sink, word);
|
||||
bitInWord = 0;
|
||||
word = 0;
|
||||
}
|
||||
}
|
||||
if (bitInWord != 0)
|
||||
WriteToSink(sink, word);
|
||||
}
|
||||
|
||||
// TSink - where to write.
|
||||
// TIter - iterator to bool.
|
||||
// TODO: optimize logChunkSize default value.
|
||||
template <typename TSink, typename TIter>
|
||||
void BuildMMBitVector32RankDirectory(TSink & sink, TIter beg, TIter end, uint32_t logChunkSize = 5)
|
||||
{
|
||||
WriteVarUint(sink, logChunkSize);
|
||||
uint32_t rank1 = 0;
|
||||
for (uint32_t i = 0; beg != end; ++beg,++i)
|
||||
{
|
||||
if ((i & ((1 << (logChunkSize + 5)) - 1)) == 0 && i != 0)
|
||||
WriteToSink(sink, rank1);
|
||||
if (*beg)
|
||||
++rank1;
|
||||
}
|
||||
WriteToSink(sink, rank1);
|
||||
}
|
|
@ -34,27 +34,15 @@ HEADERS += \
|
|||
internal/file64_api.hpp \
|
||||
parse_xml.hpp \
|
||||
varint.hpp \
|
||||
mm_vector.hpp \
|
||||
mm_bit_vector.hpp \
|
||||
mm_base.hpp \
|
||||
endianness.hpp \
|
||||
byte_stream.hpp \
|
||||
var_serial_vector.hpp \
|
||||
hex.hpp \
|
||||
mm_compact_trie.hpp \
|
||||
mm_compact_tree.hpp \
|
||||
compact_trie_builder.hpp \
|
||||
compact_tree_builder.hpp \
|
||||
bit_vector_builder.hpp \
|
||||
dd_vector.hpp \
|
||||
dd_bit_vector.hpp \
|
||||
dd_base.hpp \
|
||||
writer.hpp \
|
||||
write_to_sink.hpp \
|
||||
reader.hpp \
|
||||
dd_bit_rank_directory.hpp \
|
||||
dd_compact_tree.hpp \
|
||||
dd_compact_trie.hpp \
|
||||
diff.hpp \
|
||||
diff_patch_common.hpp \
|
||||
source.hpp \
|
||||
|
|
|
@ -12,8 +12,6 @@ include($$ROOT_DIR/common.pri)
|
|||
SOURCES += ../../testing/testingmain.cpp \
|
||||
endianness_test.cpp \
|
||||
varint_test.cpp \
|
||||
mm_bit_vector_test.cpp \
|
||||
mm_compact_trie_test.cpp \
|
||||
mem_file_reader_test.cpp \
|
||||
mem_file_writer_test.cpp \
|
||||
var_serial_vector_test.cpp \
|
||||
|
|
|
@ -1,177 +0,0 @@
|
|||
#include "../../testing/testing.hpp"
|
||||
#include "../dd_vector.hpp"
|
||||
#include "../dd_bit_vector.hpp"
|
||||
#include "../dd_bit_rank_directory.hpp"
|
||||
#include "../bit_vector_builder.hpp"
|
||||
#include "../byte_stream.hpp"
|
||||
#include "../reader.hpp"
|
||||
#include "../../base/base.hpp"
|
||||
#include "../../base/macros.hpp"
|
||||
#include "../../std/cstdlib.hpp"
|
||||
|
||||
#include "../../base/start_mem_debug.hpp"
|
||||
|
||||
namespace
|
||||
{
|
||||
template <typename TWord>
|
||||
void TestBitVector(unsigned char const * bits, size_t N)
|
||||
{
|
||||
typedef PushBackByteSink<vector<char> > SinkType;
|
||||
vector<char> data;
|
||||
SinkType sink(data);
|
||||
BuildMMBitVector<TWord>(sink, &bits[0], &bits[N]);
|
||||
|
||||
DDBitVector<DDVector<TWord, MemReader> > bitVectorDD;
|
||||
MemReader reader(&data[0], data.size());
|
||||
DDParseInfo<MemReader> info(reader, true);
|
||||
bitVectorDD.Parse(info);
|
||||
for (size_t i = 0; i < N; ++i)
|
||||
TEST_EQUAL(bitVectorDD[i], bits[i] != 0, (i));
|
||||
}
|
||||
|
||||
void TestBitVector32Rank(unsigned char const * bits, size_t N, size_t logChunkSize)
|
||||
{
|
||||
typedef PushBackByteSink<vector<char> > SinkType;
|
||||
vector<char> data;
|
||||
SinkType sink(data);
|
||||
BuildMMBitVector<uint32_t>(sink, &bits[0], &bits[N]);
|
||||
BuildMMBitVector32RankDirectory(sink, &bits[0], &bits[N], logChunkSize);
|
||||
|
||||
DDBitRankDirectory<DDBitVector<DDVector<uint32_t, MemReader> > > rankDirectoryDD;
|
||||
MemReader reader(&data[0], data.size());
|
||||
{
|
||||
DDParseInfo<MemReader> info(reader, true);
|
||||
rankDirectoryDD.Parse(info);
|
||||
}
|
||||
|
||||
uint32_t rank1 = 0;
|
||||
for (size_t i = 0; i < N; ++i)
|
||||
{
|
||||
if (rankDirectoryDD[i])
|
||||
{
|
||||
++rank1;
|
||||
TEST_EQUAL(rankDirectoryDD.BitVector().Select1FromWord(0, rank1), i,
|
||||
(rank1, N, logChunkSize));
|
||||
TEST_EQUAL(rankDirectoryDD.Select1(rank1), i, (rank1, N, logChunkSize));
|
||||
}
|
||||
TEST_EQUAL(rankDirectoryDD.Rank1(i), rank1, (i, N, logChunkSize));
|
||||
TEST_EQUAL(rankDirectoryDD.Rank0(i), i + 1 - rank1, (i, N, logChunkSize));
|
||||
}
|
||||
}
|
||||
|
||||
unsigned char const simpleBits[] = {
|
||||
1,1,0,1,0,0,0,1,
|
||||
1,1,0,0,0,0,0,1,
|
||||
1,1,0,0,1,1,1,0,
|
||||
0,1,1,1,0,0,1,1,
|
||||
0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,
|
||||
0,0,1,0,0,1,0,0,
|
||||
0,0,0,1,0,0,0,1 };
|
||||
static size_t const simpleBitsSize = ARRAY_SIZE(simpleBits);
|
||||
}
|
||||
|
||||
UNIT_TEST(BitVector8Simple)
|
||||
{
|
||||
TestBitVector<uint8_t>(simpleBits, simpleBitsSize);
|
||||
}
|
||||
|
||||
UNIT_TEST(BitVector16Simple)
|
||||
{
|
||||
TestBitVector<uint16_t>(simpleBits, simpleBitsSize);
|
||||
}
|
||||
|
||||
UNIT_TEST(BitVector32Simple)
|
||||
{
|
||||
TestBitVector<uint32_t>(simpleBits, simpleBitsSize);
|
||||
}
|
||||
|
||||
UNIT_TEST(BitVector64Simple)
|
||||
{
|
||||
TestBitVector<uint64_t>(simpleBits, simpleBitsSize);
|
||||
}
|
||||
|
||||
UNIT_TEST(BitVector32RankSimple)
|
||||
{
|
||||
for (size_t chunkSize = 1; chunkSize <= 7; ++chunkSize)
|
||||
TestBitVector32Rank(simpleBits, simpleBitsSize, chunkSize);
|
||||
}
|
||||
|
||||
UNIT_TEST(BitVector32RankChunkSizePlus1All0)
|
||||
{
|
||||
vector<unsigned char> bits(129, 0);
|
||||
TestBitVector<uint32_t>(&bits[0], bits.size());
|
||||
TestBitVector32Rank(&bits[0], bits.size(), 2);
|
||||
}
|
||||
|
||||
UNIT_TEST(BitVector32RankChunkSizePlus1All1)
|
||||
{
|
||||
vector<unsigned char> bits(129, 1);
|
||||
TestBitVector<uint32_t>(&bits[0], bits.size());
|
||||
TestBitVector32Rank(&bits[0], bits.size(), 2);
|
||||
}
|
||||
|
||||
UNIT_TEST(BitVector32Empty)
|
||||
{
|
||||
TestBitVector<uint8_t>(NULL, 0);
|
||||
TestBitVector<uint16_t>(NULL, 0);
|
||||
TestBitVector<uint32_t>(NULL, 0);
|
||||
TestBitVector<uint64_t>(NULL, 0);
|
||||
// TODO: When uncommented, there is an error:
|
||||
// malloc: *** error for object 0x12c2e: pointer being freed was not allocated
|
||||
// TestBitVector32Rank(NULL, 0, 2);
|
||||
}
|
||||
|
||||
UNIT_TEST(BitVector32RankRandom)
|
||||
{
|
||||
// TODO: When l == 0, there is an error:
|
||||
// malloc: *** error for object 0x12c2e: pointer being freed was not allocated
|
||||
// *** set a breakpoint in malloc_error_break to debug
|
||||
for (size_t l = 1; l <= 150; ++l)
|
||||
{
|
||||
vector<unsigned char> bits(l);
|
||||
for (size_t i = 0; i < bits.size(); ++i)
|
||||
bits[i] = (rand() & 1);
|
||||
unsigned char * p = bits.empty() ? NULL : &bits[0];
|
||||
TestBitVector<uint32_t>(p, bits.size());
|
||||
TestBitVector32Rank(p, bits.size(), 1);
|
||||
TestBitVector32Rank(p, bits.size(), 2);
|
||||
TestBitVector32Rank(p, bits.size(), 3);
|
||||
}
|
||||
}
|
||||
|
||||
UNIT_TEST(BitVector32RankRandomPow2)
|
||||
{
|
||||
uint32_t values[] = {16, 32, 64, 128, 256, 1024, 2048, 4096};
|
||||
for (size_t j = 0; j < ARRAY_SIZE(values); ++j)
|
||||
{
|
||||
for (size_t l = values[j] - 2; l <= values[j] + 2; ++l)
|
||||
{
|
||||
vector<unsigned char> bits(l);
|
||||
for (size_t i = 0; i < bits.size(); ++i)
|
||||
bits[i] = (rand() & 1);
|
||||
TestBitVector<uint32_t>(&bits[0], bits.size());
|
||||
TestBitVector32Rank(&bits[0], bits.size(), 1);
|
||||
TestBitVector32Rank(&bits[0], bits.size(), 2);
|
||||
TestBitVector32Rank(&bits[0], bits.size(), 3);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
UNIT_TEST(BitVector32RankRandomLargeVector)
|
||||
{
|
||||
size_t const l = 9000;
|
||||
vector<unsigned char> bits(l);
|
||||
for (size_t i = 0; i < bits.size(); ++i)
|
||||
bits[i] = (rand() & 1);
|
||||
unsigned char * p = &bits[0];
|
||||
TestBitVector<uint32_t>(p, bits.size());
|
||||
TestBitVector32Rank(p, bits.size(), 1);
|
||||
TestBitVector32Rank(p, bits.size(), 2);
|
||||
TestBitVector32Rank(p, bits.size(), 3);
|
||||
}
|
||||
|
||||
// TODO: Test large bitVector.
|
||||
// TODO: Test max size bitVector.
|
||||
// TODO: Test wrong data size for BitVector and BitVector32RankDirectory.
|
||||
#include "../../base/stop_mem_debug.hpp"
|
|
@ -1,97 +0,0 @@
|
|||
#include "../../testing/testing.hpp"
|
||||
|
||||
#include "../byte_stream.hpp"
|
||||
#include "../compact_trie_builder.hpp"
|
||||
#include "../dd_compact_trie.hpp"
|
||||
#include "../reader.hpp"
|
||||
#include "../../base/base.hpp"
|
||||
#include "../../base/macros.hpp"
|
||||
#include "../../std/string.hpp"
|
||||
#include "../../std/vector.hpp"
|
||||
|
||||
//namespace
|
||||
//{
|
||||
class DDCompactTrieTester
|
||||
{
|
||||
public:
|
||||
template <class TChar, class TBitRankDirectory>
|
||||
static typename TBitRankDirectory::BitVectorType const &
|
||||
IsParentVector(DDCompactTrie<TChar, TBitRankDirectory> const & trie)
|
||||
{
|
||||
return trie.m_IsParent.BitVector();
|
||||
}
|
||||
|
||||
template <class TChar, class TBitRankDirectory>
|
||||
static typename TBitRankDirectory::BitVectorType const &
|
||||
IsFirstChildVector(DDCompactTrie<TChar, TBitRankDirectory> const & trie)
|
||||
{
|
||||
return trie.m_IsFirstChild.BitVector();
|
||||
}
|
||||
template <class TChar, class TBitRankDirectory>
|
||||
static int ParentsWithDataCount(
|
||||
DDCompactTrie<TChar, TBitRankDirectory> const & trie)
|
||||
{
|
||||
return trie.m_ParentsWithDataCount;
|
||||
}
|
||||
};
|
||||
//}
|
||||
|
||||
UNIT_TEST(CompactTrieSimple)
|
||||
{
|
||||
vector<string> words;
|
||||
words.push_back("hello");
|
||||
words.push_back("help");
|
||||
words.push_back("sim");
|
||||
words.push_back("simple");
|
||||
words.push_back("world");
|
||||
words.push_back("z");
|
||||
|
||||
typedef PushBackByteSink<vector<char> > SinkType;
|
||||
vector<char> data;
|
||||
SinkType sink(data);
|
||||
BuildMMCompactTrie<char>(sink, words.begin(), words.end());
|
||||
typedef DDBitVector<DDVector<uint32_t, MemReader> > BitVectorDD;
|
||||
typedef DDBitRankDirectory<BitVectorDD> RankDirDD;
|
||||
typedef DDCompactTrie<char, RankDirDD> TrieDD;
|
||||
TrieDD trie;
|
||||
MemReader reader(&data[0], data.size());
|
||||
{
|
||||
DDParseInfo<MemReader> info(reader, true);
|
||||
trie.Parse(info);
|
||||
}
|
||||
|
||||
// 0 (0)
|
||||
// 1 h(1) s(2) w(3) z(4)
|
||||
// 2 e(5) i(6) o(7)
|
||||
// 3 l(8) m(9) r(10)
|
||||
// 4 l(11) p(12) p(13) l(14)
|
||||
// 5 o(15) l(16) d(17)
|
||||
// 6 e(18)
|
||||
|
||||
TEST_EQUAL(trie.NodesWithData(), words.size(), ());
|
||||
TEST_EQUAL(trie.Root(), 0U, ());
|
||||
TEST_EQUAL(DDCompactTrieTester::ParentsWithDataCount(trie), 1, ());
|
||||
|
||||
// 0123456789012345678
|
||||
char const * chars = "$hswzeiolmrlpplolde";
|
||||
char const * isParent = "1111011111110110100";
|
||||
char const * isFChild = "0100011111110111111";
|
||||
char const * nexts = "0111000000010000000";
|
||||
// 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
|
||||
TrieDD::Id fchild[] = { 1, 5, 6, 7,-1, 8, 9,10,11,13,14,15,-1,16,17,-1,18,-1,-1 };
|
||||
TrieDD::Id parent[] = { -1, 0, 0, 0, 0, 1, 2, 3, 5, 6, 7, 8, 8, 9,10,11,13,14,16 };
|
||||
TrieDD::Id dataI[] = { -1,-1,-1,-1, 1,-1,-1,-1,-1, 0,-1,-1, 2,-1,-1, 3,-1, 4, 5 };
|
||||
for (size_t i = 0; i <= 17; ++i) {
|
||||
// wcout << "!! " << i << endl;
|
||||
if (i != 0) {
|
||||
TEST_EQUAL(trie.Char(i), chars[i], (i, string(1, trie.Char(i)), string(1, chars[i])));
|
||||
}
|
||||
TEST_EQUAL(DDCompactTrieTester::IsParentVector(trie)[i], isParent[i] == '1', (i));
|
||||
TEST_EQUAL(DDCompactTrieTester::IsFirstChildVector(trie)[i], isFChild[i] == '1', (i));
|
||||
TEST_EQUAL(trie.NextSibling(i), nexts[i] == '1' ? i+1 : trie.INVALID_ID, (i));
|
||||
TEST_EQUAL(trie.FirstChild(i), fchild[i], (i));
|
||||
TEST_EQUAL(trie.Parent(i), parent[i], (i));
|
||||
TEST_EQUAL(trie.Data(i), dataI[i], (i));
|
||||
}
|
||||
|
||||
}
|
|
@ -1,30 +0,0 @@
|
|||
#pragma once
|
||||
#include "writer.hpp"
|
||||
#include "bit_vector_builder.hpp"
|
||||
#include "../base/base.hpp"
|
||||
#include "../base/start_mem_debug.hpp"
|
||||
#include "../std/iterator.hpp"
|
||||
|
||||
template <typename TSink, typename TIter>
|
||||
void BuildCompactTree(TSink & sink,
|
||||
TIter const isParentBeg, TIter const isParentEnd,
|
||||
TIter const isFirstChildBeg, TIter const isFirstChildEnd)
|
||||
{
|
||||
size_t const size = distance(isParentBeg, isParentEnd);
|
||||
BuildMMBitVector<uint32_t>(sink, isParentBeg, isParentEnd, true, size);
|
||||
BuildMMBitVector32RankDirectory(sink, isParentBeg, isParentEnd);
|
||||
BuildMMBitVector<uint32_t>(sink, isFirstChildBeg, isFirstChildEnd, false, size);
|
||||
BuildMMBitVector32RankDirectory(sink, isFirstChildBeg, isFirstChildEnd);
|
||||
}
|
||||
|
||||
template <typename TSink, typename TIter>
|
||||
void BuildCompactTreeWithData(TSink & sink,
|
||||
TIter const isParentBeg, TIter const isParentEnd,
|
||||
TIter const isFirstChildBeg, TIter const isFirstChildEnd,
|
||||
TIter const parentHasDataBeg, TIter const parentHasDataEnd)
|
||||
{
|
||||
BuildCompactTree(sink, isParentBeg, isParentEnd, isFirstChildBeg, isFirstChildEnd);
|
||||
BuildMMBitVector<uint32_t>(sink, parentHasDataBeg, parentHasDataEnd);
|
||||
BuildMMBitVector32RankDirectory(sink, parentHasDataBeg, parentHasDataEnd);
|
||||
}
|
||||
|
|
@ -1,98 +0,0 @@
|
|||
#pragma once
|
||||
#include "compact_tree_builder.hpp"
|
||||
#include "../base/assert.hpp"
|
||||
#include "../base/base.hpp"
|
||||
#include "../std/algorithm.hpp"
|
||||
#include "../std/vector.hpp"
|
||||
#include "../base/start_mem_debug.hpp"
|
||||
|
||||
// Build compact trie given a sorted sequence on strings.
|
||||
// If writeChars == false, characters will not be written
|
||||
template <class TChar, class TSink, class TIter>
|
||||
void BuildMMCompactTrie(TSink & sink, TIter const beg, TIter const end, bool writeChars = true)
|
||||
{
|
||||
size_t maxLen = 0;
|
||||
size_t size = 0;
|
||||
for (TIter it = beg; it != end; ++it, ++size)
|
||||
maxLen = max(maxLen, it->size());
|
||||
vector<vector<bool> > isParent(maxLen + 1);
|
||||
vector<vector<bool> > isFirstChild(maxLen + 1);
|
||||
vector<vector<bool> > hasData(maxLen + 1);
|
||||
vector<vector<TChar> > chars(maxLen);
|
||||
isParent[0].push_back(true);
|
||||
isFirstChild[0].push_back(false);
|
||||
hasData[0].push_back(false);
|
||||
TIter prev;
|
||||
size_t word = 0;
|
||||
for (TIter it = beg; it != end; ++word, prev = it++)
|
||||
{
|
||||
CHECK_NOT_EQUAL(it->size(), 0U, ());
|
||||
size_t commonLen = 0;
|
||||
bool nextIsExtensionOfPrev = true;
|
||||
if (it != beg)
|
||||
{
|
||||
while (commonLen < it->size() && commonLen < prev->size() &&
|
||||
(*it)[commonLen] == (*prev)[commonLen])
|
||||
++commonLen;
|
||||
// Next string should be strictly greater than previous.
|
||||
CHECK(commonLen != it->size(),
|
||||
(commonLen, it->size(), prev->size(), word));
|
||||
CHECK(commonLen == prev->size() || (*prev)[commonLen] != (*it)[commonLen],
|
||||
(commonLen, it->size(), prev->size(), word));
|
||||
nextIsExtensionOfPrev = (commonLen == prev->size());
|
||||
}
|
||||
isParent[commonLen].back() = true;
|
||||
size_t last = it->size() - 1;
|
||||
for (size_t i = commonLen; i <= last; ++i)
|
||||
{
|
||||
isParent[i+1].push_back(i != last);
|
||||
isFirstChild[i+1].push_back(i != commonLen || nextIsExtensionOfPrev);
|
||||
hasData[i+1].push_back(i == last);
|
||||
if (writeChars)
|
||||
chars[i].push_back((*it)[i]);
|
||||
}
|
||||
}
|
||||
|
||||
vector<bool> isParentComined, isFirstChildCombined, parentHasDataCombined;
|
||||
isParentComined.reserve(size);
|
||||
isFirstChildCombined.reserve(size);
|
||||
parentHasDataCombined.reserve(size);
|
||||
for (vector<vector<bool> >::const_iterator i = isParent.begin(); i != isParent.end(); ++i)
|
||||
isParentComined.insert(isParentComined.end(), i->begin(), i->end());
|
||||
for (vector<vector<bool> >::const_iterator i = isFirstChild.begin(); i != isFirstChild.end(); ++i)
|
||||
isFirstChildCombined.insert(isFirstChildCombined.end(), i->begin(), i->end());
|
||||
for (size_t i = 0; i < isParent.size(); ++i)
|
||||
{
|
||||
for (size_t j = 0; j < isParent[i].size(); ++j)
|
||||
{
|
||||
if (isParent[i][j])
|
||||
{
|
||||
parentHasDataCombined.push_back(hasData[i][j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
BuildCompactTreeWithData(sink, isParentComined.begin(), isParentComined.end(),
|
||||
isFirstChildCombined.begin(), isFirstChildCombined.end(),
|
||||
parentHasDataCombined.begin(), parentHasDataCombined.end());
|
||||
size_t numChars = 0;
|
||||
if (writeChars)
|
||||
{
|
||||
for (typename vector<vector<TChar> >::const_iterator i = chars.begin(); i != chars.end(); ++i)
|
||||
{
|
||||
for (typename vector<TChar>::const_iterator it = i->begin(); it != i->end(); ++it)
|
||||
{
|
||||
WriteToSink(sink, TChar(*it));
|
||||
++numChars;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
size_t padding = (4 - (numChars * sizeof(TChar)) % 4) % 4;
|
||||
for (size_t i = 0; i < padding; ++i)
|
||||
{
|
||||
WriteToSink(sink, uint8_t(0));
|
||||
}
|
||||
}
|
||||
|
||||
#include "../base/stop_mem_debug.hpp"
|
|
@ -1,121 +0,0 @@
|
|||
#pragma once
|
||||
#include "dd_base.hpp"
|
||||
#include "dd_vector.hpp"
|
||||
#include "endianness.hpp"
|
||||
#include "varint.hpp"
|
||||
#include "../base/assert.hpp"
|
||||
#include "../base/bits.hpp"
|
||||
#include <boost/iterator/transform_iterator.hpp>
|
||||
#include "../base/start_mem_debug.hpp"
|
||||
|
||||
template <class TBitVector, class SizeT = typename TBitVector::size_type> class DDBitRankDirectory
|
||||
{
|
||||
public:
|
||||
typedef TBitVector BitVectorType;
|
||||
typedef SizeT size_type;
|
||||
|
||||
DDBitRankDirectory()
|
||||
{
|
||||
}
|
||||
|
||||
template <class ReaderT>
|
||||
void Parse(DDParseInfo<ReaderT> & info)
|
||||
{
|
||||
BitVectorType bitVector;
|
||||
bitVector.Parse(info);
|
||||
Parse(info, bitVector);
|
||||
}
|
||||
|
||||
template <class ReaderT>
|
||||
void Parse(DDParseInfo<ReaderT> & info, BitVectorType const & bitVector)
|
||||
{
|
||||
Parse(info, bitVector, ReadVarUint<size_type>(info.Source()));
|
||||
}
|
||||
|
||||
template <class ReaderT>
|
||||
void Parse(DDParseInfo<ReaderT> &info, BitVectorType const & bitVector, size_type logChunkSize)
|
||||
{
|
||||
m_BitVector = bitVector;
|
||||
m_LogChunkSize = logChunkSize;
|
||||
size_type const sizeChunks = bits::RoundLastBitsUpAndShiftRight(
|
||||
m_BitVector.size(), bits::LogBitSizeOfType<size_type>::value + m_LogChunkSize);
|
||||
if (sizeChunks != 0)
|
||||
{
|
||||
m_Chunks = ChunkVectorType(info.Source().SubReader(sizeChunks * sizeof(size_type)),
|
||||
sizeChunks);
|
||||
if (SwapIfBigEndian(m_Chunks[m_Chunks.size() - 1]) > m_BitVector.size())
|
||||
MYTHROW(DDParseException, (m_Chunks.size(),
|
||||
SwapIfBigEndian(m_Chunks[m_Chunks.size() - 1]),
|
||||
m_BitVector.size()));
|
||||
}
|
||||
}
|
||||
|
||||
size_type Rank0(size_type x) const
|
||||
{
|
||||
ASSERT_LESS(x, m_BitVector.size(), ());
|
||||
return x + 1 - Rank1(x);
|
||||
}
|
||||
|
||||
size_type Rank1(size_type x) const
|
||||
{
|
||||
ASSERT_LESS(x, m_BitVector.size(), ());
|
||||
size_type const logBitSize = bits::LogBitSizeOfType<size_type>::value;
|
||||
size_type const iWord = x >> logBitSize;
|
||||
size_type const iChunk = iWord >> m_LogChunkSize;
|
||||
return (iChunk == 0 ? 0 : SwapIfBigEndian(m_Chunks[iChunk - 1])) +
|
||||
m_BitVector.PopCountWords(iChunk << m_LogChunkSize, iWord) +
|
||||
bits::popcount(
|
||||
m_BitVector.WordAt(iWord) &
|
||||
(size_type(-1) >> ((8 * sizeof(size_type) - 1) - (x - (iWord << logBitSize)))));
|
||||
}
|
||||
|
||||
size_type Select1(size_type i) const
|
||||
{
|
||||
ASSERT_GREATER(i, 0, ());
|
||||
ASSERT_LESS_OR_EQUAL(i, m_Chunks[m_Chunks.size() - 1], ());
|
||||
// TODO: First try approximate lower and upper bound.
|
||||
size_type iChunk = lower_bound(
|
||||
boost::make_transform_iterator(m_Chunks.begin(),
|
||||
&SwapIfBigEndian<size_type>),
|
||||
boost::make_transform_iterator(m_Chunks.end(),
|
||||
&SwapIfBigEndian<size_type>),
|
||||
i) -
|
||||
boost::make_transform_iterator(m_Chunks.begin(),
|
||||
&SwapIfBigEndian<size_type>);
|
||||
ASSERT_LESS(iChunk, m_Chunks.size(), ());
|
||||
ASSERT_GREATER_OR_EQUAL(SwapIfBigEndian(m_Chunks[iChunk]), i, ());
|
||||
ASSERT_LESS((iChunk << m_LogChunkSize), m_BitVector.size_words(), (iChunk, m_LogChunkSize));
|
||||
return (iChunk << (bits::LogBitSizeOfType<size_type>::value + m_LogChunkSize)) +
|
||||
m_BitVector.Select1FromWord(
|
||||
iChunk << m_LogChunkSize,
|
||||
i - (iChunk == 0 ? 0 : SwapIfBigEndian(m_Chunks[iChunk - 1])));
|
||||
}
|
||||
|
||||
size_type size() const
|
||||
{
|
||||
return m_BitVector.size();
|
||||
}
|
||||
|
||||
bool empty() const
|
||||
{
|
||||
return size() == 0;
|
||||
}
|
||||
|
||||
bool operator[](size_type i) const
|
||||
{
|
||||
return m_BitVector[i];
|
||||
}
|
||||
|
||||
TBitVector const & BitVector() const
|
||||
{
|
||||
return m_BitVector;
|
||||
}
|
||||
|
||||
private:
|
||||
TBitVector m_BitVector;
|
||||
typedef DDVector<size_type, typename TBitVector::VectorType::ReaderType> ChunkVectorType;
|
||||
ChunkVectorType m_Chunks;
|
||||
size_type m_LogChunkSize; // Chunk size in size_types.
|
||||
};
|
||||
|
||||
#include "../base/stop_mem_debug.hpp"
|
|
@ -1,103 +0,0 @@
|
|||
#pragma once
|
||||
#include "dd_base.hpp"
|
||||
#include "endianness.hpp"
|
||||
#include "varint.hpp"
|
||||
#include "../base/assert.hpp"
|
||||
#include "../base/base.hpp"
|
||||
#include "../base/bits.hpp"
|
||||
#include "../std/type_traits.hpp"
|
||||
|
||||
#include "../base/start_mem_debug.hpp"
|
||||
|
||||
template <
|
||||
class TWordVector,
|
||||
typename TSize = typename TWordVector::size_type,
|
||||
typename TDifference = typename make_signed<TSize>::type
|
||||
> class DDBitVector
|
||||
{
|
||||
public:
|
||||
typedef TWordVector VectorType;
|
||||
typedef typename VectorType::value_type WordType;
|
||||
typedef TSize size_type;
|
||||
typedef TDifference difference_type;
|
||||
|
||||
DDBitVector()
|
||||
{
|
||||
}
|
||||
|
||||
template <class ReaderT>
|
||||
void Parse(DDParseInfo<ReaderT> & info)
|
||||
{
|
||||
Parse(info, ReadVarUint<size_type>(info.Source()));
|
||||
}
|
||||
|
||||
template <class ReaderT>
|
||||
void Parse(DDParseInfo<ReaderT> & info, size_type vectorSize)
|
||||
{
|
||||
m_Size = vectorSize;
|
||||
m_Data = VectorType(info.Source().SubReader(size_words() * sizeof(WordType)), size_words());
|
||||
}
|
||||
|
||||
bool operator[](size_type i) const
|
||||
{
|
||||
ASSERT(i < size(), (i, size()));
|
||||
return 0 !=
|
||||
(WordAt(i / 8 / sizeof(WordType)) & (WordType(1) << (i & (8 * sizeof(WordType) - 1))));
|
||||
}
|
||||
|
||||
size_type size() const
|
||||
{
|
||||
return m_Size;
|
||||
}
|
||||
|
||||
size_type size_words() const
|
||||
{
|
||||
return (m_Size + sizeof(WordType) * 8 - 1) / 8 / sizeof(WordType);
|
||||
}
|
||||
|
||||
bool empty() const
|
||||
{
|
||||
return m_Size == 0;
|
||||
}
|
||||
|
||||
WordType WordAt(size_type i) const
|
||||
{
|
||||
ASSERT(i < size_words(), (i));
|
||||
return SwapIfBigEndian(m_Data[i]);
|
||||
}
|
||||
|
||||
size_type PopCountWords(size_type begWord, size_type endWord) const
|
||||
{
|
||||
if (begWord == endWord)
|
||||
return 0;
|
||||
ASSERT_LESS(begWord, endWord, ());
|
||||
ASSERT_LESS(begWord, size_words(), ());
|
||||
ASSERT_LESS_OR_EQUAL(endWord, size_words(), ());
|
||||
|
||||
// popcount doesn't depend on byte order.
|
||||
size_type result = 0;
|
||||
while (begWord != endWord)
|
||||
result += bits::popcount(m_Data[begWord++]);
|
||||
return result;
|
||||
}
|
||||
|
||||
size_type Select1FromWord(size_type iWord, size_type i) const
|
||||
{
|
||||
ASSERT(iWord < size_words(), (iWord, size_words(), i));
|
||||
size_type const startWord = iWord;
|
||||
size_type wordRank = bits::popcount(m_Data[iWord]);
|
||||
while (wordRank < i)
|
||||
{
|
||||
i -= wordRank;
|
||||
wordRank = bits::popcount(m_Data[++iWord]);
|
||||
}
|
||||
return (iWord - startWord) * sizeof(WordType) * 8 +
|
||||
bits::select1(SwapIfBigEndian(m_Data[iWord]), i);
|
||||
}
|
||||
|
||||
private:
|
||||
size_type m_Size;
|
||||
VectorType m_Data;
|
||||
};
|
||||
|
||||
#include "../base/stop_mem_debug.hpp"
|
|
@ -1,128 +0,0 @@
|
|||
#pragma once
|
||||
#include "dd_bit_vector.hpp"
|
||||
#include "dd_bit_rank_directory.hpp"
|
||||
#include "../base/base.hpp"
|
||||
|
||||
#include "../base/start_mem_debug.hpp"
|
||||
|
||||
template <class BitRankDirT>
|
||||
class DDCompactTree
|
||||
{
|
||||
public:
|
||||
typedef BitRankDirT BitRankDirType;
|
||||
|
||||
// Node id.
|
||||
typedef typename BitRankDirT::size_type Id;
|
||||
static Id const INVALID_ID = -1;
|
||||
|
||||
DDCompactTree()
|
||||
{
|
||||
}
|
||||
|
||||
// Id of the root.
|
||||
Id Root() const
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Parent id and INVALID_ID for root.
|
||||
Id Parent(Id id) const
|
||||
{
|
||||
ASSERT(id != INVALID_ID, ());
|
||||
return id ? m_IsParent.Select1(m_IsFirstChild.Rank1(id)) : INVALID_ID;
|
||||
}
|
||||
|
||||
// First child id and INVALID_ID for leaf.
|
||||
Id FirstChild(Id id) const
|
||||
{
|
||||
ASSERT(id != INVALID_ID, ());
|
||||
return m_IsParent[id] ? m_IsFirstChild.Select1(m_IsParent.Rank1(id)) : INVALID_ID;
|
||||
}
|
||||
|
||||
// Next sibling id and INVALID_ID if there is no next sibling.
|
||||
Id NextSibling(Id id) const
|
||||
{
|
||||
ASSERT(id != INVALID_ID, ());
|
||||
return (id + 1 == m_IsFirstChild.size() || m_IsFirstChild[id + 1]) ? INVALID_ID : id + 1;
|
||||
}
|
||||
|
||||
template <class ReaderT>
|
||||
void Parse(DDParseInfo<ReaderT> & info)
|
||||
{
|
||||
typedef typename BitRankDirT::BitVectorType BitVectorType;
|
||||
{
|
||||
BitVectorType isParent;
|
||||
isParent.Parse(info);
|
||||
if (isParent.size() == 0)
|
||||
MYTHROW(DDParseException, ());
|
||||
m_IsParent.Parse(info, isParent);
|
||||
}
|
||||
{
|
||||
// TODO: Don't write logRankChunkSize twice.
|
||||
// TODO: Allow logRankChunkSize be explicitly specified.
|
||||
BitVectorType isFirstChild;
|
||||
isFirstChild.Parse(info, m_IsParent.size());
|
||||
if (isFirstChild.size() == 0)
|
||||
MYTHROW(DDParseException, ());
|
||||
m_IsFirstChild.Parse(info, isFirstChild);
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
friend class MMCompactTreeTester;
|
||||
BitRankDirT m_IsParent;
|
||||
BitRankDirT m_IsFirstChild;
|
||||
};
|
||||
|
||||
template <class BitRankDirT>
|
||||
class DDCompactTreeWithData : public DDCompactTree<BitRankDirT>
|
||||
{
|
||||
public:
|
||||
typedef DDCompactTree<BitRankDirT> BaseType;
|
||||
typedef BitRankDirT BitRankDirType;
|
||||
typedef typename BaseType::Id Id;
|
||||
static Id const INVALID_ID = BaseType::INVALID_ID;
|
||||
|
||||
DDCompactTreeWithData() : m_NodesWithData(0)
|
||||
{
|
||||
}
|
||||
|
||||
// Number of nodes with data.
|
||||
Id NodesWithData() const
|
||||
{
|
||||
return m_NodesWithData;
|
||||
}
|
||||
|
||||
// Id of the data for a given node id and INVALID_ID if node doesn't have any data.
|
||||
Id Data(Id id) const
|
||||
{
|
||||
ASSERT(id != INVALID_ID, ());
|
||||
if (BaseType::m_IsParent[id])
|
||||
{
|
||||
Id const parentIndex = BaseType::m_IsParent.Rank1(id) - 1;
|
||||
return m_ParentHasData[parentIndex] ? m_ParentHasData.Rank1(parentIndex) - 1 : INVALID_ID;
|
||||
}
|
||||
else
|
||||
{
|
||||
return m_ParentsWithDataCount + BaseType::m_IsParent.Rank0(id) - 1;
|
||||
}
|
||||
}
|
||||
|
||||
template <class ReaderT>
|
||||
void Parse(DDParseInfo<ReaderT> & info)
|
||||
{
|
||||
BaseType::Parse(info);
|
||||
// TODO: Pass the vector size here.
|
||||
m_ParentHasData.Parse(info);
|
||||
m_ParentsWithDataCount =
|
||||
m_ParentHasData.empty() ? 0 : m_ParentHasData.Rank1(m_ParentHasData.size() - 1);
|
||||
m_NodesWithData = m_ParentsWithDataCount +
|
||||
BaseType::m_IsParent.Rank0(BaseType::m_IsParent.size() - 1);
|
||||
}
|
||||
protected:
|
||||
BitRankDirType m_ParentHasData;
|
||||
uint32_t m_ParentsWithDataCount;
|
||||
size_t m_NodesWithData;
|
||||
};
|
||||
|
||||
#include "../base/stop_mem_debug.hpp"
|
|
@ -1,70 +0,0 @@
|
|||
#pragma once
|
||||
#include "dd_compact_tree.hpp"
|
||||
#include "dd_vector.hpp"
|
||||
#include "../base/assert.hpp"
|
||||
#include "../base/base.hpp"
|
||||
#include "../std/string.hpp"
|
||||
#include "../base/start_mem_debug.hpp"
|
||||
|
||||
class MMCompactTTrieester;
|
||||
|
||||
template <class TChar, class BitRankDirT>
|
||||
class DDCompactTrie : public DDCompactTreeWithData<BitRankDirT>
|
||||
{
|
||||
public:
|
||||
typedef DDCompactTreeWithData<BitRankDirT> BaseType;
|
||||
typedef typename BaseType::Id Id;
|
||||
static Id const INVALID_ID = BaseType::INVALID_ID;
|
||||
|
||||
DDCompactTrie()
|
||||
{
|
||||
}
|
||||
|
||||
TChar Char(Id id) const
|
||||
{
|
||||
ASSERT(id != 0, ());
|
||||
ASSERT(id != INVALID_ID, ());
|
||||
return m_Chars[id - 1]; // There is no char for the root.
|
||||
}
|
||||
|
||||
template <class ReaderT>
|
||||
void Parse(DDParseInfo<ReaderT> & info)
|
||||
{
|
||||
BaseType::Parse(info);
|
||||
m_Chars =
|
||||
CharVectorType(info.Source().SubReader((BaseType::m_IsParent.size() - 1) * sizeof(TChar)),
|
||||
BaseType::m_IsParent.size() - 1);
|
||||
}
|
||||
|
||||
protected:
|
||||
friend class DDCompactTrieTester;
|
||||
typedef DDVector<TChar, typename BaseType::BitRankDirType::BitVectorType::VectorType::ReaderType>
|
||||
CharVectorType;
|
||||
CharVectorType m_Chars;
|
||||
};
|
||||
|
||||
template <class TTrie, typename TIter>
|
||||
typename TTrie::Id FindNodeByPath(TTrie const & trie, TIter pathBegin, TIter pathEnd)
|
||||
{
|
||||
typename TTrie::Id nodeId = trie.Root();
|
||||
for (TIter edge = pathBegin; edge != pathEnd; ++edge)
|
||||
{
|
||||
bool found = false;
|
||||
for (typename TTrie::Id child = trie.FirstChild(nodeId);
|
||||
child != TTrie::INVALID_ID;
|
||||
child = trie.NextSibling(child))
|
||||
{
|
||||
if (trie.Char(child) == *edge)
|
||||
{
|
||||
nodeId = child;
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!found)
|
||||
return TTrie::INVALID_ID;
|
||||
}
|
||||
return nodeId;
|
||||
}
|
||||
|
||||
#include "../base/stop_mem_debug.hpp"
|
|
@ -1,80 +0,0 @@
|
|||
#pragma once
|
||||
#include "../base/assert.hpp"
|
||||
#include "../base/base.hpp"
|
||||
|
||||
#include "../base/start_mem_debug.hpp"
|
||||
|
||||
class MMParseInfo
|
||||
{
|
||||
public:
|
||||
MMParseInfo(void const * p, size_t size, bool failOnError) :
|
||||
m_p(static_cast<char const *>(p)), m_Size(size), m_bFailOnError(failOnError),
|
||||
m_bSuccessful(true)
|
||||
{
|
||||
}
|
||||
|
||||
~MMParseInfo()
|
||||
{
|
||||
CHECK(!m_bFailOnError || m_bSuccessful, ());
|
||||
}
|
||||
|
||||
void CheckAligned(size_t size)
|
||||
{
|
||||
size_t p = reinterpret_cast<size_t>(m_p);
|
||||
if (!(size & 7)) {
|
||||
CHECK_OR_CALL(m_bFailOnError, Fail, !(p & 7), (p, size));
|
||||
}
|
||||
else if (!(size & 3)) {
|
||||
CHECK_OR_CALL(m_bFailOnError, Fail, !(p & 3), (p, size));
|
||||
}
|
||||
else if (!(size & 1)) {
|
||||
CHECK_OR_CALL(m_bFailOnError, Fail, !(p & 1), (p, size));
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T> T const * Advance()
|
||||
{
|
||||
CheckAligned(sizeof(T));
|
||||
size_t const advanceSize = sizeof(T);
|
||||
CHECK_OR_CALL(m_bFailOnError, Fail, advanceSize <= m_Size, (sizeof(T), m_Size));
|
||||
void const * p = m_p;
|
||||
m_p += advanceSize;
|
||||
m_Size -= advanceSize;
|
||||
return static_cast<T const *>(p);
|
||||
}
|
||||
|
||||
template <typename T> T const * Advance(size_t size)
|
||||
{
|
||||
CheckAligned(sizeof(T));
|
||||
size_t const advanceSize = size * sizeof(T);
|
||||
CHECK_OR_CALL(m_bFailOnError, Fail, advanceSize <= m_Size, (size, sizeof(T), m_Size));
|
||||
void const * p = m_p;
|
||||
m_p += advanceSize;
|
||||
m_Size -= advanceSize;
|
||||
return static_cast<T const *>(p);
|
||||
}
|
||||
|
||||
bool Successful() const
|
||||
{
|
||||
return m_bSuccessful;
|
||||
}
|
||||
|
||||
void Fail()
|
||||
{
|
||||
CHECK(!m_bFailOnError, (m_bSuccessful));
|
||||
m_bSuccessful = false;
|
||||
}
|
||||
|
||||
bool FailOnError() const
|
||||
{
|
||||
return m_bFailOnError;
|
||||
}
|
||||
|
||||
private:
|
||||
char const * m_p;
|
||||
size_t m_Size;
|
||||
bool m_bFailOnError;
|
||||
bool m_bSuccessful;
|
||||
};
|
||||
|
||||
#include "../base/stop_mem_debug.hpp"
|
|
@ -1,189 +0,0 @@
|
|||
#pragma once
|
||||
#include "endianness.hpp"
|
||||
#include "mm_base.hpp"
|
||||
#include "../base/assert.hpp"
|
||||
#include "../base/base.hpp"
|
||||
#include "../base/bits.hpp"
|
||||
#include "../std/algorithm.hpp"
|
||||
#include <boost/iterator/transform_iterator.hpp>
|
||||
|
||||
#include "../base/start_mem_debug.hpp"
|
||||
|
||||
// . !!
|
||||
// network byte order (big endian).
|
||||
// .
|
||||
template <typename TWord> class MMBitVector
|
||||
{
|
||||
public:
|
||||
typedef TWord WordType;
|
||||
|
||||
MMBitVector()
|
||||
{
|
||||
}
|
||||
|
||||
MMBitVector(void const *p, size_t size)
|
||||
{
|
||||
MMParseInfo parseInfo(p, size, true);
|
||||
this->Parse(parseInfo);
|
||||
}
|
||||
|
||||
bool operator[](TWord i) const
|
||||
{
|
||||
ASSERT(i < size(), (i, size()));
|
||||
return (WordAt(i / 8 / sizeof(TWord)) & (TWord(1) << (i & (8 * sizeof(TWord) - 1)))) != 0;
|
||||
}
|
||||
|
||||
TWord size() const
|
||||
{
|
||||
return m_Size;
|
||||
}
|
||||
|
||||
TWord size_words() const
|
||||
{
|
||||
return (m_Size + sizeof(TWord)*8 - 1) / 8 / sizeof(TWord);
|
||||
}
|
||||
|
||||
size_t bytes_used() const
|
||||
{
|
||||
return (size_words() + 1) * sizeof(TWord);
|
||||
}
|
||||
|
||||
bool empty() const
|
||||
{
|
||||
return m_Size == 0;
|
||||
}
|
||||
|
||||
TWord WordAt(TWord i) const
|
||||
{
|
||||
ASSERT(i < size_words(), (i));
|
||||
return SwapIfBigEndian(m_pWords[i]);
|
||||
}
|
||||
|
||||
TWord Select1FromWord(TWord word, TWord i) const
|
||||
{
|
||||
ASSERT(word < size_words(), (word, size_words(), i));
|
||||
TWord const * const pStartWord = m_pWords + word;
|
||||
TWord const * pWord = pStartWord;
|
||||
TWord wordRank = bits::popcount(*pWord);
|
||||
while (wordRank < i)
|
||||
{
|
||||
i -= wordRank;
|
||||
wordRank = bits::popcount(*++pWord);
|
||||
}
|
||||
return static_cast<TWord>(pWord - pStartWord) * sizeof(TWord) * 8 +
|
||||
bits::select1(SwapIfBigEndian(*pWord), i);
|
||||
}
|
||||
|
||||
TWord PopCountWords(TWord begWord, TWord endWord) const
|
||||
{
|
||||
// popcount byte order.
|
||||
ASSERT(begWord < size_words(), (begWord));
|
||||
ASSERT(endWord <= size_words(), (endWord));
|
||||
return bits::popcount(m_pWords + begWord, endWord - begWord);
|
||||
}
|
||||
|
||||
void Parse(MMParseInfo & info)
|
||||
{
|
||||
if (!info.Successful()) return;
|
||||
TWord size = *info.Advance<TWord>(1);
|
||||
Parse(info, SwapIfBigEndian(size));
|
||||
}
|
||||
|
||||
void Parse(MMParseInfo & info, TWord vectorSize)
|
||||
{
|
||||
m_Size = vectorSize;
|
||||
if (!info.Successful()) return;
|
||||
m_pWords = info.Advance<TWord>(static_cast<size_t>(size_words()));
|
||||
}
|
||||
|
||||
private:
|
||||
TWord const * m_pWords;
|
||||
TWord m_Size;
|
||||
};
|
||||
|
||||
// , Rank0 Rank1
|
||||
// Select0 Select1 .
|
||||
class MMBitVector32RankDirectory
|
||||
{
|
||||
public:
|
||||
MMBitVector32RankDirectory(MMBitVector<uint32_t> const & bitVector) : m_BitVector(bitVector)
|
||||
{
|
||||
}
|
||||
|
||||
MMBitVector32RankDirectory(MMBitVector<uint32_t> const & bitVector, void const * p, size_t size)
|
||||
: m_BitVector(bitVector)
|
||||
{
|
||||
MMParseInfo parseInfo(p, size, true);
|
||||
this->Parse(parseInfo);
|
||||
}
|
||||
|
||||
uint32_t Rank0(uint32_t x) const
|
||||
{
|
||||
ASSERT(x < m_BitVector.size(), (x, m_BitVector.size()));
|
||||
return x + 1 - Rank1(x);
|
||||
}
|
||||
|
||||
uint32_t Rank1(uint32_t x) const
|
||||
{
|
||||
ASSERT(x < m_BitVector.size(), (x, m_BitVector.size()));
|
||||
uint32_t const iWord = x >> 5;
|
||||
uint32_t const iChunk = iWord >> m_LogChunkSize;
|
||||
return (iChunk == 0 ? 0 : SwapIfBigEndian(m_pChunks[iChunk - 1])) +
|
||||
m_BitVector.PopCountWords(iChunk << m_LogChunkSize, iWord) +
|
||||
bits::popcount(m_BitVector.WordAt(iWord) & (0xFFFFFFFFU >> (31 - (x - (iWord << 5)))));
|
||||
}
|
||||
|
||||
uint32_t Select1(uint32_t i) const
|
||||
{
|
||||
ASSERT(i > 0 && i <= m_MaxRank, (i, m_MaxRank));
|
||||
// TODO: First try approximate lower and upper bound.
|
||||
uint32_t iChunk = lower_bound(
|
||||
boost::make_transform_iterator(m_pChunks, &SwapIfBigEndian<uint32_t>),
|
||||
boost::make_transform_iterator(m_pChunks + size_chunks(), &SwapIfBigEndian<uint32_t>),
|
||||
i) - boost::make_transform_iterator(m_pChunks, &SwapIfBigEndian<uint32_t>);
|
||||
ASSERT_LESS(iChunk, size_chunks(), ());
|
||||
ASSERT_GREATER_OR_EQUAL(SwapIfBigEndian(m_pChunks[iChunk]), i, ());
|
||||
ASSERT_LESS((iChunk << m_LogChunkSize), m_BitVector.size_words(), (iChunk, m_LogChunkSize));
|
||||
return (iChunk << (5 + m_LogChunkSize)) +
|
||||
m_BitVector.Select1FromWord(
|
||||
iChunk << m_LogChunkSize,
|
||||
i - (iChunk == 0 ? 0 : SwapIfBigEndian(m_pChunks[iChunk - 1])));
|
||||
}
|
||||
|
||||
uint32_t size_chunks() const
|
||||
{
|
||||
return bits::RoundLastBitsUpAndShiftRight(m_BitVector.size(), 5 + m_LogChunkSize);
|
||||
}
|
||||
|
||||
size_t bytes_used() const
|
||||
{
|
||||
return (1 + size_chunks()) << (2 + m_LogChunkSize);
|
||||
}
|
||||
|
||||
void Parse(MMParseInfo & info)
|
||||
{
|
||||
// TODO: Store version in MMBitVector32RankDirectory?
|
||||
if (!info.Successful()) return;
|
||||
m_LogChunkSize = SwapIfBigEndian(*info.Advance<uint32_t>());
|
||||
if (!info.Successful()) return;
|
||||
m_pChunks = info.Advance<uint32_t>(size_chunks());
|
||||
#ifdef DEBUG
|
||||
m_MaxRank = (m_BitVector.empty() ? 0 : Rank1(m_BitVector.size() - 1));
|
||||
if (m_MaxRank > m_BitVector.size())
|
||||
{
|
||||
CHECK(!info.FailOnError(), (m_MaxRank, m_BitVector.size()));
|
||||
info.Fail();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
protected:
|
||||
MMBitVector<uint32_t> const & m_BitVector;
|
||||
uint32_t const * m_pChunks;
|
||||
uint32_t m_LogChunkSize; // 32 .
|
||||
#ifdef DEBUG
|
||||
uint32_t m_MaxRank;
|
||||
#endif
|
||||
};
|
||||
|
||||
#include "../base/stop_mem_debug.hpp"
|
|
@ -1,113 +0,0 @@
|
|||
#pragma once
|
||||
#include "mm_bit_vector.hpp"
|
||||
#include "../base/base.hpp"
|
||||
|
||||
class MMCompactTree
|
||||
{
|
||||
public:
|
||||
// Node id.
|
||||
typedef uint32_t Id;
|
||||
static Id const INVALID_ID = 0xFFFFFFFF;
|
||||
|
||||
MMCompactTree() :
|
||||
m_IsParentDir(m_IsParent), m_IsFirstChildDir(m_IsFirstChild)
|
||||
{
|
||||
}
|
||||
|
||||
MMCompactTree(void const * p, size_t size) :
|
||||
m_IsParentDir(m_IsParent), m_IsFirstChildDir(m_IsFirstChild)
|
||||
{
|
||||
MMParseInfo info(p, size, true);
|
||||
this->Parse(info);
|
||||
}
|
||||
|
||||
// Id of the root.
|
||||
Id Root() const
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Parent id and INVALID_ID for root.
|
||||
Id Parent(Id id) const
|
||||
{
|
||||
return id ? m_IsParentDir.Select1(m_IsFirstChildDir.Rank1(id)) : INVALID_ID;
|
||||
}
|
||||
|
||||
// First child id and INVALID_ID for leaf.
|
||||
Id FirstChild(Id id) const
|
||||
{
|
||||
return m_IsParent[id] ? m_IsFirstChildDir.Select1(m_IsParentDir.Rank1(id)) : INVALID_ID;
|
||||
}
|
||||
|
||||
// Next sibling id and INVALID_ID if there is no next sibling.
|
||||
Id NextSibling(Id id) const
|
||||
{
|
||||
return (id + 1 == m_IsFirstChild.size() || m_IsFirstChild[id + 1]) ? INVALID_ID : id + 1;
|
||||
}
|
||||
|
||||
void Parse(MMParseInfo & info)
|
||||
{
|
||||
if (!info.Successful())
|
||||
return;
|
||||
m_IsParent.Parse(info);
|
||||
m_IsParentDir.Parse(info);
|
||||
CHECK_OR_CALL(info.FailOnError(), info.Fail, m_IsParent.size() > 0, ());
|
||||
m_IsFirstChild.Parse(info, m_IsParent.size());
|
||||
m_IsFirstChildDir.Parse(info);
|
||||
}
|
||||
|
||||
protected:
|
||||
friend class MMCompactTreeTester;
|
||||
MMBitVector<uint32_t> m_IsParent;
|
||||
MMBitVector32RankDirectory m_IsParentDir;
|
||||
MMBitVector<uint32_t> m_IsFirstChild;
|
||||
MMBitVector32RankDirectory m_IsFirstChildDir;
|
||||
};
|
||||
|
||||
class MMCompactTreeWithData : public MMCompactTree
|
||||
{
|
||||
public:
|
||||
MMCompactTreeWithData() : m_ParentHasDataDir(m_ParentHasData), m_Size(0)
|
||||
{
|
||||
}
|
||||
|
||||
// Number of nodes with data.
|
||||
size_t NodesWithData()
|
||||
{
|
||||
return m_Size;
|
||||
}
|
||||
|
||||
// Id of the data for a given node id and INVALID_ID if node doesn't have any data.
|
||||
uint32_t Data(Id id) const
|
||||
{
|
||||
if (m_IsParent[id])
|
||||
{
|
||||
uint32_t const parentIndex = m_IsParentDir.Rank1(id) - 1;
|
||||
return m_ParentHasData[parentIndex] ? m_ParentHasDataDir.Rank1(parentIndex) - 1 : INVALID_ID;
|
||||
}
|
||||
else
|
||||
{
|
||||
return m_ParentsWithDataCount + m_IsParentDir.Rank0(id) - 1;
|
||||
}
|
||||
}
|
||||
|
||||
void Parse(MMParseInfo & info)
|
||||
{
|
||||
MMCompactTree::Parse(info);
|
||||
if (!info.Successful())
|
||||
return;
|
||||
m_ParentHasData.Parse(
|
||||
info, m_IsParent.empty() ? 0 : m_IsParentDir.Rank1(m_IsParent.size() - 1));
|
||||
m_ParentHasDataDir.Parse(info);
|
||||
m_ParentsWithDataCount =
|
||||
m_ParentHasData.empty() ? 0 : m_ParentHasDataDir.Rank1(m_ParentHasData.size() - 1);
|
||||
m_Size = m_ParentsWithDataCount +
|
||||
(m_IsParent.empty() ? 0 : m_IsParentDir.Rank0(m_IsParent.size() - 1));
|
||||
}
|
||||
protected:
|
||||
MMBitVector<uint32_t> m_ParentHasData;
|
||||
MMBitVector32RankDirectory m_ParentHasDataDir;
|
||||
uint32_t m_ParentsWithDataCount;
|
||||
size_t m_Size;
|
||||
|
||||
};
|
|
@ -1,70 +0,0 @@
|
|||
#pragma once
|
||||
#include "mm_base.hpp"
|
||||
#include "mm_compact_tree.hpp"
|
||||
#include "mm_vector.hpp"
|
||||
#include "../base/assert.hpp"
|
||||
#include "../base/base.hpp"
|
||||
#include "../std/string.hpp"
|
||||
#include "../base/start_mem_debug.hpp"
|
||||
|
||||
class MMCompactTrieTester;
|
||||
|
||||
template <class TChar> class MMCompactTrie : public MMCompactTreeWithData
|
||||
{
|
||||
public:
|
||||
MMCompactTrie()
|
||||
{
|
||||
}
|
||||
|
||||
MMCompactTrie(void const * p, size_t size) : MMCompactTreeWithData()
|
||||
{
|
||||
MMParseInfo info(p, size, true);
|
||||
Parse(info);
|
||||
}
|
||||
|
||||
TChar Char(Id id) const
|
||||
{
|
||||
ASSERT(id != 0, ());
|
||||
ASSERT(id != INVALID_ID, ());
|
||||
return m_Chars[id - 1]; // There is no char for the root.
|
||||
}
|
||||
|
||||
void Parse(MMParseInfo & info)
|
||||
{
|
||||
MMCompactTreeWithData::Parse(info);
|
||||
if (!info.Successful())
|
||||
return;
|
||||
m_Chars.Parse(info, m_IsParent.size() - 1);
|
||||
}
|
||||
|
||||
protected:
|
||||
friend class MMCompactTrieTester;
|
||||
MMVector<TChar> m_Chars;
|
||||
};
|
||||
|
||||
#if 0
|
||||
template <class TrieT, typename ItT>
|
||||
MMCompactTree::Id FindNodeByPath(TrieT const & trie, ItT pathBegin, ItT pathEnd)
|
||||
{
|
||||
MMCompactTree::Id nodeId = 0;
|
||||
for (ItT edge = pathBegin; edge != pathEnd; ++edge)
|
||||
{
|
||||
bool found = false;
|
||||
for (MMCompactTree::Id child = trie.FirstChild(nodeId);
|
||||
child != MMCompactTree::INVALID_ID;
|
||||
child = trie.NextSibling(child))
|
||||
{
|
||||
if (trie.Char(child) == *edge)
|
||||
{
|
||||
nodeId = child;
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!found) return MMCompactTree::INVALID_ID;
|
||||
}
|
||||
return nodeId;
|
||||
}
|
||||
#endif
|
||||
|
||||
#include "../base/stop_mem_debug.hpp"
|
|
@ -1,58 +0,0 @@
|
|||
#pragma once
|
||||
#include "endianness.hpp"
|
||||
#include "mm_base.hpp"
|
||||
#include "../base/assert.hpp"
|
||||
#include "../base/base.hpp"
|
||||
#include "../std/memcpy.hpp"
|
||||
|
||||
#include "../base/start_mem_debug.hpp"
|
||||
|
||||
template <typename T, int Align = sizeof(T)> class MMVector
|
||||
{
|
||||
public:
|
||||
typedef T * const_iterator;
|
||||
typedef const_iterator iterator;
|
||||
typedef T value_type;
|
||||
|
||||
const_iterator begin() const
|
||||
{
|
||||
return m_p;
|
||||
}
|
||||
|
||||
const_iterator end() const
|
||||
{
|
||||
return m_p + m_Size;
|
||||
}
|
||||
|
||||
T const & operator [] (size_t i) const
|
||||
{
|
||||
ASSERT_LESS(i, m_Size, ());
|
||||
return m_p[i];
|
||||
}
|
||||
|
||||
size_t size() const
|
||||
{
|
||||
return m_Size;
|
||||
}
|
||||
|
||||
void Parse(MMParseInfo & info)
|
||||
{
|
||||
if (!info.Successful()) return;
|
||||
uint32_t size;
|
||||
memcpy(size, info.Advance<uint8_t>(4), 4);
|
||||
Parse(info, SwapIfBigEndian(size));
|
||||
}
|
||||
|
||||
void Parse(MMParseInfo & info, size_t vectorSize)
|
||||
{
|
||||
m_Size = vectorSize;
|
||||
if (!info.Successful()) return;
|
||||
m_p = info.Advance<T>(m_Size);
|
||||
}
|
||||
|
||||
private:
|
||||
T const * m_p;
|
||||
size_t m_Size;
|
||||
};
|
||||
|
||||
#include "../base/stop_mem_debug.hpp"
|
Loading…
Add table
Reference in a new issue