diff --git a/coding/bit_vector_builder.hpp b/coding/bit_vector_builder.hpp deleted file mode 100644 index bd038ef43f..0000000000 --- a/coding/bit_vector_builder.hpp +++ /dev/null @@ -1,52 +0,0 @@ -#pragma once -#include "varint.hpp" -#include "write_to_sink.hpp" -#include "../base/base.hpp" -#include "../std/iterator.hpp" - -// TWord - word type, uint32_t or uint64_t. -// TSink - where to write. -// TIter - iterator to bool. -template -void BuildMMBitVector(TSink & sink, TIter beg, TIter end, bool bWriteSize = true, size_t size = -1) -{ - if (size == size_t(-1)) - size = distance(beg, end); - CHECK(static_cast(size) == size, ("Vector is more than word size.", size)); - if (bWriteSize) - WriteVarUint(sink, size); - - int bitInWord = 0; - TWord word = 0; - for (; beg != end; ++beg) - { - if (*beg) - word |= (TWord(1) << bitInWord); - if (++bitInWord == 8 * sizeof(TWord)) - { - WriteToSink(sink, word); - bitInWord = 0; - word = 0; - } - } - if (bitInWord != 0) - WriteToSink(sink, word); -} - -// TSink - where to write. -// TIter - iterator to bool. -// TODO: optimize logChunkSize default value. -template -void BuildMMBitVector32RankDirectory(TSink & sink, TIter beg, TIter end, uint32_t logChunkSize = 5) -{ - WriteVarUint(sink, logChunkSize); - uint32_t rank1 = 0; - for (uint32_t i = 0; beg != end; ++beg,++i) - { - if ((i & ((1 << (logChunkSize + 5)) - 1)) == 0 && i != 0) - WriteToSink(sink, rank1); - if (*beg) - ++rank1; - } - WriteToSink(sink, rank1); -} diff --git a/coding/coding.pro b/coding/coding.pro index c3ceeaa944..240b200e58 100644 --- a/coding/coding.pro +++ b/coding/coding.pro @@ -34,27 +34,15 @@ HEADERS += \ internal/file64_api.hpp \ parse_xml.hpp \ varint.hpp \ - mm_vector.hpp \ - mm_bit_vector.hpp \ - mm_base.hpp \ endianness.hpp \ byte_stream.hpp \ var_serial_vector.hpp \ hex.hpp \ - mm_compact_trie.hpp \ - mm_compact_tree.hpp \ - compact_trie_builder.hpp \ - compact_tree_builder.hpp \ - bit_vector_builder.hpp \ dd_vector.hpp \ - dd_bit_vector.hpp \ dd_base.hpp \ writer.hpp \ write_to_sink.hpp \ reader.hpp \ - dd_bit_rank_directory.hpp \ - dd_compact_tree.hpp \ - dd_compact_trie.hpp \ diff.hpp \ diff_patch_common.hpp \ source.hpp \ diff --git a/coding/coding_tests/coding_tests.pro b/coding/coding_tests/coding_tests.pro index a52cd810a0..fb93f233ec 100644 --- a/coding/coding_tests/coding_tests.pro +++ b/coding/coding_tests/coding_tests.pro @@ -12,8 +12,6 @@ include($$ROOT_DIR/common.pri) SOURCES += ../../testing/testingmain.cpp \ endianness_test.cpp \ varint_test.cpp \ - mm_bit_vector_test.cpp \ - mm_compact_trie_test.cpp \ mem_file_reader_test.cpp \ mem_file_writer_test.cpp \ var_serial_vector_test.cpp \ diff --git a/coding/coding_tests/mm_bit_vector_test.cpp b/coding/coding_tests/mm_bit_vector_test.cpp deleted file mode 100644 index cebab9f86a..0000000000 --- a/coding/coding_tests/mm_bit_vector_test.cpp +++ /dev/null @@ -1,177 +0,0 @@ -#include "../../testing/testing.hpp" -#include "../dd_vector.hpp" -#include "../dd_bit_vector.hpp" -#include "../dd_bit_rank_directory.hpp" -#include "../bit_vector_builder.hpp" -#include "../byte_stream.hpp" -#include "../reader.hpp" -#include "../../base/base.hpp" -#include "../../base/macros.hpp" -#include "../../std/cstdlib.hpp" - -#include "../../base/start_mem_debug.hpp" - -namespace -{ - template - void TestBitVector(unsigned char const * bits, size_t N) - { - typedef PushBackByteSink > SinkType; - vector data; - SinkType sink(data); - BuildMMBitVector(sink, &bits[0], &bits[N]); - - DDBitVector > bitVectorDD; - MemReader reader(&data[0], data.size()); - DDParseInfo info(reader, true); - bitVectorDD.Parse(info); - for (size_t i = 0; i < N; ++i) - TEST_EQUAL(bitVectorDD[i], bits[i] != 0, (i)); - } - - void TestBitVector32Rank(unsigned char const * bits, size_t N, size_t logChunkSize) - { - typedef PushBackByteSink > SinkType; - vector data; - SinkType sink(data); - BuildMMBitVector(sink, &bits[0], &bits[N]); - BuildMMBitVector32RankDirectory(sink, &bits[0], &bits[N], logChunkSize); - - DDBitRankDirectory > > rankDirectoryDD; - MemReader reader(&data[0], data.size()); - { - DDParseInfo info(reader, true); - rankDirectoryDD.Parse(info); - } - - uint32_t rank1 = 0; - for (size_t i = 0; i < N; ++i) - { - if (rankDirectoryDD[i]) - { - ++rank1; - TEST_EQUAL(rankDirectoryDD.BitVector().Select1FromWord(0, rank1), i, - (rank1, N, logChunkSize)); - TEST_EQUAL(rankDirectoryDD.Select1(rank1), i, (rank1, N, logChunkSize)); - } - TEST_EQUAL(rankDirectoryDD.Rank1(i), rank1, (i, N, logChunkSize)); - TEST_EQUAL(rankDirectoryDD.Rank0(i), i + 1 - rank1, (i, N, logChunkSize)); - } - } - - unsigned char const simpleBits[] = { - 1,1,0,1,0,0,0,1, - 1,1,0,0,0,0,0,1, - 1,1,0,0,1,1,1,0, - 0,1,1,1,0,0,1,1, - 0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0, - 0,0,1,0,0,1,0,0, - 0,0,0,1,0,0,0,1 }; - static size_t const simpleBitsSize = ARRAY_SIZE(simpleBits); -} - -UNIT_TEST(BitVector8Simple) -{ - TestBitVector(simpleBits, simpleBitsSize); -} - -UNIT_TEST(BitVector16Simple) -{ - TestBitVector(simpleBits, simpleBitsSize); -} - -UNIT_TEST(BitVector32Simple) -{ - TestBitVector(simpleBits, simpleBitsSize); -} - -UNIT_TEST(BitVector64Simple) -{ - TestBitVector(simpleBits, simpleBitsSize); -} - -UNIT_TEST(BitVector32RankSimple) -{ - for (size_t chunkSize = 1; chunkSize <= 7; ++chunkSize) - TestBitVector32Rank(simpleBits, simpleBitsSize, chunkSize); -} - -UNIT_TEST(BitVector32RankChunkSizePlus1All0) -{ - vector bits(129, 0); - TestBitVector(&bits[0], bits.size()); - TestBitVector32Rank(&bits[0], bits.size(), 2); -} - -UNIT_TEST(BitVector32RankChunkSizePlus1All1) -{ - vector bits(129, 1); - TestBitVector(&bits[0], bits.size()); - TestBitVector32Rank(&bits[0], bits.size(), 2); -} - -UNIT_TEST(BitVector32Empty) -{ - TestBitVector(NULL, 0); - TestBitVector(NULL, 0); - TestBitVector(NULL, 0); - TestBitVector(NULL, 0); - // TODO: When uncommented, there is an error: - // malloc: *** error for object 0x12c2e: pointer being freed was not allocated - // TestBitVector32Rank(NULL, 0, 2); -} - -UNIT_TEST(BitVector32RankRandom) -{ - // TODO: When l == 0, there is an error: - // malloc: *** error for object 0x12c2e: pointer being freed was not allocated - // *** set a breakpoint in malloc_error_break to debug - for (size_t l = 1; l <= 150; ++l) - { - vector bits(l); - for (size_t i = 0; i < bits.size(); ++i) - bits[i] = (rand() & 1); - unsigned char * p = bits.empty() ? NULL : &bits[0]; - TestBitVector(p, bits.size()); - TestBitVector32Rank(p, bits.size(), 1); - TestBitVector32Rank(p, bits.size(), 2); - TestBitVector32Rank(p, bits.size(), 3); - } -} - -UNIT_TEST(BitVector32RankRandomPow2) -{ - uint32_t values[] = {16, 32, 64, 128, 256, 1024, 2048, 4096}; - for (size_t j = 0; j < ARRAY_SIZE(values); ++j) - { - for (size_t l = values[j] - 2; l <= values[j] + 2; ++l) - { - vector bits(l); - for (size_t i = 0; i < bits.size(); ++i) - bits[i] = (rand() & 1); - TestBitVector(&bits[0], bits.size()); - TestBitVector32Rank(&bits[0], bits.size(), 1); - TestBitVector32Rank(&bits[0], bits.size(), 2); - TestBitVector32Rank(&bits[0], bits.size(), 3); - } - } -} - -UNIT_TEST(BitVector32RankRandomLargeVector) -{ - size_t const l = 9000; - vector bits(l); - for (size_t i = 0; i < bits.size(); ++i) - bits[i] = (rand() & 1); - unsigned char * p = &bits[0]; - TestBitVector(p, bits.size()); - TestBitVector32Rank(p, bits.size(), 1); - TestBitVector32Rank(p, bits.size(), 2); - TestBitVector32Rank(p, bits.size(), 3); -} - -// TODO: Test large bitVector. -// TODO: Test max size bitVector. -// TODO: Test wrong data size for BitVector and BitVector32RankDirectory. -#include "../../base/stop_mem_debug.hpp" diff --git a/coding/coding_tests/mm_compact_trie_test.cpp b/coding/coding_tests/mm_compact_trie_test.cpp deleted file mode 100644 index f45566c7d6..0000000000 --- a/coding/coding_tests/mm_compact_trie_test.cpp +++ /dev/null @@ -1,97 +0,0 @@ -#include "../../testing/testing.hpp" - -#include "../byte_stream.hpp" -#include "../compact_trie_builder.hpp" -#include "../dd_compact_trie.hpp" -#include "../reader.hpp" -#include "../../base/base.hpp" -#include "../../base/macros.hpp" -#include "../../std/string.hpp" -#include "../../std/vector.hpp" - -//namespace -//{ -class DDCompactTrieTester -{ -public: - template - static typename TBitRankDirectory::BitVectorType const & - IsParentVector(DDCompactTrie const & trie) - { - return trie.m_IsParent.BitVector(); - } - - template - static typename TBitRankDirectory::BitVectorType const & - IsFirstChildVector(DDCompactTrie const & trie) - { - return trie.m_IsFirstChild.BitVector(); - } - template - static int ParentsWithDataCount( - DDCompactTrie const & trie) - { - return trie.m_ParentsWithDataCount; - } - }; -//} - -UNIT_TEST(CompactTrieSimple) -{ - vector words; - words.push_back("hello"); - words.push_back("help"); - words.push_back("sim"); - words.push_back("simple"); - words.push_back("world"); - words.push_back("z"); - - typedef PushBackByteSink > SinkType; - vector data; - SinkType sink(data); - BuildMMCompactTrie(sink, words.begin(), words.end()); - typedef DDBitVector > BitVectorDD; - typedef DDBitRankDirectory RankDirDD; - typedef DDCompactTrie TrieDD; - TrieDD trie; - MemReader reader(&data[0], data.size()); - { - DDParseInfo info(reader, true); - trie.Parse(info); - } - - // 0 (0) - // 1 h(1) s(2) w(3) z(4) - // 2 e(5) i(6) o(7) - // 3 l(8) m(9) r(10) - // 4 l(11) p(12) p(13) l(14) - // 5 o(15) l(16) d(17) - // 6 e(18) - - TEST_EQUAL(trie.NodesWithData(), words.size(), ()); - TEST_EQUAL(trie.Root(), 0U, ()); - TEST_EQUAL(DDCompactTrieTester::ParentsWithDataCount(trie), 1, ()); - - // 0123456789012345678 - char const * chars = "$hswzeiolmrlpplolde"; - char const * isParent = "1111011111110110100"; - char const * isFChild = "0100011111110111111"; - char const * nexts = "0111000000010000000"; - // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 - TrieDD::Id fchild[] = { 1, 5, 6, 7,-1, 8, 9,10,11,13,14,15,-1,16,17,-1,18,-1,-1 }; - TrieDD::Id parent[] = { -1, 0, 0, 0, 0, 1, 2, 3, 5, 6, 7, 8, 8, 9,10,11,13,14,16 }; - TrieDD::Id dataI[] = { -1,-1,-1,-1, 1,-1,-1,-1,-1, 0,-1,-1, 2,-1,-1, 3,-1, 4, 5 }; - for (size_t i = 0; i <= 17; ++i) { - // wcout << "!! " << i << endl; - if (i != 0) { - TEST_EQUAL(trie.Char(i), chars[i], (i, string(1, trie.Char(i)), string(1, chars[i]))); - } - TEST_EQUAL(DDCompactTrieTester::IsParentVector(trie)[i], isParent[i] == '1', (i)); - TEST_EQUAL(DDCompactTrieTester::IsFirstChildVector(trie)[i], isFChild[i] == '1', (i)); - TEST_EQUAL(trie.NextSibling(i), nexts[i] == '1' ? i+1 : trie.INVALID_ID, (i)); - TEST_EQUAL(trie.FirstChild(i), fchild[i], (i)); - TEST_EQUAL(trie.Parent(i), parent[i], (i)); - TEST_EQUAL(trie.Data(i), dataI[i], (i)); - } - -} diff --git a/coding/compact_tree_builder.hpp b/coding/compact_tree_builder.hpp deleted file mode 100644 index 2254f286bd..0000000000 --- a/coding/compact_tree_builder.hpp +++ /dev/null @@ -1,30 +0,0 @@ -#pragma once -#include "writer.hpp" -#include "bit_vector_builder.hpp" -#include "../base/base.hpp" -#include "../base/start_mem_debug.hpp" -#include "../std/iterator.hpp" - -template -void BuildCompactTree(TSink & sink, - TIter const isParentBeg, TIter const isParentEnd, - TIter const isFirstChildBeg, TIter const isFirstChildEnd) -{ - size_t const size = distance(isParentBeg, isParentEnd); - BuildMMBitVector(sink, isParentBeg, isParentEnd, true, size); - BuildMMBitVector32RankDirectory(sink, isParentBeg, isParentEnd); - BuildMMBitVector(sink, isFirstChildBeg, isFirstChildEnd, false, size); - BuildMMBitVector32RankDirectory(sink, isFirstChildBeg, isFirstChildEnd); -} - -template -void BuildCompactTreeWithData(TSink & sink, - TIter const isParentBeg, TIter const isParentEnd, - TIter const isFirstChildBeg, TIter const isFirstChildEnd, - TIter const parentHasDataBeg, TIter const parentHasDataEnd) -{ - BuildCompactTree(sink, isParentBeg, isParentEnd, isFirstChildBeg, isFirstChildEnd); - BuildMMBitVector(sink, parentHasDataBeg, parentHasDataEnd); - BuildMMBitVector32RankDirectory(sink, parentHasDataBeg, parentHasDataEnd); -} - diff --git a/coding/compact_trie_builder.hpp b/coding/compact_trie_builder.hpp deleted file mode 100644 index 69ca99c2b5..0000000000 --- a/coding/compact_trie_builder.hpp +++ /dev/null @@ -1,98 +0,0 @@ -#pragma once -#include "compact_tree_builder.hpp" -#include "../base/assert.hpp" -#include "../base/base.hpp" -#include "../std/algorithm.hpp" -#include "../std/vector.hpp" -#include "../base/start_mem_debug.hpp" - -// Build compact trie given a sorted sequence on strings. -// If writeChars == false, characters will not be written -template -void BuildMMCompactTrie(TSink & sink, TIter const beg, TIter const end, bool writeChars = true) -{ - size_t maxLen = 0; - size_t size = 0; - for (TIter it = beg; it != end; ++it, ++size) - maxLen = max(maxLen, it->size()); - vector > isParent(maxLen + 1); - vector > isFirstChild(maxLen + 1); - vector > hasData(maxLen + 1); - vector > chars(maxLen); - isParent[0].push_back(true); - isFirstChild[0].push_back(false); - hasData[0].push_back(false); - TIter prev; - size_t word = 0; - for (TIter it = beg; it != end; ++word, prev = it++) - { - CHECK_NOT_EQUAL(it->size(), 0U, ()); - size_t commonLen = 0; - bool nextIsExtensionOfPrev = true; - if (it != beg) - { - while (commonLen < it->size() && commonLen < prev->size() && - (*it)[commonLen] == (*prev)[commonLen]) - ++commonLen; - // Next string should be strictly greater than previous. - CHECK(commonLen != it->size(), - (commonLen, it->size(), prev->size(), word)); - CHECK(commonLen == prev->size() || (*prev)[commonLen] != (*it)[commonLen], - (commonLen, it->size(), prev->size(), word)); - nextIsExtensionOfPrev = (commonLen == prev->size()); - } - isParent[commonLen].back() = true; - size_t last = it->size() - 1; - for (size_t i = commonLen; i <= last; ++i) - { - isParent[i+1].push_back(i != last); - isFirstChild[i+1].push_back(i != commonLen || nextIsExtensionOfPrev); - hasData[i+1].push_back(i == last); - if (writeChars) - chars[i].push_back((*it)[i]); - } - } - - vector isParentComined, isFirstChildCombined, parentHasDataCombined; - isParentComined.reserve(size); - isFirstChildCombined.reserve(size); - parentHasDataCombined.reserve(size); - for (vector >::const_iterator i = isParent.begin(); i != isParent.end(); ++i) - isParentComined.insert(isParentComined.end(), i->begin(), i->end()); - for (vector >::const_iterator i = isFirstChild.begin(); i != isFirstChild.end(); ++i) - isFirstChildCombined.insert(isFirstChildCombined.end(), i->begin(), i->end()); - for (size_t i = 0; i < isParent.size(); ++i) - { - for (size_t j = 0; j < isParent[i].size(); ++j) - { - if (isParent[i][j]) - { - parentHasDataCombined.push_back(hasData[i][j]); - } - } - } - - BuildCompactTreeWithData(sink, isParentComined.begin(), isParentComined.end(), - isFirstChildCombined.begin(), isFirstChildCombined.end(), - parentHasDataCombined.begin(), parentHasDataCombined.end()); - size_t numChars = 0; - if (writeChars) - { - for (typename vector >::const_iterator i = chars.begin(); i != chars.end(); ++i) - { - for (typename vector::const_iterator it = i->begin(); it != i->end(); ++it) - { - WriteToSink(sink, TChar(*it)); - ++numChars; - } - } - } - - size_t padding = (4 - (numChars * sizeof(TChar)) % 4) % 4; - for (size_t i = 0; i < padding; ++i) - { - WriteToSink(sink, uint8_t(0)); - } -} - -#include "../base/stop_mem_debug.hpp" diff --git a/coding/dd_bit_rank_directory.hpp b/coding/dd_bit_rank_directory.hpp deleted file mode 100644 index d3873e5134..0000000000 --- a/coding/dd_bit_rank_directory.hpp +++ /dev/null @@ -1,121 +0,0 @@ -#pragma once -#include "dd_base.hpp" -#include "dd_vector.hpp" -#include "endianness.hpp" -#include "varint.hpp" -#include "../base/assert.hpp" -#include "../base/bits.hpp" -#include -#include "../base/start_mem_debug.hpp" - -template class DDBitRankDirectory -{ -public: - typedef TBitVector BitVectorType; - typedef SizeT size_type; - - DDBitRankDirectory() - { - } - - template - void Parse(DDParseInfo & info) - { - BitVectorType bitVector; - bitVector.Parse(info); - Parse(info, bitVector); - } - - template - void Parse(DDParseInfo & info, BitVectorType const & bitVector) - { - Parse(info, bitVector, ReadVarUint(info.Source())); - } - - template - void Parse(DDParseInfo &info, BitVectorType const & bitVector, size_type logChunkSize) - { - m_BitVector = bitVector; - m_LogChunkSize = logChunkSize; - size_type const sizeChunks = bits::RoundLastBitsUpAndShiftRight( - m_BitVector.size(), bits::LogBitSizeOfType::value + m_LogChunkSize); - if (sizeChunks != 0) - { - m_Chunks = ChunkVectorType(info.Source().SubReader(sizeChunks * sizeof(size_type)), - sizeChunks); - if (SwapIfBigEndian(m_Chunks[m_Chunks.size() - 1]) > m_BitVector.size()) - MYTHROW(DDParseException, (m_Chunks.size(), - SwapIfBigEndian(m_Chunks[m_Chunks.size() - 1]), - m_BitVector.size())); - } - } - - size_type Rank0(size_type x) const - { - ASSERT_LESS(x, m_BitVector.size(), ()); - return x + 1 - Rank1(x); - } - - size_type Rank1(size_type x) const - { - ASSERT_LESS(x, m_BitVector.size(), ()); - size_type const logBitSize = bits::LogBitSizeOfType::value; - size_type const iWord = x >> logBitSize; - size_type const iChunk = iWord >> m_LogChunkSize; - return (iChunk == 0 ? 0 : SwapIfBigEndian(m_Chunks[iChunk - 1])) + - m_BitVector.PopCountWords(iChunk << m_LogChunkSize, iWord) + - bits::popcount( - m_BitVector.WordAt(iWord) & - (size_type(-1) >> ((8 * sizeof(size_type) - 1) - (x - (iWord << logBitSize))))); - } - - size_type Select1(size_type i) const - { - ASSERT_GREATER(i, 0, ()); - ASSERT_LESS_OR_EQUAL(i, m_Chunks[m_Chunks.size() - 1], ()); - // TODO: First try approximate lower and upper bound. - size_type iChunk = lower_bound( - boost::make_transform_iterator(m_Chunks.begin(), - &SwapIfBigEndian), - boost::make_transform_iterator(m_Chunks.end(), - &SwapIfBigEndian), - i) - - boost::make_transform_iterator(m_Chunks.begin(), - &SwapIfBigEndian); - ASSERT_LESS(iChunk, m_Chunks.size(), ()); - ASSERT_GREATER_OR_EQUAL(SwapIfBigEndian(m_Chunks[iChunk]), i, ()); - ASSERT_LESS((iChunk << m_LogChunkSize), m_BitVector.size_words(), (iChunk, m_LogChunkSize)); - return (iChunk << (bits::LogBitSizeOfType::value + m_LogChunkSize)) + - m_BitVector.Select1FromWord( - iChunk << m_LogChunkSize, - i - (iChunk == 0 ? 0 : SwapIfBigEndian(m_Chunks[iChunk - 1]))); - } - - size_type size() const - { - return m_BitVector.size(); - } - - bool empty() const - { - return size() == 0; - } - - bool operator[](size_type i) const - { - return m_BitVector[i]; - } - - TBitVector const & BitVector() const - { - return m_BitVector; - } - -private: - TBitVector m_BitVector; - typedef DDVector ChunkVectorType; - ChunkVectorType m_Chunks; - size_type m_LogChunkSize; // Chunk size in size_types. -}; - -#include "../base/stop_mem_debug.hpp" diff --git a/coding/dd_bit_vector.hpp b/coding/dd_bit_vector.hpp deleted file mode 100644 index bc5fbe1921..0000000000 --- a/coding/dd_bit_vector.hpp +++ /dev/null @@ -1,103 +0,0 @@ -#pragma once -#include "dd_base.hpp" -#include "endianness.hpp" -#include "varint.hpp" -#include "../base/assert.hpp" -#include "../base/base.hpp" -#include "../base/bits.hpp" -#include "../std/type_traits.hpp" - -#include "../base/start_mem_debug.hpp" - -template < - class TWordVector, - typename TSize = typename TWordVector::size_type, - typename TDifference = typename make_signed::type - > class DDBitVector -{ -public: - typedef TWordVector VectorType; - typedef typename VectorType::value_type WordType; - typedef TSize size_type; - typedef TDifference difference_type; - - DDBitVector() - { - } - - template - void Parse(DDParseInfo & info) - { - Parse(info, ReadVarUint(info.Source())); - } - - template - void Parse(DDParseInfo & info, size_type vectorSize) - { - m_Size = vectorSize; - m_Data = VectorType(info.Source().SubReader(size_words() * sizeof(WordType)), size_words()); - } - - bool operator[](size_type i) const - { - ASSERT(i < size(), (i, size())); - return 0 != - (WordAt(i / 8 / sizeof(WordType)) & (WordType(1) << (i & (8 * sizeof(WordType) - 1)))); - } - - size_type size() const - { - return m_Size; - } - - size_type size_words() const - { - return (m_Size + sizeof(WordType) * 8 - 1) / 8 / sizeof(WordType); - } - - bool empty() const - { - return m_Size == 0; - } - - WordType WordAt(size_type i) const - { - ASSERT(i < size_words(), (i)); - return SwapIfBigEndian(m_Data[i]); - } - - size_type PopCountWords(size_type begWord, size_type endWord) const - { - if (begWord == endWord) - return 0; - ASSERT_LESS(begWord, endWord, ()); - ASSERT_LESS(begWord, size_words(), ()); - ASSERT_LESS_OR_EQUAL(endWord, size_words(), ()); - - // popcount doesn't depend on byte order. - size_type result = 0; - while (begWord != endWord) - result += bits::popcount(m_Data[begWord++]); - return result; - } - - size_type Select1FromWord(size_type iWord, size_type i) const - { - ASSERT(iWord < size_words(), (iWord, size_words(), i)); - size_type const startWord = iWord; - size_type wordRank = bits::popcount(m_Data[iWord]); - while (wordRank < i) - { - i -= wordRank; - wordRank = bits::popcount(m_Data[++iWord]); - } - return (iWord - startWord) * sizeof(WordType) * 8 + - bits::select1(SwapIfBigEndian(m_Data[iWord]), i); - } - -private: - size_type m_Size; - VectorType m_Data; -}; - -#include "../base/stop_mem_debug.hpp" diff --git a/coding/dd_compact_tree.hpp b/coding/dd_compact_tree.hpp deleted file mode 100644 index 1a46cbc6e1..0000000000 --- a/coding/dd_compact_tree.hpp +++ /dev/null @@ -1,128 +0,0 @@ -#pragma once -#include "dd_bit_vector.hpp" -#include "dd_bit_rank_directory.hpp" -#include "../base/base.hpp" - -#include "../base/start_mem_debug.hpp" - -template -class DDCompactTree -{ -public: - typedef BitRankDirT BitRankDirType; - - // Node id. - typedef typename BitRankDirT::size_type Id; - static Id const INVALID_ID = -1; - - DDCompactTree() - { - } - - // Id of the root. - Id Root() const - { - return 0; - } - - // Parent id and INVALID_ID for root. - Id Parent(Id id) const - { - ASSERT(id != INVALID_ID, ()); - return id ? m_IsParent.Select1(m_IsFirstChild.Rank1(id)) : INVALID_ID; - } - - // First child id and INVALID_ID for leaf. - Id FirstChild(Id id) const - { - ASSERT(id != INVALID_ID, ()); - return m_IsParent[id] ? m_IsFirstChild.Select1(m_IsParent.Rank1(id)) : INVALID_ID; - } - - // Next sibling id and INVALID_ID if there is no next sibling. - Id NextSibling(Id id) const - { - ASSERT(id != INVALID_ID, ()); - return (id + 1 == m_IsFirstChild.size() || m_IsFirstChild[id + 1]) ? INVALID_ID : id + 1; - } - - template - void Parse(DDParseInfo & info) - { - typedef typename BitRankDirT::BitVectorType BitVectorType; - { - BitVectorType isParent; - isParent.Parse(info); - if (isParent.size() == 0) - MYTHROW(DDParseException, ()); - m_IsParent.Parse(info, isParent); - } - { - // TODO: Don't write logRankChunkSize twice. - // TODO: Allow logRankChunkSize be explicitly specified. - BitVectorType isFirstChild; - isFirstChild.Parse(info, m_IsParent.size()); - if (isFirstChild.size() == 0) - MYTHROW(DDParseException, ()); - m_IsFirstChild.Parse(info, isFirstChild); - } - } - -protected: - friend class MMCompactTreeTester; - BitRankDirT m_IsParent; - BitRankDirT m_IsFirstChild; -}; - -template -class DDCompactTreeWithData : public DDCompactTree -{ -public: - typedef DDCompactTree BaseType; - typedef BitRankDirT BitRankDirType; - typedef typename BaseType::Id Id; - static Id const INVALID_ID = BaseType::INVALID_ID; - - DDCompactTreeWithData() : m_NodesWithData(0) - { - } - - // Number of nodes with data. - Id NodesWithData() const - { - return m_NodesWithData; - } - - // Id of the data for a given node id and INVALID_ID if node doesn't have any data. - Id Data(Id id) const - { - ASSERT(id != INVALID_ID, ()); - if (BaseType::m_IsParent[id]) - { - Id const parentIndex = BaseType::m_IsParent.Rank1(id) - 1; - return m_ParentHasData[parentIndex] ? m_ParentHasData.Rank1(parentIndex) - 1 : INVALID_ID; - } - else - { - return m_ParentsWithDataCount + BaseType::m_IsParent.Rank0(id) - 1; - } - } - - template - void Parse(DDParseInfo & info) - { - BaseType::Parse(info); - // TODO: Pass the vector size here. - m_ParentHasData.Parse(info); - m_ParentsWithDataCount = - m_ParentHasData.empty() ? 0 : m_ParentHasData.Rank1(m_ParentHasData.size() - 1); - m_NodesWithData = m_ParentsWithDataCount + - BaseType::m_IsParent.Rank0(BaseType::m_IsParent.size() - 1); - } -protected: - BitRankDirType m_ParentHasData; - uint32_t m_ParentsWithDataCount; - size_t m_NodesWithData; -}; - -#include "../base/stop_mem_debug.hpp" diff --git a/coding/dd_compact_trie.hpp b/coding/dd_compact_trie.hpp deleted file mode 100644 index f6a8321cb4..0000000000 --- a/coding/dd_compact_trie.hpp +++ /dev/null @@ -1,70 +0,0 @@ -#pragma once -#include "dd_compact_tree.hpp" -#include "dd_vector.hpp" -#include "../base/assert.hpp" -#include "../base/base.hpp" -#include "../std/string.hpp" -#include "../base/start_mem_debug.hpp" - -class MMCompactTTrieester; - -template -class DDCompactTrie : public DDCompactTreeWithData -{ -public: - typedef DDCompactTreeWithData BaseType; - typedef typename BaseType::Id Id; - static Id const INVALID_ID = BaseType::INVALID_ID; - - DDCompactTrie() - { - } - - TChar Char(Id id) const - { - ASSERT(id != 0, ()); - ASSERT(id != INVALID_ID, ()); - return m_Chars[id - 1]; // There is no char for the root. - } - - template - void Parse(DDParseInfo & info) - { - BaseType::Parse(info); - m_Chars = - CharVectorType(info.Source().SubReader((BaseType::m_IsParent.size() - 1) * sizeof(TChar)), - BaseType::m_IsParent.size() - 1); - } - -protected: - friend class DDCompactTrieTester; - typedef DDVector - CharVectorType; - CharVectorType m_Chars; -}; - -template -typename TTrie::Id FindNodeByPath(TTrie const & trie, TIter pathBegin, TIter pathEnd) -{ - typename TTrie::Id nodeId = trie.Root(); - for (TIter edge = pathBegin; edge != pathEnd; ++edge) - { - bool found = false; - for (typename TTrie::Id child = trie.FirstChild(nodeId); - child != TTrie::INVALID_ID; - child = trie.NextSibling(child)) - { - if (trie.Char(child) == *edge) - { - nodeId = child; - found = true; - break; - } - } - if (!found) - return TTrie::INVALID_ID; - } - return nodeId; -} - -#include "../base/stop_mem_debug.hpp" diff --git a/coding/mm_base.hpp b/coding/mm_base.hpp deleted file mode 100644 index ee599ffd52..0000000000 --- a/coding/mm_base.hpp +++ /dev/null @@ -1,80 +0,0 @@ -#pragma once -#include "../base/assert.hpp" -#include "../base/base.hpp" - -#include "../base/start_mem_debug.hpp" - -class MMParseInfo -{ -public: - MMParseInfo(void const * p, size_t size, bool failOnError) : - m_p(static_cast(p)), m_Size(size), m_bFailOnError(failOnError), - m_bSuccessful(true) - { - } - - ~MMParseInfo() - { - CHECK(!m_bFailOnError || m_bSuccessful, ()); - } - - void CheckAligned(size_t size) - { - size_t p = reinterpret_cast(m_p); - if (!(size & 7)) { - CHECK_OR_CALL(m_bFailOnError, Fail, !(p & 7), (p, size)); - } - else if (!(size & 3)) { - CHECK_OR_CALL(m_bFailOnError, Fail, !(p & 3), (p, size)); - } - else if (!(size & 1)) { - CHECK_OR_CALL(m_bFailOnError, Fail, !(p & 1), (p, size)); - } - } - - template T const * Advance() - { - CheckAligned(sizeof(T)); - size_t const advanceSize = sizeof(T); - CHECK_OR_CALL(m_bFailOnError, Fail, advanceSize <= m_Size, (sizeof(T), m_Size)); - void const * p = m_p; - m_p += advanceSize; - m_Size -= advanceSize; - return static_cast(p); - } - - template T const * Advance(size_t size) - { - CheckAligned(sizeof(T)); - size_t const advanceSize = size * sizeof(T); - CHECK_OR_CALL(m_bFailOnError, Fail, advanceSize <= m_Size, (size, sizeof(T), m_Size)); - void const * p = m_p; - m_p += advanceSize; - m_Size -= advanceSize; - return static_cast(p); - } - - bool Successful() const - { - return m_bSuccessful; - } - - void Fail() - { - CHECK(!m_bFailOnError, (m_bSuccessful)); - m_bSuccessful = false; - } - - bool FailOnError() const - { - return m_bFailOnError; - } - -private: - char const * m_p; - size_t m_Size; - bool m_bFailOnError; - bool m_bSuccessful; -}; - -#include "../base/stop_mem_debug.hpp" diff --git a/coding/mm_bit_vector.hpp b/coding/mm_bit_vector.hpp deleted file mode 100644 index 10b7dac733..0000000000 --- a/coding/mm_bit_vector.hpp +++ /dev/null @@ -1,189 +0,0 @@ -#pragma once -#include "endianness.hpp" -#include "mm_base.hpp" -#include "../base/assert.hpp" -#include "../base/base.hpp" -#include "../base/bits.hpp" -#include "../std/algorithm.hpp" -#include - -#include "../base/start_mem_debug.hpp" - -// . !! -// network byte order (big endian). -// . -template class MMBitVector -{ -public: - typedef TWord WordType; - - MMBitVector() - { - } - - MMBitVector(void const *p, size_t size) - { - MMParseInfo parseInfo(p, size, true); - this->Parse(parseInfo); - } - - bool operator[](TWord i) const - { - ASSERT(i < size(), (i, size())); - return (WordAt(i / 8 / sizeof(TWord)) & (TWord(1) << (i & (8 * sizeof(TWord) - 1)))) != 0; - } - - TWord size() const - { - return m_Size; - } - - TWord size_words() const - { - return (m_Size + sizeof(TWord)*8 - 1) / 8 / sizeof(TWord); - } - - size_t bytes_used() const - { - return (size_words() + 1) * sizeof(TWord); - } - - bool empty() const - { - return m_Size == 0; - } - - TWord WordAt(TWord i) const - { - ASSERT(i < size_words(), (i)); - return SwapIfBigEndian(m_pWords[i]); - } - - TWord Select1FromWord(TWord word, TWord i) const - { - ASSERT(word < size_words(), (word, size_words(), i)); - TWord const * const pStartWord = m_pWords + word; - TWord const * pWord = pStartWord; - TWord wordRank = bits::popcount(*pWord); - while (wordRank < i) - { - i -= wordRank; - wordRank = bits::popcount(*++pWord); - } - return static_cast(pWord - pStartWord) * sizeof(TWord) * 8 + - bits::select1(SwapIfBigEndian(*pWord), i); - } - - TWord PopCountWords(TWord begWord, TWord endWord) const - { - // popcount byte order. - ASSERT(begWord < size_words(), (begWord)); - ASSERT(endWord <= size_words(), (endWord)); - return bits::popcount(m_pWords + begWord, endWord - begWord); - } - - void Parse(MMParseInfo & info) - { - if (!info.Successful()) return; - TWord size = *info.Advance(1); - Parse(info, SwapIfBigEndian(size)); - } - - void Parse(MMParseInfo & info, TWord vectorSize) - { - m_Size = vectorSize; - if (!info.Successful()) return; - m_pWords = info.Advance(static_cast(size_words())); - } - -private: - TWord const * m_pWords; - TWord m_Size; -}; - -// , Rank0 Rank1 -// Select0 Select1 . -class MMBitVector32RankDirectory -{ -public: - MMBitVector32RankDirectory(MMBitVector const & bitVector) : m_BitVector(bitVector) - { - } - - MMBitVector32RankDirectory(MMBitVector const & bitVector, void const * p, size_t size) - : m_BitVector(bitVector) - { - MMParseInfo parseInfo(p, size, true); - this->Parse(parseInfo); - } - - uint32_t Rank0(uint32_t x) const - { - ASSERT(x < m_BitVector.size(), (x, m_BitVector.size())); - return x + 1 - Rank1(x); - } - - uint32_t Rank1(uint32_t x) const - { - ASSERT(x < m_BitVector.size(), (x, m_BitVector.size())); - uint32_t const iWord = x >> 5; - uint32_t const iChunk = iWord >> m_LogChunkSize; - return (iChunk == 0 ? 0 : SwapIfBigEndian(m_pChunks[iChunk - 1])) + - m_BitVector.PopCountWords(iChunk << m_LogChunkSize, iWord) + - bits::popcount(m_BitVector.WordAt(iWord) & (0xFFFFFFFFU >> (31 - (x - (iWord << 5))))); - } - - uint32_t Select1(uint32_t i) const - { - ASSERT(i > 0 && i <= m_MaxRank, (i, m_MaxRank)); - // TODO: First try approximate lower and upper bound. - uint32_t iChunk = lower_bound( - boost::make_transform_iterator(m_pChunks, &SwapIfBigEndian), - boost::make_transform_iterator(m_pChunks + size_chunks(), &SwapIfBigEndian), - i) - boost::make_transform_iterator(m_pChunks, &SwapIfBigEndian); - ASSERT_LESS(iChunk, size_chunks(), ()); - ASSERT_GREATER_OR_EQUAL(SwapIfBigEndian(m_pChunks[iChunk]), i, ()); - ASSERT_LESS((iChunk << m_LogChunkSize), m_BitVector.size_words(), (iChunk, m_LogChunkSize)); - return (iChunk << (5 + m_LogChunkSize)) + - m_BitVector.Select1FromWord( - iChunk << m_LogChunkSize, - i - (iChunk == 0 ? 0 : SwapIfBigEndian(m_pChunks[iChunk - 1]))); - } - - uint32_t size_chunks() const - { - return bits::RoundLastBitsUpAndShiftRight(m_BitVector.size(), 5 + m_LogChunkSize); - } - - size_t bytes_used() const - { - return (1 + size_chunks()) << (2 + m_LogChunkSize); - } - - void Parse(MMParseInfo & info) - { - // TODO: Store version in MMBitVector32RankDirectory? - if (!info.Successful()) return; - m_LogChunkSize = SwapIfBigEndian(*info.Advance()); - if (!info.Successful()) return; - m_pChunks = info.Advance(size_chunks()); -#ifdef DEBUG - m_MaxRank = (m_BitVector.empty() ? 0 : Rank1(m_BitVector.size() - 1)); - if (m_MaxRank > m_BitVector.size()) - { - CHECK(!info.FailOnError(), (m_MaxRank, m_BitVector.size())); - info.Fail(); - } -#endif - } - -protected: - MMBitVector const & m_BitVector; - uint32_t const * m_pChunks; - uint32_t m_LogChunkSize; // 32 . -#ifdef DEBUG - uint32_t m_MaxRank; -#endif -}; - -#include "../base/stop_mem_debug.hpp" diff --git a/coding/mm_compact_tree.hpp b/coding/mm_compact_tree.hpp deleted file mode 100644 index 35de2df36f..0000000000 --- a/coding/mm_compact_tree.hpp +++ /dev/null @@ -1,113 +0,0 @@ -#pragma once -#include "mm_bit_vector.hpp" -#include "../base/base.hpp" - -class MMCompactTree -{ -public: - // Node id. - typedef uint32_t Id; - static Id const INVALID_ID = 0xFFFFFFFF; - - MMCompactTree() : - m_IsParentDir(m_IsParent), m_IsFirstChildDir(m_IsFirstChild) - { - } - - MMCompactTree(void const * p, size_t size) : - m_IsParentDir(m_IsParent), m_IsFirstChildDir(m_IsFirstChild) - { - MMParseInfo info(p, size, true); - this->Parse(info); - } - - // Id of the root. - Id Root() const - { - return 0; - } - - // Parent id and INVALID_ID for root. - Id Parent(Id id) const - { - return id ? m_IsParentDir.Select1(m_IsFirstChildDir.Rank1(id)) : INVALID_ID; - } - - // First child id and INVALID_ID for leaf. - Id FirstChild(Id id) const - { - return m_IsParent[id] ? m_IsFirstChildDir.Select1(m_IsParentDir.Rank1(id)) : INVALID_ID; - } - - // Next sibling id and INVALID_ID if there is no next sibling. - Id NextSibling(Id id) const - { - return (id + 1 == m_IsFirstChild.size() || m_IsFirstChild[id + 1]) ? INVALID_ID : id + 1; - } - - void Parse(MMParseInfo & info) - { - if (!info.Successful()) - return; - m_IsParent.Parse(info); - m_IsParentDir.Parse(info); - CHECK_OR_CALL(info.FailOnError(), info.Fail, m_IsParent.size() > 0, ()); - m_IsFirstChild.Parse(info, m_IsParent.size()); - m_IsFirstChildDir.Parse(info); - } - -protected: - friend class MMCompactTreeTester; - MMBitVector m_IsParent; - MMBitVector32RankDirectory m_IsParentDir; - MMBitVector m_IsFirstChild; - MMBitVector32RankDirectory m_IsFirstChildDir; -}; - -class MMCompactTreeWithData : public MMCompactTree -{ -public: - MMCompactTreeWithData() : m_ParentHasDataDir(m_ParentHasData), m_Size(0) - { - } - - // Number of nodes with data. - size_t NodesWithData() - { - return m_Size; - } - - // Id of the data for a given node id and INVALID_ID if node doesn't have any data. - uint32_t Data(Id id) const - { - if (m_IsParent[id]) - { - uint32_t const parentIndex = m_IsParentDir.Rank1(id) - 1; - return m_ParentHasData[parentIndex] ? m_ParentHasDataDir.Rank1(parentIndex) - 1 : INVALID_ID; - } - else - { - return m_ParentsWithDataCount + m_IsParentDir.Rank0(id) - 1; - } - } - - void Parse(MMParseInfo & info) - { - MMCompactTree::Parse(info); - if (!info.Successful()) - return; - m_ParentHasData.Parse( - info, m_IsParent.empty() ? 0 : m_IsParentDir.Rank1(m_IsParent.size() - 1)); - m_ParentHasDataDir.Parse(info); - m_ParentsWithDataCount = - m_ParentHasData.empty() ? 0 : m_ParentHasDataDir.Rank1(m_ParentHasData.size() - 1); - m_Size = m_ParentsWithDataCount + - (m_IsParent.empty() ? 0 : m_IsParentDir.Rank0(m_IsParent.size() - 1)); - } -protected: - MMBitVector m_ParentHasData; - MMBitVector32RankDirectory m_ParentHasDataDir; - uint32_t m_ParentsWithDataCount; - size_t m_Size; - -}; diff --git a/coding/mm_compact_trie.hpp b/coding/mm_compact_trie.hpp deleted file mode 100644 index 0d1744d1ee..0000000000 --- a/coding/mm_compact_trie.hpp +++ /dev/null @@ -1,70 +0,0 @@ -#pragma once -#include "mm_base.hpp" -#include "mm_compact_tree.hpp" -#include "mm_vector.hpp" -#include "../base/assert.hpp" -#include "../base/base.hpp" -#include "../std/string.hpp" -#include "../base/start_mem_debug.hpp" - -class MMCompactTrieTester; - -template class MMCompactTrie : public MMCompactTreeWithData -{ -public: - MMCompactTrie() - { - } - - MMCompactTrie(void const * p, size_t size) : MMCompactTreeWithData() - { - MMParseInfo info(p, size, true); - Parse(info); - } - - TChar Char(Id id) const - { - ASSERT(id != 0, ()); - ASSERT(id != INVALID_ID, ()); - return m_Chars[id - 1]; // There is no char for the root. - } - - void Parse(MMParseInfo & info) - { - MMCompactTreeWithData::Parse(info); - if (!info.Successful()) - return; - m_Chars.Parse(info, m_IsParent.size() - 1); - } - -protected: - friend class MMCompactTrieTester; - MMVector m_Chars; -}; - -#if 0 -template -MMCompactTree::Id FindNodeByPath(TrieT const & trie, ItT pathBegin, ItT pathEnd) -{ - MMCompactTree::Id nodeId = 0; - for (ItT edge = pathBegin; edge != pathEnd; ++edge) - { - bool found = false; - for (MMCompactTree::Id child = trie.FirstChild(nodeId); - child != MMCompactTree::INVALID_ID; - child = trie.NextSibling(child)) - { - if (trie.Char(child) == *edge) - { - nodeId = child; - found = true; - break; - } - } - if (!found) return MMCompactTree::INVALID_ID; - } - return nodeId; -} -#endif - -#include "../base/stop_mem_debug.hpp" diff --git a/coding/mm_vector.hpp b/coding/mm_vector.hpp deleted file mode 100644 index 58032879c6..0000000000 --- a/coding/mm_vector.hpp +++ /dev/null @@ -1,58 +0,0 @@ -#pragma once -#include "endianness.hpp" -#include "mm_base.hpp" -#include "../base/assert.hpp" -#include "../base/base.hpp" -#include "../std/memcpy.hpp" - -#include "../base/start_mem_debug.hpp" - -template class MMVector -{ -public: - typedef T * const_iterator; - typedef const_iterator iterator; - typedef T value_type; - - const_iterator begin() const - { - return m_p; - } - - const_iterator end() const - { - return m_p + m_Size; - } - - T const & operator [] (size_t i) const - { - ASSERT_LESS(i, m_Size, ()); - return m_p[i]; - } - - size_t size() const - { - return m_Size; - } - - void Parse(MMParseInfo & info) - { - if (!info.Successful()) return; - uint32_t size; - memcpy(size, info.Advance(4), 4); - Parse(info, SwapIfBigEndian(size)); - } - - void Parse(MMParseInfo & info, size_t vectorSize) - { - m_Size = vectorSize; - if (!info.Successful()) return; - m_p = info.Advance(m_Size); - } - -private: - T const * m_p; - size_t m_Size; -}; - -#include "../base/stop_mem_debug.hpp"