forked from organicmaps/organicmaps
[omim] [coding] Compressed bit vectors.
This commit is contained in:
parent
119ade14dd
commit
e8acd6f459
8 changed files with 598 additions and 26 deletions
|
@ -20,8 +20,8 @@ UNIT_TEST(Popcount32)
|
|||
{
|
||||
for (uint32_t i = 0; i < 10000; ++i)
|
||||
{
|
||||
TEST_EQUAL(bits::popcount(i), PopCountSimple(i), (i));
|
||||
TEST_EQUAL(bits::popcount(0xC2000000 | i), PopCountSimple(0xC2000000 | i), (0xC2000000 | i));
|
||||
TEST_EQUAL(bits::PopCount(i), PopCountSimple(i), (i));
|
||||
TEST_EQUAL(bits::PopCount(0xC2000000 | i), PopCountSimple(0xC2000000 | i), (0xC2000000 | i));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -36,7 +36,7 @@ UNIT_TEST(PopcountArray32)
|
|||
uint32_t expectedPopCount = 0;
|
||||
for (size_t i = 0; i < v.size(); ++i)
|
||||
expectedPopCount += PopCountSimple(v[i]);
|
||||
TEST_EQUAL(bits::popcount(v.empty() ? NULL : &v[0], v.size()), expectedPopCount,
|
||||
TEST_EQUAL(bits::PopCount(v.empty() ? NULL : &v[0], v.size()), expectedPopCount,
|
||||
(j, v.size(), expectedPopCount));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
namespace bits
|
||||
{
|
||||
// Count the number of 1 bits. Implementation: see Hacker's delight book.
|
||||
inline uint32_t popcount(uint32_t x)
|
||||
inline uint32_t PopCount(uint32_t x)
|
||||
{
|
||||
x -= ((x >> 1) & 0x55555555);
|
||||
x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
|
||||
|
@ -18,14 +18,14 @@ namespace bits
|
|||
return x & 0x3F;
|
||||
}
|
||||
|
||||
inline uint32_t popcount(uint8_t x)
|
||||
inline uint32_t PopCount(uint8_t x)
|
||||
{
|
||||
return popcount(static_cast<uint32_t>(x));
|
||||
return PopCount(static_cast<uint32_t>(x));
|
||||
}
|
||||
|
||||
// Count the number of 1 bits in array p, length n bits.
|
||||
// There is a better implementation at hackersdelight.org
|
||||
inline uint32_t popcount(uint32_t const * p, uint32_t n)
|
||||
inline uint32_t PopCount(uint32_t const * p, uint32_t n)
|
||||
{
|
||||
uint32_t s = 0;
|
||||
for (uint32_t i = 0; i < n; i += 31)
|
||||
|
@ -61,10 +61,15 @@ namespace bits
|
|||
return static_cast<unsigned int>(SELECT1_ERROR);
|
||||
}
|
||||
|
||||
inline uint32_t PopCount(uint64_t x)
|
||||
{
|
||||
uint32_t lower = static_cast<uint32_t>(x);
|
||||
uint32_t higher = static_cast<uint32_t>(x >> 32);
|
||||
return PopCount(lower) + PopCount(higher);
|
||||
}
|
||||
|
||||
// Will be implemented when needed.
|
||||
uint64_t popcount(uint64_t x);
|
||||
// Will be implemented when needed.
|
||||
uint64_t popcount(uint64_t const * p, uint64_t n);
|
||||
uint64_t PopCount(uint64_t const * p, uint64_t n);
|
||||
|
||||
template <typename T> T RoundLastBitsUpAndShiftRight(T x, T bits)
|
||||
{
|
||||
|
|
|
@ -52,6 +52,7 @@ HEADERS += \
|
|||
byte_stream.hpp \
|
||||
coder.hpp \
|
||||
coder_util.hpp \
|
||||
compressed_bit_vector.hpp \
|
||||
old_compressed_bit_vector.hpp \
|
||||
# compressed_varnum_vector.hpp \
|
||||
constants.hpp \
|
||||
|
|
|
@ -17,6 +17,7 @@ SOURCES += ../../testing/testingmain.cpp \
|
|||
bit_streams_test.cpp \
|
||||
# blob_storage_test.cpp \
|
||||
coder_util_test.cpp \
|
||||
compressed_bit_vector_test.cpp \
|
||||
old_compressed_bit_vector_test.cpp \
|
||||
# compressed_varnum_vector_test.cpp \
|
||||
dd_vector_test.cpp \
|
||||
|
|
165
coding/coding_tests/compressed_bit_vector_test.cpp
Normal file
165
coding/coding_tests/compressed_bit_vector_test.cpp
Normal file
|
@ -0,0 +1,165 @@
|
|||
#include "testing/testing.hpp"
|
||||
|
||||
#include "coding/compressed_bit_vector.hpp"
|
||||
#include "coding/writer.hpp"
|
||||
|
||||
#include "std/algorithm.hpp"
|
||||
#include "std/iterator.hpp"
|
||||
|
||||
namespace
|
||||
{
|
||||
void CheckIntersection(vector<uint64_t> & setBits1, vector<uint64_t> & setBits2,
|
||||
unique_ptr<coding::CompressedBitVector> const & cbv)
|
||||
{
|
||||
TEST(cbv.get(), ());
|
||||
vector<uint64_t> expected;
|
||||
sort(setBits1.begin(), setBits1.end());
|
||||
sort(setBits2.begin(), setBits2.end());
|
||||
set_intersection(setBits1.begin(), setBits1.end(), setBits2.begin(), setBits2.end(),
|
||||
back_inserter(expected));
|
||||
TEST_EQUAL(expected.size(), cbv->PopCount(), ());
|
||||
for (size_t i = 0; i < expected.size(); ++i)
|
||||
TEST(cbv->GetBit(expected[i]), ());
|
||||
}
|
||||
} // namespace
|
||||
|
||||
UNIT_TEST(CompressedBitVector_Smoke) {}
|
||||
|
||||
UNIT_TEST(CompressedBitVector_Intersect1)
|
||||
{
|
||||
size_t const n = 100;
|
||||
vector<uint64_t> setBits1;
|
||||
vector<uint64_t> setBits2;
|
||||
for (size_t i = 0; i < n; ++i)
|
||||
{
|
||||
if (i > 0)
|
||||
setBits1.push_back(i);
|
||||
if (i + 1 < n)
|
||||
setBits2.push_back(i);
|
||||
}
|
||||
auto cbv1 = coding::CompressedBitVectorBuilder::Build(setBits1);
|
||||
auto cbv2 = coding::CompressedBitVectorBuilder::Build(setBits2);
|
||||
TEST(cbv1.get(), ());
|
||||
TEST(cbv2.get(), ());
|
||||
auto cbv3 = coding::CompressedBitVector::Intersect(*cbv1, *cbv2);
|
||||
TEST_EQUAL(coding::CompressedBitVector::StorageStrategy::Dense, cbv3->GetStorageStrategy(), ());
|
||||
CheckIntersection(setBits1, setBits2, cbv3);
|
||||
}
|
||||
|
||||
UNIT_TEST(CompressedBitVector_Intersect2)
|
||||
{
|
||||
size_t const n = 100;
|
||||
vector<uint64_t> setBits1;
|
||||
vector<uint64_t> setBits2;
|
||||
for (size_t i = 0; i < n; ++i)
|
||||
{
|
||||
if (i <= n / 2)
|
||||
setBits1.push_back(i);
|
||||
if (i >= n / 2)
|
||||
setBits2.push_back(i);
|
||||
}
|
||||
auto cbv1 = coding::CompressedBitVectorBuilder::Build(setBits1);
|
||||
auto cbv2 = coding::CompressedBitVectorBuilder::Build(setBits2);
|
||||
TEST(cbv1.get(), ());
|
||||
TEST(cbv2.get(), ());
|
||||
auto cbv3 = coding::CompressedBitVector::Intersect(*cbv1, *cbv2);
|
||||
TEST_EQUAL(coding::CompressedBitVector::StorageStrategy::Sparse, cbv3->GetStorageStrategy(), ());
|
||||
CheckIntersection(setBits1, setBits2, cbv3);
|
||||
}
|
||||
|
||||
UNIT_TEST(CompressedBitVector_Intersect3)
|
||||
{
|
||||
size_t const n = 100;
|
||||
vector<uint64_t> setBits1;
|
||||
vector<uint64_t> setBits2;
|
||||
for (size_t i = 0; i < n; ++i)
|
||||
{
|
||||
if (i % 2 == 0)
|
||||
setBits1.push_back(i);
|
||||
if (i % 3 == 0)
|
||||
setBits2.push_back(i);
|
||||
}
|
||||
auto cbv1 = coding::CompressedBitVectorBuilder::Build(setBits1);
|
||||
auto cbv2 = coding::CompressedBitVectorBuilder::Build(setBits2);
|
||||
TEST(cbv1.get(), ());
|
||||
TEST(cbv2.get(), ());
|
||||
auto cbv3 = coding::CompressedBitVector::Intersect(*cbv1, *cbv2);
|
||||
TEST_EQUAL(coding::CompressedBitVector::StorageStrategy::Sparse, cbv3->GetStorageStrategy(), ());
|
||||
for (size_t i = 0; i < n; ++i)
|
||||
{
|
||||
bool expected = i % 6 == 0;
|
||||
TEST_EQUAL(expected, cbv3->GetBit(i), (i));
|
||||
}
|
||||
}
|
||||
|
||||
UNIT_TEST(CompressedBitVector_Intersect4)
|
||||
{
|
||||
size_t const n = 1000;
|
||||
vector<uint64_t> setBits1;
|
||||
vector<uint64_t> setBits2;
|
||||
for (size_t i = 0; i < n; ++i)
|
||||
{
|
||||
if (i % 100 == 0)
|
||||
setBits1.push_back(i);
|
||||
if (i % 150 == 0)
|
||||
setBits2.push_back(i);
|
||||
}
|
||||
auto cbv1 = coding::CompressedBitVectorBuilder::Build(setBits1);
|
||||
auto cbv2 = coding::CompressedBitVectorBuilder::Build(setBits2);
|
||||
TEST(cbv1.get(), ());
|
||||
TEST(cbv2.get(), ());
|
||||
auto cbv3 = coding::CompressedBitVector::Intersect(*cbv1, *cbv2);
|
||||
TEST_EQUAL(coding::CompressedBitVector::StorageStrategy::Sparse, cbv3->GetStorageStrategy(), ());
|
||||
for (size_t i = 0; i < n; ++i)
|
||||
{
|
||||
bool expected = i % 300 == 0;
|
||||
TEST_EQUAL(expected, cbv3->GetBit(i), (i));
|
||||
}
|
||||
}
|
||||
|
||||
UNIT_TEST(CompressedBitVector_SerializationDense)
|
||||
{
|
||||
int const n = 100;
|
||||
vector<uint64_t> setBits;
|
||||
for (size_t i = 0; i < n; ++i)
|
||||
setBits.push_back(i);
|
||||
vector<uint8_t> buf;
|
||||
{
|
||||
MemWriter<vector<uint8_t>> writer(buf);
|
||||
auto cbv = coding::CompressedBitVectorBuilder::Build(setBits);
|
||||
cbv->Serialize(writer);
|
||||
}
|
||||
MemReader reader(buf.data(), buf.size());
|
||||
ReaderSource<MemReader> src(reader);
|
||||
auto cbv = coding::CompressedBitVectorBuilder::Deserialize(src);
|
||||
TEST(cbv.get(), ());
|
||||
TEST_EQUAL(coding::CompressedBitVector::StorageStrategy::Dense, cbv->GetStorageStrategy(), ());
|
||||
TEST_EQUAL(setBits.size(), cbv->PopCount(), ());
|
||||
for (size_t i = 0; i < setBits.size(); ++i)
|
||||
TEST(cbv->GetBit(setBits[i]), ());
|
||||
}
|
||||
|
||||
UNIT_TEST(CompressedBitVector_SerializationSparse)
|
||||
{
|
||||
int const n = 100;
|
||||
vector<uint64_t> setBits;
|
||||
for (size_t i = 0; i < n; ++i)
|
||||
{
|
||||
if (i % 10 == 0)
|
||||
setBits.push_back(i);
|
||||
}
|
||||
vector<uint8_t> buf;
|
||||
{
|
||||
MemWriter<vector<uint8_t>> writer(buf);
|
||||
auto cbv = coding::CompressedBitVectorBuilder::Build(setBits);
|
||||
cbv->Serialize(writer);
|
||||
}
|
||||
MemReader reader(buf.data(), buf.size());
|
||||
ReaderSource<MemReader> src(reader);
|
||||
auto cbv = coding::CompressedBitVectorBuilder::Deserialize(src);
|
||||
TEST(cbv.get(), ());
|
||||
TEST_EQUAL(coding::CompressedBitVector::StorageStrategy::Sparse, cbv->GetStorageStrategy(), ());
|
||||
TEST_EQUAL(setBits.size(), cbv->PopCount(), ());
|
||||
for (size_t i = 0; i < setBits.size(); ++i)
|
||||
TEST(cbv->GetBit(setBits[i]), ());
|
||||
}
|
218
coding/compressed_bit_vector.cpp
Normal file
218
coding/compressed_bit_vector.cpp
Normal file
|
@ -0,0 +1,218 @@
|
|||
#include "coding/compressed_bit_vector.hpp"
|
||||
#include "coding/writer.hpp"
|
||||
#include "coding/write_to_sink.hpp"
|
||||
|
||||
#include "std/algorithm.hpp"
|
||||
|
||||
namespace
|
||||
{
|
||||
unique_ptr<coding::CompressedBitVector> IntersectImpl(coding::DenseCBV const & a,
|
||||
coding::DenseCBV const & b)
|
||||
{
|
||||
size_t sizeA = a.NumBitGroups();
|
||||
size_t sizeB = b.NumBitGroups();
|
||||
vector<uint64_t> resBits;
|
||||
for (size_t i = 0; i < min(sizeA, sizeB); ++i)
|
||||
{
|
||||
uint64_t bitGroup = a.GetBitGroup(i) & b.GetBitGroup(i);
|
||||
for (size_t j = 0; j < 64; j++)
|
||||
if (((bitGroup >> j) & 1) > 0)
|
||||
resBits.push_back(64 * i + j);
|
||||
}
|
||||
return coding::CompressedBitVectorBuilder::Build(resBits);
|
||||
}
|
||||
|
||||
// The intersection of dense and sparse is always sparse.
|
||||
unique_ptr<coding::CompressedBitVector> IntersectImpl(coding::DenseCBV const & a,
|
||||
coding::SparseCBV const & b)
|
||||
{
|
||||
vector<uint64_t> resPos;
|
||||
for (size_t i = 0; i < b.PopCount(); ++i)
|
||||
{
|
||||
auto pos = b.Select(i);
|
||||
if (a.GetBit(pos))
|
||||
resPos.push_back(pos);
|
||||
}
|
||||
return make_unique<coding::SparseCBV>(move(resPos));
|
||||
}
|
||||
|
||||
unique_ptr<coding::CompressedBitVector> IntersectImpl(coding::SparseCBV const & a,
|
||||
coding::DenseCBV const & b)
|
||||
{
|
||||
return IntersectImpl(b, a);
|
||||
}
|
||||
|
||||
unique_ptr<coding::CompressedBitVector> IntersectImpl(coding::SparseCBV const & a,
|
||||
coding::SparseCBV const & b)
|
||||
{
|
||||
size_t sizeA = a.PopCount();
|
||||
size_t sizeB = b.PopCount();
|
||||
vector<uint64_t> resPos;
|
||||
size_t i = 0;
|
||||
size_t j = 0;
|
||||
while (i < sizeA && j < sizeB)
|
||||
{
|
||||
auto posA = a.Select(i);
|
||||
auto posB = b.Select(j);
|
||||
if (posA == posB)
|
||||
{
|
||||
resPos.push_back(posA);
|
||||
++i;
|
||||
++j;
|
||||
}
|
||||
else if (posA < posB)
|
||||
{
|
||||
++i;
|
||||
}
|
||||
else
|
||||
{
|
||||
++j;
|
||||
}
|
||||
}
|
||||
return make_unique<coding::SparseCBV>(move(resPos));
|
||||
}
|
||||
} // namespace
|
||||
|
||||
namespace coding
|
||||
{
|
||||
DenseCBV::DenseCBV(vector<uint64_t> const & setBits)
|
||||
{
|
||||
if (setBits.empty())
|
||||
{
|
||||
m_bits.resize(0);
|
||||
m_popCount = 0;
|
||||
return;
|
||||
}
|
||||
uint64_t maxBit = setBits[0];
|
||||
for (size_t i = 1; i < setBits.size(); ++i)
|
||||
maxBit = max(maxBit, setBits[i]);
|
||||
size_t sz = (maxBit + 64 - 1) / 64;
|
||||
m_bits.resize(sz);
|
||||
m_popCount = static_cast<uint32_t>(setBits.size());
|
||||
for (uint64_t pos : setBits)
|
||||
m_bits[pos / 64] |= static_cast<uint64_t>(1) << (pos % 64);
|
||||
}
|
||||
|
||||
uint32_t DenseCBV::PopCount() const { return m_popCount; }
|
||||
|
||||
uint32_t SparseCBV::PopCount() const { return m_positions.size(); }
|
||||
|
||||
bool DenseCBV::GetBit(uint32_t pos) const
|
||||
{
|
||||
uint64_t bitGroup = GetBitGroup(pos / 64);
|
||||
return ((bitGroup >> (pos % 64)) & 1) > 0;
|
||||
}
|
||||
|
||||
bool SparseCBV::GetBit(uint32_t pos) const
|
||||
{
|
||||
auto it = lower_bound(m_positions.begin(), m_positions.end(), pos);
|
||||
return it != m_positions.end() && *it == pos;
|
||||
}
|
||||
|
||||
CompressedBitVector::StorageStrategy DenseCBV::GetStorageStrategy() const
|
||||
{
|
||||
return CompressedBitVector::StorageStrategy::Dense;
|
||||
}
|
||||
|
||||
CompressedBitVector::StorageStrategy SparseCBV::GetStorageStrategy() const
|
||||
{
|
||||
return CompressedBitVector::StorageStrategy::Sparse;
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
void DenseCBV::ForEach(F && f) const
|
||||
{
|
||||
for (size_t i = 0; i < m_bits.size(); ++i)
|
||||
for (size_t j = 0; j < 64; ++j)
|
||||
if (((m_bits[i] >> j) & 1) > 0)
|
||||
f(64 * i + j);
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
void SparseCBV::ForEach(F && f) const
|
||||
{
|
||||
for (size_t i = 0; i < m_positions.size(); ++i)
|
||||
f(m_positions[i]);
|
||||
}
|
||||
|
||||
string DebugPrint(CompressedBitVector::StorageStrategy strat)
|
||||
{
|
||||
switch (strat)
|
||||
{
|
||||
case CompressedBitVector::StorageStrategy::Dense:
|
||||
return "Dense";
|
||||
case CompressedBitVector::StorageStrategy::Sparse:
|
||||
return "Sparse";
|
||||
}
|
||||
}
|
||||
|
||||
void DenseCBV::Serialize(Writer & writer) const
|
||||
{
|
||||
uint8_t header = static_cast<uint8_t>(GetStorageStrategy());
|
||||
WriteToSink(writer, header);
|
||||
WriteToSink(writer, static_cast<uint32_t>(NumBitGroups()));
|
||||
for (size_t i = 0; i < NumBitGroups(); ++i)
|
||||
WriteToSink(writer, GetBitGroup(i));
|
||||
}
|
||||
|
||||
void SparseCBV::Serialize(Writer & writer) const
|
||||
{
|
||||
uint8_t header = static_cast<uint8_t>(GetStorageStrategy());
|
||||
WriteToSink(writer, header);
|
||||
WriteToSink(writer, PopCount());
|
||||
ForEach([&](uint64_t bitPos)
|
||||
{
|
||||
WriteToSink(writer, bitPos);
|
||||
});
|
||||
}
|
||||
|
||||
// static
|
||||
unique_ptr<CompressedBitVector> CompressedBitVectorBuilder::Build(vector<uint64_t> const & setBits)
|
||||
{
|
||||
if (setBits.empty())
|
||||
return make_unique<SparseCBV>(setBits);
|
||||
uint64_t maxBit = setBits[0];
|
||||
for (size_t i = 1; i < setBits.size(); ++i)
|
||||
maxBit = max(maxBit, setBits[i]);
|
||||
// 30% occupied is dense enough
|
||||
if (10 * setBits.size() >= 3 * maxBit)
|
||||
return make_unique<DenseCBV>(setBits);
|
||||
return make_unique<SparseCBV>(setBits);
|
||||
}
|
||||
|
||||
// static
|
||||
unique_ptr<CompressedBitVector> CompressedBitVector::Intersect(CompressedBitVector const & lhs,
|
||||
CompressedBitVector const & rhs)
|
||||
{
|
||||
auto stratA = lhs.GetStorageStrategy();
|
||||
auto stratB = rhs.GetStorageStrategy();
|
||||
auto stratDense = CompressedBitVector::StorageStrategy::Dense;
|
||||
auto stratSparse = CompressedBitVector::StorageStrategy::Sparse;
|
||||
if (stratA == stratDense && stratB == stratDense)
|
||||
{
|
||||
DenseCBV const & a = static_cast<DenseCBV const &>(lhs);
|
||||
DenseCBV const & b = static_cast<DenseCBV const &>(rhs);
|
||||
return IntersectImpl(a, b);
|
||||
}
|
||||
if (stratA == stratDense && stratB == stratSparse)
|
||||
{
|
||||
DenseCBV const & a = static_cast<DenseCBV const &>(lhs);
|
||||
SparseCBV const & b = static_cast<SparseCBV const &>(rhs);
|
||||
return IntersectImpl(a, b);
|
||||
}
|
||||
if (stratA == stratSparse && stratB == stratDense)
|
||||
{
|
||||
SparseCBV const & a = static_cast<SparseCBV const &>(lhs);
|
||||
DenseCBV const & b = static_cast<DenseCBV const &>(rhs);
|
||||
return IntersectImpl(a, b);
|
||||
}
|
||||
if (stratA == stratSparse && stratB == stratSparse)
|
||||
{
|
||||
SparseCBV const & a = static_cast<SparseCBV const &>(lhs);
|
||||
SparseCBV const & b = static_cast<SparseCBV const &>(rhs);
|
||||
return IntersectImpl(a, b);
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
} // namespace coding
|
182
coding/compressed_bit_vector.hpp
Normal file
182
coding/compressed_bit_vector.hpp
Normal file
|
@ -0,0 +1,182 @@
|
|||
#include "std/vector.hpp"
|
||||
|
||||
#include "base/assert.hpp"
|
||||
#include "base/bits.hpp"
|
||||
|
||||
#include "coding/reader.hpp"
|
||||
#include "coding/writer.hpp"
|
||||
|
||||
#include "std/algorithm.hpp"
|
||||
#include "std/unique_ptr.hpp"
|
||||
|
||||
#include "base/assert.hpp"
|
||||
|
||||
namespace coding
|
||||
{
|
||||
class CompressedBitVector
|
||||
{
|
||||
public:
|
||||
enum class StorageStrategy
|
||||
{
|
||||
Dense,
|
||||
Sparse
|
||||
};
|
||||
|
||||
virtual ~CompressedBitVector() = default;
|
||||
|
||||
// Executes f for each bit that is set to one using
|
||||
// the bit's 0-based position as argument.
|
||||
template <typename F>
|
||||
void ForEach(F && f) const;
|
||||
|
||||
// Intersects two bit vectors.
|
||||
static unique_ptr<CompressedBitVector> Intersect(CompressedBitVector const &,
|
||||
CompressedBitVector const &);
|
||||
|
||||
// Returns the number of set bits (population count).
|
||||
virtual uint32_t PopCount() const = 0;
|
||||
|
||||
// todo(@pimenov) How long will 32 bits be enough here?
|
||||
// Would operator[] look better?
|
||||
virtual bool GetBit(uint32_t pos) const = 0;
|
||||
|
||||
// Returns the strategy used when storing this bit vector.
|
||||
virtual StorageStrategy GetStorageStrategy() const = 0;
|
||||
|
||||
// Writes the contents of a bit vector to writer.
|
||||
// The first byte is always the header that defines the format.
|
||||
// Currently the header is 0 or 1 for Dense and Sparse strategies respectively.
|
||||
// It is easier to dispatch via virtual method calls and not bother
|
||||
// with template TWriters here as we do in similar places in our code.
|
||||
// This should not pose too much a problem because commonly
|
||||
// used writers are inhereted from Writer anyway.
|
||||
// todo(@pimenov). Think about rewriting Serialize and Deserialize to use the
|
||||
// code in old_compressed_bit_vector.{c,h}pp.
|
||||
virtual void Serialize(Writer & writer) const = 0;
|
||||
};
|
||||
|
||||
string DebugPrint(CompressedBitVector::StorageStrategy strat);
|
||||
|
||||
class DenseCBV : public CompressedBitVector
|
||||
{
|
||||
public:
|
||||
// Builds a dense CBV from a list of positions of set bits.
|
||||
DenseCBV(vector<uint64_t> const & setBits);
|
||||
|
||||
// Builds a dense CBV from a packed bitmap of set bits.
|
||||
// todo(@pimenov) This behaviour of & and && constructors is extremely error-prone.
|
||||
DenseCBV(vector<uint64_t> && bitMasks) : m_bits(move(bitMasks))
|
||||
{
|
||||
m_popCount = 0;
|
||||
for (size_t i = 0; i < m_bits.size(); ++i)
|
||||
m_popCount += bits::PopCount(m_bits[i]);
|
||||
}
|
||||
|
||||
~DenseCBV() = default;
|
||||
|
||||
size_t NumBitGroups() const { return m_bits.size(); }
|
||||
|
||||
template <typename F>
|
||||
void ForEach(F && f) const;
|
||||
|
||||
uint64_t GetBitGroup(size_t i) const
|
||||
{
|
||||
if (i < m_bits.size())
|
||||
return m_bits[i];
|
||||
return 0;
|
||||
}
|
||||
|
||||
// CompressedBitVector overrides:
|
||||
|
||||
uint32_t PopCount() const override;
|
||||
|
||||
bool GetBit(uint32_t pos) const override;
|
||||
|
||||
StorageStrategy GetStorageStrategy() const override;
|
||||
|
||||
void Serialize(Writer & writer) const override;
|
||||
|
||||
private:
|
||||
vector<uint64_t> m_bits;
|
||||
uint32_t m_popCount;
|
||||
};
|
||||
|
||||
class SparseCBV : public CompressedBitVector
|
||||
{
|
||||
public:
|
||||
SparseCBV(vector<uint64_t> const & setBits) : m_positions(setBits)
|
||||
{
|
||||
ASSERT(is_sorted(m_positions.begin(), m_positions.end()), ());
|
||||
}
|
||||
|
||||
SparseCBV(vector<uint64_t> && setBits) : m_positions(move(setBits))
|
||||
{
|
||||
ASSERT(is_sorted(m_positions.begin(), m_positions.end()), ());
|
||||
}
|
||||
|
||||
~SparseCBV() = default;
|
||||
|
||||
// Returns the position of the i'th set bit.
|
||||
uint64_t Select(size_t i) const
|
||||
{
|
||||
ASSERT_LESS(i, m_positions.size(), ());
|
||||
return m_positions[i];
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
void ForEach(F && f) const;
|
||||
|
||||
// CompressedBitVector overrides:
|
||||
|
||||
uint32_t PopCount() const override;
|
||||
|
||||
bool GetBit(uint32_t pos) const override;
|
||||
|
||||
StorageStrategy GetStorageStrategy() const override;
|
||||
|
||||
void Serialize(Writer & writer) const override;
|
||||
|
||||
private:
|
||||
// 0-based positions of the set bits.
|
||||
vector<uint64_t> m_positions;
|
||||
};
|
||||
|
||||
class CompressedBitVectorBuilder
|
||||
{
|
||||
public:
|
||||
// Chooses a strategy to store the bit vector with bits from setBits set to one
|
||||
// and returns a pointer to a class that fits best.
|
||||
static unique_ptr<CompressedBitVector> Build(vector<uint64_t> const & setBits);
|
||||
|
||||
// Reads a bit vector from reader which must contain a valid
|
||||
// bit vector representation (see CompressedBitVector::Serialize for the format).
|
||||
template <typename TReader>
|
||||
static unique_ptr<CompressedBitVector> Deserialize(TReader & reader)
|
||||
{
|
||||
ReaderSource<TReader> src(reader);
|
||||
uint8_t header = ReadPrimitiveFromSource<uint8_t>(reader);
|
||||
CompressedBitVector::StorageStrategy strat =
|
||||
static_cast<CompressedBitVector::StorageStrategy>(header);
|
||||
switch (strat)
|
||||
{
|
||||
case CompressedBitVector::StorageStrategy::Dense:
|
||||
{
|
||||
uint32_t numBitGroups = ReadPrimitiveFromSource<uint32_t>(reader);
|
||||
vector<uint64_t> bitGroups(numBitGroups);
|
||||
for (size_t i = 0; i < numBitGroups; ++i)
|
||||
bitGroups[i] = ReadPrimitiveFromSource<uint64_t>(reader);
|
||||
return make_unique<DenseCBV>(move(bitGroups));
|
||||
}
|
||||
case CompressedBitVector::StorageStrategy::Sparse:
|
||||
{
|
||||
uint32_t numBits = ReadPrimitiveFromSource<uint32_t>(reader);
|
||||
vector<uint64_t> setBits(numBits);
|
||||
for (size_t i = 0; i < numBits; ++i)
|
||||
setBits[i] = ReadPrimitiveFromSource<uint64_t>(reader);
|
||||
return make_unique<SparseCBV>(setBits);
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
};
|
||||
} // namespace coding
|
|
@ -8,11 +8,14 @@
|
|||
|
||||
using std::all_of;
|
||||
using std::binary_search;
|
||||
using std::copy;
|
||||
using std::equal;
|
||||
using std::equal_range;
|
||||
using std::fill;
|
||||
using std::find;
|
||||
using std::find_if;
|
||||
using std::find_first_of;
|
||||
using std::find_if;
|
||||
using std::for_each;
|
||||
using std::is_sorted;
|
||||
using std::lexicographical_compare;
|
||||
using std::lower_bound;
|
||||
|
@ -20,20 +23,17 @@ using std::max;
|
|||
using std::max_element;
|
||||
using std::min;
|
||||
using std::next_permutation;
|
||||
using std::sort;
|
||||
using std::stable_sort;
|
||||
using std::partial_sort;
|
||||
using std::swap;
|
||||
using std::upper_bound;
|
||||
using std::unique;
|
||||
using std::equal_range;
|
||||
using std::for_each;
|
||||
using std::copy;
|
||||
using std::remove_if;
|
||||
using std::replace;
|
||||
using std::reverse;
|
||||
using std::set_union;
|
||||
using std::set_intersection;
|
||||
using std::set_union;
|
||||
using std::sort;
|
||||
using std::stable_sort;
|
||||
using std::swap;
|
||||
using std::unique;
|
||||
using std::upper_bound;
|
||||
// Bug workaround, see http://connect.microsoft.com/VisualStudio/feedbackdetail/view/840578/algorithm-possible-c-compiler-bug-when-using-std-set-difference-with-custom-comperator
|
||||
#ifdef _MSC_VER
|
||||
namespace vs_bug
|
||||
|
@ -81,14 +81,14 @@ OutputIt set_difference(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputI
|
|||
#else
|
||||
using std::set_difference;
|
||||
#endif
|
||||
using std::set_symmetric_difference;
|
||||
using std::transform;
|
||||
using std::push_heap;
|
||||
using std::pop_heap;
|
||||
using std::sort_heap;
|
||||
using std::distance;
|
||||
using std::remove_copy_if;
|
||||
using std::generate;
|
||||
using std::pop_heap;
|
||||
using std::push_heap;
|
||||
using std::remove_copy_if;
|
||||
using std::set_symmetric_difference;
|
||||
using std::sort_heap;
|
||||
using std::transform;
|
||||
|
||||
#ifdef DEBUG_NEW
|
||||
#define new DEBUG_NEW
|
||||
|
|
Loading…
Add table
Reference in a new issue