Review fixes.

This commit is contained in:
Maxim Pimenov 2015-09-17 12:10:21 +03:00 committed by Sergey Yershov
parent d6800004d7
commit 6746120143
4 changed files with 78 additions and 59 deletions

View file

@ -8,7 +8,7 @@
namespace bits
{
// Count the number of 1 bits. Implementation: see Hacker's delight book.
inline uint32_t PopCount(uint32_t x)
inline uint32_t PopCount(uint32_t x) noexcept
{
x -= ((x >> 1) & 0x55555555);
x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
@ -18,7 +18,7 @@ namespace bits
return x & 0x3F;
}
inline uint32_t PopCount(uint8_t x)
inline uint32_t PopCount(uint8_t x) noexcept
{
return PopCount(static_cast<uint32_t>(x));
}
@ -61,7 +61,7 @@ namespace bits
return static_cast<unsigned int>(SELECT1_ERROR);
}
inline uint32_t PopCount(uint64_t x)
inline uint32_t PopCount(uint64_t x) noexcept
{
x = (x & 0x5555555555555555) + ((x & 0xAAAAAAAAAAAAAAAA) >> 1);
x = (x & 0x3333333333333333) + ((x & 0xCCCCCCCCCCCCCCCC) >> 2);
@ -69,7 +69,7 @@ namespace bits
x = (x & 0x00FF00FF00FF00FF) + ((x & 0xFF00FF00FF00FF00) >> 8);
x = (x & 0x0000FFFF0000FFFF) + ((x & 0xFFFF0000FFFF0000) >> 16);
x = x + (x >> 32);
return static_cast<uint8_t>(x);
return static_cast<uint32_t>(x);
}
// Will be implemented when needed.

View file

@ -191,3 +191,14 @@ UNIT_TEST(CompressedBitVector_ForEach)
TEST_EQUAL(pos % 15, 0, ());
});
}
UNIT_TEST(CompressedBitVector_DenseOneBit)
{
vector<uint64_t> setBits = {0};
unique_ptr<coding::DenseCBV> cbv(new coding::DenseCBV(setBits));
TEST_EQUAL(cbv->PopCount(), 1, ());
coding::CompressedBitVectorEnumerator::ForEach(*cbv, [&](uint64_t pos)
{
TEST_EQUAL(pos, 0, ());
});
}

View file

@ -6,6 +6,12 @@
#include "std/algorithm.hpp"
namespace coding
{
// static
uint32_t const DenseCBV::kBlockSize;
} // namespace coding
namespace
{
uint64_t const kBlockSize = coding::DenseCBV::kBlockSize;
@ -89,14 +95,12 @@ DenseCBV::DenseCBV(vector<uint64_t> const & setBits)
{
if (setBits.empty())
{
m_bitGroups.resize(0);
m_popCount = 0;
return;
}
uint64_t maxBit = setBits[0];
for (size_t i = 1; i < setBits.size(); ++i)
maxBit = max(maxBit, setBits[i]);
size_t sz = (maxBit + kBlockSize - 1) / kBlockSize;
size_t sz = 1 + maxBit / kBlockSize;
m_bitGroups.resize(sz);
m_popCount = static_cast<uint32_t>(setBits.size());
for (uint64_t pos : setBits)
@ -114,6 +118,33 @@ unique_ptr<DenseCBV> DenseCBV::BuildFromBitGroups(vector<uint64_t> && bitGroups)
return cbv;
}
uint64_t DenseCBV::GetBitGroup(size_t i) const
{
return i < m_bitGroups.size() ? m_bitGroups[i] : 0;
}
uint32_t DenseCBV::PopCount() const { return m_popCount; }
bool DenseCBV::GetBit(uint32_t pos) const
{
uint64_t bitGroup = GetBitGroup(pos / kBlockSize);
return ((bitGroup >> (pos % kBlockSize)) & 1) > 0;
}
CompressedBitVector::StorageStrategy DenseCBV::GetStorageStrategy() const
{
return CompressedBitVector::StorageStrategy::Dense;
}
void DenseCBV::Serialize(Writer & writer) const
{
uint8_t header = static_cast<uint8_t>(GetStorageStrategy());
WriteToSink(writer, header);
WriteToSink(writer, static_cast<uint32_t>(NumBitGroups()));
for (size_t i = 0; i < NumBitGroups(); ++i)
WriteToSink(writer, GetBitGroup(i));
}
SparseCBV::SparseCBV(vector<uint64_t> const & setBits) : m_positions(setBits)
{
ASSERT(is_sorted(m_positions.begin(), m_positions.end()), ());
@ -124,63 +155,25 @@ SparseCBV::SparseCBV(vector<uint64_t> && setBits) : m_positions(move(setBits))
ASSERT(is_sorted(m_positions.begin(), m_positions.end()), ());
}
uint32_t DenseCBV::PopCount() const { return m_popCount; }
uint64_t SparseCBV::Select(size_t i) const
{
ASSERT_LESS(i, m_positions.size(), ());
return m_positions[i];
}
uint32_t SparseCBV::PopCount() const { return m_positions.size(); }
bool DenseCBV::GetBit(uint32_t pos) const
{
uint64_t bitGroup = GetBitGroup(pos / kBlockSize);
return ((bitGroup >> (pos % kBlockSize)) & 1) > 0;
}
bool SparseCBV::GetBit(uint32_t pos) const
{
auto const it = lower_bound(m_positions.begin(), m_positions.end(), pos);
return it != m_positions.end() && *it == pos;
}
uint64_t DenseCBV::GetBitGroup(size_t i) const
{
return i < m_bitGroups.size() ? m_bitGroups[i] : 0;
}
uint64_t SparseCBV::Select(size_t i) const
{
ASSERT_LESS(i, m_positions.size(), ());
return m_positions[i];
}
CompressedBitVector::StorageStrategy DenseCBV::GetStorageStrategy() const
{
return CompressedBitVector::StorageStrategy::Dense;
}
CompressedBitVector::StorageStrategy SparseCBV::GetStorageStrategy() const
{
return CompressedBitVector::StorageStrategy::Sparse;
}
string DebugPrint(CompressedBitVector::StorageStrategy strat)
{
switch (strat)
{
case CompressedBitVector::StorageStrategy::Dense:
return "Dense";
case CompressedBitVector::StorageStrategy::Sparse:
return "Sparse";
}
}
void DenseCBV::Serialize(Writer & writer) const
{
uint8_t header = static_cast<uint8_t>(GetStorageStrategy());
WriteToSink(writer, header);
WriteToSink(writer, static_cast<uint32_t>(NumBitGroups()));
for (size_t i = 0; i < NumBitGroups(); ++i)
WriteToSink(writer, GetBitGroup(i));
}
void SparseCBV::Serialize(Writer & writer) const
{
uint8_t header = static_cast<uint8_t>(GetStorageStrategy());
@ -217,7 +210,7 @@ unique_ptr<CompressedBitVector> CompressedBitVectorBuilder::FromBitGroups(
if (bitGroups.empty())
return make_unique<SparseCBV>(bitGroups);
uint64_t maxBit = kBlockSize * bitGroups.size() - 1;
uint64_t const maxBit = kBlockSize * bitGroups.size() - 1;
uint64_t popCount = 0;
for (size_t i = 0; i < bitGroups.size(); ++i)
popCount += bits::PopCount(bitGroups[i]);
@ -233,6 +226,17 @@ unique_ptr<CompressedBitVector> CompressedBitVectorBuilder::FromBitGroups(
return make_unique<SparseCBV>(setBits);
}
string DebugPrint(CompressedBitVector::StorageStrategy strat)
{
switch (strat)
{
case CompressedBitVector::StorageStrategy::Dense:
return "Dense";
case CompressedBitVector::StorageStrategy::Sparse:
return "Sparse";
}
}
// static
unique_ptr<CompressedBitVector> CompressedBitVector::Intersect(CompressedBitVector const & lhs,
CompressedBitVector const & rhs)

View file

@ -65,6 +65,8 @@ string DebugPrint(CompressedBitVector::StorageStrategy strat);
class DenseCBV : public CompressedBitVector
{
public:
static uint32_t const kBlockSize = 64;
DenseCBV() = default;
// Builds a dense CBV from a list of positions of set bits.
@ -76,15 +78,17 @@ public:
size_t NumBitGroups() const { return m_bitGroups.size(); }
static uint32_t const kBlockSize = 64;
template <typename F>
void ForEach(F && f) const
template <typename TFn>
void ForEach(TFn && f) const
{
for (size_t i = 0; i < m_bitGroups.size(); ++i)
{
for (size_t j = 0; j < kBlockSize; ++j)
{
if (((m_bitGroups[i] >> j) & 1) > 0)
f(kBlockSize * i + j);
}
}
}
// Returns 0 if the group number is too large to be contained in m_bits.
@ -111,8 +115,8 @@ public:
// Returns the position of the i'th set bit.
uint64_t Select(size_t i) const;
template <typename F>
void ForEach(F && f) const
template <typename TFn>
void ForEach(TFn && f) const
{
for (auto const & position : m_positions)
f(position);
@ -174,8 +178,8 @@ class CompressedBitVectorEnumerator
public:
// Executes f for each bit that is set to one using
// the bit's 0-based position as argument.
template <typename F>
static void ForEach(CompressedBitVector const & cbv, F && f)
template <typename TFn>
static void ForEach(CompressedBitVector const & cbv, TFn && f)
{
CompressedBitVector::StorageStrategy strat = cbv.GetStorageStrategy();
switch (strat)