From 6746120143aadbf02b46708f771e3b1969db9eb0 Mon Sep 17 00:00:00 2001 From: Maxim Pimenov Date: Thu, 17 Sep 2015 12:10:21 +0300 Subject: [PATCH] Review fixes. --- base/bits.hpp | 8 +- .../compressed_bit_vector_test.cpp | 11 +++ coding/compressed_bit_vector.cpp | 98 ++++++++++--------- coding/compressed_bit_vector.hpp | 20 ++-- 4 files changed, 78 insertions(+), 59 deletions(-) diff --git a/base/bits.hpp b/base/bits.hpp index 6b40d2406d..1971e41c08 100644 --- a/base/bits.hpp +++ b/base/bits.hpp @@ -8,7 +8,7 @@ namespace bits { // Count the number of 1 bits. Implementation: see Hacker's delight book. - inline uint32_t PopCount(uint32_t x) + inline uint32_t PopCount(uint32_t x) noexcept { x -= ((x >> 1) & 0x55555555); x = (x & 0x33333333) + ((x >> 2) & 0x33333333); @@ -18,7 +18,7 @@ namespace bits return x & 0x3F; } - inline uint32_t PopCount(uint8_t x) + inline uint32_t PopCount(uint8_t x) noexcept { return PopCount(static_cast(x)); } @@ -61,7 +61,7 @@ namespace bits return static_cast(SELECT1_ERROR); } - inline uint32_t PopCount(uint64_t x) + inline uint32_t PopCount(uint64_t x) noexcept { x = (x & 0x5555555555555555) + ((x & 0xAAAAAAAAAAAAAAAA) >> 1); x = (x & 0x3333333333333333) + ((x & 0xCCCCCCCCCCCCCCCC) >> 2); @@ -69,7 +69,7 @@ namespace bits x = (x & 0x00FF00FF00FF00FF) + ((x & 0xFF00FF00FF00FF00) >> 8); x = (x & 0x0000FFFF0000FFFF) + ((x & 0xFFFF0000FFFF0000) >> 16); x = x + (x >> 32); - return static_cast(x); + return static_cast(x); } // Will be implemented when needed. diff --git a/coding/coding_tests/compressed_bit_vector_test.cpp b/coding/coding_tests/compressed_bit_vector_test.cpp index 74d9f9bb55..e74596bd0d 100644 --- a/coding/coding_tests/compressed_bit_vector_test.cpp +++ b/coding/coding_tests/compressed_bit_vector_test.cpp @@ -191,3 +191,14 @@ UNIT_TEST(CompressedBitVector_ForEach) TEST_EQUAL(pos % 15, 0, ()); }); } + +UNIT_TEST(CompressedBitVector_DenseOneBit) +{ + vector setBits = {0}; + unique_ptr cbv(new coding::DenseCBV(setBits)); + TEST_EQUAL(cbv->PopCount(), 1, ()); + coding::CompressedBitVectorEnumerator::ForEach(*cbv, [&](uint64_t pos) + { + TEST_EQUAL(pos, 0, ()); + }); +} diff --git a/coding/compressed_bit_vector.cpp b/coding/compressed_bit_vector.cpp index 0b444b42ac..ab32d1af0c 100644 --- a/coding/compressed_bit_vector.cpp +++ b/coding/compressed_bit_vector.cpp @@ -6,6 +6,12 @@ #include "std/algorithm.hpp" +namespace coding +{ +// static +uint32_t const DenseCBV::kBlockSize; +} // namespace coding + namespace { uint64_t const kBlockSize = coding::DenseCBV::kBlockSize; @@ -89,14 +95,12 @@ DenseCBV::DenseCBV(vector const & setBits) { if (setBits.empty()) { - m_bitGroups.resize(0); - m_popCount = 0; return; } uint64_t maxBit = setBits[0]; for (size_t i = 1; i < setBits.size(); ++i) maxBit = max(maxBit, setBits[i]); - size_t sz = (maxBit + kBlockSize - 1) / kBlockSize; + size_t sz = 1 + maxBit / kBlockSize; m_bitGroups.resize(sz); m_popCount = static_cast(setBits.size()); for (uint64_t pos : setBits) @@ -114,6 +118,33 @@ unique_ptr DenseCBV::BuildFromBitGroups(vector && bitGroups) return cbv; } +uint64_t DenseCBV::GetBitGroup(size_t i) const +{ + return i < m_bitGroups.size() ? m_bitGroups[i] : 0; +} + +uint32_t DenseCBV::PopCount() const { return m_popCount; } + +bool DenseCBV::GetBit(uint32_t pos) const +{ + uint64_t bitGroup = GetBitGroup(pos / kBlockSize); + return ((bitGroup >> (pos % kBlockSize)) & 1) > 0; +} + +CompressedBitVector::StorageStrategy DenseCBV::GetStorageStrategy() const +{ + return CompressedBitVector::StorageStrategy::Dense; +} + +void DenseCBV::Serialize(Writer & writer) const +{ + uint8_t header = static_cast(GetStorageStrategy()); + WriteToSink(writer, header); + WriteToSink(writer, static_cast(NumBitGroups())); + for (size_t i = 0; i < NumBitGroups(); ++i) + WriteToSink(writer, GetBitGroup(i)); +} + SparseCBV::SparseCBV(vector const & setBits) : m_positions(setBits) { ASSERT(is_sorted(m_positions.begin(), m_positions.end()), ()); @@ -124,63 +155,25 @@ SparseCBV::SparseCBV(vector && setBits) : m_positions(move(setBits)) ASSERT(is_sorted(m_positions.begin(), m_positions.end()), ()); } -uint32_t DenseCBV::PopCount() const { return m_popCount; } +uint64_t SparseCBV::Select(size_t i) const +{ + ASSERT_LESS(i, m_positions.size(), ()); + return m_positions[i]; +} uint32_t SparseCBV::PopCount() const { return m_positions.size(); } -bool DenseCBV::GetBit(uint32_t pos) const -{ - uint64_t bitGroup = GetBitGroup(pos / kBlockSize); - return ((bitGroup >> (pos % kBlockSize)) & 1) > 0; -} - bool SparseCBV::GetBit(uint32_t pos) const { auto const it = lower_bound(m_positions.begin(), m_positions.end(), pos); return it != m_positions.end() && *it == pos; } -uint64_t DenseCBV::GetBitGroup(size_t i) const -{ - return i < m_bitGroups.size() ? m_bitGroups[i] : 0; -} - -uint64_t SparseCBV::Select(size_t i) const -{ - ASSERT_LESS(i, m_positions.size(), ()); - return m_positions[i]; -} - -CompressedBitVector::StorageStrategy DenseCBV::GetStorageStrategy() const -{ - return CompressedBitVector::StorageStrategy::Dense; -} - CompressedBitVector::StorageStrategy SparseCBV::GetStorageStrategy() const { return CompressedBitVector::StorageStrategy::Sparse; } -string DebugPrint(CompressedBitVector::StorageStrategy strat) -{ - switch (strat) - { - case CompressedBitVector::StorageStrategy::Dense: - return "Dense"; - case CompressedBitVector::StorageStrategy::Sparse: - return "Sparse"; - } -} - -void DenseCBV::Serialize(Writer & writer) const -{ - uint8_t header = static_cast(GetStorageStrategy()); - WriteToSink(writer, header); - WriteToSink(writer, static_cast(NumBitGroups())); - for (size_t i = 0; i < NumBitGroups(); ++i) - WriteToSink(writer, GetBitGroup(i)); -} - void SparseCBV::Serialize(Writer & writer) const { uint8_t header = static_cast(GetStorageStrategy()); @@ -217,7 +210,7 @@ unique_ptr CompressedBitVectorBuilder::FromBitGroups( if (bitGroups.empty()) return make_unique(bitGroups); - uint64_t maxBit = kBlockSize * bitGroups.size() - 1; + uint64_t const maxBit = kBlockSize * bitGroups.size() - 1; uint64_t popCount = 0; for (size_t i = 0; i < bitGroups.size(); ++i) popCount += bits::PopCount(bitGroups[i]); @@ -233,6 +226,17 @@ unique_ptr CompressedBitVectorBuilder::FromBitGroups( return make_unique(setBits); } +string DebugPrint(CompressedBitVector::StorageStrategy strat) +{ + switch (strat) + { + case CompressedBitVector::StorageStrategy::Dense: + return "Dense"; + case CompressedBitVector::StorageStrategy::Sparse: + return "Sparse"; + } +} + // static unique_ptr CompressedBitVector::Intersect(CompressedBitVector const & lhs, CompressedBitVector const & rhs) diff --git a/coding/compressed_bit_vector.hpp b/coding/compressed_bit_vector.hpp index 26a2182f77..1fca665dd4 100644 --- a/coding/compressed_bit_vector.hpp +++ b/coding/compressed_bit_vector.hpp @@ -65,6 +65,8 @@ string DebugPrint(CompressedBitVector::StorageStrategy strat); class DenseCBV : public CompressedBitVector { public: + static uint32_t const kBlockSize = 64; + DenseCBV() = default; // Builds a dense CBV from a list of positions of set bits. @@ -76,15 +78,17 @@ public: size_t NumBitGroups() const { return m_bitGroups.size(); } - static uint32_t const kBlockSize = 64; - - template - void ForEach(F && f) const + template + void ForEach(TFn && f) const { for (size_t i = 0; i < m_bitGroups.size(); ++i) + { for (size_t j = 0; j < kBlockSize; ++j) + { if (((m_bitGroups[i] >> j) & 1) > 0) f(kBlockSize * i + j); + } + } } // Returns 0 if the group number is too large to be contained in m_bits. @@ -111,8 +115,8 @@ public: // Returns the position of the i'th set bit. uint64_t Select(size_t i) const; - template - void ForEach(F && f) const + template + void ForEach(TFn && f) const { for (auto const & position : m_positions) f(position); @@ -174,8 +178,8 @@ class CompressedBitVectorEnumerator public: // Executes f for each bit that is set to one using // the bit's 0-based position as argument. - template - static void ForEach(CompressedBitVector const & cbv, F && f) + template + static void ForEach(CompressedBitVector const & cbv, TFn && f) { CompressedBitVector::StorageStrategy strat = cbv.GetStorageStrategy(); switch (strat)