From 004b63425fbaab6bb0a02ad4ade111cd5fa19d82 Mon Sep 17 00:00:00 2001 From: Yuri Gorshenin Date: Tue, 9 Feb 2016 12:46:53 +0300 Subject: [PATCH] [coding] Added CBV::LeaveFirstSetNBits(). --- .../compressed_bit_vector_test.cpp | 80 +++++++++++++++++++ coding/compressed_bit_vector.cpp | 44 +++++++++- coding/compressed_bit_vector.hpp | 4 + 3 files changed, 124 insertions(+), 4 deletions(-) diff --git a/coding/coding_tests/compressed_bit_vector_test.cpp b/coding/coding_tests/compressed_bit_vector_test.cpp index f539704559..3507ec0903 100644 --- a/coding/coding_tests/compressed_bit_vector_test.cpp +++ b/coding/coding_tests/compressed_bit_vector_test.cpp @@ -387,3 +387,83 @@ UNIT_TEST(CompressedBitVector_DenseOneBit) TEST_EQUAL(pos, 0, ()); }); } + +UNIT_TEST(CompressedBitVector_LeaveFirstNBitsSmoke) +{ + auto cbv = coding::CompressedBitVectorBuilder::FromBitPositions(vector{}); + TEST_EQUAL(cbv->PopCount(), 0, ()); + + cbv = cbv->LeaveFirstSetNBits(0); + TEST_EQUAL(cbv->PopCount(), 0, ()); + + cbv = cbv->LeaveFirstSetNBits(200); + TEST_EQUAL(cbv->PopCount(), 0, ()); +} + +UNIT_TEST(CompressedBitVector_DenseLeaveFirstNBits) +{ + { + vector setBits; + setBits.assign(coding::DenseCBV::kBlockSize * 4, 1); + auto cbv = coding::CompressedBitVectorBuilder::FromBitPositions(setBits); + TEST_EQUAL(cbv->PopCount(), coding::DenseCBV::kBlockSize * 4, ()); + TEST_EQUAL(cbv->GetStorageStrategy(), coding::CompressedBitVector::StorageStrategy::Dense, ()); + + cbv = cbv->LeaveFirstSetNBits(0); + TEST_EQUAL(cbv->PopCount(), 0, ()); + } + + { + vector setBits; + for (int i = 0; i < 100; ++i) + setBits.push_back(2 * i); + auto cbv = coding::CompressedBitVectorBuilder::FromBitPositions(setBits); + TEST_EQUAL(cbv->PopCount(), 100, ()); + TEST_EQUAL(cbv->GetStorageStrategy(), coding::CompressedBitVector::StorageStrategy::Dense, ()); + + cbv = cbv->LeaveFirstSetNBits(50); + TEST_EQUAL(cbv->PopCount(), 50, ()); + TEST_EQUAL(cbv->GetStorageStrategy(), coding::CompressedBitVector::StorageStrategy::Dense, ()); + + for (int i = 0; i < 50; ++i) + { + TEST(cbv->GetBit(2 * i), ()); + TEST(!cbv->GetBit(2 * i + 1), ()); + } + } +} + +UNIT_TEST(CompressedBitVector_SparseLeaveFirstNBits) +{ + vector setBits; + for (int p = 0; p < 20; ++p) + setBits.push_back(static_cast(1) << p); + auto cbv = coding::CompressedBitVectorBuilder::FromBitPositions(setBits); + TEST_EQUAL(cbv->PopCount(), 20, ()); + TEST_EQUAL(cbv->GetStorageStrategy(), coding::CompressedBitVector::StorageStrategy::Sparse, ()); + + cbv = cbv->LeaveFirstSetNBits(100); + TEST_EQUAL(cbv->PopCount(), 20, ()); + for (uint64_t bit = 0; bit < (1 << 20); ++bit) + { + if (bit != 0 && (bit & (bit - 1)) == 0) + TEST(cbv->GetBit(bit), (bit)); + else + TEST(!cbv->GetBit(bit), (bit)); + } + + cbv = cbv->LeaveFirstSetNBits(10); + TEST_EQUAL(cbv->PopCount(), 10, ()); + for (uint64_t bit = 0; bit < (1 << 20); ++bit) + { + if (bit != 0 && (bit & (bit - 1)) == 0 && bit < (1 << 10)) + TEST(cbv->GetBit(bit), (bit)); + else + TEST(!cbv->GetBit(bit), (bit)); + } + + cbv = cbv->LeaveFirstSetNBits(0); + TEST_EQUAL(cbv->PopCount(), 0, ()); + for (uint64_t bit = 0; bit < (1 << 20); ++bit) + TEST(!cbv->GetBit(bit), (bit)); +} diff --git a/coding/compressed_bit_vector.cpp b/coding/compressed_bit_vector.cpp index 78a8c7cfde..d52eaffb6d 100644 --- a/coding/compressed_bit_vector.cpp +++ b/coding/compressed_bit_vector.cpp @@ -309,6 +309,36 @@ bool DenseCBV::GetBit(uint64_t pos) const return ((bitGroup >> (pos % kBlockSize)) & 1) > 0; } +unique_ptr DenseCBV::LeaveFirstSetNBits(uint64_t n) const +{ + if (PopCount() <= n) + return Clone(); + + vector groups; + for (size_t i = 0; i < m_bitGroups.size() && n != 0; ++i) + { + uint64_t group = m_bitGroups[i]; + uint32_t const bits = bits::PopCount(group); + if (bits <= n) + { + n -= bits; + groups.push_back(group); + } + else + { + uint64_t part = 0; + while (n != 0) + { + part = part | (group & -group); + group = group & (group - 1); + --n; + } + groups.push_back(part); + } + } + return CompressedBitVectorBuilder::FromBitGroups(move(groups)); +} + CompressedBitVector::StorageStrategy DenseCBV::GetStorageStrategy() const { return CompressedBitVector::StorageStrategy::Dense; @@ -352,6 +382,14 @@ bool SparseCBV::GetBit(uint64_t pos) const return binary_search(m_positions.begin(), m_positions.end(), pos); } +unique_ptr SparseCBV::LeaveFirstSetNBits(uint64_t n) const +{ + if (PopCount() <= n) + return Clone(); + vector positions(m_positions.begin(), m_positions.begin() + n); + return CompressedBitVectorBuilder::FromBitPositions(move(positions)); +} + CompressedBitVector::StorageStrategy SparseCBV::GetStorageStrategy() const { return CompressedBitVector::StorageStrategy::Sparse; @@ -420,10 +458,8 @@ string DebugPrint(CompressedBitVector::StorageStrategy strat) { switch (strat) { - case CompressedBitVector::StorageStrategy::Dense: - return "Dense"; - case CompressedBitVector::StorageStrategy::Sparse: - return "Sparse"; + case CompressedBitVector::StorageStrategy::Dense: return "Dense"; + case CompressedBitVector::StorageStrategy::Sparse: return "Sparse"; } } diff --git a/coding/compressed_bit_vector.hpp b/coding/compressed_bit_vector.hpp index 23d7b1c04c..6e01ee7056 100644 --- a/coding/compressed_bit_vector.hpp +++ b/coding/compressed_bit_vector.hpp @@ -58,6 +58,8 @@ public: // Would operator[] look better? virtual bool GetBit(uint64_t pos) const = 0; + virtual unique_ptr LeaveFirstSetNBits(uint64_t n) const = 0; + // Returns the strategy used when storing this bit vector. virtual StorageStrategy GetStorageStrategy() const = 0; @@ -114,6 +116,7 @@ public: // CompressedBitVector overrides: uint64_t PopCount() const override; bool GetBit(uint64_t pos) const override; + unique_ptr LeaveFirstSetNBits(uint64_t n) const override; StorageStrategy GetStorageStrategy() const override; void Serialize(Writer & writer) const override; unique_ptr Clone() const override; @@ -148,6 +151,7 @@ public: // CompressedBitVector overrides: uint64_t PopCount() const override; bool GetBit(uint64_t pos) const override; + unique_ptr LeaveFirstSetNBits(uint64_t n) const override; StorageStrategy GetStorageStrategy() const override; void Serialize(Writer & writer) const override; unique_ptr Clone() const override;