diff --git a/base/base_tests/bits_test.cpp b/base/base_tests/bits_test.cpp index 79cac2c797..e919e154e0 100644 --- a/base/base_tests/bits_test.cpp +++ b/base/base_tests/bits_test.cpp @@ -100,3 +100,11 @@ UNIT_TEST(NumHiZeroBits64) TEST_EQUAL(bits::NumHiZeroBits64(0x0FABCDEF0FABCDEFULL), 4, ()); TEST_EQUAL(bits::NumHiZeroBits64(0x000000000000FDEFULL), 48, ()); } + +UNIT_TEST(NumUsedBits) +{ + TEST_EQUAL(bits::NumUsedBits(0), 0, ()); + TEST_EQUAL(bits::NumUsedBits(0xFFFFFFFFFFFFFFFFULL), 64, ()); + TEST_EQUAL(bits::NumUsedBits(0x0FABCDEF0FABCDEFULL), 60, ()); + TEST_EQUAL(bits::NumUsedBits(0x000000000000FDEFULL), 16, ()); +} diff --git a/base/bits.hpp b/base/bits.hpp index 111c1c36e0..374b70683e 100644 --- a/base/bits.hpp +++ b/base/bits.hpp @@ -164,4 +164,13 @@ namespace bits while ((n & (uint64_t(1) << 63)) == 0) { ++result; n <<= 1; } return result; } + + // Computes number of bits needed to store the number, it is not equal to number of ones. + // E.g. if we have a number (in bit representation) 00001000b then NumUsedBits is 4. + inline uint32_t NumUsedBits(uint64_t n) + { + uint32_t result = 0; + while (n != 0) { ++result; n >>= 1; } + return result; + } } diff --git a/coding/compressed_bit_vector.cpp b/coding/compressed_bit_vector.cpp index 422da6eb7a..d38cae30cb 100644 --- a/coding/compressed_bit_vector.cpp +++ b/coding/compressed_bit_vector.cpp @@ -5,6 +5,7 @@ #include "writer.hpp" #include "../base/assert.hpp" +#include "../base/bits.hpp" namespace { void VarintEncode(vector & dst, u64 n) @@ -73,12 +74,6 @@ namespace { return n; } - inline u32 NumUsedBits(u64 n) - { - u32 result = 0; - while (n != 0) { ++result; n >>= 1; } - return result; - } vector SerialFreqsToDistrTable(Reader & reader, u64 & decodeOffset, u64 cnt) { vector freqs; @@ -173,7 +168,7 @@ void BuildCompressedBitVector(Writer & writer, vector const & posOnes, int CHECK_LESS(prevOnePos, posOnes[i], ()); // Accumulate size of diff encoding. u64 diff = posOnes[i] - prevOnePos; - u32 diffBitsize = NumUsedBits(diff - 1); + u32 diffBitsize = bits::NumUsedBits(diff - 1); numBytesDiffsEncVint += (diffBitsize + BLOCK_SIZE - 1) / BLOCK_SIZE; numBitsDiffsEncArith += diffBitsize > 0 ? diffBitsize - 1 : 0; ++diffsSizesFreqs[diffBitsize]; @@ -183,14 +178,14 @@ void BuildCompressedBitVector(Writer & writer, vector const & posOnes, int if (onesRangeLen > 0) { // Accumulate size of ones-range encoding. - u32 onesRangeLenBitsize = NumUsedBits(onesRangeLen - 1); + u32 onesRangeLenBitsize = bits::NumUsedBits(onesRangeLen - 1); numBytesRangesEncVint += (onesRangeLenBitsize + BLOCK_SIZE - 1) / BLOCK_SIZE; numBitsRangesEncArith += onesRangeLenBitsize > 0 ? onesRangeLenBitsize - 1 : 0; ++ranges1SizesFreqs[onesRangeLenBitsize]; onesRangeLen = 0; } // Accumulate size of zeros-range encoding. - u32 zeros_range_len_bitsize = NumUsedBits(posOnes[i] - prevOnePos - 2); + u32 zeros_range_len_bitsize = bits::NumUsedBits(posOnes[i] - prevOnePos - 2); numBytesRangesEncVint += (zeros_range_len_bitsize + BLOCK_SIZE - 1) / BLOCK_SIZE; numBitsRangesEncArith += zeros_range_len_bitsize > 0 ? zeros_range_len_bitsize - 1 : 0; ++ranges0SizesFreqs[zeros_range_len_bitsize]; @@ -201,7 +196,7 @@ void BuildCompressedBitVector(Writer & writer, vector const & posOnes, int // Accumulate size of remaining ones-range encoding. if (onesRangeLen > 0) { - u32 onesRangeLenBitsize = NumUsedBits(onesRangeLen - 1); + u32 onesRangeLenBitsize = bits::NumUsedBits(onesRangeLen - 1); numBytesRangesEncVint += (onesRangeLenBitsize + BLOCK_SIZE - 1) / BLOCK_SIZE; numBitsRangesEncArith = onesRangeLenBitsize > 0 ? onesRangeLenBitsize - 1 : 0; ++ranges1SizesFreqs[onesRangeLenBitsize]; @@ -312,7 +307,7 @@ void BuildCompressedBitVector(Writer & writer, vector const & posOnes, int for (u64 i = 0; i < posOnes.size(); ++i) { CHECK_GREATER(posOnes[i], prevOnePos, ()); - u32 bitsUsed = NumUsedBits(posOnes[i] - prevOnePos - 1); + u32 bitsUsed = bits::NumUsedBits(posOnes[i] - prevOnePos - 1); arithEnc.Encode(bitsUsed); ++cntElements; prevOnePos = posOnes[i]; @@ -336,7 +331,7 @@ void BuildCompressedBitVector(Writer & writer, vector const & posOnes, int CHECK_GREATER(posOnes[i], prevOnePos, ()); // Encode one's pos (diff - 1). u64 diff = posOnes[i] - prevOnePos - 1; - u32 bitsUsed = NumUsedBits(diff); + u32 bitsUsed = bits::NumUsedBits(diff); if (bitsUsed > 1) { // Most significant bit is always 1 for non-zero diffs, so don't store it. @@ -424,13 +419,13 @@ void BuildCompressedBitVector(Writer & writer, vector const & posOnes, int if (onesRangeLen > 0) { // Encode ones range bits size. - u32 bitsUsed = NumUsedBits(onesRangeLen - 1); + u32 bitsUsed = bits::NumUsedBits(onesRangeLen - 1); arith_enc1.Encode(bitsUsed); ++cntElements1; onesRangeLen = 0; } // Encode zeros range bits size - 1. - u32 bitsUsed = NumUsedBits(posOnes[i] - prevOnePos - 2); + u32 bitsUsed = bits::NumUsedBits(posOnes[i] - prevOnePos - 2); arith_enc0.Encode(bitsUsed); ++cntElements0; } @@ -440,7 +435,7 @@ void BuildCompressedBitVector(Writer & writer, vector const & posOnes, int if (onesRangeLen > 0) { // Encode last ones range size - 1. - u32 bitsUsed = NumUsedBits(onesRangeLen - 1); + u32 bitsUsed = bits::NumUsedBits(onesRangeLen - 1); arith_enc1.Encode(bitsUsed); ++cntElements1; onesRangeLen = 0; @@ -470,7 +465,7 @@ void BuildCompressedBitVector(Writer & writer, vector const & posOnes, int if (onesRangeLen > 0) { // Encode ones range bits size. - u32 bitsUsed = NumUsedBits(onesRangeLen - 1); + u32 bitsUsed = bits::NumUsedBits(onesRangeLen - 1); if (bitsUsed > 1) { // Most significant bit for non-zero values is always 1, don't encode it. @@ -480,7 +475,7 @@ void BuildCompressedBitVector(Writer & writer, vector const & posOnes, int onesRangeLen = 0; } // Encode zeros range bits size - 1. - u32 bitsUsed = NumUsedBits(posOnes[i] - prevOnePos - 2); + u32 bitsUsed = bits::NumUsedBits(posOnes[i] - prevOnePos - 2); if (bitsUsed > 1) { // Most significant bit for non-zero values is always 1, don't encode it. @@ -494,7 +489,7 @@ void BuildCompressedBitVector(Writer & writer, vector const & posOnes, int if (onesRangeLen > 0) { // Encode last ones range size - 1. - u32 bitsUsed = NumUsedBits(onesRangeLen - 1); + u32 bitsUsed = bits::NumUsedBits(onesRangeLen - 1); if (bitsUsed > 1) { // Most significant bit for non-zero values is always 1, don't encode it.