From f50ed69e87e037c519f2ede59aea582f30c6980a Mon Sep 17 00:00:00 2001 From: Maxim Pimenov Date: Fri, 17 Jul 2015 13:21:00 +0300 Subject: [PATCH] Review fixes. --- coding/bit_streams.hpp | 19 ++++++----- coding/compressed_bit_vector.cpp | 56 ++++++++++++++++++++++++-------- 2 files changed, 53 insertions(+), 22 deletions(-) diff --git a/coding/bit_streams.hpp b/coding/bit_streams.hpp index 2751403cbf..3df06acd7b 100644 --- a/coding/bit_streams.hpp +++ b/coding/bit_streams.hpp @@ -1,6 +1,5 @@ #pragma once -#include "std/algorithm.hpp" #include "std/cstdint.hpp" #include "std/limits.hpp" @@ -9,9 +8,11 @@ namespace { -uint64_t const kByteMask = (static_cast(1) << CHAR_BIT) - 1; +uint8_t const kByteMask = 0xFF; } // namespace +static_assert(CHAR_BIT == 8, ""); + template class BitWriter { @@ -50,14 +51,14 @@ public: { if (n == 0) return; - CHECK_LESS_OR_EQUAL(n, CHAR_BIT, ()); + ASSERT_LESS_OR_EQUAL(n, CHAR_BIT, ()); uint32_t bufferedBits = m_bitsWritten % CHAR_BIT; m_bitsWritten += n; - if (n + bufferedBits > 8) + if (n + bufferedBits > CHAR_BIT) { uint8_t b = (bits << bufferedBits) | m_buf; m_writer.Write(&b, 1); - m_buf = bits >> (8 - bufferedBits); + m_buf = bits >> (CHAR_BIT - bufferedBits); } else { @@ -88,17 +89,17 @@ public: BitReader(TReader & reader) : m_reader(reader), m_bitsRead(0), m_bufferedBits(0), m_buf(0) {} // Returns the total number of bits read from this BitReader. - uint32_t BitsRead() const { return m_bitsRead; } + uint64_t BitsRead() const { return m_bitsRead; } // Reads n bits and returns them as the least significant bits of an 8-bit number. // The underlying m_reader is supposed to be byte-aligned (which is the - // case when it reads from the place that was written with BitWriter) because + // case when it reads from the place that was written to using BitWriter). // Read may use one lookahead byte. uint8_t Read(uint32_t n) { if (n == 0) return 0; - CHECK_LESS_OR_EQUAL(n, CHAR_BIT, ()); + ASSERT_LESS_OR_EQUAL(n, CHAR_BIT, ()); m_bitsRead += n; uint8_t result = 0; if (n <= m_bufferedBits) @@ -121,7 +122,7 @@ public: private: TReader & m_reader; - uint32_t m_bitsRead; + uint64_t m_bitsRead; uint32_t m_bufferedBits; uint8_t m_buf; }; diff --git a/coding/compressed_bit_vector.cpp b/coding/compressed_bit_vector.cpp index d4dd942ade..d9a24d598b 100644 --- a/coding/compressed_bit_vector.cpp +++ b/coding/compressed_bit_vector.cpp @@ -202,7 +202,8 @@ void BuildCompressedBitVector(Writer & writer, vector const & posOnes, { // Most significant bit is always 1 for non-zero diffs, so don't store it. --bitsUsed; - bitWriter.Write(diff, bitsUsed); + for (size_t j = 0; j < bitsUsed; ++j) + bitWriter.Write((diff >> j) & 1, 1); totalReadBits += bitsUsed; ++totalReadCnts; } @@ -336,7 +337,8 @@ void BuildCompressedBitVector(Writer & writer, vector const & posOnes, { // Most significant bit for non-zero values is always 1, don't encode it. --bitsUsed; - bitWriter.Write(onesRangeLen - 1, bitsUsed); + for (size_t j = 0; j < bitsUsed; ++j) + bitWriter.Write(((onesRangeLen - 1) >> j) & 1, 1); } onesRangeLen = 0; } @@ -346,7 +348,8 @@ void BuildCompressedBitVector(Writer & writer, vector const & posOnes, { // Most significant bit for non-zero values is always 1, don't encode it. --bitsUsed; - bitWriter.Write(posOnes[i] - prevOnePos - 2, bitsUsed); + for (size_t j = 0; j < bitsUsed; ++j) + bitWriter.Write(((posOnes[i] - prevOnePos - 2) >> j) & 1, 1); } } ++onesRangeLen; @@ -360,7 +363,8 @@ void BuildCompressedBitVector(Writer & writer, vector const & posOnes, { // Most significant bit for non-zero values is always 1, don't encode it. --bitsUsed; - bitWriter.Write(onesRangeLen - 1, bitsUsed); + for (size_t j = 0; j < bitsUsed; ++j) + bitWriter.Write(((onesRangeLen - 1) >> j) & 1, 1); } onesRangeLen = 0; } @@ -404,9 +408,7 @@ vector DecodeCompressedBitVector(Reader & reader) { ArithmeticDecoder arithDec(*arithDecReader, distrTable); for (uint64_t i = 0; i < cntElements; ++i) bitsUsedVec.push_back(arithDec.Decode()); decodeOffset += encSizesBytesize; - unique_ptr bitMemReader( - reader.CreateSubReader(decodeOffset, serialSize - decodeOffset)); - ReaderPtr readerPtr(bitMemReader.get()); + ReaderPtr readerPtr(reader.CreateSubReader(decodeOffset, serialSize - decodeOffset)); ReaderSource> bitReaderSource(readerPtr); BitReader>> bitReader(bitReaderSource); int64_t prevOnePos = -1; @@ -414,7 +416,17 @@ vector DecodeCompressedBitVector(Reader & reader) { { uint32_t bitsUsed = bitsUsedVec[i]; uint64_t diff = 0; - if (bitsUsed > 0) diff = ((uint64_t(1) << (bitsUsed - 1)) | bitReader.Read(bitsUsed - 1)) + 1; else diff = 1; + if (bitsUsed > 0) + { + diff = static_cast(1) << (bitsUsed - 1); + for (size_t j = 0; j + 1 < bitsUsed; ++j) + diff |= bitReader.Read(1) << j; + ++diff; + } + else + { + diff = 1; + } posOnes.push_back(prevOnePos + diff); prevOnePos += diff; } @@ -459,9 +471,7 @@ vector DecodeCompressedBitVector(Reader & reader) { vector bitsSizes1; for (uint64_t i = 0; i < cntElements1; ++i) bitsSizes1.push_back(arith_dec1.Decode()); decodeOffset += enc1SizesBytesize; - unique_ptr bitMemReader( - reader.CreateSubReader(decodeOffset, serialSize - decodeOffset)); - ReaderPtr readerPtr(bitMemReader.get()); + ReaderPtr readerPtr(reader.CreateSubReader(decodeOffset, serialSize - decodeOffset)); ReaderSource> bitReaderSource(readerPtr); BitReader>> bitReader(bitReaderSource); uint64_t sum = 0, i0 = 0, i1 = 0; @@ -472,13 +482,33 @@ vector DecodeCompressedBitVector(Reader & reader) { if (!isFirstOne) { uint32_t bitsUsed = bitsSizes0[i0]; - if (bitsUsed > 0) zerosRangeSize = ((uint64_t(1) << (bitsUsed - 1)) | bitReader.Read(bitsUsed - 1)) + 1; else zerosRangeSize = 1; + if (bitsUsed > 0) + { + zerosRangeSize = static_cast(1) << (bitsUsed - 1); + for (size_t j = 0; j + 1 < bitsUsed; ++j) + zerosRangeSize |= bitReader.Read(1) << j; + ++zerosRangeSize; + } + else + { + zerosRangeSize = 1; + } ++i0; } else isFirstOne = false; uint64_t onesRangeSize = 0; uint32_t bitsUsed = bitsSizes1[i1]; - if (bitsUsed > 0) onesRangeSize = ((uint64_t(1) << (bitsUsed - 1)) | bitReader.Read(bitsUsed - 1)) + 1; else onesRangeSize = 1; + if (bitsUsed > 0) + { + onesRangeSize = static_cast(1) << (bitsUsed - 1); + for (size_t j = 0; j + 1 < bitsUsed; ++j) + onesRangeSize |= bitReader.Read(1) << j; + ++onesRangeSize; + } + else + { + onesRangeSize = 1; + } ++i1; sum += zerosRangeSize; for (uint64_t j = sum; j < sum + onesRangeSize; ++j) posOnes.push_back(j);