diff --git a/base/bits.hpp b/base/bits.hpp index 4c561f3422..94c5d501db 100644 --- a/base/bits.hpp +++ b/base/bits.hpp @@ -2,9 +2,9 @@ #include "base/assert.hpp" #include "std/cstdint.hpp" +#include "std/limits.hpp" #include "std/type_traits.hpp" - namespace bits { // Count the number of 1 bits. Implementation: see Hacker's delight book. @@ -201,4 +201,11 @@ namespace bits while (n != 0) { ++result; n >>= 1; } return result; } -} + + inline uint64_t GetFullMask(uint8_t numBits) + { + ASSERT_LESS_OR_EQUAL(numBits, 64, ()); + return numBits == 64 ? numeric_limits::max() + : (static_cast(1) << numBits) - 1; + } +} // namespace bits diff --git a/coding/bit_streams.hpp b/coding/bit_streams.hpp index 702e31145d..c79249cbc7 100644 --- a/coding/bit_streams.hpp +++ b/coding/bit_streams.hpp @@ -1,13 +1,13 @@ #pragma once #include "base/assert.hpp" +#include "base/bits.hpp" #include "base/logging.hpp" #include "std/algorithm.hpp" #include "std/cstdint.hpp" #include "std/limits.hpp" - namespace { uint8_t const kByteMask = 0xFF; @@ -34,17 +34,19 @@ public: } // Returns the number of bits that have been sent to BitWriter, - // including those that are in m_buf and are possibly - // not flushed yet. + // including those that are in m_buf and are possibly not flushed + // yet. uint64_t BitsWritten() const { return m_bitsWritten; } - // Writes n bits starting with the least significant bit. - // They are written one byte at a time so endianness is of no concern. - // All the other bits except for the first n must be set to zero. + // Writes n bits starting with the least significant bit. They are + // written one byte at a time so endianness is of no concern. void Write(uint8_t bits, uint8_t n) { if (n == 0) return; + + bits = bits & bits::GetFullMask(n); + ASSERT_LESS_OR_EQUAL(n, CHAR_BIT, ()); uint32_t bufferedBits = m_bitsWritten % CHAR_BIT; m_bitsWritten += n; @@ -70,21 +72,53 @@ public: } } +#define WRITE_BYTE() \ + { \ + Write(bits, min(kMinBits, n)); \ + if (n <= kMinBits) \ + return; \ + n -= kMinBits; \ + bits >>= kMinBits; \ + } + // Same as Write but accept up to 32 bits to write. void WriteAtMost32Bits(uint32_t bits, uint8_t n) { + uint8_t const kMinBits = CHAR_BIT; + ASSERT_LESS_OR_EQUAL(n, 32, ()); - uint8_t constexpr kMinBits = CHAR_BIT; - Write(static_cast(bits), min(n, kMinBits)); - if (n > kMinBits) - WriteAtMost32Bits(bits >> kMinBits, n - kMinBits); + WRITE_BYTE(); + WRITE_BYTE(); + WRITE_BYTE(); + + Write(bits, n); } + // Same as Write but accept up to 64 bits to write. + void WriteAtMost64Bits(uint64_t bits, uint8_t n) + { + uint8_t const kMinBits = CHAR_BIT; + + ASSERT_LESS_OR_EQUAL(n, 64, ()); + + WRITE_BYTE(); + WRITE_BYTE(); + WRITE_BYTE(); + WRITE_BYTE(); + WRITE_BYTE(); + WRITE_BYTE(); + WRITE_BYTE(); + + Write(bits, n); + } + +#undef WRITE_BYTE + private: - // Writes up to CHAR_BIT-1 last bits if they have not been written yet - // and pads them with zeros. - // This method cannot be made public because once a byte has been flushed there is no going back. + // Writes up to CHAR_BIT-1 last bits if they have not been written + // yet and pads them with zeros. This method cannot be made public + // because once a byte has been flushed there is no going back. void Flush() { if (m_bitsWritten % CHAR_BIT != 0) @@ -105,10 +139,11 @@ public: // Returns the total number of bits read from this BitReader. uint64_t BitsRead() const { return m_bitsRead; } - // Reads n bits and returns them as the least significant bits of an 8-bit number. - // The underlying m_src is supposed to be byte-aligned (which is the - // case when it reads from the place that was written to using BitWriter). - // Read may use one lookahead byte. + // Reads n bits and returns them as the least significant bits of an + // 8-bit number. The underlying m_src is supposed to be + // byte-aligned (which is the case when it reads from the place that + // was written to using BitWriter). Read may use one lookahead + // byte. uint8_t Read(uint8_t n) { if (n == 0) @@ -134,6 +169,52 @@ public: return result; } +#define READ_BYTE(i) \ + { \ + result = result | (static_cast(Read(min(n, kMinBits))) << (i * kMinBits)); \ + if (n <= kMinBits) \ + return result; \ + n -= kMinBits; \ + } + + // Same as Read but accept up to 32 bits to read. + uint32_t ReadAtMost32Bits(uint8_t n) + { + uint8_t constexpr kMinBits = CHAR_BIT; + + ASSERT_LESS_OR_EQUAL(n, 32, ()); + + uint32_t result = 0; + + READ_BYTE(0); + READ_BYTE(1); + READ_BYTE(2); + + return result | (static_cast(Read(n)) << (3 * kMinBits)); + } + + // Same as Read but accept up to 64 bits to read. + uint64_t ReadAtMost64Bits(uint8_t n) + { + uint8_t constexpr kMinBits = CHAR_BIT; + + ASSERT_LESS_OR_EQUAL(n, 64, ()); + + uint64_t result = 0; + + READ_BYTE(0); + READ_BYTE(1); + READ_BYTE(2); + READ_BYTE(3); + READ_BYTE(4); + READ_BYTE(5); + READ_BYTE(6); + + return result | (static_cast(Read(n)) << (7 * kMinBits)); + } + +#undef READ_BYTE + private: TSource & m_src; uint64_t m_bitsRead; diff --git a/coding/coding.pro b/coding/coding.pro index b4a9ed2f92..638202e5be 100644 --- a/coding/coding.pro +++ b/coding/coding.pro @@ -49,6 +49,7 @@ HEADERS += \ dd_vector.hpp \ diff.hpp \ diff_patch_common.hpp \ + elias_coder.hpp \ endianness.hpp \ file_container.hpp \ file_name_utils.hpp \ diff --git a/coding/coding_tests/bit_streams_test.cpp b/coding/coding_tests/bit_streams_test.cpp index ac60178ecb..0b060403fd 100644 --- a/coding/coding_tests/bit_streams_test.cpp +++ b/coding/coding_tests/bit_streams_test.cpp @@ -4,11 +4,15 @@ #include "coding/reader.hpp" #include "coding/writer.hpp" +#include "base/assert.hpp" +#include "base/bits.hpp" + #include "std/random.hpp" #include "std/utility.hpp" #include "std/vector.hpp" - +namespace +{ UNIT_TEST(BitStreams_Smoke) { uniform_int_distribution randomBytesDistribution(0, 255); @@ -62,3 +66,38 @@ UNIT_TEST(BitStreams_T1) TEST_EQUAL(buf.size(), 2, ()); } + +UNIT_TEST(BitStreams_Large) +{ + using TBuffer = vector; + using TWriter = MemWriter; + + uint64_t const kMask = 0x0123456789abcdef; + + TBuffer buf; + { + TWriter w(buf); + BitWriter bits(w); + + for (int i = 0; i <= 64; ++i) + { + if (i <= 32) + bits.WriteAtMost32Bits(static_cast(kMask), i); + else + bits.WriteAtMost64Bits(kMask, i); + } + } + + { + MemReader r(buf.data(), buf.size()); + ReaderSource src(r); + BitReader> bits(src); + for (int i = 0; i <= 64; ++i) + { + uint64_t const mask = bits::GetFullMask(i); + uint64_t const value = i <= 32 ? bits.ReadAtMost32Bits(i) : bits.ReadAtMost64Bits(i); + TEST_EQUAL(value, kMask & mask, (i)); + } + } +} +} // namespace diff --git a/coding/coding_tests/coding_tests.pro b/coding/coding_tests/coding_tests.pro index f4b0d58fd7..46049648d0 100644 --- a/coding/coding_tests/coding_tests.pro +++ b/coding/coding_tests/coding_tests.pro @@ -20,6 +20,7 @@ SOURCES += ../../testing/testingmain.cpp \ compressed_bit_vector_test.cpp \ dd_vector_test.cpp \ diff_test.cpp \ + elias_coder_test.cpp \ endianness_test.cpp \ file_container_test.cpp \ file_data_test.cpp \ diff --git a/coding/coding_tests/elias_coder_test.cpp b/coding/coding_tests/elias_coder_test.cpp new file mode 100644 index 0000000000..1abab6283d --- /dev/null +++ b/coding/coding_tests/elias_coder_test.cpp @@ -0,0 +1,54 @@ +#include "testing/testing.hpp" + +#include "coding/bit_streams.hpp" +#include "coding/elias_coder.hpp" +#include "coding/reader.hpp" +#include "coding/writer.hpp" + +#include "base/bits.hpp" + +#include "std/vector.hpp" + +namespace +{ +template +void TestCoder(string const & name) +{ + using TBuffer = vector; + using TWriter = MemWriter; + + uint64_t const kMask = 0xfedcba9876543210; + + TBuffer buf; + { + TWriter w(buf); + BitWriter bits(w); + for (int i = 0; i <= 64; ++i) + { + uint64_t const mask = bits::GetFullMask(i); + uint64_t const value = kMask & mask; + if (value == 0) + TEST(!TCoder::Encode(bits, value), (name, i)); + else + TEST(TCoder::Encode(bits, value), (name, i)); + } + } + + { + MemReader r(buf.data(), buf.size()); + ReaderSource src(r); + BitReader> bits(src); + for (int i = 0; i <= 64; ++i) + { + uint64_t const mask = bits::GetFullMask(i); + uint64_t const expected = kMask & mask; + if (expected == 0) + continue; + TEST_EQUAL(expected, TCoder::Decode(bits), (name, i)); + } + } +} + +UNIT_TEST(EliasCoder_Gamma) { TestCoder("Gamma"); } +UNIT_TEST(EliasCoder_Delta) { TestCoder("Delta"); } +} // namespace diff --git a/coding/elias_coder.hpp b/coding/elias_coder.hpp new file mode 100644 index 0000000000..e7cc92e494 --- /dev/null +++ b/coding/elias_coder.hpp @@ -0,0 +1,76 @@ +#pragma once + +#include "coding/bit_streams.hpp" + +#include "base/assert.hpp" +#include "base/bits.hpp" + +#include "std/cstdint.hpp" + +namespace coding +{ +class GammaCoder +{ +public: + template + static bool Encode(BitWriter & writer, uint64_t value) + { + if (value == 0) + return false; + + uint8_t const n = bits::CeilLog(value); + ASSERT_LESS_OR_EQUAL(n, 63, ()); + + uint64_t const msb = static_cast(1) << n; + writer.WriteAtMost64Bits(msb, n + 1); + writer.WriteAtMost64Bits(value, n); + return true; + } + + template + static uint64_t Decode(BitReader & reader) + { + uint8_t n = 0; + while (reader.Read(1) == 0) + ++n; + + ASSERT_LESS_OR_EQUAL(n, 63, ()); + + uint64_t const msb = static_cast(1) << n; + return msb | reader.ReadAtMost64Bits(n); + } +}; + +class DeltaCoder +{ +public: + template + static bool Encode(BitWriter & writer, uint64_t value) + { + if (value == 0) + return false; + + uint8_t const n = bits::CeilLog(value); + ASSERT_LESS_OR_EQUAL(n, 63, ()); + if (!GammaCoder::Encode(writer, n + 1)) + return false; + + writer.WriteAtMost64Bits(value, n); + return true; + } + + template + static uint64_t Decode(BitReader & reader) + { + uint8_t n = GammaCoder::Decode(reader); + + ASSERT_GREATER(n, 0, ()); + --n; + + ASSERT_LESS_OR_EQUAL(n, 63, ()); + + uint64_t const msb = static_cast(1) << n; + return msb | reader.ReadAtMost64Bits(n); + } +}; +} // namespace coding