From b4bd3a3e802fc25665b7cbcb2a2af2d45b79913c Mon Sep 17 00:00:00 2001 From: Yuri Gorshenin Date: Wed, 2 Sep 2015 15:44:15 +0300 Subject: [PATCH] Review fixes. --- .../coding_tests/simple_dense_coding_test.cpp | 4 +- coding/coding_tests/succinct_mapper_test.cpp | 4 +- coding/simple_dense_coding.cpp | 86 ++++++------- coding/succinct_mapper.hpp | 113 +++++++++--------- 4 files changed, 107 insertions(+), 100 deletions(-) diff --git a/coding/coding_tests/simple_dense_coding_test.cpp b/coding/coding_tests/simple_dense_coding_test.cpp index 79bbd1f880..a7d7561f54 100644 --- a/coding/coding_tests/simple_dense_coding_test.cpp +++ b/coding/coding_tests/simple_dense_coding_test.cpp @@ -38,13 +38,13 @@ UNIT_TEST(SimpleDenseCoding_Smoke) SimpleDenseCoding coding(data); TestSDC(data, coding); FileWriter writer(kTestFile); - Freeze(coding, writer); + Freeze(coding, writer, "SimpleDenseCoding"); } { MmapReader reader(kTestFile); SimpleDenseCoding coding; - Map(coding, reader.Data()); + Map(coding, reader.Data(), "SimpleDenseCoding"); TestSDC(data, coding); } } diff --git a/coding/coding_tests/succinct_mapper_test.cpp b/coding/coding_tests/succinct_mapper_test.cpp index 97dec21539..c46008a41d 100644 --- a/coding/coding_tests/succinct_mapper_test.cpp +++ b/coding/coding_tests/succinct_mapper_test.cpp @@ -28,11 +28,11 @@ UNIT_TEST(Freeze_Smoke) { MemWriter writer(data); uint64_t const data = 0x0123456789abcdef; - Freeze(data, writer); + Freeze(data, writer, "uint64_t"); } TEST_EQUAL(8, data.size(), ()); uint64_t value = 0x0; - TEST_EQUAL(8, Map(value, reinterpret_cast(data.data())), ()); + TEST_EQUAL(8, Map(value, reinterpret_cast(data.data()), "uint64_t"), ()); TEST_EQUAL(0x0123456789abcdef, value, ()); } diff --git a/coding/simple_dense_coding.cpp b/coding/simple_dense_coding.cpp index f2e4acfb95..03d88f1b8a 100644 --- a/coding/simple_dense_coding.cpp +++ b/coding/simple_dense_coding.cpp @@ -9,6 +9,8 @@ namespace coding { namespace { +size_t const kAlphabetSize = static_cast(numeric_limits::max()) + 1; + struct Code { Code() : m_code(0), m_length(0) {} @@ -17,42 +19,44 @@ struct Code uint8_t m_length; }; -size_t const kAlphabetSize = static_cast(numeric_limits::max()) + 1; -Code g_codeTable[kAlphabetSize]; -bool g_codeTableInitialized = false; - -// Returns pointer to an initialized code table. If necessary, -// initializes it. In the latter case, code table is filled with -// following code words: 0, 1, 00, 01, 10, 11, 000, 001, ... -Code const * GetCodeTable() +// Initializes code table for simple dense coding with following code +// words: 0, 1, 00, 01, 10, 11, 000, 001, ... +struct CodeTable { - if (g_codeTableInitialized) - return g_codeTable; - - unsigned length = 1; - size_t rank = 0; - while (rank < kAlphabetSize) +public: + CodeTable() { - // Number of codes with the same bit length. - size_t const numCodes = static_cast(1) << length; - - size_t base = rank; - while (rank - base < numCodes && rank < kAlphabetSize) + size_t rank = 0; + uint8_t length = 1; + while (rank < kAlphabetSize) { - g_codeTable[rank].m_code = rank - base; - g_codeTable[rank].m_length = length; - ++rank; - } + // Number of codes with the same bit length. + size_t const numCodes = static_cast(1) << length; - ++length; + uint8_t code = 0; + for (; code < numCodes && rank + code < kAlphabetSize; ++code) + { + size_t const pos = rank + code; + m_table[pos].m_code = code; + m_table[pos].m_length = length; + } + + rank += code; + length += 1; + } } - g_codeTableInitialized = true; - return g_codeTable; -} + inline Code const & GetCode(uint8_t rank) const + { + return m_table[rank]; + } -// Computes frequences for data symbols. -void GetFrequences(vector const & data, uint64_t frequency[]) +private: + Code m_table[kAlphabetSize]; +}; + +// Calculates frequences for data symbols. +void CalcFrequences(vector const & data, uint64_t frequency[]) { memset(frequency, 0, sizeof(*frequency) * kAlphabetSize); for (uint8_t symbol : data) @@ -62,8 +66,11 @@ void GetFrequences(vector const & data, uint64_t frequency[]) SimpleDenseCoding::SimpleDenseCoding(vector const & data) { + // This static initialization isn't thread safe prior to C++11. + static CodeTable codeTable; + uint64_t frequency[kAlphabetSize]; // Maps symbols to frequences. - GetFrequences(data, frequency); + CalcFrequences(data, frequency); uint8_t symbols[kAlphabetSize]; // Maps ranks to symbols. uint8_t rank[kAlphabetSize]; // Maps symbols to ranks. @@ -77,27 +84,22 @@ SimpleDenseCoding::SimpleDenseCoding(vector const & data) for (size_t r = 0; r < kAlphabetSize; ++r) rank[symbols[r]] = r; - Code const * codeTable = GetCodeTable(); - ASSERT(codeTable, ()); - uint64_t bitLength = 0; for (size_t symbol = 0; symbol < kAlphabetSize; ++symbol) - bitLength += static_cast(frequency[symbol]) * codeTable[rank[symbol]].m_length; + bitLength += frequency[symbol] * codeTable.GetCode(rank[symbol]).m_length; succinct::bit_vector_builder bitsBuilder; bitsBuilder.reserve(bitLength); vector indexBuilder(bitLength); size_t pos = 0; + for (uint8_t symbol : data) { - for (uint8_t symbol : data) - { - Code const & code = codeTable[rank[symbol]]; - ASSERT_LESS(pos, bitLength, ()); - indexBuilder[pos] = 1; + Code const & code = codeTable.GetCode(rank[symbol]); + ASSERT_LESS(pos, bitLength, ()); + indexBuilder[pos] = 1; - bitsBuilder.append_bits(code.m_code, code.m_length); - pos += code.m_length; - } + bitsBuilder.append_bits(code.m_code, code.m_length); + pos += code.m_length; } ASSERT_EQUAL(pos, bitLength, ()); diff --git a/coding/succinct_mapper.hpp b/coding/succinct_mapper.hpp index 3800f3fea0..a5c288dac0 100644 --- a/coding/succinct_mapper.hpp +++ b/coding/succinct_mapper.hpp @@ -2,6 +2,9 @@ #include "coding/endianness.hpp" +#include "base/assert.hpp" +#include "base/macros.hpp" + #include "std/type_traits.hpp" #include "3party/succinct/mappable_vector.hpp" @@ -12,67 +15,64 @@ namespace coding template static T * Align8Ptr(T * ptr) { - uint64_t value = (reinterpret_cast(ptr) + 0x7) & 0xfffffffffffffff8; + uint64_t const value = (reinterpret_cast(ptr) + 0x7) & 0xfffffffffffffff8; return reinterpret_cast(value); } -inline uint32_t NeedToAlign8(uint64_t written) { return 0x8 - (written & 0x7); } +inline uint32_t ToAlign8(uint64_t written) { return (0x8 - (written & 0x7)) & 0x7; } class MapVisitor { public: - MapVisitor(uint8_t const * base) : m_base(base), m_cur(m_base) {} + explicit MapVisitor(uint8_t const * base) : m_base(base), m_cur(m_base) {} template - typename enable_if::value, MapVisitor &>::type operator()( - T & val, char const * /* friendlyName */) + typename enable_if::value, MapVisitor &>::type operator()(T & val, + char const * /* name */) { val.map(*this); return *this; } template - typename enable_if::value, MapVisitor &>::type operator()( - T & val, char const * /* friendlyName */) + typename enable_if::value, MapVisitor &>::type operator()(T & val, + char const * /* name */) { T const * valPtr = reinterpret_cast(m_cur); val = *valPtr; - m_cur += sizeof(T); - m_cur = Align8Ptr(m_cur); + m_cur = Align8Ptr(m_cur + sizeof(T)); return *this; } template - MapVisitor & operator()(succinct::mapper::mappable_vector & vec, - char const * /* friendlyName */) + MapVisitor & operator()(succinct::mapper::mappable_vector & vec, char const * /* name */) { vec.clear(); (*this)(vec.m_size, "size"); - vec.m_data = reinterpret_cast(m_cur); - size_t const bytes = vec.m_size * sizeof(T); - m_cur += bytes; - m_cur = Align8Ptr(m_cur); + m_cur = Align8Ptr(m_cur + vec.m_size * sizeof(T)); return *this; } - size_t BytesRead() const { return static_cast(m_cur - m_base); } + uint64_t BytesRead() const { return static_cast(m_cur - m_base); } private: uint8_t const * const m_base; uint8_t const * m_cur; + + DISALLOW_COPY_AND_MOVE(MapVisitor); }; class ReverseMapVisitor { public: - ReverseMapVisitor(uint8_t * base) : m_base(base), m_cur(m_base) {} + explicit ReverseMapVisitor(uint8_t * base) : m_base(base), m_cur(m_base) {} template typename enable_if::value, ReverseMapVisitor &>::type operator()( - T & val, char const * /* friendlyName */) + T & val, char const * /* name */) { val.map(*this); return *this; @@ -80,117 +80,122 @@ public: template typename enable_if::value, ReverseMapVisitor &>::type operator()( - T & val, char const * /* friendlyName */) + T & val, char const * /* name */) { T * valPtr = reinterpret_cast(m_cur); *valPtr = ReverseByteOrder(*valPtr); val = *valPtr; - m_cur += sizeof(T); - m_cur = Align8Ptr(m_cur); + m_cur = Align8Ptr(m_cur + sizeof(T)); return *this; } template ReverseMapVisitor & operator()(succinct::mapper::mappable_vector & vec, - char const * /* friendlyName */) + char const * /* name */) { vec.clear(); (*this)(vec.m_size, "size"); vec.m_data = reinterpret_cast(m_cur); - for (auto const it = vec.begin(); it != vec.end(); ++it) + for (auto const it = vec.cbegin(); it != vec.cend(); ++it) *it = ReverseByteOrder(*it); - size_t const bytes = vec.m_size * sizeof(T); - m_cur += bytes; - m_cur = Align8Ptr(m_cur); + m_cur = Align8Ptr(m_cur + vec.m_size * sizeof(T)); return *this; } - size_t BytesRead() const { return static_cast(m_cur - m_base); } + uint64_t BytesRead() const { return static_cast(m_cur - m_base); } private: - uint8_t * m_base; + uint8_t * const m_base; uint8_t * m_cur; + + DISALLOW_COPY_AND_MOVE(ReverseMapVisitor); }; template class FreezeVisitor { public: - FreezeVisitor(TWriter & writer) : m_writer(writer), m_written(0) {} + explicit FreezeVisitor(TWriter & writer) : m_writer(writer), m_bytesWritten(0) {} template - typename enable_if::value, FreezeVisitor &>::type operator()( - T & val, char const * /* friendlyName */) + typename enable_if::value, FreezeVisitor &>::type operator()(T & val, + char const * /* name */) { + ASSERT(IsAligned(), ()); val.map(*this); return *this; } template - typename enable_if::value, FreezeVisitor &>::type operator()( - T & val, char const * /* friendlyName */) + typename enable_if::value, FreezeVisitor &>::type operator()(T & val, + char const * /* name */) { - m_writer.Write(reinterpret_cast(&val), sizeof(T)); - m_written += sizeof(T); + ASSERT(IsAligned(), ()); + m_writer.Write(&val, sizeof(T)); + m_bytesWritten += sizeof(T); WritePadding(); return *this; } template - FreezeVisitor & operator()(succinct::mapper::mappable_vector & vec, - char const * /* friendlyName */) + FreezeVisitor & operator()(succinct::mapper::mappable_vector & vec, char const * /* name */) { + ASSERT(IsAligned(), ()); (*this)(vec.m_size, "size"); size_t const bytes = static_cast(vec.m_size * sizeof(T)); - m_writer.Write(vec.m_data, static_cast(bytes)); - m_written += bytes; + m_writer.Write(vec.m_data, bytes); + m_bytesWritten += bytes; WritePadding(); return *this; } - size_t Written() const { return m_written; } + uint64_t BytesWritten() const { return m_bytesWritten; } private: + bool IsAligned() const { return ToAlign8(m_writer.Pos()) == 0; } + void WritePadding() { - uint32_t const padding = NeedToAlign8(m_written); static uint64_t const zero = 0; - if (padding > 0 && padding < 8) - { - m_writer.Write(reinterpret_cast(&zero), padding); - m_written += padding; - } + + uint32_t const padding = ToAlign8(m_bytesWritten); + if (padding == 0) + return; + m_writer.Write(&zero, padding); + m_bytesWritten += padding; } TWriter & m_writer; - uint64_t m_written; + uint64_t m_bytesWritten; + + DISALLOW_COPY_AND_MOVE(FreezeVisitor); }; template -size_t Map(T & value, uint8_t const * base, char const * friendlyName = "") +uint64_t Map(T & value, uint8_t const * base, char const * name) { MapVisitor visitor(base); - visitor(value, friendlyName); + visitor(value, name); return visitor.BytesRead(); } template -size_t ReverseMap(T & value, uint8_t * base, char const * friendlyName = "") +uint64_t ReverseMap(T & value, uint8_t * base, char const * name) { ReverseMapVisitor visitor(base); - visitor(value, friendlyName); + visitor(value, name); return visitor.BytesRead(); } template -size_t Freeze(T & val, TWriter & writer, char const * friendlyName = "") +uint64_t Freeze(T & val, TWriter & writer, char const * name) { FreezeVisitor visitor(writer); - visitor(val, friendlyName); - return visitor.Written(); + visitor(val, name); + return visitor.BytesWritten(); } } // namespace coding