Review fixes.

This commit is contained in:
Yuri Gorshenin 2015-09-02 15:44:15 +03:00 committed by Sergey Yershov
parent 122fdfda89
commit b4bd3a3e80
4 changed files with 107 additions and 100 deletions

View file

@ -38,13 +38,13 @@ UNIT_TEST(SimpleDenseCoding_Smoke)
SimpleDenseCoding coding(data);
TestSDC(data, coding);
FileWriter writer(kTestFile);
Freeze(coding, writer);
Freeze(coding, writer, "SimpleDenseCoding");
}
{
MmapReader reader(kTestFile);
SimpleDenseCoding coding;
Map(coding, reader.Data());
Map(coding, reader.Data(), "SimpleDenseCoding");
TestSDC(data, coding);
}
}

View file

@ -28,11 +28,11 @@ UNIT_TEST(Freeze_Smoke)
{
MemWriter<decltype(data)> writer(data);
uint64_t const data = 0x0123456789abcdef;
Freeze(data, writer);
Freeze(data, writer, "uint64_t");
}
TEST_EQUAL(8, data.size(), ());
uint64_t value = 0x0;
TEST_EQUAL(8, Map(value, reinterpret_cast<uint8_t const *>(data.data())), ());
TEST_EQUAL(8, Map(value, reinterpret_cast<uint8_t const *>(data.data()), "uint64_t"), ());
TEST_EQUAL(0x0123456789abcdef, value, ());
}

View file

@ -9,6 +9,8 @@ namespace coding
{
namespace
{
size_t const kAlphabetSize = static_cast<size_t>(numeric_limits<uint8_t>::max()) + 1;
struct Code
{
Code() : m_code(0), m_length(0) {}
@ -17,42 +19,44 @@ struct Code
uint8_t m_length;
};
size_t const kAlphabetSize = static_cast<size_t>(numeric_limits<uint8_t>::max()) + 1;
Code g_codeTable[kAlphabetSize];
bool g_codeTableInitialized = false;
// Returns pointer to an initialized code table. If necessary,
// initializes it. In the latter case, code table is filled with
// following code words: 0, 1, 00, 01, 10, 11, 000, 001, ...
Code const * GetCodeTable()
// Initializes code table for simple dense coding with following code
// words: 0, 1, 00, 01, 10, 11, 000, 001, ...
struct CodeTable
{
if (g_codeTableInitialized)
return g_codeTable;
unsigned length = 1;
size_t rank = 0;
while (rank < kAlphabetSize)
public:
CodeTable()
{
// Number of codes with the same bit length.
size_t const numCodes = static_cast<size_t>(1) << length;
size_t base = rank;
while (rank - base < numCodes && rank < kAlphabetSize)
size_t rank = 0;
uint8_t length = 1;
while (rank < kAlphabetSize)
{
g_codeTable[rank].m_code = rank - base;
g_codeTable[rank].m_length = length;
++rank;
}
// Number of codes with the same bit length.
size_t const numCodes = static_cast<size_t>(1) << length;
++length;
uint8_t code = 0;
for (; code < numCodes && rank + code < kAlphabetSize; ++code)
{
size_t const pos = rank + code;
m_table[pos].m_code = code;
m_table[pos].m_length = length;
}
rank += code;
length += 1;
}
}
g_codeTableInitialized = true;
return g_codeTable;
}
inline Code const & GetCode(uint8_t rank) const
{
return m_table[rank];
}
// Computes frequences for data symbols.
void GetFrequences(vector<uint8_t> const & data, uint64_t frequency[])
private:
Code m_table[kAlphabetSize];
};
// Calculates frequences for data symbols.
void CalcFrequences(vector<uint8_t> const & data, uint64_t frequency[])
{
memset(frequency, 0, sizeof(*frequency) * kAlphabetSize);
for (uint8_t symbol : data)
@ -62,8 +66,11 @@ void GetFrequences(vector<uint8_t> const & data, uint64_t frequency[])
SimpleDenseCoding::SimpleDenseCoding(vector<uint8_t> const & data)
{
// This static initialization isn't thread safe prior to C++11.
static CodeTable codeTable;
uint64_t frequency[kAlphabetSize]; // Maps symbols to frequences.
GetFrequences(data, frequency);
CalcFrequences(data, frequency);
uint8_t symbols[kAlphabetSize]; // Maps ranks to symbols.
uint8_t rank[kAlphabetSize]; // Maps symbols to ranks.
@ -77,27 +84,22 @@ SimpleDenseCoding::SimpleDenseCoding(vector<uint8_t> const & data)
for (size_t r = 0; r < kAlphabetSize; ++r)
rank[symbols[r]] = r;
Code const * codeTable = GetCodeTable();
ASSERT(codeTable, ());
uint64_t bitLength = 0;
for (size_t symbol = 0; symbol < kAlphabetSize; ++symbol)
bitLength += static_cast<uint64_t>(frequency[symbol]) * codeTable[rank[symbol]].m_length;
bitLength += frequency[symbol] * codeTable.GetCode(rank[symbol]).m_length;
succinct::bit_vector_builder bitsBuilder;
bitsBuilder.reserve(bitLength);
vector<bool> indexBuilder(bitLength);
size_t pos = 0;
for (uint8_t symbol : data)
{
for (uint8_t symbol : data)
{
Code const & code = codeTable[rank[symbol]];
ASSERT_LESS(pos, bitLength, ());
indexBuilder[pos] = 1;
Code const & code = codeTable.GetCode(rank[symbol]);
ASSERT_LESS(pos, bitLength, ());
indexBuilder[pos] = 1;
bitsBuilder.append_bits(code.m_code, code.m_length);
pos += code.m_length;
}
bitsBuilder.append_bits(code.m_code, code.m_length);
pos += code.m_length;
}
ASSERT_EQUAL(pos, bitLength, ());

View file

@ -2,6 +2,9 @@
#include "coding/endianness.hpp"
#include "base/assert.hpp"
#include "base/macros.hpp"
#include "std/type_traits.hpp"
#include "3party/succinct/mappable_vector.hpp"
@ -12,67 +15,64 @@ namespace coding
template <typename T>
static T * Align8Ptr(T * ptr)
{
uint64_t value = (reinterpret_cast<uint64_t>(ptr) + 0x7) & 0xfffffffffffffff8;
uint64_t const value = (reinterpret_cast<uint64_t>(ptr) + 0x7) & 0xfffffffffffffff8;
return reinterpret_cast<T *>(value);
}
inline uint32_t NeedToAlign8(uint64_t written) { return 0x8 - (written & 0x7); }
inline uint32_t ToAlign8(uint64_t written) { return (0x8 - (written & 0x7)) & 0x7; }
class MapVisitor
{
public:
MapVisitor(uint8_t const * base) : m_base(base), m_cur(m_base) {}
explicit MapVisitor(uint8_t const * base) : m_base(base), m_cur(m_base) {}
template <typename T>
typename enable_if<!is_pod<T>::value, MapVisitor &>::type operator()(
T & val, char const * /* friendlyName */)
typename enable_if<!is_pod<T>::value, MapVisitor &>::type operator()(T & val,
char const * /* name */)
{
val.map(*this);
return *this;
}
template <typename T>
typename enable_if<is_pod<T>::value, MapVisitor &>::type operator()(
T & val, char const * /* friendlyName */)
typename enable_if<is_pod<T>::value, MapVisitor &>::type operator()(T & val,
char const * /* name */)
{
T const * valPtr = reinterpret_cast<T const *>(m_cur);
val = *valPtr;
m_cur += sizeof(T);
m_cur = Align8Ptr(m_cur);
m_cur = Align8Ptr(m_cur + sizeof(T));
return *this;
}
template <typename T>
MapVisitor & operator()(succinct::mapper::mappable_vector<T> & vec,
char const * /* friendlyName */)
MapVisitor & operator()(succinct::mapper::mappable_vector<T> & vec, char const * /* name */)
{
vec.clear();
(*this)(vec.m_size, "size");
vec.m_data = reinterpret_cast<const T *>(m_cur);
size_t const bytes = vec.m_size * sizeof(T);
m_cur += bytes;
m_cur = Align8Ptr(m_cur);
m_cur = Align8Ptr(m_cur + vec.m_size * sizeof(T));
return *this;
}
size_t BytesRead() const { return static_cast<size_t>(m_cur - m_base); }
uint64_t BytesRead() const { return static_cast<uint64_t>(m_cur - m_base); }
private:
uint8_t const * const m_base;
uint8_t const * m_cur;
DISALLOW_COPY_AND_MOVE(MapVisitor);
};
class ReverseMapVisitor
{
public:
ReverseMapVisitor(uint8_t * base) : m_base(base), m_cur(m_base) {}
explicit ReverseMapVisitor(uint8_t * base) : m_base(base), m_cur(m_base) {}
template <typename T>
typename enable_if<!is_pod<T>::value, ReverseMapVisitor &>::type operator()(
T & val, char const * /* friendlyName */)
T & val, char const * /* name */)
{
val.map(*this);
return *this;
@ -80,117 +80,122 @@ public:
template <typename T>
typename enable_if<is_pod<T>::value, ReverseMapVisitor &>::type operator()(
T & val, char const * /* friendlyName */)
T & val, char const * /* name */)
{
T * valPtr = reinterpret_cast<T *>(m_cur);
*valPtr = ReverseByteOrder(*valPtr);
val = *valPtr;
m_cur += sizeof(T);
m_cur = Align8Ptr(m_cur);
m_cur = Align8Ptr(m_cur + sizeof(T));
return *this;
}
template <typename T>
ReverseMapVisitor & operator()(succinct::mapper::mappable_vector<T> & vec,
char const * /* friendlyName */)
char const * /* name */)
{
vec.clear();
(*this)(vec.m_size, "size");
vec.m_data = reinterpret_cast<const T *>(m_cur);
for (auto const it = vec.begin(); it != vec.end(); ++it)
for (auto const it = vec.cbegin(); it != vec.cend(); ++it)
*it = ReverseByteOrder(*it);
size_t const bytes = vec.m_size * sizeof(T);
m_cur += bytes;
m_cur = Align8Ptr(m_cur);
m_cur = Align8Ptr(m_cur + vec.m_size * sizeof(T));
return *this;
}
size_t BytesRead() const { return static_cast<size_t>(m_cur - m_base); }
uint64_t BytesRead() const { return static_cast<uint64_t>(m_cur - m_base); }
private:
uint8_t * m_base;
uint8_t * const m_base;
uint8_t * m_cur;
DISALLOW_COPY_AND_MOVE(ReverseMapVisitor);
};
template <typename TWriter>
class FreezeVisitor
{
public:
FreezeVisitor(TWriter & writer) : m_writer(writer), m_written(0) {}
explicit FreezeVisitor(TWriter & writer) : m_writer(writer), m_bytesWritten(0) {}
template <typename T>
typename enable_if<!is_pod<T>::value, FreezeVisitor &>::type operator()(
T & val, char const * /* friendlyName */)
typename enable_if<!is_pod<T>::value, FreezeVisitor &>::type operator()(T & val,
char const * /* name */)
{
ASSERT(IsAligned(), ());
val.map(*this);
return *this;
}
template <typename T>
typename enable_if<is_pod<T>::value, FreezeVisitor &>::type operator()(
T & val, char const * /* friendlyName */)
typename enable_if<is_pod<T>::value, FreezeVisitor &>::type operator()(T & val,
char const * /* name */)
{
m_writer.Write(reinterpret_cast<void const *>(&val), sizeof(T));
m_written += sizeof(T);
ASSERT(IsAligned(), ());
m_writer.Write(&val, sizeof(T));
m_bytesWritten += sizeof(T);
WritePadding();
return *this;
}
template <typename T>
FreezeVisitor & operator()(succinct::mapper::mappable_vector<T> & vec,
char const * /* friendlyName */)
FreezeVisitor & operator()(succinct::mapper::mappable_vector<T> & vec, char const * /* name */)
{
ASSERT(IsAligned(), ());
(*this)(vec.m_size, "size");
size_t const bytes = static_cast<size_t>(vec.m_size * sizeof(T));
m_writer.Write(vec.m_data, static_cast<size_t>(bytes));
m_written += bytes;
m_writer.Write(vec.m_data, bytes);
m_bytesWritten += bytes;
WritePadding();
return *this;
}
size_t Written() const { return m_written; }
uint64_t BytesWritten() const { return m_bytesWritten; }
private:
bool IsAligned() const { return ToAlign8(m_writer.Pos()) == 0; }
void WritePadding()
{
uint32_t const padding = NeedToAlign8(m_written);
static uint64_t const zero = 0;
if (padding > 0 && padding < 8)
{
m_writer.Write(reinterpret_cast<void const *>(&zero), padding);
m_written += padding;
}
uint32_t const padding = ToAlign8(m_bytesWritten);
if (padding == 0)
return;
m_writer.Write(&zero, padding);
m_bytesWritten += padding;
}
TWriter & m_writer;
uint64_t m_written;
uint64_t m_bytesWritten;
DISALLOW_COPY_AND_MOVE(FreezeVisitor);
};
template <typename T>
size_t Map(T & value, uint8_t const * base, char const * friendlyName = "<TOP>")
uint64_t Map(T & value, uint8_t const * base, char const * name)
{
MapVisitor visitor(base);
visitor(value, friendlyName);
visitor(value, name);
return visitor.BytesRead();
}
template <typename T>
size_t ReverseMap(T & value, uint8_t * base, char const * friendlyName = "<TOP>")
uint64_t ReverseMap(T & value, uint8_t * base, char const * name)
{
ReverseMapVisitor visitor(base);
visitor(value, friendlyName);
visitor(value, name);
return visitor.BytesRead();
}
template <typename T, typename TWriter>
size_t Freeze(T & val, TWriter & writer, char const * friendlyName = "<TOP>")
uint64_t Freeze(T & val, TWriter & writer, char const * name)
{
FreezeVisitor<TWriter> visitor(writer);
visitor(val, friendlyName);
return visitor.Written();
visitor(val, name);
return visitor.BytesWritten();
}
} // namespace coding