forked from organicmaps/organicmaps
[search] Added FixedBitsDDVector container.
This commit is contained in:
parent
bae2445580
commit
83690aa414
7 changed files with 281 additions and 6 deletions
|
@ -1,11 +1,13 @@
|
|||
#pragma once
|
||||
|
||||
#include "std/cstdint.hpp"
|
||||
#include "std/limits.hpp"
|
||||
|
||||
#include "base/assert.hpp"
|
||||
#include "base/logging.hpp"
|
||||
|
||||
#include "std/algorithm.hpp"
|
||||
#include "std/cstdint.hpp"
|
||||
#include "std/limits.hpp"
|
||||
|
||||
|
||||
namespace
|
||||
{
|
||||
uint8_t const kByteMask = 0xFF;
|
||||
|
@ -39,7 +41,7 @@ public:
|
|||
// Writes n bits starting with the least significant bit.
|
||||
// They are written one byte at a time so endianness is of no concern.
|
||||
// All the other bits except for the first n must be set to zero.
|
||||
void Write(uint8_t bits, uint32_t n)
|
||||
void Write(uint8_t bits, uint8_t n)
|
||||
{
|
||||
if (n == 0)
|
||||
return;
|
||||
|
@ -68,6 +70,17 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
// Same as Write but accept up to 32 bits to write.
|
||||
void WriteAtMost32Bits(uint32_t bits, uint8_t n)
|
||||
{
|
||||
ASSERT_LESS_OR_EQUAL(n, 32, ());
|
||||
|
||||
uint8_t constexpr kMinBits = CHAR_BIT;
|
||||
Write(static_cast<uint8_t>(bits), min(n, kMinBits));
|
||||
if (n > kMinBits)
|
||||
WriteAtMost32Bits(bits >> kMinBits, n - kMinBits);
|
||||
}
|
||||
|
||||
private:
|
||||
// Writes up to CHAR_BIT-1 last bits if they have not been written yet
|
||||
// and pads them with zeros.
|
||||
|
@ -96,7 +109,7 @@ public:
|
|||
// The underlying m_src is supposed to be byte-aligned (which is the
|
||||
// case when it reads from the place that was written to using BitWriter).
|
||||
// Read may use one lookahead byte.
|
||||
uint8_t Read(uint32_t n)
|
||||
uint8_t Read(uint8_t n)
|
||||
{
|
||||
if (n == 0)
|
||||
return 0;
|
||||
|
|
|
@ -67,10 +67,12 @@ HEADERS += \
|
|||
file_sort.hpp \
|
||||
file_writer.hpp \
|
||||
file_writer_stream.hpp \
|
||||
fixed_bits_ddvector.hpp \
|
||||
hex.hpp \
|
||||
huffman.hpp \
|
||||
internal/file64_api.hpp \
|
||||
internal/file_data.hpp \
|
||||
internal/xmlparser.hpp \
|
||||
matrix_traversal.hpp \
|
||||
mmap_reader.hpp \
|
||||
multilang_utf8_string.hpp \
|
||||
|
@ -101,4 +103,3 @@ HEADERS += \
|
|||
writer.hpp \
|
||||
zip_creator.hpp \
|
||||
zip_reader.hpp \
|
||||
internal/xmlparser.hpp \
|
||||
|
|
|
@ -44,3 +44,21 @@ UNIT_TEST(BitStreams_Smoke)
|
|||
TEST_EQUAL(num, nums[i].first, (i));
|
||||
}
|
||||
}
|
||||
|
||||
UNIT_TEST(BitStreams_T1)
|
||||
{
|
||||
using TBuffer = vector<uint8_t>;
|
||||
using TWriter = MemWriter<TBuffer>;
|
||||
|
||||
TBuffer buf;
|
||||
{
|
||||
TWriter w(buf);
|
||||
BitWriter<TWriter> bits(w);
|
||||
|
||||
bits.Write(0, 3);
|
||||
bits.Write(3, 3);
|
||||
bits.Write(6, 3);
|
||||
}
|
||||
|
||||
TEST_EQUAL(buf.size(), 2, ());
|
||||
}
|
||||
|
|
|
@ -27,6 +27,7 @@ SOURCES += ../../testing/testingmain.cpp \
|
|||
file_data_test.cpp \
|
||||
file_sort_test.cpp \
|
||||
file_utils_test.cpp \
|
||||
fixed_bits_ddvector_test.cpp \
|
||||
hex_test.cpp \
|
||||
huffman_test.cpp \
|
||||
mem_file_reader_test.cpp \
|
||||
|
|
74
coding/coding_tests/fixed_bits_ddvector_test.cpp
Normal file
74
coding/coding_tests/fixed_bits_ddvector_test.cpp
Normal file
|
@ -0,0 +1,74 @@
|
|||
#include "testing/testing.hpp"
|
||||
|
||||
#include "coding/fixed_bits_ddvector.hpp"
|
||||
#include "coding/writer.hpp"
|
||||
|
||||
#include "std/initializer_list.hpp"
|
||||
#include "std/random.hpp"
|
||||
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
template <size_t Bits> void TestWithData(vector<uint32_t> const & lst)
|
||||
{
|
||||
using TVector = FixedBitsDDVector<Bits, MemReader>;
|
||||
using TBuffer = vector<uint8_t>;
|
||||
using TWriter = MemWriter<TBuffer>;
|
||||
|
||||
TBuffer buf;
|
||||
{
|
||||
TWriter writer(buf);
|
||||
typename TVector::template Builder<TWriter> builder(writer);
|
||||
|
||||
uint32_t optCount = 0;
|
||||
uint32_t const optBound = (1 << Bits) - 1;
|
||||
|
||||
for (uint32_t v : lst)
|
||||
{
|
||||
if (v < optBound)
|
||||
++optCount;
|
||||
|
||||
builder.PushBack(v);
|
||||
}
|
||||
|
||||
pair<uint32_t, uint32_t> exp(optCount, lst.size());
|
||||
TEST_EQUAL(builder.GetCount(), exp, ());
|
||||
}
|
||||
|
||||
MemReader reader(buf.data(), buf.size());
|
||||
auto const vec = TVector::Create(reader);
|
||||
|
||||
size_t i = 0;
|
||||
for (uint32_t v : lst)
|
||||
TEST_EQUAL(vec->Get(i++), v, ());
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
UNIT_TEST(FixedBitsDDVector_Smoke)
|
||||
{
|
||||
TestWithData<3>({0, 3, 6});
|
||||
TestWithData<3>({7, 20, 50});
|
||||
TestWithData<3>({1, 0, 4, 30, 5, 3, 6, 7, 2, 8, 0});
|
||||
}
|
||||
|
||||
UNIT_TEST(FixedBitsDDVector_Rand)
|
||||
{
|
||||
vector<uint32_t> v;
|
||||
|
||||
default_random_engine gen;
|
||||
uniform_int_distribution<uint32_t> distribution(0, 1000);
|
||||
|
||||
size_t constexpr kMaxCount = 1000;
|
||||
for (size_t i = 0; i < kMaxCount; ++i)
|
||||
v.push_back(distribution(gen));
|
||||
|
||||
TestWithData<3>(v);
|
||||
TestWithData<4>(v);
|
||||
TestWithData<5>(v);
|
||||
TestWithData<6>(v);
|
||||
TestWithData<7>(v);
|
||||
TestWithData<8>(v);
|
||||
TestWithData<9>(v);
|
||||
}
|
167
coding/fixed_bits_ddvector.hpp
Normal file
167
coding/fixed_bits_ddvector.hpp
Normal file
|
@ -0,0 +1,167 @@
|
|||
#pragma once
|
||||
|
||||
#include "bit_streams.hpp"
|
||||
#include "byte_stream.hpp"
|
||||
#include "dd_vector.hpp"
|
||||
#include "reader.hpp"
|
||||
#include "write_to_sink.hpp"
|
||||
|
||||
#include "std/algorithm.hpp"
|
||||
#include "std/unique_ptr.hpp"
|
||||
#include "std/vector.hpp"
|
||||
|
||||
|
||||
/// Disk driven vector for optimal storing small values with rare big values.
|
||||
/// Format:
|
||||
/// 4 bytes to store vector's size
|
||||
/// Buffer of ceil(Size * Bits / 8) bytes, e.g. vector of Bits-sized elements.
|
||||
/// - values in range [0, (1 << Bits) - 2] stored as is
|
||||
/// - value (1 << Bits) - 1 tells that actual value is stored in the exceptions table below.
|
||||
/// Buffer with exceptions table, e.g. vector of (index, value) pairs till the end of the reader,
|
||||
/// sorted by index parameter.
|
||||
/// Component works in little endian without any host conversions.
|
||||
|
||||
template
|
||||
<
|
||||
size_t Bits, /// number of fixed bits
|
||||
class TReader, /// reader with random offset read functions
|
||||
typename TSize = uint32_t, /// vector index type (platform independent)
|
||||
typename TValue = uint32_t /// vector value type (platform independent)
|
||||
>
|
||||
class FixedBitsDDVector
|
||||
{
|
||||
static_assert(is_unsigned<TSize>::value, "");
|
||||
static_assert(is_unsigned<TValue>::value, "");
|
||||
// 16 - is the maximum bits count to get all needed bits in random access within uint32_t.
|
||||
static_assert(Bits <= 16, "");
|
||||
|
||||
using TSelf = FixedBitsDDVector<Bits, TReader, TSize, TValue>;
|
||||
|
||||
struct IndexValue
|
||||
{
|
||||
TSize m_index;
|
||||
TValue m_value;
|
||||
bool operator<(IndexValue const & rhs) const { return m_index < rhs.m_index; }
|
||||
};
|
||||
|
||||
TReader m_bits;
|
||||
DDVector<IndexValue, TReader, TSize> m_vector;
|
||||
|
||||
#ifdef DEBUG
|
||||
TSize const m_size;
|
||||
#endif
|
||||
|
||||
using TBlock = uint32_t;
|
||||
|
||||
static uint64_t AlignBytesCount(uint64_t count)
|
||||
{
|
||||
return max(count, static_cast<uint64_t>(sizeof(TBlock)));
|
||||
}
|
||||
|
||||
static TBlock constexpr kMask = (1 << Bits) - 1;
|
||||
|
||||
TValue FindInVector(TSize index) const
|
||||
{
|
||||
auto const it = lower_bound(m_vector.begin(), m_vector.end(), IndexValue{index, 0});
|
||||
ASSERT(it != m_vector.end() && it->m_index == index, ());
|
||||
return it->m_value;
|
||||
}
|
||||
|
||||
FixedBitsDDVector(TReader const & bitsReader, TReader const & vecReader, TSize size)
|
||||
: m_bits(bitsReader)
|
||||
, m_vector(vecReader)
|
||||
#ifdef DEBUG
|
||||
, m_size(size)
|
||||
#endif
|
||||
{
|
||||
}
|
||||
|
||||
public:
|
||||
static unique_ptr<TSelf> Create(TReader const & reader)
|
||||
{
|
||||
TSize const size = ReadPrimitiveFromPos<TSize>(reader, 0);
|
||||
|
||||
uint64_t const off1 = sizeof(TSize);
|
||||
uint64_t const off2 = AlignBytesCount((size * Bits + CHAR_BIT - 1) / CHAR_BIT) + off1;
|
||||
return unique_ptr<TSelf>(new TSelf(reader.SubReader(off1, off2 - off1),
|
||||
reader.SubReader(off2, reader.Size() - off2),
|
||||
size));
|
||||
}
|
||||
|
||||
TValue Get(TSize index) const
|
||||
{
|
||||
ASSERT_LESS(index, m_size, ());
|
||||
uint64_t const bitsOffset = index * Bits;
|
||||
|
||||
uint64_t bytesOffset = bitsOffset / CHAR_BIT;
|
||||
size_t constexpr kBlockSize = sizeof(TBlock);
|
||||
if (bytesOffset + kBlockSize > m_bits.Size())
|
||||
bytesOffset = m_bits.Size() - kBlockSize;
|
||||
|
||||
TBlock v = ReadPrimitiveFromPos<TBlock>(m_bits, bytesOffset);
|
||||
v >>= (bitsOffset - bytesOffset * CHAR_BIT);
|
||||
v &= kMask;
|
||||
return (v == kMask ? FindInVector(index) : v);
|
||||
}
|
||||
|
||||
template <class TWriter> class Builder
|
||||
{
|
||||
using TData = vector<uint8_t>;
|
||||
using TempWriter = PushBackByteSink<TData>;
|
||||
using TBits = BitWriter<TempWriter>;
|
||||
|
||||
TData m_data;
|
||||
TempWriter m_writer;
|
||||
unique_ptr<TBits> m_bits;
|
||||
|
||||
vector<IndexValue> m_excepts;
|
||||
TSize m_count = 0;
|
||||
TSize m_optCount = 0;
|
||||
|
||||
TWriter & m_finalWriter;
|
||||
|
||||
public:
|
||||
explicit Builder(TWriter & writer)
|
||||
: m_writer(m_data), m_bits(new TBits(m_writer)), m_finalWriter(writer)
|
||||
{
|
||||
}
|
||||
|
||||
~Builder()
|
||||
{
|
||||
// Final serialization is in dtor only.
|
||||
// You can't do any intermediate flushes during building vector.
|
||||
|
||||
// Reset the bit stream first.
|
||||
m_bits.reset();
|
||||
|
||||
// Write size of vector.
|
||||
WriteToSink(m_finalWriter, m_count);
|
||||
|
||||
// Write bits vector, alignes at least to 4 bytes.
|
||||
m_data.resize(AlignBytesCount(m_data.size()));
|
||||
m_finalWriter.Write(m_data.data(), m_data.size());
|
||||
|
||||
// Write exceptions table.
|
||||
m_finalWriter.Write(m_excepts.data(), m_excepts.size() * sizeof(IndexValue));
|
||||
}
|
||||
|
||||
void PushBack(TValue v)
|
||||
{
|
||||
if (v >= kMask)
|
||||
{
|
||||
m_bits->WriteAtMost32Bits(kMask, Bits);
|
||||
m_excepts.push_back({m_count, v});
|
||||
}
|
||||
else
|
||||
{
|
||||
++m_optCount;
|
||||
m_bits->WriteAtMost32Bits(v, Bits);
|
||||
}
|
||||
|
||||
++m_count;
|
||||
}
|
||||
|
||||
/// @return (number of stored as-is elements, number of all elements)
|
||||
pair<TSize, TSize> GetCount() const { return make_pair(m_optCount, m_count); }
|
||||
};
|
||||
};
|
|
@ -6,6 +6,7 @@
|
|||
|
||||
#include <random>
|
||||
|
||||
using std::default_random_engine;
|
||||
using std::mt19937;
|
||||
using std::uniform_int_distribution;
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue