forked from organicmaps/organicmaps
[coding] [compressed_bit_vector] Convert uX -> uintX_t.
This commit is contained in:
parent
64a8008f44
commit
fdef7a4210
3 changed files with 195 additions and 199 deletions
|
@ -5,44 +5,44 @@
|
|||
#include "../../testing/testing.hpp"
|
||||
#include "../../base/pseudo_random.hpp"
|
||||
|
||||
u32 const NUMS_COUNT = 12345;
|
||||
uint32_t const NUMS_COUNT = 12345;
|
||||
|
||||
namespace
|
||||
{
|
||||
u64 GetRand64()
|
||||
uint64_t GetRand64()
|
||||
{
|
||||
static PseudoRNG32 g_rng;
|
||||
u64 result = g_rng.Generate();
|
||||
result ^= u64(g_rng.Generate()) << 32;
|
||||
uint64_t result = g_rng.Generate();
|
||||
result ^= uint64_t(g_rng.Generate()) << 32;
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
UNIT_TEST(CompressedBitVector_Sparse)
|
||||
{
|
||||
vector<u32> posOnes;
|
||||
u32 sum = 0;
|
||||
for (u32 i = 0; i < NUMS_COUNT; ++i)
|
||||
vector<uint32_t> posOnes;
|
||||
uint32_t sum = 0;
|
||||
for (uint32_t i = 0; i < NUMS_COUNT; ++i)
|
||||
{
|
||||
u32 byteSize = GetRand64() % 2 + 1;
|
||||
u64 num = GetRand64() & ((u64(1) << (byteSize * 7)) - 1);
|
||||
uint32_t byteSize = GetRand64() % 2 + 1;
|
||||
uint64_t num = GetRand64() & ((uint64_t(1) << (byteSize * 7)) - 1);
|
||||
if (num == 0) num = 1;
|
||||
sum += num;
|
||||
posOnes.push_back(sum);
|
||||
}
|
||||
for (u32 j = 0; j < 5; ++j)
|
||||
for (uint32_t j = 0; j < 5; ++j)
|
||||
{
|
||||
if (j == 1) posOnes.insert(posOnes.begin(), 1, 0);
|
||||
if (j == 2) posOnes.clear();
|
||||
if (j == 3) posOnes.push_back(1);
|
||||
if (j == 4) { posOnes.clear(); posOnes.push_back(10); }
|
||||
for (u32 ienc = 0; ienc < 4; ++ienc)
|
||||
for (uint32_t ienc = 0; ienc < 4; ++ienc)
|
||||
{
|
||||
vector<u8> serialBitVector;
|
||||
MemWriter< vector<u8> > writer(serialBitVector);
|
||||
vector<uint8_t> serialBitVector;
|
||||
MemWriter< vector<uint8_t> > writer(serialBitVector);
|
||||
BuildCompressedBitVector(writer, posOnes, ienc);
|
||||
MemReader reader(serialBitVector.data(), serialBitVector.size());
|
||||
vector<u32> decPosOnes = DecodeCompressedBitVector(reader);
|
||||
vector<uint32_t> decPosOnes = DecodeCompressedBitVector(reader);
|
||||
TEST_EQUAL(posOnes, decPosOnes, ());
|
||||
}
|
||||
}
|
||||
|
@ -50,32 +50,32 @@ UNIT_TEST(CompressedBitVector_Sparse)
|
|||
|
||||
UNIT_TEST(CompressedBitVector_Dense)
|
||||
{
|
||||
vector<u32> posOnes;
|
||||
u32 prevPos = 0;
|
||||
u32 sum = 0;
|
||||
for (u32 i = 0; i < NUMS_COUNT; ++i)
|
||||
vector<uint32_t> posOnes;
|
||||
uint32_t prevPos = 0;
|
||||
uint32_t sum = 0;
|
||||
for (uint32_t i = 0; i < NUMS_COUNT; ++i)
|
||||
{
|
||||
u32 zeroesByteSize = GetRand64() % 2 + 1;
|
||||
u64 zeroesRangeSize = (GetRand64() & ((u64(1) << (zeroesByteSize * 7)) - 1)) + 1;
|
||||
uint32_t zeroesByteSize = GetRand64() % 2 + 1;
|
||||
uint64_t zeroesRangeSize = (GetRand64() & ((uint64_t(1) << (zeroesByteSize * 7)) - 1)) + 1;
|
||||
sum += zeroesRangeSize;
|
||||
u32 onesByteSize = GetRand64() % 1 + 1;
|
||||
u64 onesRangeSize = (GetRand64() & ((u64(1) << (onesByteSize * 7)) - 1)) + 1;
|
||||
for (u32 j = 0; j < onesRangeSize; ++j) posOnes.push_back(sum + j);
|
||||
uint32_t onesByteSize = GetRand64() % 1 + 1;
|
||||
uint64_t onesRangeSize = (GetRand64() & ((uint64_t(1) << (onesByteSize * 7)) - 1)) + 1;
|
||||
for (uint32_t j = 0; j < onesRangeSize; ++j) posOnes.push_back(sum + j);
|
||||
sum += onesRangeSize;
|
||||
}
|
||||
for (u32 j = 0; j < 5; ++j)
|
||||
for (uint32_t j = 0; j < 5; ++j)
|
||||
{
|
||||
if (j == 1) posOnes.insert(posOnes.begin(), 1, 0);
|
||||
if (j == 2) posOnes.clear();
|
||||
if (j == 3) posOnes.push_back(1);
|
||||
if (j == 4) { posOnes.clear(); posOnes.push_back(10); }
|
||||
for (u32 ienc = 0; ienc < 4; ++ienc)
|
||||
for (uint32_t ienc = 0; ienc < 4; ++ienc)
|
||||
{
|
||||
vector<u8> serialBitVector;
|
||||
MemWriter< vector<u8> > writer(serialBitVector);
|
||||
vector<uint8_t> serialBitVector;
|
||||
MemWriter< vector<uint8_t> > writer(serialBitVector);
|
||||
BuildCompressedBitVector(writer, posOnes, ienc);
|
||||
MemReader reader(serialBitVector.data(), serialBitVector.size());
|
||||
vector<u32> decPosOnes = DecodeCompressedBitVector(reader);
|
||||
vector<uint32_t> decPosOnes = DecodeCompressedBitVector(reader);
|
||||
TEST_EQUAL(posOnes, decPosOnes, ());
|
||||
}
|
||||
}
|
||||
|
@ -84,53 +84,53 @@ UNIT_TEST(CompressedBitVector_Dense)
|
|||
UNIT_TEST(BitVectors_And)
|
||||
{
|
||||
vector<bool> v1(NUMS_COUNT * 2, false), v2(NUMS_COUNT * 2, false);
|
||||
for (u32 i = 0; i < NUMS_COUNT; ++i)
|
||||
for (uint32_t i = 0; i < NUMS_COUNT; ++i)
|
||||
{
|
||||
v1[GetRand64() % v1.size()] = true;
|
||||
v2[GetRand64() % v2.size()] = true;
|
||||
}
|
||||
vector<u32> posOnes1, posOnes2, andPos;
|
||||
for (u32 i = 0; i < v1.size(); ++i)
|
||||
vector<uint32_t> posOnes1, posOnes2, andPos;
|
||||
for (uint32_t i = 0; i < v1.size(); ++i)
|
||||
{
|
||||
if (v1[i]) posOnes1.push_back(i);
|
||||
if (v2[i]) posOnes2.push_back(i);
|
||||
if (v1[i] && v2[i]) andPos.push_back(i);
|
||||
}
|
||||
vector<u32> actualAndPos = BitVectorsAnd(posOnes1.begin(), posOnes1.end(), posOnes2.begin(), posOnes2.end());
|
||||
vector<uint32_t> actualAndPos = BitVectorsAnd(posOnes1.begin(), posOnes1.end(), posOnes2.begin(), posOnes2.end());
|
||||
TEST_EQUAL(andPos, actualAndPos, ());
|
||||
}
|
||||
|
||||
UNIT_TEST(BitVectors_Or)
|
||||
{
|
||||
vector<bool> v1(NUMS_COUNT * 2, false), v2(NUMS_COUNT * 2, false);
|
||||
for (u32 i = 0; i < NUMS_COUNT; ++i)
|
||||
for (uint32_t i = 0; i < NUMS_COUNT; ++i)
|
||||
{
|
||||
v1[GetRand64() % v1.size()] = true;
|
||||
v2[GetRand64() % v2.size()] = true;
|
||||
}
|
||||
vector<u32> posOnes1, posOnes2, orPos;
|
||||
for (u32 i = 0; i < v1.size(); ++i)
|
||||
vector<uint32_t> posOnes1, posOnes2, orPos;
|
||||
for (uint32_t i = 0; i < v1.size(); ++i)
|
||||
{
|
||||
if (v1[i]) posOnes1.push_back(i);
|
||||
if (v2[i]) posOnes2.push_back(i);
|
||||
if (v1[i] || v2[i]) orPos.push_back(i);
|
||||
}
|
||||
vector<u32> actualOrPos = BitVectorsOr(posOnes1.begin(), posOnes1.end(), posOnes2.begin(), posOnes2.end());
|
||||
vector<uint32_t> actualOrPos = BitVectorsOr(posOnes1.begin(), posOnes1.end(), posOnes2.begin(), posOnes2.end());
|
||||
TEST_EQUAL(orPos, actualOrPos, ());
|
||||
}
|
||||
|
||||
UNIT_TEST(BitVectors_SubAnd)
|
||||
{
|
||||
vector<bool> v1(NUMS_COUNT * 2, false);
|
||||
u64 numV1Ones = 0;
|
||||
for (u32 i = 0; i < v1.size(); ++i) v1[i] = (GetRand64() % 2) == 0;
|
||||
vector<u32> posOnes1;
|
||||
for (u32 i = 0; i < v1.size(); ++i) if (v1[i]) posOnes1.push_back(i);
|
||||
uint64_t numV1Ones = 0;
|
||||
for (uint32_t i = 0; i < v1.size(); ++i) v1[i] = (GetRand64() % 2) == 0;
|
||||
vector<uint32_t> posOnes1;
|
||||
for (uint32_t i = 0; i < v1.size(); ++i) if (v1[i]) posOnes1.push_back(i);
|
||||
vector<bool> v2(posOnes1.size(), false);
|
||||
for (u32 i = 0; i < v2.size(); ++i) v2[i] = (GetRand64() % 2) == 0;
|
||||
vector<u32> posOnes2, subandPos;
|
||||
for (u32 i = 0; i < v2.size(); ++i) if (v2[i]) posOnes2.push_back(i);
|
||||
for (u32 i = 0, j = 0; i < v1.size(); ++i)
|
||||
for (uint32_t i = 0; i < v2.size(); ++i) v2[i] = (GetRand64() % 2) == 0;
|
||||
vector<uint32_t> posOnes2, subandPos;
|
||||
for (uint32_t i = 0; i < v2.size(); ++i) if (v2[i]) posOnes2.push_back(i);
|
||||
for (uint32_t i = 0, j = 0; i < v1.size(); ++i)
|
||||
{
|
||||
if (v1[i])
|
||||
{
|
||||
|
@ -138,6 +138,6 @@ UNIT_TEST(BitVectors_SubAnd)
|
|||
++j;
|
||||
}
|
||||
}
|
||||
vector<u32> actualSubandPos = BitVectorsSubAnd(posOnes1.begin(), posOnes1.end(), posOnes2.begin(), posOnes2.end());
|
||||
vector<uint32_t> actualSubandPos = BitVectorsSubAnd(posOnes1.begin(), posOnes1.end(), posOnes2.begin(), posOnes2.end());
|
||||
TEST_EQUAL(subandPos, actualSubandPos, ());
|
||||
}
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
#include "../base/bits.hpp"
|
||||
|
||||
namespace {
|
||||
void VarintEncode(vector<u8> & dst, u64 n)
|
||||
void VarintEncode(vector<uint8_t> & dst, uint64_t n)
|
||||
{
|
||||
if (n == 0)
|
||||
{
|
||||
|
@ -18,14 +18,14 @@ namespace {
|
|||
{
|
||||
while (n != 0)
|
||||
{
|
||||
u8 b = n & 0x7F;
|
||||
uint8_t b = n & 0x7F;
|
||||
n >>= 7;
|
||||
b |= n == 0 ? 0 : 0x80;
|
||||
dst.push_back(b);
|
||||
}
|
||||
}
|
||||
}
|
||||
void VarintEncode(Writer & writer, u64 n)
|
||||
void VarintEncode(Writer & writer, uint64_t n)
|
||||
{
|
||||
if (n == 0)
|
||||
{
|
||||
|
@ -35,38 +35,38 @@ namespace {
|
|||
{
|
||||
while (n != 0)
|
||||
{
|
||||
u8 b = n & 0x7F;
|
||||
uint8_t b = n & 0x7F;
|
||||
n >>= 7;
|
||||
b |= n == 0 ? 0 : 0x80;
|
||||
writer.Write(&b, 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
u64 VarintDecode(void * src, u64 & offset)
|
||||
uint64_t VarintDecode(void * src, uint64_t & offset)
|
||||
{
|
||||
u64 n = 0;
|
||||
uint64_t n = 0;
|
||||
int shift = 0;
|
||||
while (1)
|
||||
{
|
||||
u8 b = *(((u8*)src) + offset);
|
||||
uint8_t b = *(((uint8_t*)src) + offset);
|
||||
CHECK_LESS_OR_EQUAL(shift, 56, ());
|
||||
n |= u64(b & 0x7F) << shift;
|
||||
n |= uint64_t(b & 0x7F) << shift;
|
||||
++offset;
|
||||
if ((b & 0x80) == 0) break;
|
||||
shift += 7;
|
||||
}
|
||||
return n;
|
||||
}
|
||||
u64 VarintDecode(Reader & reader, u64 & offset)
|
||||
uint64_t VarintDecode(Reader & reader, uint64_t & offset)
|
||||
{
|
||||
u64 n = 0;
|
||||
uint64_t n = 0;
|
||||
int shift = 0;
|
||||
while (1)
|
||||
{
|
||||
u8 b = 0;
|
||||
uint8_t b = 0;
|
||||
reader.Read(offset, &b, 1);
|
||||
CHECK_LESS_OR_EQUAL(shift, 56, ());
|
||||
n |= u64(b & 0x7F) << shift;
|
||||
n |= uint64_t(b & 0x7F) << shift;
|
||||
++offset;
|
||||
if ((b & 0x80) == 0) break;
|
||||
shift += 7;
|
||||
|
@ -74,10 +74,10 @@ namespace {
|
|||
return n;
|
||||
}
|
||||
|
||||
vector<u32> SerialFreqsToDistrTable(Reader & reader, u64 & decodeOffset, u64 cnt)
|
||||
vector<uint32_t> SerialFreqsToDistrTable(Reader & reader, uint64_t & decodeOffset, uint64_t cnt)
|
||||
{
|
||||
vector<u32> freqs;
|
||||
for (u64 i = 0; i < cnt; ++i) freqs.push_back(VarintDecode(reader, decodeOffset));
|
||||
vector<uint32_t> freqs;
|
||||
for (uint64_t i = 0; i < cnt; ++i) freqs.push_back(VarintDecode(reader, decodeOffset));
|
||||
return FreqsToDistrTable(freqs);
|
||||
}
|
||||
}
|
||||
|
@ -88,12 +88,12 @@ public:
|
|||
BitWriter(Writer & writer)
|
||||
: m_writer(writer), m_lastByte(0), m_size(0) {}
|
||||
~BitWriter() { if (m_size % 8 > 0) m_writer.Write(&m_lastByte, 1); }
|
||||
u64 NumBitsWritten() const { return m_size; }
|
||||
void Write(u64 bits, u32 writeSize)
|
||||
uint64_t NumBitsWritten() const { return m_size; }
|
||||
void Write(uint64_t bits, uint32_t writeSize)
|
||||
{
|
||||
if (writeSize == 0) return;
|
||||
m_totalBits += writeSize;
|
||||
u32 remSize = m_size % 8;
|
||||
uint32_t remSize = m_size % 8;
|
||||
CHECK_LESS_OR_EQUAL(writeSize, 64 - remSize, ());
|
||||
if (remSize > 0)
|
||||
{
|
||||
|
@ -102,16 +102,16 @@ public:
|
|||
writeSize += remSize;
|
||||
m_size -= remSize;
|
||||
}
|
||||
u32 writeBytesSize = writeSize / 8;
|
||||
uint32_t writeBytesSize = writeSize / 8;
|
||||
m_writer.Write(&bits, writeBytesSize);
|
||||
m_lastByte = (bits >> (writeBytesSize * 8)) & ((1 << (writeSize % 8)) - 1);
|
||||
m_size += writeSize;
|
||||
}
|
||||
private:
|
||||
Writer & m_writer;
|
||||
u8 m_lastByte;
|
||||
u64 m_size;
|
||||
u64 m_totalBits;
|
||||
uint8_t m_lastByte;
|
||||
uint64_t m_size;
|
||||
uint64_t m_totalBits;
|
||||
};
|
||||
|
||||
class BitReader {
|
||||
|
@ -119,27 +119,27 @@ public:
|
|||
BitReader(Reader & reader)
|
||||
: m_reader(reader), m_serialCur(0), m_serialEnd(reader.Size()),
|
||||
m_bits(0), m_bitsSize(0), m_totalBitsRead(0) {}
|
||||
u64 NumBitsRead() const { return m_totalBitsRead; }
|
||||
u64 Read(u32 readSize)
|
||||
uint64_t NumBitsRead() const { return m_totalBitsRead; }
|
||||
uint64_t Read(uint32_t readSize)
|
||||
{
|
||||
m_totalBitsRead += readSize;
|
||||
if (readSize == 0) return 0;
|
||||
CHECK_LESS_OR_EQUAL(readSize, 64, ());
|
||||
// First read, sets bits that are in the m_bits buffer.
|
||||
u32 firstReadSize = readSize <= m_bitsSize ? readSize : m_bitsSize;
|
||||
u64 result = m_bits & (~u64(0) >> (64 - firstReadSize));
|
||||
uint32_t firstReadSize = readSize <= m_bitsSize ? readSize : m_bitsSize;
|
||||
uint64_t result = m_bits & (~uint64_t(0) >> (64 - firstReadSize));
|
||||
m_bits >>= firstReadSize;
|
||||
m_bitsSize -= firstReadSize;
|
||||
readSize -= firstReadSize;
|
||||
// Second read, does an extra read using m_reader.
|
||||
if (readSize > 0)
|
||||
{
|
||||
u32 readByteSize = m_serialCur + sizeof(m_bits) <= m_serialEnd ? sizeof(m_bits) : m_serialEnd - m_serialCur;
|
||||
uint32_t readByteSize = m_serialCur + sizeof(m_bits) <= m_serialEnd ? sizeof(m_bits) : m_serialEnd - m_serialCur;
|
||||
m_reader.Read(m_serialCur, &m_bits, readByteSize);
|
||||
m_serialCur += readByteSize;
|
||||
m_bitsSize += readByteSize * 8;
|
||||
if (readSize > m_bitsSize) CHECK_LESS_OR_EQUAL(readSize, m_bitsSize, ());
|
||||
result |= (m_bits & (~u64(0) >> (64 - readSize))) << firstReadSize;
|
||||
result |= (m_bits & (~uint64_t(0) >> (64 - readSize))) << firstReadSize;
|
||||
m_bits >>= readSize;
|
||||
m_bitsSize -= readSize;
|
||||
readSize = 0;
|
||||
|
@ -148,27 +148,27 @@ public:
|
|||
}
|
||||
private:
|
||||
Reader & m_reader;
|
||||
u64 m_serialCur;
|
||||
u64 m_serialEnd;
|
||||
u64 m_bits;
|
||||
u32 m_bitsSize;
|
||||
u64 m_totalBitsRead;
|
||||
uint64_t m_serialCur;
|
||||
uint64_t m_serialEnd;
|
||||
uint64_t m_bits;
|
||||
uint32_t m_bitsSize;
|
||||
uint64_t m_totalBitsRead;
|
||||
};
|
||||
|
||||
void BuildCompressedBitVector(Writer & writer, vector<u32> const & posOnes, int chosenEncType)
|
||||
void BuildCompressedBitVector(Writer & writer, vector<uint32_t> const & posOnes, int chosenEncType)
|
||||
{
|
||||
u32 const BLOCK_SIZE = 7;
|
||||
uint32_t const BLOCK_SIZE = 7;
|
||||
// First stage of compression is analysis run through data ones.
|
||||
u64 numBytesDiffsEncVint = 0, numBytesRangesEncVint = 0, numBitsDiffsEncArith = 0, numBitsRangesEncArith = 0;
|
||||
uint64_t numBytesDiffsEncVint = 0, numBytesRangesEncVint = 0, numBitsDiffsEncArith = 0, numBitsRangesEncArith = 0;
|
||||
int64_t prevOnePos = -1;
|
||||
u64 onesRangeLen = 0;
|
||||
vector<u32> diffsSizesFreqs(65, 0), ranges0SizesFreqs(65, 0), ranges1SizesFreqs(65, 0);
|
||||
for (u32 i = 0; i < posOnes.size(); ++i)
|
||||
uint64_t onesRangeLen = 0;
|
||||
vector<uint32_t> diffsSizesFreqs(65, 0), ranges0SizesFreqs(65, 0), ranges1SizesFreqs(65, 0);
|
||||
for (uint32_t i = 0; i < posOnes.size(); ++i)
|
||||
{
|
||||
CHECK_LESS(prevOnePos, posOnes[i], ());
|
||||
// Accumulate size of diff encoding.
|
||||
u64 diff = posOnes[i] - prevOnePos;
|
||||
u32 diffBitsize = bits::NumUsedBits(diff - 1);
|
||||
uint64_t diff = posOnes[i] - prevOnePos;
|
||||
uint32_t diffBitsize = bits::NumUsedBits(diff - 1);
|
||||
numBytesDiffsEncVint += (diffBitsize + BLOCK_SIZE - 1) / BLOCK_SIZE;
|
||||
numBitsDiffsEncArith += diffBitsize > 0 ? diffBitsize - 1 : 0;
|
||||
++diffsSizesFreqs[diffBitsize];
|
||||
|
@ -178,14 +178,14 @@ void BuildCompressedBitVector(Writer & writer, vector<u32> const & posOnes, int
|
|||
if (onesRangeLen > 0)
|
||||
{
|
||||
// Accumulate size of ones-range encoding.
|
||||
u32 onesRangeLenBitsize = bits::NumUsedBits(onesRangeLen - 1);
|
||||
uint32_t onesRangeLenBitsize = bits::NumUsedBits(onesRangeLen - 1);
|
||||
numBytesRangesEncVint += (onesRangeLenBitsize + BLOCK_SIZE - 1) / BLOCK_SIZE;
|
||||
numBitsRangesEncArith += onesRangeLenBitsize > 0 ? onesRangeLenBitsize - 1 : 0;
|
||||
++ranges1SizesFreqs[onesRangeLenBitsize];
|
||||
onesRangeLen = 0;
|
||||
}
|
||||
// Accumulate size of zeros-range encoding.
|
||||
u32 zeros_range_len_bitsize = bits::NumUsedBits(posOnes[i] - prevOnePos - 2);
|
||||
uint32_t zeros_range_len_bitsize = bits::NumUsedBits(posOnes[i] - prevOnePos - 2);
|
||||
numBytesRangesEncVint += (zeros_range_len_bitsize + BLOCK_SIZE - 1) / BLOCK_SIZE;
|
||||
numBitsRangesEncArith += zeros_range_len_bitsize > 0 ? zeros_range_len_bitsize - 1 : 0;
|
||||
++ranges0SizesFreqs[zeros_range_len_bitsize];
|
||||
|
@ -196,21 +196,21 @@ void BuildCompressedBitVector(Writer & writer, vector<u32> const & posOnes, int
|
|||
// Accumulate size of remaining ones-range encoding.
|
||||
if (onesRangeLen > 0)
|
||||
{
|
||||
u32 onesRangeLenBitsize = bits::NumUsedBits(onesRangeLen - 1);
|
||||
uint32_t onesRangeLenBitsize = bits::NumUsedBits(onesRangeLen - 1);
|
||||
numBytesRangesEncVint += (onesRangeLenBitsize + BLOCK_SIZE - 1) / BLOCK_SIZE;
|
||||
numBitsRangesEncArith = onesRangeLenBitsize > 0 ? onesRangeLenBitsize - 1 : 0;
|
||||
++ranges1SizesFreqs[onesRangeLenBitsize];
|
||||
onesRangeLen = 0;
|
||||
}
|
||||
// Compute arithmetic encoding size.
|
||||
u64 diffsSizesTotalFreq = 0, ranges0_sizes_total_freq = 0, ranges1SizesTotalFreq = 0;
|
||||
for (u32 i = 0; i < diffsSizesFreqs.size(); ++i) diffsSizesTotalFreq += diffsSizesFreqs[i];
|
||||
for (u32 i = 0; i < ranges0SizesFreqs.size(); ++i) ranges0_sizes_total_freq += ranges0SizesFreqs[i];
|
||||
for (u32 i = 0; i < ranges1SizesFreqs.size(); ++i) ranges1SizesTotalFreq += ranges1SizesFreqs[i];
|
||||
uint64_t diffsSizesTotalFreq = 0, ranges0_sizes_total_freq = 0, ranges1SizesTotalFreq = 0;
|
||||
for (uint32_t i = 0; i < diffsSizesFreqs.size(); ++i) diffsSizesTotalFreq += diffsSizesFreqs[i];
|
||||
for (uint32_t i = 0; i < ranges0SizesFreqs.size(); ++i) ranges0_sizes_total_freq += ranges0SizesFreqs[i];
|
||||
for (uint32_t i = 0; i < ranges1SizesFreqs.size(); ++i) ranges1SizesTotalFreq += ranges1SizesFreqs[i];
|
||||
// Compute number of bits for arith encoded diffs sizes.
|
||||
double numSizesBitsDiffsEncArith = 0;
|
||||
u32 nonzeroDiffsSizesFreqsEnd = 0;
|
||||
for (u32 i = 0; i < diffsSizesFreqs.size(); ++i)
|
||||
uint32_t nonzeroDiffsSizesFreqsEnd = 0;
|
||||
for (uint32_t i = 0; i < diffsSizesFreqs.size(); ++i)
|
||||
{
|
||||
if (diffsSizesFreqs[i] > 0)
|
||||
{
|
||||
|
@ -219,13 +219,13 @@ void BuildCompressedBitVector(Writer & writer, vector<u32> const & posOnes, int
|
|||
nonzeroDiffsSizesFreqsEnd = i + 1;
|
||||
}
|
||||
}
|
||||
vector<u8> diffsSizesFreqsSerial;
|
||||
for (u32 i = 0; i < nonzeroDiffsSizesFreqsEnd; ++i) VarintEncode(diffsSizesFreqsSerial, diffsSizesFreqs[i]);
|
||||
u64 numBytesDiffsEncArith = 4 + diffsSizesFreqsSerial.size() + (u64(numSizesBitsDiffsEncArith * diffsSizesTotalFreq + 0.999) + 7) / 8 + (numBitsDiffsEncArith + 7) /8;
|
||||
vector<uint8_t> diffsSizesFreqsSerial;
|
||||
for (uint32_t i = 0; i < nonzeroDiffsSizesFreqsEnd; ++i) VarintEncode(diffsSizesFreqsSerial, diffsSizesFreqs[i]);
|
||||
uint64_t numBytesDiffsEncArith = 4 + diffsSizesFreqsSerial.size() + (uint64_t(numSizesBitsDiffsEncArith * diffsSizesTotalFreq + 0.999) + 7) / 8 + (numBitsDiffsEncArith + 7) /8;
|
||||
// Compute number of bits for arith encoded ranges sizes.
|
||||
double numSizesBitsRanges0EncArith = 0;
|
||||
u32 nonzeroRanges0SizesFreqsEnd = 0;
|
||||
for (u32 i = 0; i < ranges0SizesFreqs.size(); ++i)
|
||||
uint32_t nonzeroRanges0SizesFreqsEnd = 0;
|
||||
for (uint32_t i = 0; i < ranges0SizesFreqs.size(); ++i)
|
||||
{
|
||||
if (ranges0SizesFreqs[i] > 0)
|
||||
{
|
||||
|
@ -235,8 +235,8 @@ void BuildCompressedBitVector(Writer & writer, vector<u32> const & posOnes, int
|
|||
}
|
||||
}
|
||||
double numSizesBitsRanges1EncArith = 0;
|
||||
u32 nonzeroRanges1SizesFreqsEnd = 0;
|
||||
for (u32 i = 0; i < ranges1SizesFreqs.size(); ++i)
|
||||
uint32_t nonzeroRanges1SizesFreqsEnd = 0;
|
||||
for (uint32_t i = 0; i < ranges1SizesFreqs.size(); ++i)
|
||||
{
|
||||
if (ranges1SizesFreqs[i] > 0)
|
||||
{
|
||||
|
@ -245,16 +245,16 @@ void BuildCompressedBitVector(Writer & writer, vector<u32> const & posOnes, int
|
|||
nonzeroRanges1SizesFreqsEnd = i + 1;
|
||||
}
|
||||
}
|
||||
vector<u8> ranges0SizesFreqsSerial, ranges1SizesFreqsSerial;
|
||||
for (u32 i = 0; i < nonzeroRanges0SizesFreqsEnd; ++i) VarintEncode(ranges0SizesFreqsSerial, ranges0SizesFreqs[i]);
|
||||
for (u32 i = 0; i < nonzeroRanges1SizesFreqsEnd; ++i) VarintEncode(ranges1SizesFreqsSerial, ranges1SizesFreqs[i]);
|
||||
u64 numBytesRangesEncArith = 4 + ranges0SizesFreqsSerial.size() + ranges1SizesFreqsSerial.size() +
|
||||
(u64(numSizesBitsRanges0EncArith * ranges0_sizes_total_freq + 0.999) + 7) / 8 + (u64(numSizesBitsRanges1EncArith * ranges1SizesTotalFreq + 0.999) + 7) / 8 +
|
||||
vector<uint8_t> ranges0SizesFreqsSerial, ranges1SizesFreqsSerial;
|
||||
for (uint32_t i = 0; i < nonzeroRanges0SizesFreqsEnd; ++i) VarintEncode(ranges0SizesFreqsSerial, ranges0SizesFreqs[i]);
|
||||
for (uint32_t i = 0; i < nonzeroRanges1SizesFreqsEnd; ++i) VarintEncode(ranges1SizesFreqsSerial, ranges1SizesFreqs[i]);
|
||||
uint64_t numBytesRangesEncArith = 4 + ranges0SizesFreqsSerial.size() + ranges1SizesFreqsSerial.size() +
|
||||
(uint64_t(numSizesBitsRanges0EncArith * ranges0_sizes_total_freq + 0.999) + 7) / 8 + (uint64_t(numSizesBitsRanges1EncArith * ranges1SizesTotalFreq + 0.999) + 7) / 8 +
|
||||
(numBitsRangesEncArith + 7) / 8;
|
||||
|
||||
// Find minimum among 4 types of encoding.
|
||||
vector<u64> numBytesPerEnc = {numBytesDiffsEncVint, numBytesRangesEncVint, numBytesDiffsEncArith, numBytesRangesEncArith};
|
||||
u32 encType = 0;
|
||||
vector<uint64_t> numBytesPerEnc = {numBytesDiffsEncVint, numBytesRangesEncVint, numBytesDiffsEncArith, numBytesRangesEncArith};
|
||||
uint32_t encType = 0;
|
||||
if (chosenEncType != -1) { CHECK(0 <= chosenEncType && chosenEncType <= 3, ()); encType = chosenEncType; }
|
||||
else if (numBytesPerEnc[0] <= numBytesPerEnc[1] && numBytesPerEnc[0] <= numBytesPerEnc[2] && numBytesPerEnc[0] <= numBytesPerEnc[3]) encType = 0;
|
||||
else if (numBytesPerEnc[1] <= numBytesPerEnc[0] && numBytesPerEnc[1] <= numBytesPerEnc[2] && numBytesPerEnc[1] <= numBytesPerEnc[3]) encType = 1;
|
||||
|
@ -277,7 +277,7 @@ void BuildCompressedBitVector(Writer & writer, vector<u32> const & posOnes, int
|
|||
VarintEncode(writer, encType + (0 << 2) + ((posOnes[0] - prevOnePos - 1) << 3));
|
||||
prevOnePos = posOnes[0];
|
||||
}
|
||||
for (u32 i = 1; i < posOnes.size(); ++i)
|
||||
for (uint32_t i = 1; i < posOnes.size(); ++i)
|
||||
{
|
||||
CHECK_GREATER(posOnes[i], prevOnePos, ());
|
||||
// Encode one's pos (diff - 1).
|
||||
|
@ -293,9 +293,9 @@ void BuildCompressedBitVector(Writer & writer, vector<u32> const & posOnes, int
|
|||
VarintEncode(writer, encType + (nonzeroDiffsSizesFreqsEnd << 2));
|
||||
// Encode freqs table.
|
||||
writer.Write(diffsSizesFreqsSerial.data(), diffsSizesFreqsSerial.size());
|
||||
u64 tmpOffset = 0;
|
||||
uint64_t tmpOffset = 0;
|
||||
MemReader diffsSizesFreqsSerialReader(diffsSizesFreqsSerial.data(), diffsSizesFreqsSerial.size());
|
||||
vector<u32> distrTable = SerialFreqsToDistrTable(
|
||||
vector<uint32_t> distrTable = SerialFreqsToDistrTable(
|
||||
diffsSizesFreqsSerialReader, tmpOffset, nonzeroDiffsSizesFreqsEnd
|
||||
);
|
||||
|
||||
|
@ -303,16 +303,16 @@ void BuildCompressedBitVector(Writer & writer, vector<u32> const & posOnes, int
|
|||
// First stage. Encode all bits sizes of all diffs using ArithmeticEncoder.
|
||||
ArithmeticEncoder arithEnc(distrTable);
|
||||
int64_t prevOnePos = -1;
|
||||
u64 cntElements = 0;
|
||||
for (u64 i = 0; i < posOnes.size(); ++i)
|
||||
uint64_t cntElements = 0;
|
||||
for (uint64_t i = 0; i < posOnes.size(); ++i)
|
||||
{
|
||||
CHECK_GREATER(posOnes[i], prevOnePos, ());
|
||||
u32 bitsUsed = bits::NumUsedBits(posOnes[i] - prevOnePos - 1);
|
||||
uint32_t bitsUsed = bits::NumUsedBits(posOnes[i] - prevOnePos - 1);
|
||||
arithEnc.Encode(bitsUsed);
|
||||
++cntElements;
|
||||
prevOnePos = posOnes[i];
|
||||
}
|
||||
vector<u8> serialSizesEnc = arithEnc.Finalize();
|
||||
vector<uint8_t> serialSizesEnc = arithEnc.Finalize();
|
||||
// Store number of compressed elements.
|
||||
VarintEncode(writer, cntElements);
|
||||
// Store compressed size of encoded sizes.
|
||||
|
@ -324,14 +324,14 @@ void BuildCompressedBitVector(Writer & writer, vector<u32> const & posOnes, int
|
|||
// Second Stage. Encode all bits of all diffs using BitWriter.
|
||||
BitWriter bitWriter(writer);
|
||||
int64_t prevOnePos = -1;
|
||||
u64 totalReadBits = 0;
|
||||
u64 totalReadCnts = 0;
|
||||
for (u64 i = 0; i < posOnes.size(); ++i)
|
||||
uint64_t totalReadBits = 0;
|
||||
uint64_t totalReadCnts = 0;
|
||||
for (uint64_t i = 0; i < posOnes.size(); ++i)
|
||||
{
|
||||
CHECK_GREATER(posOnes[i], prevOnePos, ());
|
||||
// Encode one's pos (diff - 1).
|
||||
u64 diff = posOnes[i] - prevOnePos - 1;
|
||||
u32 bitsUsed = bits::NumUsedBits(diff);
|
||||
uint64_t diff = posOnes[i] - prevOnePos - 1;
|
||||
uint32_t bitsUsed = bits::NumUsedBits(diff);
|
||||
if (bitsUsed > 1)
|
||||
{
|
||||
// Most significant bit is always 1 for non-zero diffs, so don't store it.
|
||||
|
@ -353,8 +353,8 @@ void BuildCompressedBitVector(Writer & writer, vector<u32> const & posOnes, int
|
|||
// Encode encoding type plus flag if first is 1.
|
||||
VarintEncode(writer, encType + ((isFirstOne ? 1 : 0) << 2));
|
||||
int64_t prevOnePos = -1;
|
||||
u64 onesRangeLen = 0;
|
||||
for (u32 i = 0; i < posOnes.size(); ++i)
|
||||
uint64_t onesRangeLen = 0;
|
||||
for (uint32_t i = 0; i < posOnes.size(); ++i)
|
||||
{
|
||||
CHECK_GREATER(posOnes[i], prevOnePos, ());
|
||||
if (posOnes[i] - prevOnePos > 1)
|
||||
|
@ -391,14 +391,14 @@ void BuildCompressedBitVector(Writer & writer, vector<u32> const & posOnes, int
|
|||
writer.Write(ranges0SizesFreqsSerial.data(), ranges0SizesFreqsSerial.size());
|
||||
writer.Write(ranges1SizesFreqsSerial.data(), ranges1SizesFreqsSerial.size());
|
||||
// Create distr tables.
|
||||
u64 tmpOffset = 0;
|
||||
uint64_t tmpOffset = 0;
|
||||
MemReader ranges0SizesFreqsSerialReader(ranges0SizesFreqsSerial.data(), ranges0SizesFreqsSerial.size());
|
||||
vector<u32> distrTable0 = SerialFreqsToDistrTable(
|
||||
vector<uint32_t> distrTable0 = SerialFreqsToDistrTable(
|
||||
ranges0SizesFreqsSerialReader, tmpOffset, nonzeroRanges0SizesFreqsEnd
|
||||
);
|
||||
tmpOffset = 0;
|
||||
MemReader ranges1SizesFreqsSerialReader(ranges1SizesFreqsSerial.data(), ranges1SizesFreqsSerial.size());
|
||||
vector<u32> distrTable1 = SerialFreqsToDistrTable(
|
||||
vector<uint32_t> distrTable1 = SerialFreqsToDistrTable(
|
||||
ranges1SizesFreqsSerialReader, tmpOffset, nonzeroRanges1SizesFreqsEnd
|
||||
);
|
||||
|
||||
|
@ -408,10 +408,10 @@ void BuildCompressedBitVector(Writer & writer, vector<u32> const & posOnes, int
|
|||
// Encode number of compressed elements.
|
||||
ArithmeticEncoder arith_enc0(distrTable0), arith_enc1(distrTable1);
|
||||
int64_t prevOnePos = -1;
|
||||
u64 onesRangeLen = 0;
|
||||
uint64_t onesRangeLen = 0;
|
||||
// Total number of compressed elements (ranges sizes).
|
||||
u64 cntElements0 = 0, cntElements1 = 0;
|
||||
for (u32 i = 0; i < posOnes.size(); ++i)
|
||||
uint64_t cntElements0 = 0, cntElements1 = 0;
|
||||
for (uint32_t i = 0; i < posOnes.size(); ++i)
|
||||
{
|
||||
CHECK_GREATER(posOnes[i], prevOnePos, ());
|
||||
if (posOnes[i] - prevOnePos > 1)
|
||||
|
@ -419,13 +419,13 @@ void BuildCompressedBitVector(Writer & writer, vector<u32> const & posOnes, int
|
|||
if (onesRangeLen > 0)
|
||||
{
|
||||
// Encode ones range bits size.
|
||||
u32 bitsUsed = bits::NumUsedBits(onesRangeLen - 1);
|
||||
uint32_t bitsUsed = bits::NumUsedBits(onesRangeLen - 1);
|
||||
arith_enc1.Encode(bitsUsed);
|
||||
++cntElements1;
|
||||
onesRangeLen = 0;
|
||||
}
|
||||
// Encode zeros range bits size - 1.
|
||||
u32 bitsUsed = bits::NumUsedBits(posOnes[i] - prevOnePos - 2);
|
||||
uint32_t bitsUsed = bits::NumUsedBits(posOnes[i] - prevOnePos - 2);
|
||||
arith_enc0.Encode(bitsUsed);
|
||||
++cntElements0;
|
||||
}
|
||||
|
@ -435,12 +435,12 @@ void BuildCompressedBitVector(Writer & writer, vector<u32> const & posOnes, int
|
|||
if (onesRangeLen > 0)
|
||||
{
|
||||
// Encode last ones range size - 1.
|
||||
u32 bitsUsed = bits::NumUsedBits(onesRangeLen - 1);
|
||||
uint32_t bitsUsed = bits::NumUsedBits(onesRangeLen - 1);
|
||||
arith_enc1.Encode(bitsUsed);
|
||||
++cntElements1;
|
||||
onesRangeLen = 0;
|
||||
}
|
||||
vector<u8> serial0SizesEnc = arith_enc0.Finalize(), serial1SizesEnc = arith_enc1.Finalize();
|
||||
vector<uint8_t> serial0SizesEnc = arith_enc0.Finalize(), serial1SizesEnc = arith_enc1.Finalize();
|
||||
// Store number of compressed elements.
|
||||
VarintEncode(writer, cntElements0);
|
||||
VarintEncode(writer, cntElements1);
|
||||
|
@ -456,8 +456,8 @@ void BuildCompressedBitVector(Writer & writer, vector<u32> const & posOnes, int
|
|||
// Second stage, encode all ranges bits using BitWriter.
|
||||
BitWriter bitWriter(writer);
|
||||
int64_t prevOnePos = -1;
|
||||
u64 onesRangeLen = 0;
|
||||
for (u32 i = 0; i < posOnes.size(); ++i)
|
||||
uint64_t onesRangeLen = 0;
|
||||
for (uint32_t i = 0; i < posOnes.size(); ++i)
|
||||
{
|
||||
CHECK_GREATER(posOnes[i], prevOnePos, ());
|
||||
if (posOnes[i] - prevOnePos > 1)
|
||||
|
@ -465,7 +465,7 @@ void BuildCompressedBitVector(Writer & writer, vector<u32> const & posOnes, int
|
|||
if (onesRangeLen > 0)
|
||||
{
|
||||
// Encode ones range bits size.
|
||||
u32 bitsUsed = bits::NumUsedBits(onesRangeLen - 1);
|
||||
uint32_t bitsUsed = bits::NumUsedBits(onesRangeLen - 1);
|
||||
if (bitsUsed > 1)
|
||||
{
|
||||
// Most significant bit for non-zero values is always 1, don't encode it.
|
||||
|
@ -475,7 +475,7 @@ void BuildCompressedBitVector(Writer & writer, vector<u32> const & posOnes, int
|
|||
onesRangeLen = 0;
|
||||
}
|
||||
// Encode zeros range bits size - 1.
|
||||
u32 bitsUsed = bits::NumUsedBits(posOnes[i] - prevOnePos - 2);
|
||||
uint32_t bitsUsed = bits::NumUsedBits(posOnes[i] - prevOnePos - 2);
|
||||
if (bitsUsed > 1)
|
||||
{
|
||||
// Most significant bit for non-zero values is always 1, don't encode it.
|
||||
|
@ -489,7 +489,7 @@ void BuildCompressedBitVector(Writer & writer, vector<u32> const & posOnes, int
|
|||
if (onesRangeLen > 0)
|
||||
{
|
||||
// Encode last ones range size - 1.
|
||||
u32 bitsUsed = bits::NumUsedBits(onesRangeLen - 1);
|
||||
uint32_t bitsUsed = bits::NumUsedBits(onesRangeLen - 1);
|
||||
if (bitsUsed > 1)
|
||||
{
|
||||
// Most significant bit for non-zero values is always 1, don't encode it.
|
||||
|
@ -502,12 +502,12 @@ void BuildCompressedBitVector(Writer & writer, vector<u32> const & posOnes, int
|
|||
}
|
||||
}
|
||||
|
||||
vector<u32> DecodeCompressedBitVector(Reader & reader) {
|
||||
u64 serialSize = reader.Size();
|
||||
vector<u32> posOnes;
|
||||
u64 decodeOffset = 0;
|
||||
u64 header = VarintDecode(reader, decodeOffset);
|
||||
u32 encType = header & 3;
|
||||
vector<uint32_t> DecodeCompressedBitVector(Reader & reader) {
|
||||
uint64_t serialSize = reader.Size();
|
||||
vector<uint32_t> posOnes;
|
||||
uint64_t decodeOffset = 0;
|
||||
uint64_t header = VarintDecode(reader, decodeOffset);
|
||||
uint32_t encType = header & 3;
|
||||
CHECK_LESS(encType, 4, ());
|
||||
if (encType == 0)
|
||||
{
|
||||
|
@ -529,23 +529,23 @@ vector<u32> DecodeCompressedBitVector(Reader & reader) {
|
|||
else if (encType == 2)
|
||||
{
|
||||
// Diffs-Arith encoded.
|
||||
u64 freqsCnt = header >> 2;
|
||||
vector<u32> distrTable = SerialFreqsToDistrTable(reader, decodeOffset, freqsCnt);
|
||||
u64 cntElements = VarintDecode(reader, decodeOffset);
|
||||
u64 encSizesBytesize = VarintDecode(reader, decodeOffset);
|
||||
vector<u32> bitsUsedVec;
|
||||
uint64_t freqsCnt = header >> 2;
|
||||
vector<uint32_t> distrTable = SerialFreqsToDistrTable(reader, decodeOffset, freqsCnt);
|
||||
uint64_t cntElements = VarintDecode(reader, decodeOffset);
|
||||
uint64_t encSizesBytesize = VarintDecode(reader, decodeOffset);
|
||||
vector<uint32_t> bitsUsedVec;
|
||||
Reader * arithDecReader = reader.CreateSubReader(decodeOffset, encSizesBytesize);
|
||||
ArithmeticDecoder arithDec(*arithDecReader, distrTable);
|
||||
for (u64 i = 0; i < cntElements; ++i) bitsUsedVec.push_back(arithDec.Decode());
|
||||
for (uint64_t i = 0; i < cntElements; ++i) bitsUsedVec.push_back(arithDec.Decode());
|
||||
decodeOffset += encSizesBytesize;
|
||||
Reader * bitReaderReader = reader.CreateSubReader(decodeOffset, serialSize - decodeOffset);
|
||||
BitReader bitReader(*bitReaderReader);
|
||||
int64_t prevOnePos = -1;
|
||||
for (u64 i = 0; i < cntElements; ++i)
|
||||
for (uint64_t i = 0; i < cntElements; ++i)
|
||||
{
|
||||
u32 bitsUsed = bitsUsedVec[i];
|
||||
u64 diff = 0;
|
||||
if (bitsUsed > 0) diff = ((u64(1) << (bitsUsed - 1)) | bitReader.Read(bitsUsed - 1)) + 1; else diff = 1;
|
||||
uint32_t bitsUsed = bitsUsedVec[i];
|
||||
uint64_t diff = 0;
|
||||
if (bitsUsed > 0) diff = ((uint64_t(1) << (bitsUsed - 1)) | bitReader.Read(bitsUsed - 1)) + 1; else diff = 1;
|
||||
posOnes.push_back(prevOnePos + diff);
|
||||
prevOnePos += diff;
|
||||
}
|
||||
|
@ -557,15 +557,15 @@ vector<u32> DecodeCompressedBitVector(Reader & reader) {
|
|||
|
||||
// If bit vector starts with 1.
|
||||
bool isFirstOne = ((header >> 2) & 1) == 1;
|
||||
u64 sum = 0;
|
||||
uint64_t sum = 0;
|
||||
while (decodeOffset < serialSize)
|
||||
{
|
||||
u64 zerosRangeSize = 0;
|
||||
uint64_t zerosRangeSize = 0;
|
||||
// Don't read zero range size for the first time if first bit is 1.
|
||||
if (!isFirstOne) zerosRangeSize = VarintDecode(reader, decodeOffset) + 1; else isFirstOne = false;
|
||||
u64 onesRangeSize = VarintDecode(reader, decodeOffset) + 1;
|
||||
uint64_t onesRangeSize = VarintDecode(reader, decodeOffset) + 1;
|
||||
sum += zerosRangeSize;
|
||||
for (u64 i = sum; i < sum + onesRangeSize; ++i) posOnes.push_back(i);
|
||||
for (uint64_t i = sum; i < sum + onesRangeSize; ++i) posOnes.push_back(i);
|
||||
sum += onesRangeSize;
|
||||
}
|
||||
}
|
||||
|
@ -575,41 +575,41 @@ vector<u32> DecodeCompressedBitVector(Reader & reader) {
|
|||
|
||||
// If bit vector starts with 1.
|
||||
bool isFirstOne = ((header >> 2) & 1) == 1;
|
||||
u64 freqs0Cnt = header >> 3, freqs1Cnt = VarintDecode(reader, decodeOffset);
|
||||
vector<u32> distrTable0 = SerialFreqsToDistrTable(reader, decodeOffset, freqs0Cnt);
|
||||
vector<u32> distrTable1 = SerialFreqsToDistrTable(reader, decodeOffset, freqs1Cnt);
|
||||
u64 cntElements0 = VarintDecode(reader, decodeOffset), cntElements1 = VarintDecode(reader, decodeOffset);
|
||||
u64 enc0SizesBytesize = VarintDecode(reader, decodeOffset), enc1SizesBytesize = VarintDecode(reader, decodeOffset);
|
||||
uint64_t freqs0Cnt = header >> 3, freqs1Cnt = VarintDecode(reader, decodeOffset);
|
||||
vector<uint32_t> distrTable0 = SerialFreqsToDistrTable(reader, decodeOffset, freqs0Cnt);
|
||||
vector<uint32_t> distrTable1 = SerialFreqsToDistrTable(reader, decodeOffset, freqs1Cnt);
|
||||
uint64_t cntElements0 = VarintDecode(reader, decodeOffset), cntElements1 = VarintDecode(reader, decodeOffset);
|
||||
uint64_t enc0SizesBytesize = VarintDecode(reader, decodeOffset), enc1SizesBytesize = VarintDecode(reader, decodeOffset);
|
||||
Reader * arithDec0Reader = reader.CreateSubReader(decodeOffset, enc0SizesBytesize);
|
||||
ArithmeticDecoder arithDec0(*arithDec0Reader, distrTable0);
|
||||
vector<u32> bitsSizes0;
|
||||
for (u64 i = 0; i < cntElements0; ++i) bitsSizes0.push_back(arithDec0.Decode());
|
||||
vector<uint32_t> bitsSizes0;
|
||||
for (uint64_t i = 0; i < cntElements0; ++i) bitsSizes0.push_back(arithDec0.Decode());
|
||||
decodeOffset += enc0SizesBytesize;
|
||||
Reader * arithDec1Reader = reader.CreateSubReader(decodeOffset, enc1SizesBytesize);
|
||||
ArithmeticDecoder arith_dec1(*arithDec1Reader, distrTable1);
|
||||
vector<u32> bitsSizes1;
|
||||
for (u64 i = 0; i < cntElements1; ++i) bitsSizes1.push_back(arith_dec1.Decode());
|
||||
vector<uint32_t> bitsSizes1;
|
||||
for (uint64_t i = 0; i < cntElements1; ++i) bitsSizes1.push_back(arith_dec1.Decode());
|
||||
decodeOffset += enc1SizesBytesize;
|
||||
Reader * bitReaderReader = reader.CreateSubReader(decodeOffset, serialSize - decodeOffset);
|
||||
BitReader bitReader(*bitReaderReader);
|
||||
u64 sum = 0, i0 = 0, i1 = 0;
|
||||
uint64_t sum = 0, i0 = 0, i1 = 0;
|
||||
while (i0 < cntElements0 && i1 < cntElements1)
|
||||
{
|
||||
u64 zerosRangeSize = 0;
|
||||
uint64_t zerosRangeSize = 0;
|
||||
// Don't read zero range size for the first time if first bit is 1.
|
||||
if (!isFirstOne)
|
||||
{
|
||||
u32 bitsUsed = bitsSizes0[i0];
|
||||
if (bitsUsed > 0) zerosRangeSize = ((u64(1) << (bitsUsed - 1)) | bitReader.Read(bitsUsed - 1)) + 1; else zerosRangeSize = 1;
|
||||
uint32_t bitsUsed = bitsSizes0[i0];
|
||||
if (bitsUsed > 0) zerosRangeSize = ((uint64_t(1) << (bitsUsed - 1)) | bitReader.Read(bitsUsed - 1)) + 1; else zerosRangeSize = 1;
|
||||
++i0;
|
||||
}
|
||||
else isFirstOne = false;
|
||||
u64 onesRangeSize = 0;
|
||||
u32 bitsUsed = bitsSizes1[i1];
|
||||
if (bitsUsed > 0) onesRangeSize = ((u64(1) << (bitsUsed - 1)) | bitReader.Read(bitsUsed - 1)) + 1; else onesRangeSize = 1;
|
||||
uint64_t onesRangeSize = 0;
|
||||
uint32_t bitsUsed = bitsSizes1[i1];
|
||||
if (bitsUsed > 0) onesRangeSize = ((uint64_t(1) << (bitsUsed - 1)) | bitReader.Read(bitsUsed - 1)) + 1; else onesRangeSize = 1;
|
||||
++i1;
|
||||
sum += zerosRangeSize;
|
||||
for (u64 j = sum; j < sum + onesRangeSize; ++j) posOnes.push_back(j);
|
||||
for (uint64_t j = sum; j < sum + onesRangeSize; ++j) posOnes.push_back(j);
|
||||
sum += onesRangeSize;
|
||||
}
|
||||
CHECK(i0 == cntElements0 && i1 == cntElements1, ());
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Author: Artyom.
|
||||
// Module for compressing/decompressing bit vectors.
|
||||
// Usage:
|
||||
// vector<u8> comprBits1;
|
||||
// MemWriter< vector<u8> > writer(comprBits1);
|
||||
// vector<uint8_t> comprBits1;
|
||||
// MemWriter< vector<uint8_t> > writer(comprBits1);
|
||||
// // Create a bit vector by storing increasing positions of ones.
|
||||
// vector<u32> posOnes1 = {12, 34, 75}, posOnes2 = {10, 34, 95};
|
||||
// vector<uint32_t> posOnes1 = {12, 34, 75}, posOnes2 = {10, 34, 95};
|
||||
// // Compress some vectors.
|
||||
// BuildCompressedBitVector(writer, posOnes1);
|
||||
// MemReader reader(comprBits1.data(), comprBits1.size());
|
||||
|
@ -12,12 +12,12 @@
|
|||
// MemReader reader(comprBits1.data(), comprBits1.size());
|
||||
// posOnes1 = DecodeCompressedBitVector(reader);
|
||||
// // Intersect two vectors.
|
||||
// vector<u32> andRes = BitVectorsAnd(posOnes1.begin(), posOnes1.end(), posOnes2.begin(), posOnes2.end());
|
||||
// vector<uint32_t> andRes = BitVectorsAnd(posOnes1.begin(), posOnes1.end(), posOnes2.begin(), posOnes2.end());
|
||||
// // Unite two vectors.
|
||||
// vector<u32> orRes = BitVectorsAnd(posOnes1.begin(), posOnes1.end(), posOnes2.begin(), posOnes2.end());
|
||||
// vector<uint32_t> orRes = BitVectorsAnd(posOnes1.begin(), posOnes1.end(), posOnes2.begin(), posOnes2.end());
|
||||
// // Sub-and two vectors (second vector-set is a subset of first vector-set as bit vectors,
|
||||
// // so that second vector size should be equal to number of ones of the first vector).
|
||||
// vector<u32> subandRes = BitVectorsSubAnd(posOnes1.begin(), posOnes1.end(), posOnes2.begin(), posOnes2.end());
|
||||
// vector<uint32_t> subandRes = BitVectorsSubAnd(posOnes1.begin(), posOnes1.end(), posOnes2.begin(), posOnes2.end());
|
||||
|
||||
#pragma once
|
||||
|
||||
|
@ -25,10 +25,6 @@
|
|||
#include "../std/stdint.hpp"
|
||||
#include "../std/vector.hpp"
|
||||
|
||||
typedef uint8_t u8;
|
||||
typedef uint32_t u32;
|
||||
typedef uint64_t u64;
|
||||
|
||||
// Forward declare used Reader/Writer.
|
||||
class Reader;
|
||||
class Writer;
|
||||
|
@ -40,22 +36,22 @@ class Writer;
|
|||
// "Ranges" creates a compressed array of lengths of zeros and ones ranges,
|
||||
// "Varint" encodes resulting sizes using varint encoding,
|
||||
// "Arith" encodes resulting sizes using arithmetic encoding).
|
||||
void BuildCompressedBitVector(Writer & writer, vector<u32> const & posOnes, int chosenEncType = -1);
|
||||
void BuildCompressedBitVector(Writer & writer, vector<uint32_t> const & posOnes, int chosenEncType = -1);
|
||||
// Decodes compressed bit vector to uncompressed array of ones positions.
|
||||
vector<u32> DecodeCompressedBitVector(Reader & reader);
|
||||
vector<uint32_t> DecodeCompressedBitVector(Reader & reader);
|
||||
|
||||
// Intersects two bit vectors based on theirs begin and end iterators.
|
||||
// Returns resulting positions of ones.
|
||||
template <typename It1T, typename It2T>
|
||||
vector<u32> BitVectorsAnd(It1T begin1, It1T end1, It2T begin2, It2T end2)
|
||||
vector<uint32_t> BitVectorsAnd(It1T begin1, It1T end1, It2T begin2, It2T end2)
|
||||
{
|
||||
vector<u32> result;
|
||||
vector<uint32_t> result;
|
||||
|
||||
It1T it1 = begin1;
|
||||
It2T it2 = begin2;
|
||||
while (it1 != end1 && it2 != end2)
|
||||
{
|
||||
u32 pos1 = *it1, pos2 = *it2;
|
||||
uint32_t pos1 = *it1, pos2 = *it2;
|
||||
if (pos1 == pos2)
|
||||
{
|
||||
result.push_back(pos1);
|
||||
|
@ -71,15 +67,15 @@ vector<u32> BitVectorsAnd(It1T begin1, It1T end1, It2T begin2, It2T end2)
|
|||
// Unites two bit vectors based on theirs begin and end iterators.
|
||||
// Returns resulting positions of ones.
|
||||
template <typename It1T, typename It2T>
|
||||
vector<u32> BitVectorsOr(It1T begin1, It1T end1, It2T begin2, It2T end2)
|
||||
vector<uint32_t> BitVectorsOr(It1T begin1, It1T end1, It2T begin2, It2T end2)
|
||||
{
|
||||
vector<u32> result;
|
||||
vector<uint32_t> result;
|
||||
|
||||
It1T it1 = begin1;
|
||||
It2T it2 = begin2;
|
||||
while (it1 != end1 && it2 != end2)
|
||||
{
|
||||
u32 pos1 = *it1, pos2 = *it2;
|
||||
uint32_t pos1 = *it1, pos2 = *it2;
|
||||
if (pos1 == pos2)
|
||||
{
|
||||
result.push_back(pos1);
|
||||
|
@ -101,7 +97,7 @@ vector<u32> BitVectorsOr(It1T begin1, It1T end1, It2T begin2, It2T end2)
|
|||
{
|
||||
while (it1 != end1)
|
||||
{
|
||||
u32 pos1 = *it1;
|
||||
uint32_t pos1 = *it1;
|
||||
result.push_back(pos1);
|
||||
++it1;
|
||||
}
|
||||
|
@ -110,7 +106,7 @@ vector<u32> BitVectorsOr(It1T begin1, It1T end1, It2T begin2, It2T end2)
|
|||
{
|
||||
while (it2 != end2)
|
||||
{
|
||||
u32 pos2 = *it2;
|
||||
uint32_t pos2 = *it2;
|
||||
result.push_back(pos2);
|
||||
++it2;
|
||||
}
|
||||
|
@ -122,16 +118,16 @@ vector<u32> BitVectorsOr(It1T begin1, It1T end1, It2T begin2, It2T end2)
|
|||
// second bit vector should have size equal to first vector's number of ones.
|
||||
// Returns resulting positions of ones.
|
||||
template <typename It1T, typename It2T>
|
||||
vector<u32> BitVectorsSubAnd(It1T begin1, It1T end1, It2T begin2, It2T end2)
|
||||
vector<uint32_t> BitVectorsSubAnd(It1T begin1, It1T end1, It2T begin2, It2T end2)
|
||||
{
|
||||
vector<u32> result;
|
||||
vector<uint32_t> result;
|
||||
|
||||
It1T it1 = begin1;
|
||||
It2T it2 = begin2;
|
||||
u64 index2 = 0;
|
||||
uint64_t index2 = 0;
|
||||
for (; it1 != end1 && it2 != end2; ++it1, ++index2)
|
||||
{
|
||||
u64 pos1 = *it1, pos2 = *it2;
|
||||
uint64_t pos1 = *it1, pos2 = *it2;
|
||||
if (pos2 == index2)
|
||||
{
|
||||
result.push_back(pos1);
|
||||
|
|
Loading…
Add table
Reference in a new issue