diff --git a/coding/coding_tests/compressed_bit_vector_test.cpp b/coding/coding_tests/compressed_bit_vector_test.cpp index 7b507f3dcd..7789abf224 100644 --- a/coding/coding_tests/compressed_bit_vector_test.cpp +++ b/coding/coding_tests/compressed_bit_vector_test.cpp @@ -5,44 +5,44 @@ #include "../../testing/testing.hpp" #include "../../base/pseudo_random.hpp" -u32 const NUMS_COUNT = 12345; +uint32_t const NUMS_COUNT = 12345; namespace { - u64 GetRand64() + uint64_t GetRand64() { static PseudoRNG32 g_rng; - u64 result = g_rng.Generate(); - result ^= u64(g_rng.Generate()) << 32; + uint64_t result = g_rng.Generate(); + result ^= uint64_t(g_rng.Generate()) << 32; return result; } } UNIT_TEST(CompressedBitVector_Sparse) { - vector posOnes; - u32 sum = 0; - for (u32 i = 0; i < NUMS_COUNT; ++i) + vector posOnes; + uint32_t sum = 0; + for (uint32_t i = 0; i < NUMS_COUNT; ++i) { - u32 byteSize = GetRand64() % 2 + 1; - u64 num = GetRand64() & ((u64(1) << (byteSize * 7)) - 1); + uint32_t byteSize = GetRand64() % 2 + 1; + uint64_t num = GetRand64() & ((uint64_t(1) << (byteSize * 7)) - 1); if (num == 0) num = 1; sum += num; posOnes.push_back(sum); } - for (u32 j = 0; j < 5; ++j) + for (uint32_t j = 0; j < 5; ++j) { if (j == 1) posOnes.insert(posOnes.begin(), 1, 0); if (j == 2) posOnes.clear(); if (j == 3) posOnes.push_back(1); if (j == 4) { posOnes.clear(); posOnes.push_back(10); } - for (u32 ienc = 0; ienc < 4; ++ienc) + for (uint32_t ienc = 0; ienc < 4; ++ienc) { - vector serialBitVector; - MemWriter< vector > writer(serialBitVector); + vector serialBitVector; + MemWriter< vector > writer(serialBitVector); BuildCompressedBitVector(writer, posOnes, ienc); MemReader reader(serialBitVector.data(), serialBitVector.size()); - vector decPosOnes = DecodeCompressedBitVector(reader); + vector decPosOnes = DecodeCompressedBitVector(reader); TEST_EQUAL(posOnes, decPosOnes, ()); } } @@ -50,32 +50,32 @@ UNIT_TEST(CompressedBitVector_Sparse) UNIT_TEST(CompressedBitVector_Dense) { - vector posOnes; - u32 prevPos = 0; - u32 sum = 0; - for (u32 i = 0; i < NUMS_COUNT; ++i) + vector posOnes; + uint32_t prevPos = 0; + uint32_t sum = 0; + for (uint32_t i = 0; i < NUMS_COUNT; ++i) { - u32 zeroesByteSize = GetRand64() % 2 + 1; - u64 zeroesRangeSize = (GetRand64() & ((u64(1) << (zeroesByteSize * 7)) - 1)) + 1; + uint32_t zeroesByteSize = GetRand64() % 2 + 1; + uint64_t zeroesRangeSize = (GetRand64() & ((uint64_t(1) << (zeroesByteSize * 7)) - 1)) + 1; sum += zeroesRangeSize; - u32 onesByteSize = GetRand64() % 1 + 1; - u64 onesRangeSize = (GetRand64() & ((u64(1) << (onesByteSize * 7)) - 1)) + 1; - for (u32 j = 0; j < onesRangeSize; ++j) posOnes.push_back(sum + j); + uint32_t onesByteSize = GetRand64() % 1 + 1; + uint64_t onesRangeSize = (GetRand64() & ((uint64_t(1) << (onesByteSize * 7)) - 1)) + 1; + for (uint32_t j = 0; j < onesRangeSize; ++j) posOnes.push_back(sum + j); sum += onesRangeSize; } - for (u32 j = 0; j < 5; ++j) + for (uint32_t j = 0; j < 5; ++j) { if (j == 1) posOnes.insert(posOnes.begin(), 1, 0); if (j == 2) posOnes.clear(); if (j == 3) posOnes.push_back(1); if (j == 4) { posOnes.clear(); posOnes.push_back(10); } - for (u32 ienc = 0; ienc < 4; ++ienc) + for (uint32_t ienc = 0; ienc < 4; ++ienc) { - vector serialBitVector; - MemWriter< vector > writer(serialBitVector); + vector serialBitVector; + MemWriter< vector > writer(serialBitVector); BuildCompressedBitVector(writer, posOnes, ienc); MemReader reader(serialBitVector.data(), serialBitVector.size()); - vector decPosOnes = DecodeCompressedBitVector(reader); + vector decPosOnes = DecodeCompressedBitVector(reader); TEST_EQUAL(posOnes, decPosOnes, ()); } } @@ -84,53 +84,53 @@ UNIT_TEST(CompressedBitVector_Dense) UNIT_TEST(BitVectors_And) { vector v1(NUMS_COUNT * 2, false), v2(NUMS_COUNT * 2, false); - for (u32 i = 0; i < NUMS_COUNT; ++i) + for (uint32_t i = 0; i < NUMS_COUNT; ++i) { v1[GetRand64() % v1.size()] = true; v2[GetRand64() % v2.size()] = true; } - vector posOnes1, posOnes2, andPos; - for (u32 i = 0; i < v1.size(); ++i) + vector posOnes1, posOnes2, andPos; + for (uint32_t i = 0; i < v1.size(); ++i) { if (v1[i]) posOnes1.push_back(i); if (v2[i]) posOnes2.push_back(i); if (v1[i] && v2[i]) andPos.push_back(i); } - vector actualAndPos = BitVectorsAnd(posOnes1.begin(), posOnes1.end(), posOnes2.begin(), posOnes2.end()); + vector actualAndPos = BitVectorsAnd(posOnes1.begin(), posOnes1.end(), posOnes2.begin(), posOnes2.end()); TEST_EQUAL(andPos, actualAndPos, ()); } UNIT_TEST(BitVectors_Or) { vector v1(NUMS_COUNT * 2, false), v2(NUMS_COUNT * 2, false); - for (u32 i = 0; i < NUMS_COUNT; ++i) + for (uint32_t i = 0; i < NUMS_COUNT; ++i) { v1[GetRand64() % v1.size()] = true; v2[GetRand64() % v2.size()] = true; } - vector posOnes1, posOnes2, orPos; - for (u32 i = 0; i < v1.size(); ++i) + vector posOnes1, posOnes2, orPos; + for (uint32_t i = 0; i < v1.size(); ++i) { if (v1[i]) posOnes1.push_back(i); if (v2[i]) posOnes2.push_back(i); if (v1[i] || v2[i]) orPos.push_back(i); } - vector actualOrPos = BitVectorsOr(posOnes1.begin(), posOnes1.end(), posOnes2.begin(), posOnes2.end()); + vector actualOrPos = BitVectorsOr(posOnes1.begin(), posOnes1.end(), posOnes2.begin(), posOnes2.end()); TEST_EQUAL(orPos, actualOrPos, ()); } UNIT_TEST(BitVectors_SubAnd) { vector v1(NUMS_COUNT * 2, false); - u64 numV1Ones = 0; - for (u32 i = 0; i < v1.size(); ++i) v1[i] = (GetRand64() % 2) == 0; - vector posOnes1; - for (u32 i = 0; i < v1.size(); ++i) if (v1[i]) posOnes1.push_back(i); + uint64_t numV1Ones = 0; + for (uint32_t i = 0; i < v1.size(); ++i) v1[i] = (GetRand64() % 2) == 0; + vector posOnes1; + for (uint32_t i = 0; i < v1.size(); ++i) if (v1[i]) posOnes1.push_back(i); vector v2(posOnes1.size(), false); - for (u32 i = 0; i < v2.size(); ++i) v2[i] = (GetRand64() % 2) == 0; - vector posOnes2, subandPos; - for (u32 i = 0; i < v2.size(); ++i) if (v2[i]) posOnes2.push_back(i); - for (u32 i = 0, j = 0; i < v1.size(); ++i) + for (uint32_t i = 0; i < v2.size(); ++i) v2[i] = (GetRand64() % 2) == 0; + vector posOnes2, subandPos; + for (uint32_t i = 0; i < v2.size(); ++i) if (v2[i]) posOnes2.push_back(i); + for (uint32_t i = 0, j = 0; i < v1.size(); ++i) { if (v1[i]) { @@ -138,6 +138,6 @@ UNIT_TEST(BitVectors_SubAnd) ++j; } } - vector actualSubandPos = BitVectorsSubAnd(posOnes1.begin(), posOnes1.end(), posOnes2.begin(), posOnes2.end()); + vector actualSubandPos = BitVectorsSubAnd(posOnes1.begin(), posOnes1.end(), posOnes2.begin(), posOnes2.end()); TEST_EQUAL(subandPos, actualSubandPos, ()); } diff --git a/coding/compressed_bit_vector.cpp b/coding/compressed_bit_vector.cpp index d38cae30cb..03d83ebddf 100644 --- a/coding/compressed_bit_vector.cpp +++ b/coding/compressed_bit_vector.cpp @@ -8,7 +8,7 @@ #include "../base/bits.hpp" namespace { - void VarintEncode(vector & dst, u64 n) + void VarintEncode(vector & dst, uint64_t n) { if (n == 0) { @@ -18,14 +18,14 @@ namespace { { while (n != 0) { - u8 b = n & 0x7F; + uint8_t b = n & 0x7F; n >>= 7; b |= n == 0 ? 0 : 0x80; dst.push_back(b); } } } - void VarintEncode(Writer & writer, u64 n) + void VarintEncode(Writer & writer, uint64_t n) { if (n == 0) { @@ -35,38 +35,38 @@ namespace { { while (n != 0) { - u8 b = n & 0x7F; + uint8_t b = n & 0x7F; n >>= 7; b |= n == 0 ? 0 : 0x80; writer.Write(&b, 1); } } } - u64 VarintDecode(void * src, u64 & offset) + uint64_t VarintDecode(void * src, uint64_t & offset) { - u64 n = 0; + uint64_t n = 0; int shift = 0; while (1) { - u8 b = *(((u8*)src) + offset); + uint8_t b = *(((uint8_t*)src) + offset); CHECK_LESS_OR_EQUAL(shift, 56, ()); - n |= u64(b & 0x7F) << shift; + n |= uint64_t(b & 0x7F) << shift; ++offset; if ((b & 0x80) == 0) break; shift += 7; } return n; } - u64 VarintDecode(Reader & reader, u64 & offset) + uint64_t VarintDecode(Reader & reader, uint64_t & offset) { - u64 n = 0; + uint64_t n = 0; int shift = 0; while (1) { - u8 b = 0; + uint8_t b = 0; reader.Read(offset, &b, 1); CHECK_LESS_OR_EQUAL(shift, 56, ()); - n |= u64(b & 0x7F) << shift; + n |= uint64_t(b & 0x7F) << shift; ++offset; if ((b & 0x80) == 0) break; shift += 7; @@ -74,10 +74,10 @@ namespace { return n; } - vector SerialFreqsToDistrTable(Reader & reader, u64 & decodeOffset, u64 cnt) + vector SerialFreqsToDistrTable(Reader & reader, uint64_t & decodeOffset, uint64_t cnt) { - vector freqs; - for (u64 i = 0; i < cnt; ++i) freqs.push_back(VarintDecode(reader, decodeOffset)); + vector freqs; + for (uint64_t i = 0; i < cnt; ++i) freqs.push_back(VarintDecode(reader, decodeOffset)); return FreqsToDistrTable(freqs); } } @@ -88,12 +88,12 @@ public: BitWriter(Writer & writer) : m_writer(writer), m_lastByte(0), m_size(0) {} ~BitWriter() { if (m_size % 8 > 0) m_writer.Write(&m_lastByte, 1); } - u64 NumBitsWritten() const { return m_size; } - void Write(u64 bits, u32 writeSize) + uint64_t NumBitsWritten() const { return m_size; } + void Write(uint64_t bits, uint32_t writeSize) { if (writeSize == 0) return; m_totalBits += writeSize; - u32 remSize = m_size % 8; + uint32_t remSize = m_size % 8; CHECK_LESS_OR_EQUAL(writeSize, 64 - remSize, ()); if (remSize > 0) { @@ -102,16 +102,16 @@ public: writeSize += remSize; m_size -= remSize; } - u32 writeBytesSize = writeSize / 8; + uint32_t writeBytesSize = writeSize / 8; m_writer.Write(&bits, writeBytesSize); m_lastByte = (bits >> (writeBytesSize * 8)) & ((1 << (writeSize % 8)) - 1); m_size += writeSize; } private: Writer & m_writer; - u8 m_lastByte; - u64 m_size; - u64 m_totalBits; + uint8_t m_lastByte; + uint64_t m_size; + uint64_t m_totalBits; }; class BitReader { @@ -119,27 +119,27 @@ public: BitReader(Reader & reader) : m_reader(reader), m_serialCur(0), m_serialEnd(reader.Size()), m_bits(0), m_bitsSize(0), m_totalBitsRead(0) {} - u64 NumBitsRead() const { return m_totalBitsRead; } - u64 Read(u32 readSize) + uint64_t NumBitsRead() const { return m_totalBitsRead; } + uint64_t Read(uint32_t readSize) { m_totalBitsRead += readSize; if (readSize == 0) return 0; CHECK_LESS_OR_EQUAL(readSize, 64, ()); // First read, sets bits that are in the m_bits buffer. - u32 firstReadSize = readSize <= m_bitsSize ? readSize : m_bitsSize; - u64 result = m_bits & (~u64(0) >> (64 - firstReadSize)); + uint32_t firstReadSize = readSize <= m_bitsSize ? readSize : m_bitsSize; + uint64_t result = m_bits & (~uint64_t(0) >> (64 - firstReadSize)); m_bits >>= firstReadSize; m_bitsSize -= firstReadSize; readSize -= firstReadSize; // Second read, does an extra read using m_reader. if (readSize > 0) { - u32 readByteSize = m_serialCur + sizeof(m_bits) <= m_serialEnd ? sizeof(m_bits) : m_serialEnd - m_serialCur; + uint32_t readByteSize = m_serialCur + sizeof(m_bits) <= m_serialEnd ? sizeof(m_bits) : m_serialEnd - m_serialCur; m_reader.Read(m_serialCur, &m_bits, readByteSize); m_serialCur += readByteSize; m_bitsSize += readByteSize * 8; if (readSize > m_bitsSize) CHECK_LESS_OR_EQUAL(readSize, m_bitsSize, ()); - result |= (m_bits & (~u64(0) >> (64 - readSize))) << firstReadSize; + result |= (m_bits & (~uint64_t(0) >> (64 - readSize))) << firstReadSize; m_bits >>= readSize; m_bitsSize -= readSize; readSize = 0; @@ -148,27 +148,27 @@ public: } private: Reader & m_reader; - u64 m_serialCur; - u64 m_serialEnd; - u64 m_bits; - u32 m_bitsSize; - u64 m_totalBitsRead; + uint64_t m_serialCur; + uint64_t m_serialEnd; + uint64_t m_bits; + uint32_t m_bitsSize; + uint64_t m_totalBitsRead; }; -void BuildCompressedBitVector(Writer & writer, vector const & posOnes, int chosenEncType) +void BuildCompressedBitVector(Writer & writer, vector const & posOnes, int chosenEncType) { - u32 const BLOCK_SIZE = 7; + uint32_t const BLOCK_SIZE = 7; // First stage of compression is analysis run through data ones. - u64 numBytesDiffsEncVint = 0, numBytesRangesEncVint = 0, numBitsDiffsEncArith = 0, numBitsRangesEncArith = 0; + uint64_t numBytesDiffsEncVint = 0, numBytesRangesEncVint = 0, numBitsDiffsEncArith = 0, numBitsRangesEncArith = 0; int64_t prevOnePos = -1; - u64 onesRangeLen = 0; - vector diffsSizesFreqs(65, 0), ranges0SizesFreqs(65, 0), ranges1SizesFreqs(65, 0); - for (u32 i = 0; i < posOnes.size(); ++i) + uint64_t onesRangeLen = 0; + vector diffsSizesFreqs(65, 0), ranges0SizesFreqs(65, 0), ranges1SizesFreqs(65, 0); + for (uint32_t i = 0; i < posOnes.size(); ++i) { CHECK_LESS(prevOnePos, posOnes[i], ()); // Accumulate size of diff encoding. - u64 diff = posOnes[i] - prevOnePos; - u32 diffBitsize = bits::NumUsedBits(diff - 1); + uint64_t diff = posOnes[i] - prevOnePos; + uint32_t diffBitsize = bits::NumUsedBits(diff - 1); numBytesDiffsEncVint += (diffBitsize + BLOCK_SIZE - 1) / BLOCK_SIZE; numBitsDiffsEncArith += diffBitsize > 0 ? diffBitsize - 1 : 0; ++diffsSizesFreqs[diffBitsize]; @@ -178,14 +178,14 @@ void BuildCompressedBitVector(Writer & writer, vector const & posOnes, int if (onesRangeLen > 0) { // Accumulate size of ones-range encoding. - u32 onesRangeLenBitsize = bits::NumUsedBits(onesRangeLen - 1); + uint32_t onesRangeLenBitsize = bits::NumUsedBits(onesRangeLen - 1); numBytesRangesEncVint += (onesRangeLenBitsize + BLOCK_SIZE - 1) / BLOCK_SIZE; numBitsRangesEncArith += onesRangeLenBitsize > 0 ? onesRangeLenBitsize - 1 : 0; ++ranges1SizesFreqs[onesRangeLenBitsize]; onesRangeLen = 0; } // Accumulate size of zeros-range encoding. - u32 zeros_range_len_bitsize = bits::NumUsedBits(posOnes[i] - prevOnePos - 2); + uint32_t zeros_range_len_bitsize = bits::NumUsedBits(posOnes[i] - prevOnePos - 2); numBytesRangesEncVint += (zeros_range_len_bitsize + BLOCK_SIZE - 1) / BLOCK_SIZE; numBitsRangesEncArith += zeros_range_len_bitsize > 0 ? zeros_range_len_bitsize - 1 : 0; ++ranges0SizesFreqs[zeros_range_len_bitsize]; @@ -196,21 +196,21 @@ void BuildCompressedBitVector(Writer & writer, vector const & posOnes, int // Accumulate size of remaining ones-range encoding. if (onesRangeLen > 0) { - u32 onesRangeLenBitsize = bits::NumUsedBits(onesRangeLen - 1); + uint32_t onesRangeLenBitsize = bits::NumUsedBits(onesRangeLen - 1); numBytesRangesEncVint += (onesRangeLenBitsize + BLOCK_SIZE - 1) / BLOCK_SIZE; numBitsRangesEncArith = onesRangeLenBitsize > 0 ? onesRangeLenBitsize - 1 : 0; ++ranges1SizesFreqs[onesRangeLenBitsize]; onesRangeLen = 0; } // Compute arithmetic encoding size. - u64 diffsSizesTotalFreq = 0, ranges0_sizes_total_freq = 0, ranges1SizesTotalFreq = 0; - for (u32 i = 0; i < diffsSizesFreqs.size(); ++i) diffsSizesTotalFreq += diffsSizesFreqs[i]; - for (u32 i = 0; i < ranges0SizesFreqs.size(); ++i) ranges0_sizes_total_freq += ranges0SizesFreqs[i]; - for (u32 i = 0; i < ranges1SizesFreqs.size(); ++i) ranges1SizesTotalFreq += ranges1SizesFreqs[i]; + uint64_t diffsSizesTotalFreq = 0, ranges0_sizes_total_freq = 0, ranges1SizesTotalFreq = 0; + for (uint32_t i = 0; i < diffsSizesFreqs.size(); ++i) diffsSizesTotalFreq += diffsSizesFreqs[i]; + for (uint32_t i = 0; i < ranges0SizesFreqs.size(); ++i) ranges0_sizes_total_freq += ranges0SizesFreqs[i]; + for (uint32_t i = 0; i < ranges1SizesFreqs.size(); ++i) ranges1SizesTotalFreq += ranges1SizesFreqs[i]; // Compute number of bits for arith encoded diffs sizes. double numSizesBitsDiffsEncArith = 0; - u32 nonzeroDiffsSizesFreqsEnd = 0; - for (u32 i = 0; i < diffsSizesFreqs.size(); ++i) + uint32_t nonzeroDiffsSizesFreqsEnd = 0; + for (uint32_t i = 0; i < diffsSizesFreqs.size(); ++i) { if (diffsSizesFreqs[i] > 0) { @@ -219,13 +219,13 @@ void BuildCompressedBitVector(Writer & writer, vector const & posOnes, int nonzeroDiffsSizesFreqsEnd = i + 1; } } - vector diffsSizesFreqsSerial; - for (u32 i = 0; i < nonzeroDiffsSizesFreqsEnd; ++i) VarintEncode(diffsSizesFreqsSerial, diffsSizesFreqs[i]); - u64 numBytesDiffsEncArith = 4 + diffsSizesFreqsSerial.size() + (u64(numSizesBitsDiffsEncArith * diffsSizesTotalFreq + 0.999) + 7) / 8 + (numBitsDiffsEncArith + 7) /8; + vector diffsSizesFreqsSerial; + for (uint32_t i = 0; i < nonzeroDiffsSizesFreqsEnd; ++i) VarintEncode(diffsSizesFreqsSerial, diffsSizesFreqs[i]); + uint64_t numBytesDiffsEncArith = 4 + diffsSizesFreqsSerial.size() + (uint64_t(numSizesBitsDiffsEncArith * diffsSizesTotalFreq + 0.999) + 7) / 8 + (numBitsDiffsEncArith + 7) /8; // Compute number of bits for arith encoded ranges sizes. double numSizesBitsRanges0EncArith = 0; - u32 nonzeroRanges0SizesFreqsEnd = 0; - for (u32 i = 0; i < ranges0SizesFreqs.size(); ++i) + uint32_t nonzeroRanges0SizesFreqsEnd = 0; + for (uint32_t i = 0; i < ranges0SizesFreqs.size(); ++i) { if (ranges0SizesFreqs[i] > 0) { @@ -235,8 +235,8 @@ void BuildCompressedBitVector(Writer & writer, vector const & posOnes, int } } double numSizesBitsRanges1EncArith = 0; - u32 nonzeroRanges1SizesFreqsEnd = 0; - for (u32 i = 0; i < ranges1SizesFreqs.size(); ++i) + uint32_t nonzeroRanges1SizesFreqsEnd = 0; + for (uint32_t i = 0; i < ranges1SizesFreqs.size(); ++i) { if (ranges1SizesFreqs[i] > 0) { @@ -245,16 +245,16 @@ void BuildCompressedBitVector(Writer & writer, vector const & posOnes, int nonzeroRanges1SizesFreqsEnd = i + 1; } } - vector ranges0SizesFreqsSerial, ranges1SizesFreqsSerial; - for (u32 i = 0; i < nonzeroRanges0SizesFreqsEnd; ++i) VarintEncode(ranges0SizesFreqsSerial, ranges0SizesFreqs[i]); - for (u32 i = 0; i < nonzeroRanges1SizesFreqsEnd; ++i) VarintEncode(ranges1SizesFreqsSerial, ranges1SizesFreqs[i]); - u64 numBytesRangesEncArith = 4 + ranges0SizesFreqsSerial.size() + ranges1SizesFreqsSerial.size() + - (u64(numSizesBitsRanges0EncArith * ranges0_sizes_total_freq + 0.999) + 7) / 8 + (u64(numSizesBitsRanges1EncArith * ranges1SizesTotalFreq + 0.999) + 7) / 8 + + vector ranges0SizesFreqsSerial, ranges1SizesFreqsSerial; + for (uint32_t i = 0; i < nonzeroRanges0SizesFreqsEnd; ++i) VarintEncode(ranges0SizesFreqsSerial, ranges0SizesFreqs[i]); + for (uint32_t i = 0; i < nonzeroRanges1SizesFreqsEnd; ++i) VarintEncode(ranges1SizesFreqsSerial, ranges1SizesFreqs[i]); + uint64_t numBytesRangesEncArith = 4 + ranges0SizesFreqsSerial.size() + ranges1SizesFreqsSerial.size() + + (uint64_t(numSizesBitsRanges0EncArith * ranges0_sizes_total_freq + 0.999) + 7) / 8 + (uint64_t(numSizesBitsRanges1EncArith * ranges1SizesTotalFreq + 0.999) + 7) / 8 + (numBitsRangesEncArith + 7) / 8; // Find minimum among 4 types of encoding. - vector numBytesPerEnc = {numBytesDiffsEncVint, numBytesRangesEncVint, numBytesDiffsEncArith, numBytesRangesEncArith}; - u32 encType = 0; + vector numBytesPerEnc = {numBytesDiffsEncVint, numBytesRangesEncVint, numBytesDiffsEncArith, numBytesRangesEncArith}; + uint32_t encType = 0; if (chosenEncType != -1) { CHECK(0 <= chosenEncType && chosenEncType <= 3, ()); encType = chosenEncType; } else if (numBytesPerEnc[0] <= numBytesPerEnc[1] && numBytesPerEnc[0] <= numBytesPerEnc[2] && numBytesPerEnc[0] <= numBytesPerEnc[3]) encType = 0; else if (numBytesPerEnc[1] <= numBytesPerEnc[0] && numBytesPerEnc[1] <= numBytesPerEnc[2] && numBytesPerEnc[1] <= numBytesPerEnc[3]) encType = 1; @@ -277,7 +277,7 @@ void BuildCompressedBitVector(Writer & writer, vector const & posOnes, int VarintEncode(writer, encType + (0 << 2) + ((posOnes[0] - prevOnePos - 1) << 3)); prevOnePos = posOnes[0]; } - for (u32 i = 1; i < posOnes.size(); ++i) + for (uint32_t i = 1; i < posOnes.size(); ++i) { CHECK_GREATER(posOnes[i], prevOnePos, ()); // Encode one's pos (diff - 1). @@ -293,9 +293,9 @@ void BuildCompressedBitVector(Writer & writer, vector const & posOnes, int VarintEncode(writer, encType + (nonzeroDiffsSizesFreqsEnd << 2)); // Encode freqs table. writer.Write(diffsSizesFreqsSerial.data(), diffsSizesFreqsSerial.size()); - u64 tmpOffset = 0; + uint64_t tmpOffset = 0; MemReader diffsSizesFreqsSerialReader(diffsSizesFreqsSerial.data(), diffsSizesFreqsSerial.size()); - vector distrTable = SerialFreqsToDistrTable( + vector distrTable = SerialFreqsToDistrTable( diffsSizesFreqsSerialReader, tmpOffset, nonzeroDiffsSizesFreqsEnd ); @@ -303,16 +303,16 @@ void BuildCompressedBitVector(Writer & writer, vector const & posOnes, int // First stage. Encode all bits sizes of all diffs using ArithmeticEncoder. ArithmeticEncoder arithEnc(distrTable); int64_t prevOnePos = -1; - u64 cntElements = 0; - for (u64 i = 0; i < posOnes.size(); ++i) + uint64_t cntElements = 0; + for (uint64_t i = 0; i < posOnes.size(); ++i) { CHECK_GREATER(posOnes[i], prevOnePos, ()); - u32 bitsUsed = bits::NumUsedBits(posOnes[i] - prevOnePos - 1); + uint32_t bitsUsed = bits::NumUsedBits(posOnes[i] - prevOnePos - 1); arithEnc.Encode(bitsUsed); ++cntElements; prevOnePos = posOnes[i]; } - vector serialSizesEnc = arithEnc.Finalize(); + vector serialSizesEnc = arithEnc.Finalize(); // Store number of compressed elements. VarintEncode(writer, cntElements); // Store compressed size of encoded sizes. @@ -324,14 +324,14 @@ void BuildCompressedBitVector(Writer & writer, vector const & posOnes, int // Second Stage. Encode all bits of all diffs using BitWriter. BitWriter bitWriter(writer); int64_t prevOnePos = -1; - u64 totalReadBits = 0; - u64 totalReadCnts = 0; - for (u64 i = 0; i < posOnes.size(); ++i) + uint64_t totalReadBits = 0; + uint64_t totalReadCnts = 0; + for (uint64_t i = 0; i < posOnes.size(); ++i) { CHECK_GREATER(posOnes[i], prevOnePos, ()); // Encode one's pos (diff - 1). - u64 diff = posOnes[i] - prevOnePos - 1; - u32 bitsUsed = bits::NumUsedBits(diff); + uint64_t diff = posOnes[i] - prevOnePos - 1; + uint32_t bitsUsed = bits::NumUsedBits(diff); if (bitsUsed > 1) { // Most significant bit is always 1 for non-zero diffs, so don't store it. @@ -353,8 +353,8 @@ void BuildCompressedBitVector(Writer & writer, vector const & posOnes, int // Encode encoding type plus flag if first is 1. VarintEncode(writer, encType + ((isFirstOne ? 1 : 0) << 2)); int64_t prevOnePos = -1; - u64 onesRangeLen = 0; - for (u32 i = 0; i < posOnes.size(); ++i) + uint64_t onesRangeLen = 0; + for (uint32_t i = 0; i < posOnes.size(); ++i) { CHECK_GREATER(posOnes[i], prevOnePos, ()); if (posOnes[i] - prevOnePos > 1) @@ -391,14 +391,14 @@ void BuildCompressedBitVector(Writer & writer, vector const & posOnes, int writer.Write(ranges0SizesFreqsSerial.data(), ranges0SizesFreqsSerial.size()); writer.Write(ranges1SizesFreqsSerial.data(), ranges1SizesFreqsSerial.size()); // Create distr tables. - u64 tmpOffset = 0; + uint64_t tmpOffset = 0; MemReader ranges0SizesFreqsSerialReader(ranges0SizesFreqsSerial.data(), ranges0SizesFreqsSerial.size()); - vector distrTable0 = SerialFreqsToDistrTable( + vector distrTable0 = SerialFreqsToDistrTable( ranges0SizesFreqsSerialReader, tmpOffset, nonzeroRanges0SizesFreqsEnd ); tmpOffset = 0; MemReader ranges1SizesFreqsSerialReader(ranges1SizesFreqsSerial.data(), ranges1SizesFreqsSerial.size()); - vector distrTable1 = SerialFreqsToDistrTable( + vector distrTable1 = SerialFreqsToDistrTable( ranges1SizesFreqsSerialReader, tmpOffset, nonzeroRanges1SizesFreqsEnd ); @@ -408,10 +408,10 @@ void BuildCompressedBitVector(Writer & writer, vector const & posOnes, int // Encode number of compressed elements. ArithmeticEncoder arith_enc0(distrTable0), arith_enc1(distrTable1); int64_t prevOnePos = -1; - u64 onesRangeLen = 0; + uint64_t onesRangeLen = 0; // Total number of compressed elements (ranges sizes). - u64 cntElements0 = 0, cntElements1 = 0; - for (u32 i = 0; i < posOnes.size(); ++i) + uint64_t cntElements0 = 0, cntElements1 = 0; + for (uint32_t i = 0; i < posOnes.size(); ++i) { CHECK_GREATER(posOnes[i], prevOnePos, ()); if (posOnes[i] - prevOnePos > 1) @@ -419,13 +419,13 @@ void BuildCompressedBitVector(Writer & writer, vector const & posOnes, int if (onesRangeLen > 0) { // Encode ones range bits size. - u32 bitsUsed = bits::NumUsedBits(onesRangeLen - 1); + uint32_t bitsUsed = bits::NumUsedBits(onesRangeLen - 1); arith_enc1.Encode(bitsUsed); ++cntElements1; onesRangeLen = 0; } // Encode zeros range bits size - 1. - u32 bitsUsed = bits::NumUsedBits(posOnes[i] - prevOnePos - 2); + uint32_t bitsUsed = bits::NumUsedBits(posOnes[i] - prevOnePos - 2); arith_enc0.Encode(bitsUsed); ++cntElements0; } @@ -435,12 +435,12 @@ void BuildCompressedBitVector(Writer & writer, vector const & posOnes, int if (onesRangeLen > 0) { // Encode last ones range size - 1. - u32 bitsUsed = bits::NumUsedBits(onesRangeLen - 1); + uint32_t bitsUsed = bits::NumUsedBits(onesRangeLen - 1); arith_enc1.Encode(bitsUsed); ++cntElements1; onesRangeLen = 0; } - vector serial0SizesEnc = arith_enc0.Finalize(), serial1SizesEnc = arith_enc1.Finalize(); + vector serial0SizesEnc = arith_enc0.Finalize(), serial1SizesEnc = arith_enc1.Finalize(); // Store number of compressed elements. VarintEncode(writer, cntElements0); VarintEncode(writer, cntElements1); @@ -456,8 +456,8 @@ void BuildCompressedBitVector(Writer & writer, vector const & posOnes, int // Second stage, encode all ranges bits using BitWriter. BitWriter bitWriter(writer); int64_t prevOnePos = -1; - u64 onesRangeLen = 0; - for (u32 i = 0; i < posOnes.size(); ++i) + uint64_t onesRangeLen = 0; + for (uint32_t i = 0; i < posOnes.size(); ++i) { CHECK_GREATER(posOnes[i], prevOnePos, ()); if (posOnes[i] - prevOnePos > 1) @@ -465,7 +465,7 @@ void BuildCompressedBitVector(Writer & writer, vector const & posOnes, int if (onesRangeLen > 0) { // Encode ones range bits size. - u32 bitsUsed = bits::NumUsedBits(onesRangeLen - 1); + uint32_t bitsUsed = bits::NumUsedBits(onesRangeLen - 1); if (bitsUsed > 1) { // Most significant bit for non-zero values is always 1, don't encode it. @@ -475,7 +475,7 @@ void BuildCompressedBitVector(Writer & writer, vector const & posOnes, int onesRangeLen = 0; } // Encode zeros range bits size - 1. - u32 bitsUsed = bits::NumUsedBits(posOnes[i] - prevOnePos - 2); + uint32_t bitsUsed = bits::NumUsedBits(posOnes[i] - prevOnePos - 2); if (bitsUsed > 1) { // Most significant bit for non-zero values is always 1, don't encode it. @@ -489,7 +489,7 @@ void BuildCompressedBitVector(Writer & writer, vector const & posOnes, int if (onesRangeLen > 0) { // Encode last ones range size - 1. - u32 bitsUsed = bits::NumUsedBits(onesRangeLen - 1); + uint32_t bitsUsed = bits::NumUsedBits(onesRangeLen - 1); if (bitsUsed > 1) { // Most significant bit for non-zero values is always 1, don't encode it. @@ -502,12 +502,12 @@ void BuildCompressedBitVector(Writer & writer, vector const & posOnes, int } } -vector DecodeCompressedBitVector(Reader & reader) { - u64 serialSize = reader.Size(); - vector posOnes; - u64 decodeOffset = 0; - u64 header = VarintDecode(reader, decodeOffset); - u32 encType = header & 3; +vector DecodeCompressedBitVector(Reader & reader) { + uint64_t serialSize = reader.Size(); + vector posOnes; + uint64_t decodeOffset = 0; + uint64_t header = VarintDecode(reader, decodeOffset); + uint32_t encType = header & 3; CHECK_LESS(encType, 4, ()); if (encType == 0) { @@ -529,23 +529,23 @@ vector DecodeCompressedBitVector(Reader & reader) { else if (encType == 2) { // Diffs-Arith encoded. - u64 freqsCnt = header >> 2; - vector distrTable = SerialFreqsToDistrTable(reader, decodeOffset, freqsCnt); - u64 cntElements = VarintDecode(reader, decodeOffset); - u64 encSizesBytesize = VarintDecode(reader, decodeOffset); - vector bitsUsedVec; + uint64_t freqsCnt = header >> 2; + vector distrTable = SerialFreqsToDistrTable(reader, decodeOffset, freqsCnt); + uint64_t cntElements = VarintDecode(reader, decodeOffset); + uint64_t encSizesBytesize = VarintDecode(reader, decodeOffset); + vector bitsUsedVec; Reader * arithDecReader = reader.CreateSubReader(decodeOffset, encSizesBytesize); ArithmeticDecoder arithDec(*arithDecReader, distrTable); - for (u64 i = 0; i < cntElements; ++i) bitsUsedVec.push_back(arithDec.Decode()); + for (uint64_t i = 0; i < cntElements; ++i) bitsUsedVec.push_back(arithDec.Decode()); decodeOffset += encSizesBytesize; Reader * bitReaderReader = reader.CreateSubReader(decodeOffset, serialSize - decodeOffset); BitReader bitReader(*bitReaderReader); int64_t prevOnePos = -1; - for (u64 i = 0; i < cntElements; ++i) + for (uint64_t i = 0; i < cntElements; ++i) { - u32 bitsUsed = bitsUsedVec[i]; - u64 diff = 0; - if (bitsUsed > 0) diff = ((u64(1) << (bitsUsed - 1)) | bitReader.Read(bitsUsed - 1)) + 1; else diff = 1; + uint32_t bitsUsed = bitsUsedVec[i]; + uint64_t diff = 0; + if (bitsUsed > 0) diff = ((uint64_t(1) << (bitsUsed - 1)) | bitReader.Read(bitsUsed - 1)) + 1; else diff = 1; posOnes.push_back(prevOnePos + diff); prevOnePos += diff; } @@ -557,15 +557,15 @@ vector DecodeCompressedBitVector(Reader & reader) { // If bit vector starts with 1. bool isFirstOne = ((header >> 2) & 1) == 1; - u64 sum = 0; + uint64_t sum = 0; while (decodeOffset < serialSize) { - u64 zerosRangeSize = 0; + uint64_t zerosRangeSize = 0; // Don't read zero range size for the first time if first bit is 1. if (!isFirstOne) zerosRangeSize = VarintDecode(reader, decodeOffset) + 1; else isFirstOne = false; - u64 onesRangeSize = VarintDecode(reader, decodeOffset) + 1; + uint64_t onesRangeSize = VarintDecode(reader, decodeOffset) + 1; sum += zerosRangeSize; - for (u64 i = sum; i < sum + onesRangeSize; ++i) posOnes.push_back(i); + for (uint64_t i = sum; i < sum + onesRangeSize; ++i) posOnes.push_back(i); sum += onesRangeSize; } } @@ -575,41 +575,41 @@ vector DecodeCompressedBitVector(Reader & reader) { // If bit vector starts with 1. bool isFirstOne = ((header >> 2) & 1) == 1; - u64 freqs0Cnt = header >> 3, freqs1Cnt = VarintDecode(reader, decodeOffset); - vector distrTable0 = SerialFreqsToDistrTable(reader, decodeOffset, freqs0Cnt); - vector distrTable1 = SerialFreqsToDistrTable(reader, decodeOffset, freqs1Cnt); - u64 cntElements0 = VarintDecode(reader, decodeOffset), cntElements1 = VarintDecode(reader, decodeOffset); - u64 enc0SizesBytesize = VarintDecode(reader, decodeOffset), enc1SizesBytesize = VarintDecode(reader, decodeOffset); + uint64_t freqs0Cnt = header >> 3, freqs1Cnt = VarintDecode(reader, decodeOffset); + vector distrTable0 = SerialFreqsToDistrTable(reader, decodeOffset, freqs0Cnt); + vector distrTable1 = SerialFreqsToDistrTable(reader, decodeOffset, freqs1Cnt); + uint64_t cntElements0 = VarintDecode(reader, decodeOffset), cntElements1 = VarintDecode(reader, decodeOffset); + uint64_t enc0SizesBytesize = VarintDecode(reader, decodeOffset), enc1SizesBytesize = VarintDecode(reader, decodeOffset); Reader * arithDec0Reader = reader.CreateSubReader(decodeOffset, enc0SizesBytesize); ArithmeticDecoder arithDec0(*arithDec0Reader, distrTable0); - vector bitsSizes0; - for (u64 i = 0; i < cntElements0; ++i) bitsSizes0.push_back(arithDec0.Decode()); + vector bitsSizes0; + for (uint64_t i = 0; i < cntElements0; ++i) bitsSizes0.push_back(arithDec0.Decode()); decodeOffset += enc0SizesBytesize; Reader * arithDec1Reader = reader.CreateSubReader(decodeOffset, enc1SizesBytesize); ArithmeticDecoder arith_dec1(*arithDec1Reader, distrTable1); - vector bitsSizes1; - for (u64 i = 0; i < cntElements1; ++i) bitsSizes1.push_back(arith_dec1.Decode()); + vector bitsSizes1; + for (uint64_t i = 0; i < cntElements1; ++i) bitsSizes1.push_back(arith_dec1.Decode()); decodeOffset += enc1SizesBytesize; Reader * bitReaderReader = reader.CreateSubReader(decodeOffset, serialSize - decodeOffset); BitReader bitReader(*bitReaderReader); - u64 sum = 0, i0 = 0, i1 = 0; + uint64_t sum = 0, i0 = 0, i1 = 0; while (i0 < cntElements0 && i1 < cntElements1) { - u64 zerosRangeSize = 0; + uint64_t zerosRangeSize = 0; // Don't read zero range size for the first time if first bit is 1. if (!isFirstOne) { - u32 bitsUsed = bitsSizes0[i0]; - if (bitsUsed > 0) zerosRangeSize = ((u64(1) << (bitsUsed - 1)) | bitReader.Read(bitsUsed - 1)) + 1; else zerosRangeSize = 1; + uint32_t bitsUsed = bitsSizes0[i0]; + if (bitsUsed > 0) zerosRangeSize = ((uint64_t(1) << (bitsUsed - 1)) | bitReader.Read(bitsUsed - 1)) + 1; else zerosRangeSize = 1; ++i0; } else isFirstOne = false; - u64 onesRangeSize = 0; - u32 bitsUsed = bitsSizes1[i1]; - if (bitsUsed > 0) onesRangeSize = ((u64(1) << (bitsUsed - 1)) | bitReader.Read(bitsUsed - 1)) + 1; else onesRangeSize = 1; + uint64_t onesRangeSize = 0; + uint32_t bitsUsed = bitsSizes1[i1]; + if (bitsUsed > 0) onesRangeSize = ((uint64_t(1) << (bitsUsed - 1)) | bitReader.Read(bitsUsed - 1)) + 1; else onesRangeSize = 1; ++i1; sum += zerosRangeSize; - for (u64 j = sum; j < sum + onesRangeSize; ++j) posOnes.push_back(j); + for (uint64_t j = sum; j < sum + onesRangeSize; ++j) posOnes.push_back(j); sum += onesRangeSize; } CHECK(i0 == cntElements0 && i1 == cntElements1, ()); diff --git a/coding/compressed_bit_vector.hpp b/coding/compressed_bit_vector.hpp index 024b3fef47..169d82e644 100644 --- a/coding/compressed_bit_vector.hpp +++ b/coding/compressed_bit_vector.hpp @@ -1,10 +1,10 @@ // Author: Artyom. // Module for compressing/decompressing bit vectors. // Usage: -// vector comprBits1; -// MemWriter< vector > writer(comprBits1); +// vector comprBits1; +// MemWriter< vector > writer(comprBits1); // // Create a bit vector by storing increasing positions of ones. -// vector posOnes1 = {12, 34, 75}, posOnes2 = {10, 34, 95}; +// vector posOnes1 = {12, 34, 75}, posOnes2 = {10, 34, 95}; // // Compress some vectors. // BuildCompressedBitVector(writer, posOnes1); // MemReader reader(comprBits1.data(), comprBits1.size()); @@ -12,12 +12,12 @@ // MemReader reader(comprBits1.data(), comprBits1.size()); // posOnes1 = DecodeCompressedBitVector(reader); // // Intersect two vectors. -// vector andRes = BitVectorsAnd(posOnes1.begin(), posOnes1.end(), posOnes2.begin(), posOnes2.end()); +// vector andRes = BitVectorsAnd(posOnes1.begin(), posOnes1.end(), posOnes2.begin(), posOnes2.end()); // // Unite two vectors. -// vector orRes = BitVectorsAnd(posOnes1.begin(), posOnes1.end(), posOnes2.begin(), posOnes2.end()); +// vector orRes = BitVectorsAnd(posOnes1.begin(), posOnes1.end(), posOnes2.begin(), posOnes2.end()); // // Sub-and two vectors (second vector-set is a subset of first vector-set as bit vectors, // // so that second vector size should be equal to number of ones of the first vector). -// vector subandRes = BitVectorsSubAnd(posOnes1.begin(), posOnes1.end(), posOnes2.begin(), posOnes2.end()); +// vector subandRes = BitVectorsSubAnd(posOnes1.begin(), posOnes1.end(), posOnes2.begin(), posOnes2.end()); #pragma once @@ -25,10 +25,6 @@ #include "../std/stdint.hpp" #include "../std/vector.hpp" -typedef uint8_t u8; -typedef uint32_t u32; -typedef uint64_t u64; - // Forward declare used Reader/Writer. class Reader; class Writer; @@ -40,22 +36,22 @@ class Writer; // "Ranges" creates a compressed array of lengths of zeros and ones ranges, // "Varint" encodes resulting sizes using varint encoding, // "Arith" encodes resulting sizes using arithmetic encoding). -void BuildCompressedBitVector(Writer & writer, vector const & posOnes, int chosenEncType = -1); +void BuildCompressedBitVector(Writer & writer, vector const & posOnes, int chosenEncType = -1); // Decodes compressed bit vector to uncompressed array of ones positions. -vector DecodeCompressedBitVector(Reader & reader); +vector DecodeCompressedBitVector(Reader & reader); // Intersects two bit vectors based on theirs begin and end iterators. // Returns resulting positions of ones. template -vector BitVectorsAnd(It1T begin1, It1T end1, It2T begin2, It2T end2) +vector BitVectorsAnd(It1T begin1, It1T end1, It2T begin2, It2T end2) { - vector result; + vector result; It1T it1 = begin1; It2T it2 = begin2; while (it1 != end1 && it2 != end2) { - u32 pos1 = *it1, pos2 = *it2; + uint32_t pos1 = *it1, pos2 = *it2; if (pos1 == pos2) { result.push_back(pos1); @@ -71,15 +67,15 @@ vector BitVectorsAnd(It1T begin1, It1T end1, It2T begin2, It2T end2) // Unites two bit vectors based on theirs begin and end iterators. // Returns resulting positions of ones. template -vector BitVectorsOr(It1T begin1, It1T end1, It2T begin2, It2T end2) +vector BitVectorsOr(It1T begin1, It1T end1, It2T begin2, It2T end2) { - vector result; + vector result; It1T it1 = begin1; It2T it2 = begin2; while (it1 != end1 && it2 != end2) { - u32 pos1 = *it1, pos2 = *it2; + uint32_t pos1 = *it1, pos2 = *it2; if (pos1 == pos2) { result.push_back(pos1); @@ -101,7 +97,7 @@ vector BitVectorsOr(It1T begin1, It1T end1, It2T begin2, It2T end2) { while (it1 != end1) { - u32 pos1 = *it1; + uint32_t pos1 = *it1; result.push_back(pos1); ++it1; } @@ -110,7 +106,7 @@ vector BitVectorsOr(It1T begin1, It1T end1, It2T begin2, It2T end2) { while (it2 != end2) { - u32 pos2 = *it2; + uint32_t pos2 = *it2; result.push_back(pos2); ++it2; } @@ -122,16 +118,16 @@ vector BitVectorsOr(It1T begin1, It1T end1, It2T begin2, It2T end2) // second bit vector should have size equal to first vector's number of ones. // Returns resulting positions of ones. template -vector BitVectorsSubAnd(It1T begin1, It1T end1, It2T begin2, It2T end2) +vector BitVectorsSubAnd(It1T begin1, It1T end1, It2T begin2, It2T end2) { - vector result; + vector result; It1T it1 = begin1; It2T it2 = begin2; - u64 index2 = 0; + uint64_t index2 = 0; for (; it1 != end1 && it2 != end2; ++it1, ++index2) { - u64 pos1 = *it1, pos2 = *it2; + uint64_t pos1 = *it1, pos2 = *it2; if (pos2 == index2) { result.push_back(pos1);