diff --git a/coding/coding.pro b/coding/coding.pro index 99f91663ab..4a20f93ba6 100644 --- a/coding/coding.pro +++ b/coding/coding.pro @@ -15,7 +15,6 @@ SOURCES += \ # blob_indexer.cpp \ # blob_storage.cpp \ compressed_bit_vector.cpp \ -# compressed_varnum_vector.cpp \ file_container.cpp \ file_name_utils.cpp \ file_reader.cpp \ @@ -50,7 +49,6 @@ HEADERS += \ coder.hpp \ coder_util.hpp \ compressed_bit_vector.hpp \ -# compressed_varnum_vector.hpp \ constants.hpp \ dd_vector.hpp \ diff.hpp \ diff --git a/coding/coding_tests/coding_tests.pro b/coding/coding_tests/coding_tests.pro index 899ca4f740..b119dc39e5 100644 --- a/coding/coding_tests/coding_tests.pro +++ b/coding/coding_tests/coding_tests.pro @@ -19,7 +19,6 @@ SOURCES += ../../testing/testingmain.cpp \ # blob_storage_test.cpp \ coder_util_test.cpp \ compressed_bit_vector_test.cpp \ -# compressed_varnum_vector_test.cpp \ dd_vector_test.cpp \ diff_test.cpp \ endianness_test.cpp \ diff --git a/coding/coding_tests/compressed_varnum_vector_test.cpp b/coding/coding_tests/compressed_varnum_vector_test.cpp deleted file mode 100644 index ca1143f819..0000000000 --- a/coding/coding_tests/compressed_varnum_vector_test.cpp +++ /dev/null @@ -1,80 +0,0 @@ -#include "testing/testing.hpp" - -#include "coding/compressed_varnum_vector.hpp" - -/* -#include "coding/reader.hpp" -#include "coding/writer.hpp" - -#include "std/random.hpp" - - -struct NumsSource -{ - NumsSource(vector const & v) : m_v(v) {} - uint64_t operator()(uint64_t pos) { ASSERT_LESS(pos, m_v.size(), ()); return m_v[pos]; } - - vector const & m_v; -}; - -UNIT_TEST(CompressedVarnumVector) -{ - mt19937 rng(0); - uint32_t const NUMS_CNT = 5000; - uint32_t const MAX_NUM_BYTESIZE = 5; - vector nums, sums(1, 0); - uint64_t sum = 0; - for (uint32_t i = 0; i < NUMS_CNT; ++i) - { - uint32_t byteSize = rng() % MAX_NUM_BYTESIZE + 1; - uint64_t num = rng() & ((uint64_t(1) << (byteSize * 8)) - 1); - nums.push_back(num); - sum += num; - sums.push_back(sum); - } - vector encodedVector; - MemWriter< vector > encodedVectorWriter(encodedVector); - BuildCompressedVarnumVector(encodedVectorWriter, NumsSource(nums), nums.size(), true); - MemReader reader(encodedVector.data(), encodedVector.size()); - CompressedVarnumVectorReader comprNums(reader); - // Find by index. - for (uint32_t i = 0; i < nums.size(); ++i) - { - uint64_t sumBefore = 0; - comprNums.FindByIndex(i, sumBefore); - TEST_EQUAL(sumBefore, sums[i], ()); - uint64_t num = comprNums.Read(); - TEST_EQUAL(num, nums[i], ()); - } - // Sequential read. - uint64_t sumBefore = 0; - comprNums.FindByIndex(200, sumBefore); - for (uint32_t i = 200; i < 300; ++i) - { - uint64_t num = comprNums.Read(); - TEST_EQUAL(num, nums[i], ()); - } - // Find by sum. - for (uint32_t i = 1; i < nums.size() - 1; ++i) - { - // Find strict equal sum. - if (nums[i - 1] >= 1) - { - uint64_t sumIncl = 0, cntIncl = 0; - uint64_t num = comprNums.FindBySum(sums[i], sumIncl, cntIncl); - TEST_EQUAL(sumIncl, sums[i], ()); - TEST_EQUAL(cntIncl, i, ()); - TEST_EQUAL(num, nums[i - 1], ()); - } - // Find by intermediate sum (not strictly equal). - if (nums[i] > 1) - { - uint64_t sumIncl = 0, cntIncl = 0; - uint64_t num = comprNums.FindBySum(sums[i] + 1, sumIncl, cntIncl); - TEST_EQUAL(sumIncl, sums[i + 1], ()); - TEST_EQUAL(cntIncl, i + 1, ()); - TEST_EQUAL(num, nums[i], ()); - } - } -} -*/ diff --git a/coding/compressed_varnum_vector.cpp b/coding/compressed_varnum_vector.cpp deleted file mode 100644 index 72d77945de..0000000000 --- a/coding/compressed_varnum_vector.cpp +++ /dev/null @@ -1,220 +0,0 @@ -#include "coding/compressed_varnum_vector.hpp" - -/* -#include "coding/arithmetic_codec.hpp" -#include "coding/bit_streams.hpp" -#include "coding/reader.hpp" -#include "coding/writer.hpp" -#include "coding/varint_misc.hpp" - -#include "base/bits.hpp" -#include "std/algorithm.hpp" -#include "std/unique_ptr.hpp" -#include "std/vector.hpp" - -//namespace { -// vector SerialFreqsToDistrTable(Reader & reader, uint64_t & decodeOffset, uint64_t cnt) -// { -// vector freqs; -// for (uint64_t i = 0; i < cnt; ++i) freqs.push_back(VarintDecode(reader, decodeOffset)); -// return FreqsToDistrTable(freqs); -// } -//} - -void BuildCompressedVarnumVector(Writer & writer, NumsSourceFuncT numsSource, uint64_t numsCnt, bool supportSums) -{ - // Encode header. - VarintEncode(writer, numsCnt); - VarintEncode(writer, NUM_ELEM_PER_TABLE_ENTRY); - VarintEncode(writer, supportSums ? 1 : 0); - - // Compute frequencies of bits sizes of all nums. - vector sizesFreqs(65, 0); - int32_t maxBitsSize = -1; - for (uint64_t i = 0; i < numsCnt; ++i) - { - uint64_t num = numsSource(i); - uint32_t bitsUsed = bits::NumUsedBits(num); - ++sizesFreqs[bitsUsed]; - if (int32_t(bitsUsed) > maxBitsSize) maxBitsSize = bitsUsed; - } - sizesFreqs.resize(maxBitsSize + 1); - VarintEncode(writer, sizesFreqs.size()); - for (uint32_t i = 0; i < sizesFreqs.size(); ++i) VarintEncode(writer, sizesFreqs[i]); - - vector distr_table = FreqsToDistrTable(sizesFreqs); - - vector encoded_table; - uint64_t tableSize = numsCnt == 0 ? 1 : ((numsCnt - 1) / NUM_ELEM_PER_TABLE_ENTRY) + 2; - uint64_t inum = 0, prevChunkPos = 0, encodedNumsSize = 0, prevChunkSum = 0, sum = 0; - { - // Encode starting table entry. - VarintEncode(encoded_table, 0); - if (supportSums) VarintEncode(encoded_table, 0); - } - for (uint64_t itable = 0; itable < tableSize && inum < numsCnt; ++itable) - { - // Encode chunk of nums (one chunk for one table entry). - vector encodedChunk, encodedBits; - ArithmeticEncoder arithEncSizes(distr_table); - { - MemWriter< vector > encoded_bits_writer(encodedBits); - BitSink bitsWriter(encoded_bits_writer); - for (uint64_t ichunkNum = 0; ichunkNum < NUM_ELEM_PER_TABLE_ENTRY && inum < numsCnt; ++ichunkNum, ++inum) - { - uint64_t num = numsSource(inum); - uint32_t bitsUsed = bits::NumUsedBits(num); - arithEncSizes.Encode(bitsUsed); - if (bitsUsed > 1) bitsWriter.Write(num, bitsUsed - 1); - sum += num; - } - } - vector encodedChunkSizes = arithEncSizes.Finalize(); - VarintEncode(encodedChunk, encodedChunkSizes.size()); - encodedChunk.insert(encodedChunk.end(), encodedChunkSizes.begin(), encodedChunkSizes.end()); - encodedChunk.insert(encodedChunk.end(), encodedBits.begin(), encodedBits.end()); - writer.Write(encodedChunk.data(), encodedChunk.size()); - encodedNumsSize += encodedChunk.size(); - - // Encode table entry. - VarintEncode(encoded_table, encodedNumsSize - prevChunkPos); - if (supportSums) VarintEncode(encoded_table, sum - prevChunkSum); - prevChunkPos = encodedNumsSize; - prevChunkSum = sum; - } - writer.Write(encoded_table.data(), encoded_table.size()); - VarintEncode(writer, encoded_table.size()); -} - -struct CompressedVarnumVectorReader::DecodeContext -{ - unique_ptr m_sizesArithDecReader; - unique_ptr m_sizesArithDec; - unique_ptr m_numsBitsReaderReader; - unique_ptr m_numsBitsReader; - uint64_t m_numsLeftInChunk; -}; - -CompressedVarnumVectorReader::CompressedVarnumVectorReader(Reader & reader) - : m_reader(reader), m_numsCnt(0), m_numElemPerTableEntry(0), m_supportSums(false), - m_numsEncodedOffset(0), m_decodeCtx(0) -{ - CHECK_GREATER(reader.Size(), 0, ()); - // Decode header. - uint64_t offset = 0; - m_numsCnt = VarintDecode(m_reader, offset); - m_numElemPerTableEntry = VarintDecode(m_reader, offset); - m_supportSums = VarintDecode(m_reader, offset) != 0; - vector sizesFreqs; - uint64_t freqsCnt = VarintDecode(m_reader, offset); - for (uint32_t i = 0; i < freqsCnt; ++i) sizesFreqs.push_back(VarintDecode(m_reader, offset)); - m_distrTable = FreqsToDistrTable(sizesFreqs); - m_numsEncodedOffset = offset; - - // Decode jump table. - //uint64_t tableSize = m_numsCnt == 0 ? 0 : ((m_numsCnt - 1) / m_numElemPerTableEntry) + 1; - uint64_t tableDecodeOffset = reader.Size() - 1; - uint64_t tableSizeEncodedSize = VarintDecodeReverse(reader, tableDecodeOffset); - // Advance offset to point to the first byte of table size encoded varint. - ++tableDecodeOffset; - uint64_t tableEncodedBegin = tableDecodeOffset - tableSizeEncodedSize; - uint64_t tableEncodedEnd = tableDecodeOffset; - uint64_t prevPos = 0, prevSum = 0; - for (uint64_t tableOffset = tableEncodedBegin; tableOffset < tableEncodedEnd;) - { - uint64_t posDiff = VarintDecode(reader, tableOffset); - m_tablePos.push_back(prevPos + posDiff); - prevPos += posDiff; - if (m_supportSums) - { - uint64_t sumDiff = VarintDecode(reader, tableOffset); - m_tableSum.push_back(prevSum + sumDiff); - prevSum += sumDiff; - } - } -} - -CompressedVarnumVectorReader::~CompressedVarnumVectorReader() -{ - if (m_decodeCtx) delete m_decodeCtx; -} - -void CompressedVarnumVectorReader::SetDecodeContext(uint64_t tableEntryIndex) -{ - CHECK_LESS(tableEntryIndex, m_tablePos.size() - 1, ()); - uint64_t decodeOffset = m_numsEncodedOffset + m_tablePos[tableEntryIndex]; - uint64_t encodedSizesSize = VarintDecode(m_reader, decodeOffset); - // Create decode context. - if (m_decodeCtx) delete m_decodeCtx; - m_decodeCtx = new DecodeContext; - m_decodeCtx->m_sizesArithDecReader.reset(m_reader.CreateSubReader(decodeOffset, encodedSizesSize)); - m_decodeCtx->m_sizesArithDec.reset(new ArithmeticDecoder(*m_decodeCtx->m_sizesArithDecReader, m_distrTable)); - m_decodeCtx->m_numsBitsReaderReader.reset(m_reader.CreateSubReader(decodeOffset + encodedSizesSize, m_numsEncodedOffset + m_tablePos[tableEntryIndex + 1] - decodeOffset - encodedSizesSize)); - m_decodeCtx->m_numsBitsReader.reset(new BitSource(*m_decodeCtx->m_numsBitsReaderReader)); - m_decodeCtx->m_numsLeftInChunk = min((tableEntryIndex + 1) * m_numElemPerTableEntry, m_numsCnt) - tableEntryIndex * m_numElemPerTableEntry; -} - -void CompressedVarnumVectorReader::FindByIndex(uint64_t index, uint64_t & sumBefore) -{ - CHECK_LESS(index, m_numsCnt, ()); - uint64_t tableEntryIndex = index / m_numElemPerTableEntry; - uint64_t indexWithinRange = index % m_numElemPerTableEntry; - - this->SetDecodeContext(tableEntryIndex); - - uint64_t sum = 0; - if (m_supportSums) sum = m_tableSum[tableEntryIndex]; - for (uint64_t i = 0; i < indexWithinRange; ++i) - { - uint64_t num = this->Read(); - if (m_supportSums) sum += num; - } - if (m_supportSums) sumBefore = sum; -} - -uint64_t CompressedVarnumVectorReader::FindBySum(uint64_t sum, uint64_t & sumIncl, uint64_t & cntIncl) -{ - CHECK(m_supportSums, ()); - // First do binary search over select table to find the biggest - // sum that is less than our. - uint64_t l = 0, r = m_tablePos.size(); - while (r - l > 1) - { - uint64_t m = (l + r) / 2; - if (sum > m_tableSum[m]) - { - l = m; - } - else - { - r = m; - } - } - uint64_t tableEntryIndex = l; - cntIncl = tableEntryIndex * m_numElemPerTableEntry; - - this->SetDecodeContext(tableEntryIndex); - - sumIncl = m_tableSum[tableEntryIndex]; - uint64_t num = 0; - while (sumIncl < sum && cntIncl < m_numsCnt) - { - num = this->Read(); - sumIncl += num; - ++cntIncl; - if (sumIncl >= sum) break; - } - return num; -} - -uint64_t CompressedVarnumVectorReader::Read() -{ - CHECK(m_decodeCtx != 0, ()); - CHECK_GREATER(m_decodeCtx->m_numsLeftInChunk, 0, ()); - uint32_t bitsUsed = m_decodeCtx->m_sizesArithDec->Decode(); - if (bitsUsed == 0) return 0; - uint64_t num = (uint64_t(1) << (bitsUsed - 1)) | m_decodeCtx->m_numsBitsReader->Read(bitsUsed - 1); - --m_decodeCtx->m_numsLeftInChunk; - return num; -} -*/ diff --git a/coding/compressed_varnum_vector.hpp b/coding/compressed_varnum_vector.hpp deleted file mode 100644 index 4d6bcae955..0000000000 --- a/coding/compressed_varnum_vector.hpp +++ /dev/null @@ -1,60 +0,0 @@ -// Author: Artyom. -// A module for storing arbitrary variable-bitsize numbers in a compressed form so that later -// you can access any number searching it by index or sum of numbers preceeding and including searched number. - -#pragma once - -/* -#include "std/function.hpp" -#include "std/stdint.hpp" -#include "std/vector.hpp" - -// Forward declarations. -class Reader; -class Writer; - -// Number of nums in a chunk per one table entry. -uint64_t const NUM_ELEM_PER_TABLE_ENTRY = 1024; - -// A source of nums. -typedef function NumsSourceFuncT; -// Builds CompressedVarnumVector based on source of numbers. -// If supportSums is true then sums are included in the table otherwise sums are not computed. -void BuildCompressedVarnumVector(Writer & writer, NumsSourceFuncT numsSource, uint64_t numsCnt, bool supportSums); - -// Reader of CompressedVarnumVector. -class CompressedVarnumVectorReader -{ -public: - // Bytes are read from Reader on the flight while decoding. - CompressedVarnumVectorReader(Reader & reader); - ~CompressedVarnumVectorReader(); - - // Set current number decoding context to number at given index. - // sumBefore will contain total sum of numbers before indexed number, computed only if sums are supported. - void FindByIndex(uint64_t index, uint64_t & sumBefore); - // Works only if sums are supported. Finds ith number by total sum of numbers in the range [0, i], i.e. - // finds such first number that sum of all number before and including it are equal or greater to sum. - // sumIncl will contain the actual sum including found number, cntIncl contains count of numbers including - // found one. Function returns found number. - uint64_t FindBySum(uint64_t sum, uint64_t & sumIncl, uint64_t & cntIncl); - // After setting position by FindByIndex and FindBySum functions Read() function will sequentially read - // next number. It is only allowed to read numbers in same chunk as the first number found (one chunk is - // created for one table entry). - uint64_t Read(); -private: - void SetDecodeContext(uint64_t table_entry_index); -private: - Reader & m_reader; - uint64_t m_numsCnt; - uint64_t m_numElemPerTableEntry; - bool m_supportSums; - uint64_t m_numsEncodedOffset; - vector m_distrTable; - vector m_tablePos; - vector m_tableSum; - // Decode context. - struct DecodeContext; - DecodeContext * m_decodeCtx; -}; -*/