From 45edf6980579ceb0c0cb145b3457e7cfd99a1e5d Mon Sep 17 00:00:00 2001 From: Maxim Pimenov Date: Fri, 29 Jan 2016 18:01:35 +0300 Subject: [PATCH] [coding] Removed arithmetic codec. --- coding/arithmetic_codec.cpp | 145 ------------------ coding/arithmetic_codec.hpp | 76 --------- coding/coding.pro | 2 - coding/coding_tests/arithmetic_codec_test.cpp | 43 ------ coding/coding_tests/coding_tests.pro | 1 - 5 files changed, 267 deletions(-) delete mode 100644 coding/arithmetic_codec.cpp delete mode 100644 coding/arithmetic_codec.hpp delete mode 100644 coding/coding_tests/arithmetic_codec_test.cpp diff --git a/coding/arithmetic_codec.cpp b/coding/arithmetic_codec.cpp deleted file mode 100644 index c659120eaf..0000000000 --- a/coding/arithmetic_codec.cpp +++ /dev/null @@ -1,145 +0,0 @@ -#include "coding/arithmetic_codec.hpp" - -#include "coding/writer.hpp" -#include "coding/reader.hpp" - -#include "base/assert.hpp" -#include "base/bits.hpp" - -vector FreqsToDistrTable(vector const & origFreqs) -{ - uint64_t freqLowerBound = 0; - while (1) - { - // Resulting distr table is initialized with first zero value. - vector result(1, 0); - vector freqs; - uint64_t sum = 0; - uint64_t minFreq = ~uint64_t(0); - for (uint32_t i = 0; i < origFreqs.size(); ++i) - { - uint32_t freq = origFreqs[i]; - if (freq > 0 && freq < minFreq) minFreq = freq; - if (freq > 0 && freq < freqLowerBound) freq = freqLowerBound; - freqs.push_back(freq); - sum += freq; - result.push_back(sum); - } - if (freqLowerBound == 0) freqLowerBound = minFreq; - // This flag shows that some interval with non-zero freq has - // degraded to zero interval in normalized distribution table. - bool hasDegradedZeroInterval = false; - for (uint32_t i = 1; i < result.size(); ++i) - { - result[i] = (result[i] << DISTR_SHIFT) / sum; - if (freqs[i - 1] > 0 && (result[i] - result[i - 1] == 0)) - { - hasDegradedZeroInterval = true; - break; - } - } - if (!hasDegradedZeroInterval) return vector(result.begin(), result.end()); - ++freqLowerBound; - } -} - -ArithmeticEncoder::ArithmeticEncoder(vector const & distrTable) - : m_begin(0), m_size(-1), m_distrTable(distrTable) {} - -void ArithmeticEncoder::Encode(uint32_t symbol) -{ - CHECK_LESS(symbol + 1, m_distrTable.size(), ()); - uint32_t distrBegin = m_distrTable[symbol]; - uint32_t distrEnd = m_distrTable[symbol + 1]; - CHECK_LESS(distrBegin, distrEnd, ()); - uint32_t prevBegin = m_begin; - m_begin += (m_size >> DISTR_SHIFT) * distrBegin; - m_size = (m_size >> DISTR_SHIFT) * (distrEnd - distrBegin); - if (m_begin < prevBegin) PropagateCarry(); - while (m_size < (uint32_t(1) << 24)) - { - m_output.push_back(uint8_t(m_begin >> 24)); - m_begin <<= 8; - m_size <<= 8; - } -} - -vector ArithmeticEncoder::Finalize() -{ - CHECK_GREATER(m_size, 0, ()); - uint32_t last = m_begin + m_size - 1; - if (last < m_begin) - { - PropagateCarry(); - } - else - { - uint32_t resultHiBits = bits::NumHiZeroBits32(m_begin ^ last) + 1; - uint32_t value = last & (~uint32_t(0) << (32 - resultHiBits)); - while (value != 0) - { - m_output.push_back(uint8_t(value >> 24)); - value <<= 8; - } - } - m_begin = 0; - m_size = 0; - return m_output; -} - -void ArithmeticEncoder::PropagateCarry() -{ - int i = m_output.size() - 1; - while (i >= 0 && m_output[i] == 0xFF) - { - m_output[i] = 0; - --i; - } - CHECK_GREATER_OR_EQUAL(i, 0, ()); - ++m_output[i]; -} - -ArithmeticDecoder::ArithmeticDecoder(Reader & reader, vector const & distrTable) - : m_codeValue(0), m_size(-1), m_reader(reader), m_serialCur(0), - m_serialEnd(reader.Size()), m_distrTable(distrTable) -{ - for (uint32_t i = 0; i < sizeof(m_codeValue); ++i) - { - m_codeValue <<= 8; - m_codeValue |= ReadCodeByte(); - } -} - -uint32_t ArithmeticDecoder::Decode() -{ - uint32_t l = 0, r = m_distrTable.size(), m = 0; - uint32_t shiftedSize = m_size >> DISTR_SHIFT; - while (r - l > 1) - { - m = (l + r) / 2; - uint32_t intervalBegin = shiftedSize * m_distrTable[m]; - if (intervalBegin <= m_codeValue) l = m; else r = m; - } - uint32_t symbol = l; - m_codeValue -= shiftedSize * m_distrTable[symbol]; - m_size = shiftedSize * (m_distrTable[symbol + 1] - m_distrTable[symbol]); - while (m_size < (uint32_t(1) << 24)) - { - m_codeValue <<= 8; - m_size <<= 8; - m_codeValue |= ReadCodeByte(); - } - return symbol; -} - -uint8_t ArithmeticDecoder::ReadCodeByte() -{ - if (m_serialCur >= m_serialEnd) return 0; - else - { - uint8_t result = 0; - m_reader.Read(m_serialCur, &result, 1); - ++m_serialCur; - return result; - } -} diff --git a/coding/arithmetic_codec.hpp b/coding/arithmetic_codec.hpp deleted file mode 100644 index 6220d1db71..0000000000 --- a/coding/arithmetic_codec.hpp +++ /dev/null @@ -1,76 +0,0 @@ -// Author: Artyom. -// Arithmetic Encoder/Decoder. -// See http://en.wikipedia.org/wiki/Arithmetic_coding -// Usage: -// // Compute freqs table by counting number of occurancies of each symbol. -// // Freqs table should have size equal to number of symbols in the alphabet. -// // Convert freqs table to distr table. -// vector distrTable = FreqsToDistrTable(freqs); -// ArithmeticEncoder arith_enc(distrTable); -// // Encode any number of symbols. -// arith_enc.Encode(10); arith_enc.Encode(17); arith_enc.Encode(0); arith_enc.Encode(4); -// // Get encoded bytes. -// vector encoded_data = arith_enc.Finalize(); -// // Decode encoded bytes. Number of symbols should be provided outside. -// MemReader reader(encoded_data.data(), encoded_data.size()); -// ArithmeticDecoder arith_dec(&reader, distrTable); -// uint32_t sym1 = arith_dec.Decode(); uint32_t sym2 = arith_dec.Decode(); -// uint32_t sym3 = arith_dec.Decode(); uint32_t sym4 = arith_dec.Decode(); - -#pragma once - -#include "std/cstdint.hpp" -#include "std/vector.hpp" - -// Forward declarations. -class Reader; - -// Default shift of distribution table, i.e. all distribution table frequencies are -// normalized by this shift, i.e. distr table upper bound equals (1 << DISTR_SHIFT). -uint32_t const DISTR_SHIFT = 16; -// Converts symbols frequencies table to distribution table, used in Arithmetic codecs. -vector FreqsToDistrTable(vector const & freqs); - -class ArithmeticEncoder -{ -public: - // Provided distribution table. - ArithmeticEncoder(vector const & distrTable); - // Encode symbol using given distribution table and add that symbol to output. - void Encode(uint32_t symbol); - // Finalize encoding, flushes remaining bytes from the buffer to output. - // Returns output vector of encoded bytes. - vector Finalize(); -private: - // Propagates carry in case of overflow. - void PropagateCarry(); -private: - uint32_t m_begin; - uint32_t m_size; - vector m_output; - vector const & m_distrTable; -}; - -class ArithmeticDecoder -{ -public: - // Decoder is given a reader to read input bytes, - // distrTable - distribution table to decode symbols. - ArithmeticDecoder(Reader & reader, vector const & distrTable); - // Decode next symbol from the encoded stream. - uint32_t Decode(); -private: - // Read next code byte from encoded stream. - uint8_t ReadCodeByte(); -private: - // Current most significant part of code value. - uint32_t m_codeValue; - // Current interval size. - uint32_t m_size; - // Reader and two bounds of encoded data within this Reader. - Reader & m_reader; - uint64_t m_serialCur; - uint64_t m_serialEnd; - - vector const & m_distrTable; -}; diff --git a/coding/coding.pro b/coding/coding.pro index 269badf17c..99f91663ab 100644 --- a/coding/coding.pro +++ b/coding/coding.pro @@ -11,7 +11,6 @@ INCLUDEPATH *= $$ROOT_DIR/3party/tomcrypt/src/headers SOURCES += \ $$ROOT_DIR/3party/lodepng/lodepng.cpp \ - arithmetic_codec.cpp \ base64.cpp \ # blob_indexer.cpp \ # blob_storage.cpp \ @@ -42,7 +41,6 @@ HEADERS += \ $$ROOT_DIR/3party/lodepng/lodepng.hpp \ $$ROOT_DIR/3party/lodepng/lodepng_io.hpp \ $$ROOT_DIR/3party/lodepng/lodepng_io_private.hpp \ - arithmetic_codec.hpp \ base64.hpp \ bit_streams.hpp \ # blob_indexer.hpp \ diff --git a/coding/coding_tests/arithmetic_codec_test.cpp b/coding/coding_tests/arithmetic_codec_test.cpp deleted file mode 100644 index 1539968e82..0000000000 --- a/coding/coding_tests/arithmetic_codec_test.cpp +++ /dev/null @@ -1,43 +0,0 @@ -#include "testing/testing.hpp" - -#include "coding/arithmetic_codec.hpp" -#include "coding/reader.hpp" - -#include "std/random.hpp" - -UNIT_TEST(ArithmeticCodec) -{ - mt19937 rng(0); - - uint32_t const MAX_FREQ = 2048; - uint32_t const ALPHABET_SIZE = 256; - vector symbols; - vector freqs; - // Generate random freqs. - for (uint32_t i = 0; i < ALPHABET_SIZE; ++i) { - uint32_t freq = rng() % MAX_FREQ; - freqs.push_back(freq); - } - // Make at least one frequency zero for corner cases. - freqs[freqs.size() / 2] = 0; - // Generate symbols based on given freqs. - for (uint32_t i = 0; i < freqs.size(); ++i) { - uint32_t freq = freqs[i]; - for (uint32_t j = 0; j < freq; ++j) { - uint32_t pos = rng() % (symbols.size() + 1); - symbols.insert(symbols.begin() + pos, 1, i); - } - } - vector distrTable = FreqsToDistrTable(freqs); - // Encode symbols. - ArithmeticEncoder arithEnc(distrTable); - for (uint32_t i = 0; i < symbols.size(); ++i) arithEnc.Encode(symbols[i]); - vector encodedData = arithEnc.Finalize(); - // Decode symbols. - MemReader reader(encodedData.data(), encodedData.size()); - ArithmeticDecoder arithDec(reader, distrTable); - for (uint32_t i = 0; i < symbols.size(); ++i) { - uint32_t decodedSymbol = arithDec.Decode(); - TEST_EQUAL(symbols[i], decodedSymbol, ()); - } -} diff --git a/coding/coding_tests/coding_tests.pro b/coding/coding_tests/coding_tests.pro index c007b0a9a3..899ca4f740 100644 --- a/coding/coding_tests/coding_tests.pro +++ b/coding/coding_tests/coding_tests.pro @@ -13,7 +13,6 @@ include($$ROOT_DIR/common.pri) DEFINES += OMIM_UNIT_TEST_DISABLE_PLATFORM_INIT SOURCES += ../../testing/testingmain.cpp \ - arithmetic_codec_test.cpp \ base64_for_user_id_test.cpp \ base64_test.cpp \ bit_streams_test.cpp \