[coding] Move Varint encoding/decoding from compressed bit vector and varnum vector to varint_misc.hpp module.

This commit is contained in:
Artyom Polkovnikov 2014-11-19 10:55:29 +03:00 committed by Alex Zolotarev
parent fc21fb93ef
commit 23c12d01c7
3 changed files with 103 additions and 147 deletions

View file

@ -3,77 +3,12 @@
#include "arithmetic_codec.hpp"
#include "reader.hpp"
#include "writer.hpp"
#include "varint_misc.hpp"
#include "../base/assert.hpp"
#include "../base/bits.hpp"
namespace {
void VarintEncode(vector<uint8_t> & dst, uint64_t n)
{
if (n == 0)
{
dst.push_back(0);
}
else
{
while (n != 0)
{
uint8_t b = n & 0x7F;
n >>= 7;
b |= n == 0 ? 0 : 0x80;
dst.push_back(b);
}
}
}
void VarintEncode(Writer & writer, uint64_t n)
{
if (n == 0)
{
writer.Write(&n, 1);
}
else
{
while (n != 0)
{
uint8_t b = n & 0x7F;
n >>= 7;
b |= n == 0 ? 0 : 0x80;
writer.Write(&b, 1);
}
}
}
uint64_t VarintDecode(void * src, uint64_t & offset)
{
uint64_t n = 0;
int shift = 0;
while (1)
{
uint8_t b = *(((uint8_t*)src) + offset);
CHECK_LESS_OR_EQUAL(shift, 56, ());
n |= uint64_t(b & 0x7F) << shift;
++offset;
if ((b & 0x80) == 0) break;
shift += 7;
}
return n;
}
uint64_t VarintDecode(Reader & reader, uint64_t & offset)
{
uint64_t n = 0;
int shift = 0;
while (1)
{
uint8_t b = 0;
reader.Read(offset, &b, 1);
CHECK_LESS_OR_EQUAL(shift, 56, ());
n |= uint64_t(b & 0x7F) << shift;
++offset;
if ((b & 0x80) == 0) break;
shift += 7;
}
return n;
}
vector<uint32_t> SerialFreqsToDistrTable(Reader & reader, uint64_t & decodeOffset, uint64_t cnt)
{
vector<uint32_t> freqs;

View file

@ -2,6 +2,7 @@
#include "compressed_varnum_vector.hpp"
#include "reader.hpp"
#include "writer.hpp"
#include "varint_misc.hpp"
#include "../base/bits.hpp"
#include "../std/algorithm.hpp"
@ -9,87 +10,6 @@
#include "../std/vector.hpp"
namespace {
void VarintEncode(vector<u8> & dst, u64 n)
{
if (n == 0)
{
dst.push_back(0);
}
else
{
while (n != 0)
{
u8 b = n & 0x7F;
n >>= 7;
b |= n == 0 ? 0 : 0x80;
dst.push_back(b);
}
}
}
void VarintEncode(Writer & writer, u64 n)
{
if (n == 0)
{
writer.Write(&n, 1);
}
else
{
while (n != 0)
{
u8 b = n & 0x7F;
n >>= 7;
b |= n == 0 ? 0 : 0x80;
writer.Write(&b, 1);
}
}
}
u64 VarintDecode(void * src, u64 & offset)
{
u64 n = 0;
int shift = 0;
while (1)
{
u8 b = *(((u8*)src) + offset);
CHECK_LESS_OR_EQUAL(shift, 56, ());
n |= u64(b & 0x7F) << shift;
++offset;
if ((b & 0x80) == 0) break;
shift += 7;
}
return n;
}
u64 VarintDecode(Reader & reader, u64 & offset)
{
u64 n = 0;
int shift = 0;
while (1)
{
u8 b = 0;
reader.Read(offset, &b, 1);
CHECK_LESS_OR_EQUAL(shift, 56, ());
n |= u64(b & 0x7F) << shift;
++offset;
if ((b & 0x80) == 0) break;
shift += 7;
}
return n;
}
u64 VarintDecodeReverse(Reader & reader, u64 & offset)
{
u8 b = 0;
do
{
--offset;
reader.Read(offset, &b, 1);
}
while ((b & 0x80) != 0);
++offset;
u64 beginOffset = offset;
u64 num = VarintDecode(reader, offset);
offset = beginOffset;
return num;
}
vector<u32> SerialFreqsToDistrTable(Reader & reader, u64 & decodeOffset, u64 cnt)
{
vector<u32> freqs;
@ -266,6 +186,8 @@ CompressedVarnumVectorReader::CompressedVarnumVectorReader(Reader & reader)
u64 tableSize = m_numsCnt == 0 ? 0 : ((m_numsCnt - 1) / m_numElemPerTableEntry) + 1;
u64 tableDecodeOffset = reader.Size() - 1;
u64 tableSizeEncodedSize = VarintDecodeReverse(reader, tableDecodeOffset);
// Advance offset to point to the first byte of table size encoded varint.
++tableDecodeOffset;
u64 tableEncodedBegin = tableDecodeOffset - tableSizeEncodedSize;
u64 tableEncodedEnd = tableDecodeOffset;
u64 prevPos = 0, prevSum = 0;

99
coding/varint_misc.hpp Normal file
View file

@ -0,0 +1,99 @@
// Author: Artyom Polkovnikov.
// Different variants of Varint encoding/decoding.
#pragma once
#include "reader.hpp"
#include "writer.hpp"
#include "../base/assert.hpp"
#include "../std/stdint.hpp"
#include "../std/vector.hpp"
// Encode Varint by appending to vector of bytes.
inline void VarintEncode(vector<uint8_t> & dst, uint64_t n)
{
if (n == 0)
{
dst.push_back(0);
}
else
{
while (n != 0)
{
uint8_t b = n & 0x7F;
n >>= 7;
b |= n == 0 ? 0 : 0x80;
dst.push_back(b);
}
}
}
// Encode varint using bytes Writer.
inline void VarintEncode(Writer & writer, uint64_t n)
{
if (n == 0)
{
writer.Write(&n, 1);
}
else
{
while (n != 0)
{
uint8_t b = n & 0x7F;
n >>= 7;
b |= n == 0 ? 0 : 0x80;
writer.Write(&b, 1);
}
}
}
// Deocde varint at given pointer and offset, offset is incremented after encoding.
inline uint64_t VarintDecode(void * src, uint64_t & offset)
{
uint64_t n = 0;
int shift = 0;
while (1)
{
uint8_t b = *(((uint8_t*)src) + offset);
CHECK_LESS_OR_EQUAL(shift, 56, ());
n |= uint64_t(b & 0x7F) << shift;
++offset;
if ((b & 0x80) == 0) break;
shift += 7;
}
return n;
}
// Decode varint using bytes Reader, offset is incremented after decoding.
inline uint64_t VarintDecode(Reader & reader, uint64_t & offset)
{
uint64_t n = 0;
int shift = 0;
while (1)
{
uint8_t b = 0;
reader.Read(offset, &b, 1);
CHECK_LESS_OR_EQUAL(shift, 56, ());
n |= uint64_t(b & 0x7F) << shift;
++offset;
if ((b & 0x80) == 0) break;
shift += 7;
}
return n;
}
// Reverse decode varint. Offset should point to last byte of decoded varint.
// It is compulsory that there is at least one encoded varint before this varint.
// After decoding offset points to the last byte of previous varint.
inline uint64_t VarintDecodeReverse(Reader & reader, uint64_t & offset)
{
uint8_t b = 0;
do
{
--offset;
reader.Read(offset, &b, 1);
}
while ((b & 0x80) != 0);
uint64_t prevLastEncodedByteOffset = offset;
++offset;
uint64_t num = VarintDecode(reader, offset);
offset = prevLastEncodedByteOffset;
return num;
}