diff --git a/coding/compressed_bit_vector.cpp b/coding/compressed_bit_vector.cpp index 03d83ebddf..f63720b542 100644 --- a/coding/compressed_bit_vector.cpp +++ b/coding/compressed_bit_vector.cpp @@ -3,77 +3,12 @@ #include "arithmetic_codec.hpp" #include "reader.hpp" #include "writer.hpp" +#include "varint_misc.hpp" #include "../base/assert.hpp" #include "../base/bits.hpp" namespace { - void VarintEncode(vector & dst, uint64_t n) - { - if (n == 0) - { - dst.push_back(0); - } - else - { - while (n != 0) - { - uint8_t b = n & 0x7F; - n >>= 7; - b |= n == 0 ? 0 : 0x80; - dst.push_back(b); - } - } - } - void VarintEncode(Writer & writer, uint64_t n) - { - if (n == 0) - { - writer.Write(&n, 1); - } - else - { - while (n != 0) - { - uint8_t b = n & 0x7F; - n >>= 7; - b |= n == 0 ? 0 : 0x80; - writer.Write(&b, 1); - } - } - } - uint64_t VarintDecode(void * src, uint64_t & offset) - { - uint64_t n = 0; - int shift = 0; - while (1) - { - uint8_t b = *(((uint8_t*)src) + offset); - CHECK_LESS_OR_EQUAL(shift, 56, ()); - n |= uint64_t(b & 0x7F) << shift; - ++offset; - if ((b & 0x80) == 0) break; - shift += 7; - } - return n; - } - uint64_t VarintDecode(Reader & reader, uint64_t & offset) - { - uint64_t n = 0; - int shift = 0; - while (1) - { - uint8_t b = 0; - reader.Read(offset, &b, 1); - CHECK_LESS_OR_EQUAL(shift, 56, ()); - n |= uint64_t(b & 0x7F) << shift; - ++offset; - if ((b & 0x80) == 0) break; - shift += 7; - } - return n; - } - vector SerialFreqsToDistrTable(Reader & reader, uint64_t & decodeOffset, uint64_t cnt) { vector freqs; diff --git a/coding/compressed_varnum_vector.cpp b/coding/compressed_varnum_vector.cpp index f646ea1987..41acd61f4f 100644 --- a/coding/compressed_varnum_vector.cpp +++ b/coding/compressed_varnum_vector.cpp @@ -2,6 +2,7 @@ #include "compressed_varnum_vector.hpp" #include "reader.hpp" #include "writer.hpp" +#include "varint_misc.hpp" #include "../base/bits.hpp" #include "../std/algorithm.hpp" @@ -9,87 +10,6 @@ #include "../std/vector.hpp" namespace { - void VarintEncode(vector & dst, u64 n) - { - if (n == 0) - { - dst.push_back(0); - } - else - { - while (n != 0) - { - u8 b = n & 0x7F; - n >>= 7; - b |= n == 0 ? 0 : 0x80; - dst.push_back(b); - } - } - } - void VarintEncode(Writer & writer, u64 n) - { - if (n == 0) - { - writer.Write(&n, 1); - } - else - { - while (n != 0) - { - u8 b = n & 0x7F; - n >>= 7; - b |= n == 0 ? 0 : 0x80; - writer.Write(&b, 1); - } - } - } - u64 VarintDecode(void * src, u64 & offset) - { - u64 n = 0; - int shift = 0; - while (1) - { - u8 b = *(((u8*)src) + offset); - CHECK_LESS_OR_EQUAL(shift, 56, ()); - n |= u64(b & 0x7F) << shift; - ++offset; - if ((b & 0x80) == 0) break; - shift += 7; - } - return n; - } - u64 VarintDecode(Reader & reader, u64 & offset) - { - u64 n = 0; - int shift = 0; - while (1) - { - u8 b = 0; - reader.Read(offset, &b, 1); - CHECK_LESS_OR_EQUAL(shift, 56, ()); - n |= u64(b & 0x7F) << shift; - ++offset; - if ((b & 0x80) == 0) break; - shift += 7; - } - return n; - } - u64 VarintDecodeReverse(Reader & reader, u64 & offset) - { - u8 b = 0; - do - { - --offset; - reader.Read(offset, &b, 1); - } - while ((b & 0x80) != 0); - ++offset; - u64 beginOffset = offset; - u64 num = VarintDecode(reader, offset); - offset = beginOffset; - return num; - } - vector SerialFreqsToDistrTable(Reader & reader, u64 & decodeOffset, u64 cnt) { vector freqs; @@ -266,6 +186,8 @@ CompressedVarnumVectorReader::CompressedVarnumVectorReader(Reader & reader) u64 tableSize = m_numsCnt == 0 ? 0 : ((m_numsCnt - 1) / m_numElemPerTableEntry) + 1; u64 tableDecodeOffset = reader.Size() - 1; u64 tableSizeEncodedSize = VarintDecodeReverse(reader, tableDecodeOffset); + // Advance offset to point to the first byte of table size encoded varint. + ++tableDecodeOffset; u64 tableEncodedBegin = tableDecodeOffset - tableSizeEncodedSize; u64 tableEncodedEnd = tableDecodeOffset; u64 prevPos = 0, prevSum = 0; diff --git a/coding/varint_misc.hpp b/coding/varint_misc.hpp new file mode 100644 index 0000000000..7a6ce9aaba --- /dev/null +++ b/coding/varint_misc.hpp @@ -0,0 +1,99 @@ +// Author: Artyom Polkovnikov. +// Different variants of Varint encoding/decoding. + +#pragma once + +#include "reader.hpp" +#include "writer.hpp" + +#include "../base/assert.hpp" +#include "../std/stdint.hpp" +#include "../std/vector.hpp" + +// Encode Varint by appending to vector of bytes. +inline void VarintEncode(vector & dst, uint64_t n) +{ + if (n == 0) + { + dst.push_back(0); + } + else + { + while (n != 0) + { + uint8_t b = n & 0x7F; + n >>= 7; + b |= n == 0 ? 0 : 0x80; + dst.push_back(b); + } + } +} +// Encode varint using bytes Writer. +inline void VarintEncode(Writer & writer, uint64_t n) +{ + if (n == 0) + { + writer.Write(&n, 1); + } + else + { + while (n != 0) + { + uint8_t b = n & 0x7F; + n >>= 7; + b |= n == 0 ? 0 : 0x80; + writer.Write(&b, 1); + } + } +} +// Deocde varint at given pointer and offset, offset is incremented after encoding. +inline uint64_t VarintDecode(void * src, uint64_t & offset) +{ + uint64_t n = 0; + int shift = 0; + while (1) + { + uint8_t b = *(((uint8_t*)src) + offset); + CHECK_LESS_OR_EQUAL(shift, 56, ()); + n |= uint64_t(b & 0x7F) << shift; + ++offset; + if ((b & 0x80) == 0) break; + shift += 7; + } + return n; +} +// Decode varint using bytes Reader, offset is incremented after decoding. +inline uint64_t VarintDecode(Reader & reader, uint64_t & offset) +{ + uint64_t n = 0; + int shift = 0; + while (1) + { + uint8_t b = 0; + reader.Read(offset, &b, 1); + CHECK_LESS_OR_EQUAL(shift, 56, ()); + n |= uint64_t(b & 0x7F) << shift; + ++offset; + if ((b & 0x80) == 0) break; + shift += 7; + } + return n; +} +// Reverse decode varint. Offset should point to last byte of decoded varint. +// It is compulsory that there is at least one encoded varint before this varint. +// After decoding offset points to the last byte of previous varint. +inline uint64_t VarintDecodeReverse(Reader & reader, uint64_t & offset) +{ + uint8_t b = 0; + do + { + --offset; + reader.Read(offset, &b, 1); + } + while ((b & 0x80) != 0); + uint64_t prevLastEncodedByteOffset = offset; + ++offset; + uint64_t num = VarintDecode(reader, offset); + offset = prevLastEncodedByteOffset; + return num; +}