Add Bitset class and use in the IntervalIndex.

This commit is contained in:
Yury Melnichek 2011-04-28 21:12:36 +02:00 committed by Alex Zolotarev
parent 45bb508bad
commit b9d98b82f0
4 changed files with 68 additions and 28 deletions

View file

@ -63,3 +63,4 @@ HEADERS += \
path_utils.hpp \
array_adapters.hpp \
utf8_string.hpp \
bitset.hpp \

29
base/bitset.hpp Normal file
View file

@ -0,0 +1,29 @@
#pragma once
#include "assert.hpp"
#include "base.hpp"
#include "../std/memcpy.hpp"
template <unsigned int kBytes> class Bitset
{
public:
Bitset() { memset(&m_Data, 0, sizeof(m_Data)); }
// Returns 1 if bit and 0 otherwise.
uint8_t Bit(uint32_t offset) const
{
ASSERT_LESS(offset, kBytes, ());
return (m_Data[offset >> 3] >> (offset & 7)) & 1;
}
void SetBit(uint32_t offset, bool bSet = true)
{
ASSERT_LESS(offset, kBytes, ());
if (bSet)
m_Data[offset >> 3] |= (1 << (offset & 7));
else
m_Data[offset >> 3] &= !(1 << (offset & 7));
}
private:
uint8_t m_Data[kBytes];
};

View file

@ -5,6 +5,7 @@
#include "../base/assert.hpp"
#include "../base/base.hpp"
#include "../base/bits.hpp"
#include "../base/bitset.hpp"
#include "../base/buffer_vector.hpp"
#include "../base/macros.hpp"
#include "../std/memcpy.hpp"
@ -24,13 +25,19 @@ public:
struct Index
{
enum { MAX_BITS_PER_LEVEL = 256 };
inline uint32_t GetOffset() const { return m_Offset; }
inline uint32_t Bit(uint32_t i) const { return (m_BitMask >> i) & 1; }
inline uint32_t Bit(uint32_t i) const { return m_Bitset.Bit(i); }
uint32_t m_Offset;
uint32_t m_BitMask;
Bitset<MAX_BITS_PER_LEVEL> m_Bitset;
};
STATIC_ASSERT(sizeof(Index) == 8);
static inline uint32_t BitsetSize(uint32_t bitsPerLevel)
{
ASSERT_GREATER(bitsPerLevel, 3, ());
return 1 << (bitsPerLevel - 3);
}
};
template <class ReaderT>
@ -51,7 +58,7 @@ public:
: m_Reader(reader)
{
m_Reader.Read(0, &m_Header, sizeof(m_Header));
ASSERT_EQUAL(m_Header.m_BitsPerLevel, 5, ());
m_NodeSize = 4 + BitsetSize(m_Header.m_BitsPerLevel);
ReadIndex(sizeof(m_Header), m_Level0Index);
}
@ -64,7 +71,7 @@ public:
{
// end is inclusive in ForEachImpl().
--end;
ForEachImpl(f, beg, end, m_Level0Index, sizeof(m_Header) + sizeof(m_Level0Index),
ForEachImpl(f, beg, end, m_Level0Index, sizeof(m_Header) + m_NodeSize,
(m_Header.m_Levels - 1) * m_Header.m_BitsPerLevel, query);
}
}
@ -102,9 +109,9 @@ private:
uint64_t const e1 = (i == end0) ? (end & levelBytesFF) : levelBytesFF;
Index index1;
uint32_t const offset = baseOffset + index.GetOffset() + (cumCount * sizeof(Index));
uint32_t const offset = baseOffset + index.GetOffset() + (cumCount * m_NodeSize);
ReadIndex(offset, index1);
ForEachImpl(f, b1, e1, index1, offset + sizeof(Index),
ForEachImpl(f, b1, e1, index1, offset + m_NodeSize,
skipBits - m_Header.m_BitsPerLevel, query);
++cumCount;
}
@ -115,7 +122,7 @@ private:
Index nextIndex;
ReadIndex(baseOffset, nextIndex);
uint32_t const begOffset = baseOffset + index.GetOffset();
uint32_t const endOffset = baseOffset + sizeof(Index) + nextIndex.GetOffset();
uint32_t const endOffset = baseOffset + m_NodeSize + nextIndex.GetOffset();
ASSERT_LESS(begOffset, endOffset, (beg, end, baseOffset, skipBits));
buffer_vector<uint8_t, 256> data(endOffset - begOffset);
m_Reader.Read(begOffset, &data[0], data.size());
@ -142,13 +149,13 @@ private:
void ReadIndex(uint64_t pos, Index & index) const
{
m_Reader.Read(pos, &index, sizeof(Index));
m_Reader.Read(pos, &index, m_NodeSize);
index.m_Offset = SwapIfBigEndian(index.m_Offset);
index.m_BitMask = SwapIfBigEndian(index.m_BitMask);
}
ReaderT m_Reader;
Header m_Header;
uint32_t m_NodeSize;
Index m_Level0Index;
int m_CellIdBytes;
};

View file

@ -7,6 +7,7 @@
#include "../base/assert.hpp"
#include "../base/base.hpp"
#include "../base/bits.hpp"
#include "../base/bitset.hpp"
#include "../base/logging.hpp"
#include "../std/vector.hpp"
#include "../std/memcpy.hpp"
@ -15,16 +16,16 @@ namespace impl
{
template <class WriterT>
void WriteIntervalIndexNode(WriterT & writer, uint64_t offset, uint64_t bitMask)
void WriteIntervalIndexNode(WriterT & writer, uint64_t offset, uint32_t bitsPerLevel,
Bitset<IntervalIndexBase::Index::MAX_BITS_PER_LEVEL> const & bitMask)
{
// At the moment, uint32_t is used as a bitMask, but this can change in the future.
CHECK_EQUAL(static_cast<uint32_t>(bitMask), bitMask, (offset));
CHECK_GREATER_OR_EQUAL(offset, writer.Pos() + 8, ());
WriteToSink(writer, static_cast<uint32_t>(offset - writer.Pos() - 8));
WriteToSink(writer, static_cast<uint32_t>(bitMask));
int const bitsetSize = IntervalIndexBase::BitsetSize(bitsPerLevel);
CHECK_GREATER_OR_EQUAL(offset, writer.Pos() + 4 + bitsetSize, ());
WriteToSink(writer, static_cast<uint32_t>(offset - writer.Pos() - 4 - bitsetSize));
writer.Write(&bitMask, IntervalIndexBase::BitsetSize(bitsPerLevel));
}
template <class SinkT> void WriteIntervalIndexLeaf(SinkT & sink, int bitsPerLevel,
template <class SinkT> void WriteIntervalIndexLeaf(SinkT & sink, uint32_t bitsPerLevel,
uint64_t prevKey, uint64_t prevValue,
uint64_t key, uint64_t value)
{
@ -38,7 +39,7 @@ template <class SinkT> void WriteIntervalIndexLeaf(SinkT & sink, int bitsPerLeve
WriteVarUint(sink, code);
}
inline uint32_t IntervalIndexLeafSize(int bitsPerLevel,
inline uint32_t IntervalIndexLeafSize(uint32_t bitsPerLevel,
uint64_t prevKey, uint64_t prevValue,
uint64_t key, uint64_t value)
{
@ -93,8 +94,10 @@ void BuildIntervalIndex(CellIdValueIterT const & beg, CellIdValueIterT const & e
CHECK_LESS(keyBits, 63, ());
CHECK(impl::CheckIntervalIndexInputSequence(beg, end, keyBits), ());
typedef Bitset<IntervalIndexBase::Index::MAX_BITS_PER_LEVEL> BitsetType;
uint32_t const bitsPerLevel = 5;
uint32_t const lastBitsMask = (1 << bitsPerLevel) - 1;
uint32_t const nodeSize = 4 + IntervalIndexBase::BitsetSize(bitsPerLevel);
int const levelCount = (keyBits + bitsPerLevel - 1) / bitsPerLevel;
// Write header.
@ -109,20 +112,20 @@ void BuildIntervalIndex(CellIdValueIterT const & beg, CellIdValueIterT const & e
{
// Write empty index.
CHECK_GREATER(levelCount, 1, ());
impl::WriteIntervalIndexNode(writer, writer.Pos() + sizeof(IntervalIndexBase::Index), 0);
impl::WriteIntervalIndexNode(writer, writer.Pos() + nodeSize, bitsPerLevel, BitsetType());
LOG(LWARNING, ("Written empty index."));
return;
}
// Write internal nodes.
uint64_t childOffset = writer.Pos() + sizeof(IntervalIndexBase::Index);
uint64_t childOffset = writer.Pos() + nodeSize;
uint64_t nextChildOffset = childOffset;
for (int level = levelCount - 1; level >= 0; --level)
{
// LOG(LINFO, ("Building interval index, level", level));
uint64_t const initialLevelWriterPos = writer.Pos();
uint64_t bitMask = 0;
BitsetType bitMask = BitsetType();
uint64_t prevKey = 0;
uint64_t prevValue = 0;
for (CellIdValueIterT it = beg; it != end; ++it)
@ -133,28 +136,28 @@ void BuildIntervalIndex(CellIdValueIterT const & beg, CellIdValueIterT const & e
if (it != beg && (prevKey & ~lastBitsMask) != (key & ~lastBitsMask))
{
// Write node for the previous parent.
impl::WriteIntervalIndexNode(writer, childOffset, bitMask);
impl::WriteIntervalIndexNode(writer, childOffset, bitsPerLevel, bitMask);
childOffset = nextChildOffset;
bitMask = 0;
bitMask = BitsetType();
}
bitMask |= (1ULL << (key & lastBitsMask));
bitMask.SetBit(key & lastBitsMask);
if (level == 0)
nextChildOffset += impl::IntervalIndexLeafSize(bitsPerLevel,
prevKey, prevValue, key, value);
else if (it == beg || prevKey != key)
nextChildOffset += sizeof(IntervalIndexBase::Index);
nextChildOffset += nodeSize;
prevKey = key;
prevValue = value;
}
// Write the last node.
impl::WriteIntervalIndexNode(writer, childOffset, bitMask);
impl::WriteIntervalIndexNode(writer, childOffset, bitsPerLevel, bitMask);
if (level == 1)
nextChildOffset += sizeof(IntervalIndexBase::Index);
nextChildOffset += nodeSize;
childOffset = nextChildOffset;
@ -162,7 +165,7 @@ void BuildIntervalIndex(CellIdValueIterT const & beg, CellIdValueIterT const & e
}
// Write the dummy one-after-last node.
impl::WriteIntervalIndexNode(writer, nextChildOffset, 0);
impl::WriteIntervalIndexNode(writer, nextChildOffset, bitsPerLevel, BitsetType());
// Write leaves.
{