From b9d98b82f00bc677726a18abda5b6cc520e7394a Mon Sep 17 00:00:00 2001 From: Yury Melnichek Date: Thu, 28 Apr 2011 21:12:36 +0200 Subject: [PATCH] Add Bitset class and use in the IntervalIndex. --- base/base.pro | 1 + base/bitset.hpp | 29 ++++++++++++++++++++++ indexer/interval_index.hpp | 27 +++++++++++++-------- indexer/interval_index_builder.hpp | 39 ++++++++++++++++-------------- 4 files changed, 68 insertions(+), 28 deletions(-) create mode 100644 base/bitset.hpp diff --git a/base/base.pro b/base/base.pro index 3cc6a9c79c..46702103dc 100644 --- a/base/base.pro +++ b/base/base.pro @@ -63,3 +63,4 @@ HEADERS += \ path_utils.hpp \ array_adapters.hpp \ utf8_string.hpp \ + bitset.hpp \ diff --git a/base/bitset.hpp b/base/bitset.hpp new file mode 100644 index 0000000000..7b0867c013 --- /dev/null +++ b/base/bitset.hpp @@ -0,0 +1,29 @@ +#pragma once +#include "assert.hpp" +#include "base.hpp" +#include "../std/memcpy.hpp" + +template class Bitset +{ +public: + Bitset() { memset(&m_Data, 0, sizeof(m_Data)); } + + // Returns 1 if bit and 0 otherwise. + uint8_t Bit(uint32_t offset) const + { + ASSERT_LESS(offset, kBytes, ()); + return (m_Data[offset >> 3] >> (offset & 7)) & 1; + } + + void SetBit(uint32_t offset, bool bSet = true) + { + ASSERT_LESS(offset, kBytes, ()); + if (bSet) + m_Data[offset >> 3] |= (1 << (offset & 7)); + else + m_Data[offset >> 3] &= !(1 << (offset & 7)); + } + +private: + uint8_t m_Data[kBytes]; +}; diff --git a/indexer/interval_index.hpp b/indexer/interval_index.hpp index 2894aae208..48c485f09f 100644 --- a/indexer/interval_index.hpp +++ b/indexer/interval_index.hpp @@ -5,6 +5,7 @@ #include "../base/assert.hpp" #include "../base/base.hpp" #include "../base/bits.hpp" +#include "../base/bitset.hpp" #include "../base/buffer_vector.hpp" #include "../base/macros.hpp" #include "../std/memcpy.hpp" @@ -24,13 +25,19 @@ public: struct Index { + enum { MAX_BITS_PER_LEVEL = 256 }; inline uint32_t GetOffset() const { return m_Offset; } - inline uint32_t Bit(uint32_t i) const { return (m_BitMask >> i) & 1; } + inline uint32_t Bit(uint32_t i) const { return m_Bitset.Bit(i); } uint32_t m_Offset; - uint32_t m_BitMask; + Bitset m_Bitset; }; - STATIC_ASSERT(sizeof(Index) == 8); + + static inline uint32_t BitsetSize(uint32_t bitsPerLevel) + { + ASSERT_GREATER(bitsPerLevel, 3, ()); + return 1 << (bitsPerLevel - 3); + } }; template @@ -51,7 +58,7 @@ public: : m_Reader(reader) { m_Reader.Read(0, &m_Header, sizeof(m_Header)); - ASSERT_EQUAL(m_Header.m_BitsPerLevel, 5, ()); + m_NodeSize = 4 + BitsetSize(m_Header.m_BitsPerLevel); ReadIndex(sizeof(m_Header), m_Level0Index); } @@ -64,7 +71,7 @@ public: { // end is inclusive in ForEachImpl(). --end; - ForEachImpl(f, beg, end, m_Level0Index, sizeof(m_Header) + sizeof(m_Level0Index), + ForEachImpl(f, beg, end, m_Level0Index, sizeof(m_Header) + m_NodeSize, (m_Header.m_Levels - 1) * m_Header.m_BitsPerLevel, query); } } @@ -102,9 +109,9 @@ private: uint64_t const e1 = (i == end0) ? (end & levelBytesFF) : levelBytesFF; Index index1; - uint32_t const offset = baseOffset + index.GetOffset() + (cumCount * sizeof(Index)); + uint32_t const offset = baseOffset + index.GetOffset() + (cumCount * m_NodeSize); ReadIndex(offset, index1); - ForEachImpl(f, b1, e1, index1, offset + sizeof(Index), + ForEachImpl(f, b1, e1, index1, offset + m_NodeSize, skipBits - m_Header.m_BitsPerLevel, query); ++cumCount; } @@ -115,7 +122,7 @@ private: Index nextIndex; ReadIndex(baseOffset, nextIndex); uint32_t const begOffset = baseOffset + index.GetOffset(); - uint32_t const endOffset = baseOffset + sizeof(Index) + nextIndex.GetOffset(); + uint32_t const endOffset = baseOffset + m_NodeSize + nextIndex.GetOffset(); ASSERT_LESS(begOffset, endOffset, (beg, end, baseOffset, skipBits)); buffer_vector data(endOffset - begOffset); m_Reader.Read(begOffset, &data[0], data.size()); @@ -142,13 +149,13 @@ private: void ReadIndex(uint64_t pos, Index & index) const { - m_Reader.Read(pos, &index, sizeof(Index)); + m_Reader.Read(pos, &index, m_NodeSize); index.m_Offset = SwapIfBigEndian(index.m_Offset); - index.m_BitMask = SwapIfBigEndian(index.m_BitMask); } ReaderT m_Reader; Header m_Header; + uint32_t m_NodeSize; Index m_Level0Index; int m_CellIdBytes; }; diff --git a/indexer/interval_index_builder.hpp b/indexer/interval_index_builder.hpp index 1802cbff35..4888e1251b 100644 --- a/indexer/interval_index_builder.hpp +++ b/indexer/interval_index_builder.hpp @@ -7,6 +7,7 @@ #include "../base/assert.hpp" #include "../base/base.hpp" #include "../base/bits.hpp" +#include "../base/bitset.hpp" #include "../base/logging.hpp" #include "../std/vector.hpp" #include "../std/memcpy.hpp" @@ -15,16 +16,16 @@ namespace impl { template -void WriteIntervalIndexNode(WriterT & writer, uint64_t offset, uint64_t bitMask) +void WriteIntervalIndexNode(WriterT & writer, uint64_t offset, uint32_t bitsPerLevel, + Bitset const & bitMask) { - // At the moment, uint32_t is used as a bitMask, but this can change in the future. - CHECK_EQUAL(static_cast(bitMask), bitMask, (offset)); - CHECK_GREATER_OR_EQUAL(offset, writer.Pos() + 8, ()); - WriteToSink(writer, static_cast(offset - writer.Pos() - 8)); - WriteToSink(writer, static_cast(bitMask)); + int const bitsetSize = IntervalIndexBase::BitsetSize(bitsPerLevel); + CHECK_GREATER_OR_EQUAL(offset, writer.Pos() + 4 + bitsetSize, ()); + WriteToSink(writer, static_cast(offset - writer.Pos() - 4 - bitsetSize)); + writer.Write(&bitMask, IntervalIndexBase::BitsetSize(bitsPerLevel)); } -template void WriteIntervalIndexLeaf(SinkT & sink, int bitsPerLevel, +template void WriteIntervalIndexLeaf(SinkT & sink, uint32_t bitsPerLevel, uint64_t prevKey, uint64_t prevValue, uint64_t key, uint64_t value) { @@ -38,7 +39,7 @@ template void WriteIntervalIndexLeaf(SinkT & sink, int bitsPerLeve WriteVarUint(sink, code); } -inline uint32_t IntervalIndexLeafSize(int bitsPerLevel, +inline uint32_t IntervalIndexLeafSize(uint32_t bitsPerLevel, uint64_t prevKey, uint64_t prevValue, uint64_t key, uint64_t value) { @@ -93,8 +94,10 @@ void BuildIntervalIndex(CellIdValueIterT const & beg, CellIdValueIterT const & e CHECK_LESS(keyBits, 63, ()); CHECK(impl::CheckIntervalIndexInputSequence(beg, end, keyBits), ()); + typedef Bitset BitsetType; uint32_t const bitsPerLevel = 5; uint32_t const lastBitsMask = (1 << bitsPerLevel) - 1; + uint32_t const nodeSize = 4 + IntervalIndexBase::BitsetSize(bitsPerLevel); int const levelCount = (keyBits + bitsPerLevel - 1) / bitsPerLevel; // Write header. @@ -109,20 +112,20 @@ void BuildIntervalIndex(CellIdValueIterT const & beg, CellIdValueIterT const & e { // Write empty index. CHECK_GREATER(levelCount, 1, ()); - impl::WriteIntervalIndexNode(writer, writer.Pos() + sizeof(IntervalIndexBase::Index), 0); + impl::WriteIntervalIndexNode(writer, writer.Pos() + nodeSize, bitsPerLevel, BitsetType()); LOG(LWARNING, ("Written empty index.")); return; } // Write internal nodes. - uint64_t childOffset = writer.Pos() + sizeof(IntervalIndexBase::Index); + uint64_t childOffset = writer.Pos() + nodeSize; uint64_t nextChildOffset = childOffset; for (int level = levelCount - 1; level >= 0; --level) { // LOG(LINFO, ("Building interval index, level", level)); uint64_t const initialLevelWriterPos = writer.Pos(); - uint64_t bitMask = 0; + BitsetType bitMask = BitsetType(); uint64_t prevKey = 0; uint64_t prevValue = 0; for (CellIdValueIterT it = beg; it != end; ++it) @@ -133,28 +136,28 @@ void BuildIntervalIndex(CellIdValueIterT const & beg, CellIdValueIterT const & e if (it != beg && (prevKey & ~lastBitsMask) != (key & ~lastBitsMask)) { // Write node for the previous parent. - impl::WriteIntervalIndexNode(writer, childOffset, bitMask); + impl::WriteIntervalIndexNode(writer, childOffset, bitsPerLevel, bitMask); childOffset = nextChildOffset; - bitMask = 0; + bitMask = BitsetType(); } - bitMask |= (1ULL << (key & lastBitsMask)); + bitMask.SetBit(key & lastBitsMask); if (level == 0) nextChildOffset += impl::IntervalIndexLeafSize(bitsPerLevel, prevKey, prevValue, key, value); else if (it == beg || prevKey != key) - nextChildOffset += sizeof(IntervalIndexBase::Index); + nextChildOffset += nodeSize; prevKey = key; prevValue = value; } // Write the last node. - impl::WriteIntervalIndexNode(writer, childOffset, bitMask); + impl::WriteIntervalIndexNode(writer, childOffset, bitsPerLevel, bitMask); if (level == 1) - nextChildOffset += sizeof(IntervalIndexBase::Index); + nextChildOffset += nodeSize; childOffset = nextChildOffset; @@ -162,7 +165,7 @@ void BuildIntervalIndex(CellIdValueIterT const & beg, CellIdValueIterT const & e } // Write the dummy one-after-last node. - impl::WriteIntervalIndexNode(writer, nextChildOffset, 0); + impl::WriteIntervalIndexNode(writer, nextChildOffset, bitsPerLevel, BitsetType()); // Write leaves. {