[indexer:interval_index] Add interval index version 2: 64 bit offset and size

This commit is contained in:
Anatoly Serdtcev 2019-06-11 11:47:04 +03:00 committed by mpimenov
parent 68ecbc850f
commit 9b149eef29
5 changed files with 194 additions and 73 deletions

View file

@ -48,8 +48,8 @@ UNIT_TEST(IntervalIndex_LevelCount)
UNIT_TEST(IntervalIndex_SerializedNodeBitmap)
{
uint32_t const offset = 350; // == 0x15E
uint32_t childSizes[8] = { 0, 0, 0, 10, 0, 0, 1000, 0 };
uint64_t const offset = 350; // == 0x15E
uint64_t childSizes[8] = { 0, 0, 0, 10, 0, 0, 1000, 0 };
char const expSerial [] =
"\xBD\x05" // (350 << 1) + 1 == 701 == 0x2BD - offset encoded as varuint.
"\x48" // (1 << 3) | (1 << 6) == 72 == 0x48 - bitmap.
@ -57,25 +57,56 @@ UNIT_TEST(IntervalIndex_SerializedNodeBitmap)
"\xE8\x07" // 1000 = 0x3E8 - childSizes[6] encoded as varuint.
"";
vector<uint8_t> serializedNode;
MemWriter<vector<uint8_t> > writer(serializedNode);
MemWriter<vector<uint8_t>> writer(serializedNode);
IntervalIndexBuilder(11, 1, 3).WriteNode(writer, offset, childSizes);
TEST_EQUAL(serializedNode, vector<uint8_t>(expSerial, expSerial + ARRAY_SIZE(expSerial) - 1), ());
}
UNIT_TEST(IntervalIndexV2_SerializedNodeBitmap)
{
uint64_t const offset = 5'547'468'350; // == 0x01'2A'A7'A6'3E
uint64_t childSizes[8] = { 0, 0, 0, 10, 0, 0, 6'200'000'000, 0 };
char const expSerial [] =
"\xFD\x98\xBD\xAA\x29" // (5'547'468'350 << 1) + 1 - offset encoded as varuint
"\x48" // (1 << 3) | (1 << 6) == 72 == 0x48 - bitmap.
"\x0A" // 10 - childSizes[3] encoded as varuint.
"\x80\xFC\xB1\x8C\x17" // 6'200'000'000 - childSizes[6] encoded as varuint.
"";
vector<uint8_t> serializedNode;
MemWriter<vector<uint8_t>> writer(serializedNode);
IntervalIndexBuilder(IntervalIndexVersion::V2, 11, 1, 3).WriteNode(writer, offset, childSizes);
TEST_EQUAL(serializedNode, vector<uint8_t>(expSerial, expSerial + ARRAY_SIZE(expSerial) - 1), ());
}
UNIT_TEST(IntervalIndex_SerializedNodeList)
{
uint32_t const offset = 350; // == 0x15E
uint32_t childSizes[16] = { 0, 0, 0, 0, 0, 0, 1000, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
uint64_t const offset = 350; // == 0x15E
uint64_t childSizes[16] = { 0, 0, 0, 0, 0, 0, 1000, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
char const expSerial [] =
"\xBC\x05" // (350 << 1) + 0 == 700 == 0x2BC - offset encoded as varuint.
"\x06" "\xE8\x07" // 6, 1000
"";
vector<uint8_t> serializedNode;
MemWriter<vector<uint8_t> > writer(serializedNode);
MemWriter<vector<uint8_t>> writer(serializedNode);
IntervalIndexBuilder(11, 1, 4).WriteNode(writer, offset, childSizes);
TEST_EQUAL(serializedNode, vector<uint8_t>(expSerial, expSerial + ARRAY_SIZE(expSerial) - 1), ());
}
UNIT_TEST(IntervalIndexV2_SerializedNodeList)
{
uint64_t const offset = 5'547'468'350; // == 0x01'2A'A7'A6'3E
uint64_t childSizes[16] = { 0, 0, 0, 0, 0, 0, 0, 6'200'000'000, 0, 0, 0, 0, 0, 0, 0, 0, };
char const expSerial [] =
"\xFC\x98\xBD\xAA\x29" // (5'547'468'350 << 1) + 0 - offset encoded as varuint.
"\x07" "\x80\xFC\xB1\x8C\x17" // 7, 6'200'000'000
"";
vector<uint8_t> serializedNode;
MemWriter<vector<uint8_t>> writer(serializedNode);
IntervalIndexBuilder(IntervalIndexVersion::V2, 11, 1, 4).WriteNode(writer, offset, childSizes);
TEST_EQUAL(serializedNode, vector<uint8_t>(expSerial, expSerial + ARRAY_SIZE(expSerial) - 1),
(DebugPrint(serializedNode), DebugPrint(expSerial)));
}
UNIT_TEST(IntervalIndex_SerializedLeaves)
{
vector<CellIdFeaturePairForTest> data;
@ -83,13 +114,13 @@ UNIT_TEST(IntervalIndex_SerializedLeaves)
data.push_back(CellIdFeaturePairForTest(0x1538U, 1));
data.push_back(CellIdFeaturePairForTest(0x1637U, 2));
vector<uint8_t> serialLeaves;
MemWriter<vector<uint8_t> > writer(serialLeaves);
vector<uint32_t> sizes;
MemWriter<vector<uint8_t>> writer(serialLeaves);
vector<uint64_t> sizes;
IntervalIndexBuilder(16, 1, 4).BuildLeaves(writer, data.begin(), data.end(), sizes);
char const expSerial [] = "\x37\x00" "\x38\x02" "\x37\x04"; // 0x1537 0x1538 0x1637
uint32_t const expSizes [] = { 4, 2 };
TEST_EQUAL(serialLeaves, vector<uint8_t>(expSerial, expSerial + ARRAY_SIZE(expSerial) - 1), ());
TEST_EQUAL(sizes, vector<uint32_t>(expSizes, expSizes + ARRAY_SIZE(expSizes)), ());
TEST_EQUAL(sizes, vector<uint64_t>(expSizes, expSizes + ARRAY_SIZE(expSizes)), ());
}
UNIT_TEST(IntervalIndex_SerializedNodes)
@ -98,17 +129,17 @@ UNIT_TEST(IntervalIndex_SerializedNodes)
data.push_back(CellIdFeaturePairForTest(0x1537U, 0));
data.push_back(CellIdFeaturePairForTest(0x1538U, 1));
data.push_back(CellIdFeaturePairForTest(0x1637U, 2));
uint32_t const leavesSizes [] = { 4, 2 };
uint64_t const leavesSizes [] = { 4, 2 };
vector<uint8_t> serialNodes;
MemWriter<vector<uint8_t> > writer(serialNodes);
vector<uint32_t> sizes;
MemWriter<vector<uint8_t>> writer(serialNodes);
vector<uint64_t> sizes;
IntervalIndexBuilder(16, 1, 4).BuildLevel(writer, data.begin(), data.end(), 1,
leavesSizes, leavesSizes + ARRAY_SIZE(leavesSizes),
sizes);
char const expSerial [] = "\x01\x60\x00\x04\x02";
uint32_t const expSizes [] = { ARRAY_SIZE(expSerial) - 1 };
TEST_EQUAL(serialNodes, vector<uint8_t>(expSerial, expSerial + ARRAY_SIZE(expSerial) - 1), ());
TEST_EQUAL(sizes, vector<uint32_t>(expSizes, expSizes + ARRAY_SIZE(expSizes)), ());
TEST_EQUAL(sizes, vector<uint64_t>(expSizes, expSizes + ARRAY_SIZE(expSizes)), ());
}
UNIT_TEST(IntervalIndex_Serialized)
@ -118,7 +149,7 @@ UNIT_TEST(IntervalIndex_Serialized)
data.push_back(CellIdFeaturePairForTest(0x1538U, 1));
data.push_back(CellIdFeaturePairForTest(0x1637U, 2));
vector<uint8_t> serialIndex;
MemWriter<vector<uint8_t> > writer(serialIndex);
MemWriter<vector<uint8_t>> writer(serialIndex);
IntervalIndexBuilder(16, 1, 4).BuildIndex(writer, data.begin(), data.end());
char const expSerial [] =
@ -143,6 +174,38 @@ UNIT_TEST(IntervalIndex_Serialized)
TEST_EQUAL(values, vector<uint32_t>(expected, expected + ARRAY_SIZE(expected)), ());
}
UNIT_TEST(IntervalIndexV2_Serialized)
{
vector<CellIdFeaturePairForTest> data;
data.push_back(CellIdFeaturePairForTest(0x1537U, 0));
data.push_back(CellIdFeaturePairForTest(0x1538U, 1));
data.push_back(CellIdFeaturePairForTest(0x1637U, 2));
vector<uint8_t> serialIndex;
MemWriter<vector<uint8_t>> writer(serialIndex);
IntervalIndexBuilder(IntervalIndexVersion::V2, 16, 1, 4).BuildIndex(writer, data.begin(), data.end());
char const expSerial [] =
"\x02\x02\x04\x01" // Header
"\x24\x00\x00\x00\x00\x00\x00\x00" // Leaves level offset
"\x2A\x00\x00\x00\x00\x00\x00\x00" // Level 1 offset
"\x2F\x00\x00\x00\x00\x00\x00\x00" // Root level offset
"\x32\x00\x00\x00\x00\x00\x00\x00" // Root level offset
"\x37\x00" "\x38\x02" "\x37\x04" // 0x1537 0x1538 0x1637
"\x01\x60\x00\x04\x02" // 0x15, 0x16 node
"\x00\x01\x05" // Root
"";
TEST_EQUAL(serialIndex, vector<uint8_t>(expSerial, expSerial + ARRAY_SIZE(expSerial) - 1), ());
MemReader reader(&serialIndex[0], serialIndex.size());
IntervalIndex<MemReader, uint32_t> index(reader);
uint32_t expected [] = {0, 1, 2};
vector<uint32_t> values;
TEST_EQUAL(index.KeyEnd(), 0x10000, ());
index.ForEach(IndexValueInserter(values), 0, 0x10000);
TEST_EQUAL(values, vector<uint32_t>(expected, expected + ARRAY_SIZE(expected)), ());
}
UNIT_TEST(IntervalIndex_Simple)
{
vector<CellIdFeaturePairForTest> data;
@ -150,7 +213,7 @@ UNIT_TEST(IntervalIndex_Simple)
data.push_back(CellIdFeaturePairForTest(0xA0B1C2D200ULL, 1));
data.push_back(CellIdFeaturePairForTest(0xA0B2C2D100ULL, 2));
vector<char> serialIndex;
MemWriter<vector<char> > writer(serialIndex);
MemWriter<vector<char>> writer(serialIndex);
BuildIntervalIndex(data.begin(), data.end(), writer, 40);
MemReader reader(&serialIndex[0], serialIndex.size());
IntervalIndex<MemReader, uint32_t> index(reader);
@ -201,7 +264,7 @@ UNIT_TEST(IntervalIndex_Empty)
{
vector<CellIdFeaturePairForTest> data;
vector<char> serialIndex;
MemWriter<vector<char> > writer(serialIndex);
MemWriter<vector<char>> writer(serialIndex);
BuildIntervalIndex(data.begin(), data.end(), writer, 40);
MemReader reader(&serialIndex[0], serialIndex.size());
IntervalIndex<MemReader, uint32_t> index(reader);
@ -220,7 +283,7 @@ UNIT_TEST(IntervalIndex_Simple2)
data.push_back(CellIdFeaturePairForTest(0xA0B1C2D200ULL, 3));
data.push_back(CellIdFeaturePairForTest(0xA0B2C2D200ULL, 2));
vector<char> serialIndex;
MemWriter<vector<char> > writer(serialIndex);
MemWriter<vector<char>> writer(serialIndex);
BuildIntervalIndex(data.begin(), data.end(), writer, 40);
MemReader reader(&serialIndex[0], serialIndex.size());
IntervalIndex<MemReader, uint32_t> index(reader);
@ -239,7 +302,7 @@ UNIT_TEST(IntervalIndex_Simple3)
data.push_back(CellIdFeaturePairForTest(0x0100ULL, 0));
data.push_back(CellIdFeaturePairForTest(0x0200ULL, 1));
vector<char> serialIndex;
MemWriter<vector<char> > writer(serialIndex);
MemWriter<vector<char>> writer(serialIndex);
BuildIntervalIndex(data.begin(), data.end(), writer, 40);
MemReader reader(&serialIndex[0], serialIndex.size());
IntervalIndex<MemReader, uint32_t> index(reader);
@ -258,7 +321,7 @@ UNIT_TEST(IntervalIndex_Simple4)
data.push_back(CellIdFeaturePairForTest(0x01030400ULL, 0));
data.push_back(CellIdFeaturePairForTest(0x02030400ULL, 1));
vector<char> serialIndex;
MemWriter<vector<char> > writer(serialIndex);
MemWriter<vector<char>> writer(serialIndex);
BuildIntervalIndex(data.begin(), data.end(), writer, 40);
MemReader reader(&serialIndex[0], serialIndex.size());
IntervalIndex<MemReader, uint32_t> index(reader);
@ -279,7 +342,7 @@ UNIT_TEST(IntervalIndex_Simple5)
data.push_back(CellIdFeaturePairForTest(0xA0B1C2D200ULL, 3));
data.push_back(CellIdFeaturePairForTest(0xA0B2C2D200ULL, 2));
vector<char> serialIndex;
MemWriter<vector<char> > writer(serialIndex);
MemWriter<vector<char>> writer(serialIndex);
BuildIntervalIndex(data.begin(), data.end(), writer, 40);
MemReader reader(&serialIndex[0], serialIndex.size());
IntervalIndex<MemReader, uint32_t> index(reader);

View file

@ -8,6 +8,13 @@
#include "base/buffer_vector.hpp"
#include <cstdint>
#include <string>
enum class IntervalIndexVersion : uint8_t
{
V1 = 1,
V2 = 2,
};
class IntervalIndexBase
{
@ -28,8 +35,6 @@ public:
ASSERT_GREATER(bitsPerLevel, 3, ());
return 1 << (bitsPerLevel - 3);
}
enum { kVersion = 1 };
};
template <class ReaderT, typename Value>
@ -42,10 +47,18 @@ public:
{
ReaderSource<ReaderT> src(reader);
src.Read(&m_Header, sizeof(Header));
CHECK_EQUAL(m_Header.m_Version, static_cast<uint8_t>(kVersion), ());
auto const version = static_cast<IntervalIndexVersion>(m_Header.m_Version);
CHECK(version == IntervalIndexVersion::V1 || version == IntervalIndexVersion::V2, ());
if (m_Header.m_Levels != 0)
{
for (int i = 0; i <= m_Header.m_Levels + 1; ++i)
m_LevelOffsets.push_back(ReadPrimitiveFromSource<uint32_t>(src));
{
uint64_t levelOffset =
version == IntervalIndexVersion::V1 ? ReadPrimitiveFromSource<uint32_t>(src)
: ReadPrimitiveFromSource<uint64_t>(src);
m_LevelOffsets.push_back(levelOffset);
}
}
}
uint64_t KeyEnd() const
@ -74,7 +87,7 @@ public:
private:
template <typename F>
void ForEachLeaf(F const & f, uint64_t const beg, uint64_t const end,
uint32_t const offset, uint32_t const size,
uint64_t const offset, uint64_t const size,
uint64_t keyBase /* discarded part of object key value in the parent nodes*/) const
{
buffer_vector<uint8_t, 1024> data;
@ -100,7 +113,7 @@ private:
template <typename F>
void ForEachNode(F const & f, uint64_t beg, uint64_t end, int level,
uint32_t offset, uint32_t size,
uint64_t offset, uint64_t size,
uint64_t keyBase /* discarded part of object key value in the parent nodes */) const
{
offset += m_LevelOffsets[level];
@ -125,8 +138,8 @@ private:
m_Reader.Read(offset, &data[0], size);
ArrayByteSource src(&data[0]);
uint32_t const offsetAndFlag = ReadVarUint<uint32_t>(src);
uint32_t childOffset = offsetAndFlag >> 1;
uint64_t const offsetAndFlag = ReadVarUint<uint64_t>(src);
uint64_t childOffset = offsetAndFlag >> 1;
if (offsetAndFlag & 1)
{
// Reading bitmap.
@ -136,7 +149,7 @@ private:
{
if (bits::GetBit(pBitmap, i))
{
uint32_t childSize = ReadVarUint<uint32_t>(src);
uint64_t childSize = ReadVarUint<uint64_t>(src);
if (i >= beg0)
{
uint64_t const beg1 = (i == beg0) ? (beg & levelBytesFF) : 0;
@ -147,7 +160,7 @@ private:
}
}
ASSERT(end0 != (static_cast<uint32_t>(1) << m_Header.m_BitsPerLevel) - 1 ||
static_cast<uint8_t const *>(src.Ptr()) - &data[0] == size,
static_cast<size_t>(static_cast<uint8_t const *>(src.Ptr()) - &data[0]) == size,
(beg, end, beg0, end0, offset, size, src.Ptr(), &data[0]));
}
else
@ -158,7 +171,7 @@ private:
uint8_t const i = src.ReadByte();
if (i > end0)
break;
uint32_t childSize = ReadVarUint<uint32_t>(src);
uint64_t childSize = ReadVarUint<uint64_t>(src);
if (i >= beg0)
{
uint64_t const beg1 = (i == beg0) ? (beg & levelBytesFF) : 0;
@ -172,5 +185,5 @@ private:
ReaderT m_Reader;
Header m_Header;
buffer_vector<uint32_t, 7> m_LevelOffsets;
buffer_vector<uint64_t, 7> m_LevelOffsets;
};

View file

@ -10,6 +10,7 @@
#include "base/assert.hpp"
#include "base/base.hpp"
#include "base/bits.hpp"
#include "base/checked_cast.hpp"
#include "base/logging.hpp"
#include <cstdint>
@ -39,8 +40,17 @@ class IntervalIndexBuilder
{
public:
IntervalIndexBuilder(uint32_t keyBits, uint32_t leafBytes, uint32_t bitsPerLevel = 8)
: m_BitsPerLevel(bitsPerLevel), m_LeafBytes(leafBytes)
: IntervalIndexBuilder(IntervalIndexVersion::V1, keyBits, leafBytes, bitsPerLevel)
{ }
IntervalIndexBuilder(IntervalIndexVersion version, uint32_t keyBits, uint32_t leafBytes,
uint32_t bitsPerLevel = 8)
: m_version{version}, m_BitsPerLevel(bitsPerLevel), m_LeafBytes(leafBytes)
{
CHECK_GREATER_OR_EQUAL(
static_cast<uint8_t>(version), static_cast<uint8_t>(IntervalIndexVersion::V1), ());
CHECK_LESS_OR_EQUAL(
static_cast<uint8_t>(version), static_cast<uint8_t>(IntervalIndexVersion::V2), ());
CHECK_GREATER(leafBytes, 0, ());
CHECK_LESS(keyBits, 63, ());
int const nodeKeyBits = keyBits - (m_LeafBytes << 3);
@ -59,7 +69,7 @@ public:
if (beg == end)
{
IntervalIndexBase::Header header;
header.m_Version = IntervalIndexBase::kVersion;
header.m_Version = static_cast<uint8_t>(m_version);
header.m_BitsPerLevel = 0;
header.m_Levels = 0;
header.m_LeafBytes = 0;
@ -69,21 +79,21 @@ public:
uint64_t const initialPos = writer.Pos();
WriteZeroesToSink(writer, sizeof(IntervalIndexBase::Header));
WriteZeroesToSink(writer, 4 * (m_Levels + 2));
WriteZeroesToSink(writer, (m_version == IntervalIndexVersion::V1 ? 4 : 8) * (m_Levels + 2));
uint64_t const afterHeaderPos = writer.Pos();
std::vector<uint32_t> levelOffset;
std::vector<uint64_t> levelOffset;
{
std::vector<uint32_t> offsets;
levelOffset.push_back(static_cast<uint32_t>(writer.Pos()));
std::vector<uint64_t> offsets;
levelOffset.push_back(writer.Pos());
BuildLeaves(writer, beg, end, offsets);
levelOffset.push_back(static_cast<uint32_t>(writer.Pos()));
levelOffset.push_back(writer.Pos());
for (int i = 1; i <= static_cast<int>(m_Levels); ++i)
{
std::vector<uint32_t> nextOffsets;
std::vector<uint64_t> nextOffsets;
BuildLevel(writer, beg, end, i, &offsets[0], &offsets[0] + offsets.size(), nextOffsets);
nextOffsets.swap(offsets);
levelOffset.push_back(static_cast<uint32_t>(writer.Pos()));
levelOffset.push_back(writer.Pos());
}
}
@ -93,7 +103,7 @@ public:
// Write header.
{
IntervalIndexBase::Header header;
header.m_Version = IntervalIndexBase::kVersion;
header.m_Version = static_cast<uint8_t>(m_version);
header.m_BitsPerLevel = static_cast<uint8_t>(m_BitsPerLevel);
ASSERT_EQUAL(header.m_BitsPerLevel, m_BitsPerLevel, ());
header.m_Levels = static_cast<uint8_t>(m_Levels);
@ -105,7 +115,12 @@ public:
// Write level offsets.
for (size_t i = 0; i < levelOffset.size(); ++i)
WriteToSink(writer, levelOffset[i]);
{
if (m_version == IntervalIndexVersion::V1)
WriteToSink(writer, base::checked_cast<uint32_t>(levelOffset[i]));
else
WriteToSink(writer, levelOffset[i]);
}
uint64_t const pos = writer.Pos();
CHECK_EQUAL(pos, afterHeaderPos, ());
@ -118,8 +133,8 @@ public:
// Check that [beg, end) is sorted and log most populous cell.
if (beg != end)
{
uint32_t count = 0;
uint32_t maxCount = 0;
uint64_t count = 0;
uint64_t maxCount = 0;
typename CellIdValueIter::value_type mostPopulousCell = *beg;
CellIdValueIter it = beg;
uint64_t prev = it->GetCell();
@ -154,7 +169,7 @@ public:
}
template <class SinkT>
uint32_t WriteNode(SinkT & sink, uint32_t offset, uint32_t * childSizes)
uint64_t WriteNode(SinkT & sink, uint64_t offset, uint64_t * childSizes)
{
std::vector<uint8_t> bitmapSerial, listSerial;
bitmapSerial.reserve(1024);
@ -166,28 +181,28 @@ public:
{
sink.Write(&bitmapSerial[0], bitmapSerial.size());
ASSERT_EQUAL(bitmapSerial.size(), static_cast<uint32_t>(bitmapSerial.size()), ());
return static_cast<uint32_t>(bitmapSerial.size());
return bitmapSerial.size();
}
else
{
sink.Write(&listSerial[0], listSerial.size());
ASSERT_EQUAL(listSerial.size(), static_cast<uint32_t>(listSerial.size()), ());
return static_cast<uint32_t>(listSerial.size());
return listSerial.size();
}
}
template <class Writer, typename CellIdValueIter>
void BuildLevel(Writer & writer, CellIdValueIter const & beg, CellIdValueIter const & end,
int level, uint32_t const * childSizesBeg, uint32_t const * childSizesEnd,
std::vector<uint32_t> & sizes)
int level, uint64_t const * childSizesBeg, uint64_t const * childSizesEnd,
std::vector<uint64_t> & sizes)
{
UNUSED_VALUE(childSizesEnd);
ASSERT_GREATER(level, 0, ());
uint32_t const skipBits = m_LeafBytes * 8 + (level - 1) * m_BitsPerLevel;
std::vector<uint32_t> expandedSizes(1 << m_BitsPerLevel);
std::vector<uint64_t> expandedSizes(1 << m_BitsPerLevel);
uint64_t prevKey = static_cast<uint64_t>(-1);
uint32_t childOffset = 0;
uint32_t nextChildOffset = 0;
uint64_t childOffset = 0;
uint64_t nextChildOffset = 0;
for (CellIdValueIter it = beg; it != end; ++it)
{
uint64_t const key = it->GetCell() >> skipBits;
@ -202,7 +217,8 @@ public:
}
nextChildOffset += *childSizesBeg;
expandedSizes[key & m_LastBitsMask] += *childSizesBeg;
CHECK_EQUAL(expandedSizes[key & m_LastBitsMask], 0, ());
expandedSizes[key & m_LastBitsMask] = *childSizesBeg;
++childSizesBeg;
prevKey = key;
}
@ -212,7 +228,7 @@ public:
template <class Writer, typename CellIdValueIter>
void BuildLeaves(Writer & writer, CellIdValueIter const & beg, CellIdValueIter const & end,
std::vector<uint32_t> & sizes)
std::vector<uint64_t> & sizes)
{
using Value = typename CellIdValueIter::value_type::ValueType;
@ -226,7 +242,7 @@ public:
Value const value = it->GetValue();
if (it != beg && (key >> skipBits) != (prevKey >> skipBits))
{
sizes.push_back(static_cast<uint32_t>(writer.Pos() - prevPos));
sizes.push_back(writer.Pos() - prevPos);
prevValue = 0;
prevPos = writer.Pos();
}
@ -236,46 +252,74 @@ public:
prevKey = key;
prevValue = value;
}
sizes.push_back(static_cast<uint32_t>(writer.Pos() - prevPos));
sizes.push_back(writer.Pos() - prevPos);
}
template <class SinkT>
void WriteBitmapNode(SinkT & sink, uint32_t offset, uint32_t * childSizes)
void WriteBitmapNode(SinkT & sink, uint64_t offset, uint64_t * childSizes)
{
ASSERT_GREATER_OR_EQUAL(m_BitsPerLevel, 3, ());
WriteVarUint(sink, (offset << 1) + 1);
if (m_version == IntervalIndexVersion::V1)
CHECK_LESS_OR_EQUAL(offset, std::numeric_limits<uint32_t>::max() >> 1, ());
else
CHECK_LESS_OR_EQUAL(offset, std::numeric_limits<uint64_t>::max() >> 1, ());
uint64_t const offsetAndFlag = (offset << 1) + 1;
WriteVarUint(sink, offsetAndFlag);
buffer_vector<uint8_t, 32> bitMask(1 << (m_BitsPerLevel - 3));
for (uint32_t i = 0; i < static_cast<uint32_t>(1 << m_BitsPerLevel); ++i)
if (childSizes[i])
bits::SetBitTo1(&bitMask[0], i);
sink.Write(&bitMask[0], bitMask.size());
for (uint32_t i = 0; i < static_cast<uint32_t>(1 << m_BitsPerLevel); ++i)
if (childSizes[i])
WriteVarUint(sink, childSizes[i]);
{
uint64_t size = childSizes[i];
if (!size)
continue;
if (m_version == IntervalIndexVersion::V1)
CHECK_LESS_OR_EQUAL(size, std::numeric_limits<uint32_t>::max(), ());
WriteVarUint(sink, size);
}
}
template <class SinkT>
void WriteListNode(SinkT & sink, uint32_t offset, uint32_t * childSizes)
void WriteListNode(SinkT & sink, uint64_t offset, uint64_t * childSizes)
{
ASSERT_LESS_OR_EQUAL(m_BitsPerLevel, 8, ());
WriteVarUint(sink, (offset << 1));
if (m_version == IntervalIndexVersion::V1)
CHECK_LESS_OR_EQUAL(offset, std::numeric_limits<uint32_t>::max() >> 1, ());
else
CHECK_LESS_OR_EQUAL(offset, std::numeric_limits<uint64_t>::max() >> 1, ());
uint64_t const offsetAndFlag = offset << 1;
WriteVarUint(sink, offsetAndFlag);
for (uint32_t i = 0; i < static_cast<uint32_t>(1 << m_BitsPerLevel); ++i)
{
if (childSizes[i])
{
WriteToSink(sink, static_cast<uint8_t>(i));
WriteVarUint(sink, childSizes[i]);
}
uint64_t size = childSizes[i];
if (!size)
continue;
WriteToSink(sink, static_cast<uint8_t>(i));
if (m_version == IntervalIndexVersion::V1)
CHECK_LESS_OR_EQUAL(size, std::numeric_limits<uint32_t>::max(), ());
WriteVarUint(sink, size);
}
}
private:
IntervalIndexVersion m_version;
uint32_t m_Levels, m_BitsPerLevel, m_LeafBytes, m_LastBitsMask;
};
template <class Writer, typename CellIdValueIter>
void BuildIntervalIndex(CellIdValueIter const & beg, CellIdValueIter const & end, Writer & writer,
uint32_t keyBits)
uint32_t keyBits,
IntervalIndexVersion version = IntervalIndexVersion::V1)
{
IntervalIndexBuilder(keyBits, 1).BuildIndex(writer, beg, end);
IntervalIndexBuilder(version, keyBits, 1).BuildIndex(writer, beg, end);
}

View file

@ -72,7 +72,7 @@ bool BuildLocalityIndexFromDataFile(string const & dataFile,
FileWriter writer(idxFileName);
covering::BuildLocalityIndex<LocalityVector<ModelReaderPtr>, FileWriter, DEPTH_LEVELS>(
localities.GetVector(), writer, coverLocality, outFileName);
localities.GetVector(), writer, coverLocality, outFileName, IntervalIndexVersion::V2);
}
FilesContainerW(outFileName, FileWriter::OP_WRITE_TRUNCATE)

View file

@ -28,7 +28,8 @@ using CoverLocality =
template <class ObjectsVector, class Writer, int DEPTH_LEVELS>
void BuildLocalityIndex(ObjectsVector const & objects, Writer & writer,
CoverLocality const & coverLocality, std::string const & tmpFilePrefix)
CoverLocality const & coverLocality, std::string const & tmpFilePrefix,
IntervalIndexVersion version = IntervalIndexVersion::V1)
{
std::string const cellsToValueFile = tmpFilePrefix + CELL2LOCALITY_SORTED_EXT + ".all";
SCOPE_GUARD(cellsToValueFileGuard, std::bind(&FileWriter::DeleteFileX, cellsToValueFile));
@ -51,7 +52,7 @@ void BuildLocalityIndex(ObjectsVector const & objects, Writer & writer,
DDVector<CellValuePair<uint64_t>, FileReader, uint64_t> cellsToValue(reader);
{
BuildIntervalIndex(cellsToValue.begin(), cellsToValue.end(), writer, DEPTH_LEVELS * 2 + 1);
BuildIntervalIndex(cellsToValue.begin(), cellsToValue.end(), writer, DEPTH_LEVELS * 2 + 1, version);
}
}
} // namespace covering