forked from organicmaps/organicmaps
[indexer:interval_index] Add interval index version 2: 64 bit offset and size
This commit is contained in:
parent
68ecbc850f
commit
9b149eef29
5 changed files with 194 additions and 73 deletions
|
@ -48,8 +48,8 @@ UNIT_TEST(IntervalIndex_LevelCount)
|
|||
|
||||
UNIT_TEST(IntervalIndex_SerializedNodeBitmap)
|
||||
{
|
||||
uint32_t const offset = 350; // == 0x15E
|
||||
uint32_t childSizes[8] = { 0, 0, 0, 10, 0, 0, 1000, 0 };
|
||||
uint64_t const offset = 350; // == 0x15E
|
||||
uint64_t childSizes[8] = { 0, 0, 0, 10, 0, 0, 1000, 0 };
|
||||
char const expSerial [] =
|
||||
"\xBD\x05" // (350 << 1) + 1 == 701 == 0x2BD - offset encoded as varuint.
|
||||
"\x48" // (1 << 3) | (1 << 6) == 72 == 0x48 - bitmap.
|
||||
|
@ -57,25 +57,56 @@ UNIT_TEST(IntervalIndex_SerializedNodeBitmap)
|
|||
"\xE8\x07" // 1000 = 0x3E8 - childSizes[6] encoded as varuint.
|
||||
"";
|
||||
vector<uint8_t> serializedNode;
|
||||
MemWriter<vector<uint8_t> > writer(serializedNode);
|
||||
MemWriter<vector<uint8_t>> writer(serializedNode);
|
||||
IntervalIndexBuilder(11, 1, 3).WriteNode(writer, offset, childSizes);
|
||||
TEST_EQUAL(serializedNode, vector<uint8_t>(expSerial, expSerial + ARRAY_SIZE(expSerial) - 1), ());
|
||||
}
|
||||
|
||||
UNIT_TEST(IntervalIndexV2_SerializedNodeBitmap)
|
||||
{
|
||||
uint64_t const offset = 5'547'468'350; // == 0x01'2A'A7'A6'3E
|
||||
uint64_t childSizes[8] = { 0, 0, 0, 10, 0, 0, 6'200'000'000, 0 };
|
||||
char const expSerial [] =
|
||||
"\xFD\x98\xBD\xAA\x29" // (5'547'468'350 << 1) + 1 - offset encoded as varuint
|
||||
"\x48" // (1 << 3) | (1 << 6) == 72 == 0x48 - bitmap.
|
||||
"\x0A" // 10 - childSizes[3] encoded as varuint.
|
||||
"\x80\xFC\xB1\x8C\x17" // 6'200'000'000 - childSizes[6] encoded as varuint.
|
||||
"";
|
||||
vector<uint8_t> serializedNode;
|
||||
MemWriter<vector<uint8_t>> writer(serializedNode);
|
||||
IntervalIndexBuilder(IntervalIndexVersion::V2, 11, 1, 3).WriteNode(writer, offset, childSizes);
|
||||
TEST_EQUAL(serializedNode, vector<uint8_t>(expSerial, expSerial + ARRAY_SIZE(expSerial) - 1), ());
|
||||
}
|
||||
|
||||
UNIT_TEST(IntervalIndex_SerializedNodeList)
|
||||
{
|
||||
uint32_t const offset = 350; // == 0x15E
|
||||
uint32_t childSizes[16] = { 0, 0, 0, 0, 0, 0, 1000, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
|
||||
uint64_t const offset = 350; // == 0x15E
|
||||
uint64_t childSizes[16] = { 0, 0, 0, 0, 0, 0, 1000, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
|
||||
char const expSerial [] =
|
||||
"\xBC\x05" // (350 << 1) + 0 == 700 == 0x2BC - offset encoded as varuint.
|
||||
"\x06" "\xE8\x07" // 6, 1000
|
||||
"";
|
||||
vector<uint8_t> serializedNode;
|
||||
MemWriter<vector<uint8_t> > writer(serializedNode);
|
||||
MemWriter<vector<uint8_t>> writer(serializedNode);
|
||||
IntervalIndexBuilder(11, 1, 4).WriteNode(writer, offset, childSizes);
|
||||
TEST_EQUAL(serializedNode, vector<uint8_t>(expSerial, expSerial + ARRAY_SIZE(expSerial) - 1), ());
|
||||
}
|
||||
|
||||
UNIT_TEST(IntervalIndexV2_SerializedNodeList)
|
||||
{
|
||||
uint64_t const offset = 5'547'468'350; // == 0x01'2A'A7'A6'3E
|
||||
uint64_t childSizes[16] = { 0, 0, 0, 0, 0, 0, 0, 6'200'000'000, 0, 0, 0, 0, 0, 0, 0, 0, };
|
||||
char const expSerial [] =
|
||||
"\xFC\x98\xBD\xAA\x29" // (5'547'468'350 << 1) + 0 - offset encoded as varuint.
|
||||
"\x07" "\x80\xFC\xB1\x8C\x17" // 7, 6'200'000'000
|
||||
"";
|
||||
vector<uint8_t> serializedNode;
|
||||
MemWriter<vector<uint8_t>> writer(serializedNode);
|
||||
IntervalIndexBuilder(IntervalIndexVersion::V2, 11, 1, 4).WriteNode(writer, offset, childSizes);
|
||||
TEST_EQUAL(serializedNode, vector<uint8_t>(expSerial, expSerial + ARRAY_SIZE(expSerial) - 1),
|
||||
(DebugPrint(serializedNode), DebugPrint(expSerial)));
|
||||
}
|
||||
|
||||
UNIT_TEST(IntervalIndex_SerializedLeaves)
|
||||
{
|
||||
vector<CellIdFeaturePairForTest> data;
|
||||
|
@ -83,13 +114,13 @@ UNIT_TEST(IntervalIndex_SerializedLeaves)
|
|||
data.push_back(CellIdFeaturePairForTest(0x1538U, 1));
|
||||
data.push_back(CellIdFeaturePairForTest(0x1637U, 2));
|
||||
vector<uint8_t> serialLeaves;
|
||||
MemWriter<vector<uint8_t> > writer(serialLeaves);
|
||||
vector<uint32_t> sizes;
|
||||
MemWriter<vector<uint8_t>> writer(serialLeaves);
|
||||
vector<uint64_t> sizes;
|
||||
IntervalIndexBuilder(16, 1, 4).BuildLeaves(writer, data.begin(), data.end(), sizes);
|
||||
char const expSerial [] = "\x37\x00" "\x38\x02" "\x37\x04"; // 0x1537 0x1538 0x1637
|
||||
uint32_t const expSizes [] = { 4, 2 };
|
||||
TEST_EQUAL(serialLeaves, vector<uint8_t>(expSerial, expSerial + ARRAY_SIZE(expSerial) - 1), ());
|
||||
TEST_EQUAL(sizes, vector<uint32_t>(expSizes, expSizes + ARRAY_SIZE(expSizes)), ());
|
||||
TEST_EQUAL(sizes, vector<uint64_t>(expSizes, expSizes + ARRAY_SIZE(expSizes)), ());
|
||||
}
|
||||
|
||||
UNIT_TEST(IntervalIndex_SerializedNodes)
|
||||
|
@ -98,17 +129,17 @@ UNIT_TEST(IntervalIndex_SerializedNodes)
|
|||
data.push_back(CellIdFeaturePairForTest(0x1537U, 0));
|
||||
data.push_back(CellIdFeaturePairForTest(0x1538U, 1));
|
||||
data.push_back(CellIdFeaturePairForTest(0x1637U, 2));
|
||||
uint32_t const leavesSizes [] = { 4, 2 };
|
||||
uint64_t const leavesSizes [] = { 4, 2 };
|
||||
vector<uint8_t> serialNodes;
|
||||
MemWriter<vector<uint8_t> > writer(serialNodes);
|
||||
vector<uint32_t> sizes;
|
||||
MemWriter<vector<uint8_t>> writer(serialNodes);
|
||||
vector<uint64_t> sizes;
|
||||
IntervalIndexBuilder(16, 1, 4).BuildLevel(writer, data.begin(), data.end(), 1,
|
||||
leavesSizes, leavesSizes + ARRAY_SIZE(leavesSizes),
|
||||
sizes);
|
||||
char const expSerial [] = "\x01\x60\x00\x04\x02";
|
||||
uint32_t const expSizes [] = { ARRAY_SIZE(expSerial) - 1 };
|
||||
TEST_EQUAL(serialNodes, vector<uint8_t>(expSerial, expSerial + ARRAY_SIZE(expSerial) - 1), ());
|
||||
TEST_EQUAL(sizes, vector<uint32_t>(expSizes, expSizes + ARRAY_SIZE(expSizes)), ());
|
||||
TEST_EQUAL(sizes, vector<uint64_t>(expSizes, expSizes + ARRAY_SIZE(expSizes)), ());
|
||||
}
|
||||
|
||||
UNIT_TEST(IntervalIndex_Serialized)
|
||||
|
@ -118,7 +149,7 @@ UNIT_TEST(IntervalIndex_Serialized)
|
|||
data.push_back(CellIdFeaturePairForTest(0x1538U, 1));
|
||||
data.push_back(CellIdFeaturePairForTest(0x1637U, 2));
|
||||
vector<uint8_t> serialIndex;
|
||||
MemWriter<vector<uint8_t> > writer(serialIndex);
|
||||
MemWriter<vector<uint8_t>> writer(serialIndex);
|
||||
IntervalIndexBuilder(16, 1, 4).BuildIndex(writer, data.begin(), data.end());
|
||||
|
||||
char const expSerial [] =
|
||||
|
@ -143,6 +174,38 @@ UNIT_TEST(IntervalIndex_Serialized)
|
|||
TEST_EQUAL(values, vector<uint32_t>(expected, expected + ARRAY_SIZE(expected)), ());
|
||||
}
|
||||
|
||||
UNIT_TEST(IntervalIndexV2_Serialized)
|
||||
{
|
||||
vector<CellIdFeaturePairForTest> data;
|
||||
data.push_back(CellIdFeaturePairForTest(0x1537U, 0));
|
||||
data.push_back(CellIdFeaturePairForTest(0x1538U, 1));
|
||||
data.push_back(CellIdFeaturePairForTest(0x1637U, 2));
|
||||
vector<uint8_t> serialIndex;
|
||||
MemWriter<vector<uint8_t>> writer(serialIndex);
|
||||
IntervalIndexBuilder(IntervalIndexVersion::V2, 16, 1, 4).BuildIndex(writer, data.begin(), data.end());
|
||||
|
||||
char const expSerial [] =
|
||||
"\x02\x02\x04\x01" // Header
|
||||
"\x24\x00\x00\x00\x00\x00\x00\x00" // Leaves level offset
|
||||
"\x2A\x00\x00\x00\x00\x00\x00\x00" // Level 1 offset
|
||||
"\x2F\x00\x00\x00\x00\x00\x00\x00" // Root level offset
|
||||
"\x32\x00\x00\x00\x00\x00\x00\x00" // Root level offset
|
||||
"\x37\x00" "\x38\x02" "\x37\x04" // 0x1537 0x1538 0x1637
|
||||
"\x01\x60\x00\x04\x02" // 0x15, 0x16 node
|
||||
"\x00\x01\x05" // Root
|
||||
"";
|
||||
|
||||
TEST_EQUAL(serialIndex, vector<uint8_t>(expSerial, expSerial + ARRAY_SIZE(expSerial) - 1), ());
|
||||
|
||||
MemReader reader(&serialIndex[0], serialIndex.size());
|
||||
IntervalIndex<MemReader, uint32_t> index(reader);
|
||||
uint32_t expected [] = {0, 1, 2};
|
||||
vector<uint32_t> values;
|
||||
TEST_EQUAL(index.KeyEnd(), 0x10000, ());
|
||||
index.ForEach(IndexValueInserter(values), 0, 0x10000);
|
||||
TEST_EQUAL(values, vector<uint32_t>(expected, expected + ARRAY_SIZE(expected)), ());
|
||||
}
|
||||
|
||||
UNIT_TEST(IntervalIndex_Simple)
|
||||
{
|
||||
vector<CellIdFeaturePairForTest> data;
|
||||
|
@ -150,7 +213,7 @@ UNIT_TEST(IntervalIndex_Simple)
|
|||
data.push_back(CellIdFeaturePairForTest(0xA0B1C2D200ULL, 1));
|
||||
data.push_back(CellIdFeaturePairForTest(0xA0B2C2D100ULL, 2));
|
||||
vector<char> serialIndex;
|
||||
MemWriter<vector<char> > writer(serialIndex);
|
||||
MemWriter<vector<char>> writer(serialIndex);
|
||||
BuildIntervalIndex(data.begin(), data.end(), writer, 40);
|
||||
MemReader reader(&serialIndex[0], serialIndex.size());
|
||||
IntervalIndex<MemReader, uint32_t> index(reader);
|
||||
|
@ -201,7 +264,7 @@ UNIT_TEST(IntervalIndex_Empty)
|
|||
{
|
||||
vector<CellIdFeaturePairForTest> data;
|
||||
vector<char> serialIndex;
|
||||
MemWriter<vector<char> > writer(serialIndex);
|
||||
MemWriter<vector<char>> writer(serialIndex);
|
||||
BuildIntervalIndex(data.begin(), data.end(), writer, 40);
|
||||
MemReader reader(&serialIndex[0], serialIndex.size());
|
||||
IntervalIndex<MemReader, uint32_t> index(reader);
|
||||
|
@ -220,7 +283,7 @@ UNIT_TEST(IntervalIndex_Simple2)
|
|||
data.push_back(CellIdFeaturePairForTest(0xA0B1C2D200ULL, 3));
|
||||
data.push_back(CellIdFeaturePairForTest(0xA0B2C2D200ULL, 2));
|
||||
vector<char> serialIndex;
|
||||
MemWriter<vector<char> > writer(serialIndex);
|
||||
MemWriter<vector<char>> writer(serialIndex);
|
||||
BuildIntervalIndex(data.begin(), data.end(), writer, 40);
|
||||
MemReader reader(&serialIndex[0], serialIndex.size());
|
||||
IntervalIndex<MemReader, uint32_t> index(reader);
|
||||
|
@ -239,7 +302,7 @@ UNIT_TEST(IntervalIndex_Simple3)
|
|||
data.push_back(CellIdFeaturePairForTest(0x0100ULL, 0));
|
||||
data.push_back(CellIdFeaturePairForTest(0x0200ULL, 1));
|
||||
vector<char> serialIndex;
|
||||
MemWriter<vector<char> > writer(serialIndex);
|
||||
MemWriter<vector<char>> writer(serialIndex);
|
||||
BuildIntervalIndex(data.begin(), data.end(), writer, 40);
|
||||
MemReader reader(&serialIndex[0], serialIndex.size());
|
||||
IntervalIndex<MemReader, uint32_t> index(reader);
|
||||
|
@ -258,7 +321,7 @@ UNIT_TEST(IntervalIndex_Simple4)
|
|||
data.push_back(CellIdFeaturePairForTest(0x01030400ULL, 0));
|
||||
data.push_back(CellIdFeaturePairForTest(0x02030400ULL, 1));
|
||||
vector<char> serialIndex;
|
||||
MemWriter<vector<char> > writer(serialIndex);
|
||||
MemWriter<vector<char>> writer(serialIndex);
|
||||
BuildIntervalIndex(data.begin(), data.end(), writer, 40);
|
||||
MemReader reader(&serialIndex[0], serialIndex.size());
|
||||
IntervalIndex<MemReader, uint32_t> index(reader);
|
||||
|
@ -279,7 +342,7 @@ UNIT_TEST(IntervalIndex_Simple5)
|
|||
data.push_back(CellIdFeaturePairForTest(0xA0B1C2D200ULL, 3));
|
||||
data.push_back(CellIdFeaturePairForTest(0xA0B2C2D200ULL, 2));
|
||||
vector<char> serialIndex;
|
||||
MemWriter<vector<char> > writer(serialIndex);
|
||||
MemWriter<vector<char>> writer(serialIndex);
|
||||
BuildIntervalIndex(data.begin(), data.end(), writer, 40);
|
||||
MemReader reader(&serialIndex[0], serialIndex.size());
|
||||
IntervalIndex<MemReader, uint32_t> index(reader);
|
||||
|
|
|
@ -8,6 +8,13 @@
|
|||
#include "base/buffer_vector.hpp"
|
||||
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
|
||||
enum class IntervalIndexVersion : uint8_t
|
||||
{
|
||||
V1 = 1,
|
||||
V2 = 2,
|
||||
};
|
||||
|
||||
class IntervalIndexBase
|
||||
{
|
||||
|
@ -28,8 +35,6 @@ public:
|
|||
ASSERT_GREATER(bitsPerLevel, 3, ());
|
||||
return 1 << (bitsPerLevel - 3);
|
||||
}
|
||||
|
||||
enum { kVersion = 1 };
|
||||
};
|
||||
|
||||
template <class ReaderT, typename Value>
|
||||
|
@ -42,10 +47,18 @@ public:
|
|||
{
|
||||
ReaderSource<ReaderT> src(reader);
|
||||
src.Read(&m_Header, sizeof(Header));
|
||||
CHECK_EQUAL(m_Header.m_Version, static_cast<uint8_t>(kVersion), ());
|
||||
auto const version = static_cast<IntervalIndexVersion>(m_Header.m_Version);
|
||||
CHECK(version == IntervalIndexVersion::V1 || version == IntervalIndexVersion::V2, ());
|
||||
if (m_Header.m_Levels != 0)
|
||||
{
|
||||
for (int i = 0; i <= m_Header.m_Levels + 1; ++i)
|
||||
m_LevelOffsets.push_back(ReadPrimitiveFromSource<uint32_t>(src));
|
||||
{
|
||||
uint64_t levelOffset =
|
||||
version == IntervalIndexVersion::V1 ? ReadPrimitiveFromSource<uint32_t>(src)
|
||||
: ReadPrimitiveFromSource<uint64_t>(src);
|
||||
m_LevelOffsets.push_back(levelOffset);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t KeyEnd() const
|
||||
|
@ -74,7 +87,7 @@ public:
|
|||
private:
|
||||
template <typename F>
|
||||
void ForEachLeaf(F const & f, uint64_t const beg, uint64_t const end,
|
||||
uint32_t const offset, uint32_t const size,
|
||||
uint64_t const offset, uint64_t const size,
|
||||
uint64_t keyBase /* discarded part of object key value in the parent nodes*/) const
|
||||
{
|
||||
buffer_vector<uint8_t, 1024> data;
|
||||
|
@ -100,7 +113,7 @@ private:
|
|||
|
||||
template <typename F>
|
||||
void ForEachNode(F const & f, uint64_t beg, uint64_t end, int level,
|
||||
uint32_t offset, uint32_t size,
|
||||
uint64_t offset, uint64_t size,
|
||||
uint64_t keyBase /* discarded part of object key value in the parent nodes */) const
|
||||
{
|
||||
offset += m_LevelOffsets[level];
|
||||
|
@ -125,8 +138,8 @@ private:
|
|||
m_Reader.Read(offset, &data[0], size);
|
||||
ArrayByteSource src(&data[0]);
|
||||
|
||||
uint32_t const offsetAndFlag = ReadVarUint<uint32_t>(src);
|
||||
uint32_t childOffset = offsetAndFlag >> 1;
|
||||
uint64_t const offsetAndFlag = ReadVarUint<uint64_t>(src);
|
||||
uint64_t childOffset = offsetAndFlag >> 1;
|
||||
if (offsetAndFlag & 1)
|
||||
{
|
||||
// Reading bitmap.
|
||||
|
@ -136,7 +149,7 @@ private:
|
|||
{
|
||||
if (bits::GetBit(pBitmap, i))
|
||||
{
|
||||
uint32_t childSize = ReadVarUint<uint32_t>(src);
|
||||
uint64_t childSize = ReadVarUint<uint64_t>(src);
|
||||
if (i >= beg0)
|
||||
{
|
||||
uint64_t const beg1 = (i == beg0) ? (beg & levelBytesFF) : 0;
|
||||
|
@ -147,7 +160,7 @@ private:
|
|||
}
|
||||
}
|
||||
ASSERT(end0 != (static_cast<uint32_t>(1) << m_Header.m_BitsPerLevel) - 1 ||
|
||||
static_cast<uint8_t const *>(src.Ptr()) - &data[0] == size,
|
||||
static_cast<size_t>(static_cast<uint8_t const *>(src.Ptr()) - &data[0]) == size,
|
||||
(beg, end, beg0, end0, offset, size, src.Ptr(), &data[0]));
|
||||
}
|
||||
else
|
||||
|
@ -158,7 +171,7 @@ private:
|
|||
uint8_t const i = src.ReadByte();
|
||||
if (i > end0)
|
||||
break;
|
||||
uint32_t childSize = ReadVarUint<uint32_t>(src);
|
||||
uint64_t childSize = ReadVarUint<uint64_t>(src);
|
||||
if (i >= beg0)
|
||||
{
|
||||
uint64_t const beg1 = (i == beg0) ? (beg & levelBytesFF) : 0;
|
||||
|
@ -172,5 +185,5 @@ private:
|
|||
|
||||
ReaderT m_Reader;
|
||||
Header m_Header;
|
||||
buffer_vector<uint32_t, 7> m_LevelOffsets;
|
||||
buffer_vector<uint64_t, 7> m_LevelOffsets;
|
||||
};
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
#include "base/assert.hpp"
|
||||
#include "base/base.hpp"
|
||||
#include "base/bits.hpp"
|
||||
#include "base/checked_cast.hpp"
|
||||
#include "base/logging.hpp"
|
||||
|
||||
#include <cstdint>
|
||||
|
@ -39,8 +40,17 @@ class IntervalIndexBuilder
|
|||
{
|
||||
public:
|
||||
IntervalIndexBuilder(uint32_t keyBits, uint32_t leafBytes, uint32_t bitsPerLevel = 8)
|
||||
: m_BitsPerLevel(bitsPerLevel), m_LeafBytes(leafBytes)
|
||||
: IntervalIndexBuilder(IntervalIndexVersion::V1, keyBits, leafBytes, bitsPerLevel)
|
||||
{ }
|
||||
|
||||
IntervalIndexBuilder(IntervalIndexVersion version, uint32_t keyBits, uint32_t leafBytes,
|
||||
uint32_t bitsPerLevel = 8)
|
||||
: m_version{version}, m_BitsPerLevel(bitsPerLevel), m_LeafBytes(leafBytes)
|
||||
{
|
||||
CHECK_GREATER_OR_EQUAL(
|
||||
static_cast<uint8_t>(version), static_cast<uint8_t>(IntervalIndexVersion::V1), ());
|
||||
CHECK_LESS_OR_EQUAL(
|
||||
static_cast<uint8_t>(version), static_cast<uint8_t>(IntervalIndexVersion::V2), ());
|
||||
CHECK_GREATER(leafBytes, 0, ());
|
||||
CHECK_LESS(keyBits, 63, ());
|
||||
int const nodeKeyBits = keyBits - (m_LeafBytes << 3);
|
||||
|
@ -59,7 +69,7 @@ public:
|
|||
if (beg == end)
|
||||
{
|
||||
IntervalIndexBase::Header header;
|
||||
header.m_Version = IntervalIndexBase::kVersion;
|
||||
header.m_Version = static_cast<uint8_t>(m_version);
|
||||
header.m_BitsPerLevel = 0;
|
||||
header.m_Levels = 0;
|
||||
header.m_LeafBytes = 0;
|
||||
|
@ -69,21 +79,21 @@ public:
|
|||
|
||||
uint64_t const initialPos = writer.Pos();
|
||||
WriteZeroesToSink(writer, sizeof(IntervalIndexBase::Header));
|
||||
WriteZeroesToSink(writer, 4 * (m_Levels + 2));
|
||||
WriteZeroesToSink(writer, (m_version == IntervalIndexVersion::V1 ? 4 : 8) * (m_Levels + 2));
|
||||
uint64_t const afterHeaderPos = writer.Pos();
|
||||
|
||||
std::vector<uint32_t> levelOffset;
|
||||
std::vector<uint64_t> levelOffset;
|
||||
{
|
||||
std::vector<uint32_t> offsets;
|
||||
levelOffset.push_back(static_cast<uint32_t>(writer.Pos()));
|
||||
std::vector<uint64_t> offsets;
|
||||
levelOffset.push_back(writer.Pos());
|
||||
BuildLeaves(writer, beg, end, offsets);
|
||||
levelOffset.push_back(static_cast<uint32_t>(writer.Pos()));
|
||||
levelOffset.push_back(writer.Pos());
|
||||
for (int i = 1; i <= static_cast<int>(m_Levels); ++i)
|
||||
{
|
||||
std::vector<uint32_t> nextOffsets;
|
||||
std::vector<uint64_t> nextOffsets;
|
||||
BuildLevel(writer, beg, end, i, &offsets[0], &offsets[0] + offsets.size(), nextOffsets);
|
||||
nextOffsets.swap(offsets);
|
||||
levelOffset.push_back(static_cast<uint32_t>(writer.Pos()));
|
||||
levelOffset.push_back(writer.Pos());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -93,7 +103,7 @@ public:
|
|||
// Write header.
|
||||
{
|
||||
IntervalIndexBase::Header header;
|
||||
header.m_Version = IntervalIndexBase::kVersion;
|
||||
header.m_Version = static_cast<uint8_t>(m_version);
|
||||
header.m_BitsPerLevel = static_cast<uint8_t>(m_BitsPerLevel);
|
||||
ASSERT_EQUAL(header.m_BitsPerLevel, m_BitsPerLevel, ());
|
||||
header.m_Levels = static_cast<uint8_t>(m_Levels);
|
||||
|
@ -105,7 +115,12 @@ public:
|
|||
|
||||
// Write level offsets.
|
||||
for (size_t i = 0; i < levelOffset.size(); ++i)
|
||||
WriteToSink(writer, levelOffset[i]);
|
||||
{
|
||||
if (m_version == IntervalIndexVersion::V1)
|
||||
WriteToSink(writer, base::checked_cast<uint32_t>(levelOffset[i]));
|
||||
else
|
||||
WriteToSink(writer, levelOffset[i]);
|
||||
}
|
||||
|
||||
uint64_t const pos = writer.Pos();
|
||||
CHECK_EQUAL(pos, afterHeaderPos, ());
|
||||
|
@ -118,8 +133,8 @@ public:
|
|||
// Check that [beg, end) is sorted and log most populous cell.
|
||||
if (beg != end)
|
||||
{
|
||||
uint32_t count = 0;
|
||||
uint32_t maxCount = 0;
|
||||
uint64_t count = 0;
|
||||
uint64_t maxCount = 0;
|
||||
typename CellIdValueIter::value_type mostPopulousCell = *beg;
|
||||
CellIdValueIter it = beg;
|
||||
uint64_t prev = it->GetCell();
|
||||
|
@ -154,7 +169,7 @@ public:
|
|||
}
|
||||
|
||||
template <class SinkT>
|
||||
uint32_t WriteNode(SinkT & sink, uint32_t offset, uint32_t * childSizes)
|
||||
uint64_t WriteNode(SinkT & sink, uint64_t offset, uint64_t * childSizes)
|
||||
{
|
||||
std::vector<uint8_t> bitmapSerial, listSerial;
|
||||
bitmapSerial.reserve(1024);
|
||||
|
@ -166,28 +181,28 @@ public:
|
|||
{
|
||||
sink.Write(&bitmapSerial[0], bitmapSerial.size());
|
||||
ASSERT_EQUAL(bitmapSerial.size(), static_cast<uint32_t>(bitmapSerial.size()), ());
|
||||
return static_cast<uint32_t>(bitmapSerial.size());
|
||||
return bitmapSerial.size();
|
||||
}
|
||||
else
|
||||
{
|
||||
sink.Write(&listSerial[0], listSerial.size());
|
||||
ASSERT_EQUAL(listSerial.size(), static_cast<uint32_t>(listSerial.size()), ());
|
||||
return static_cast<uint32_t>(listSerial.size());
|
||||
return listSerial.size();
|
||||
}
|
||||
}
|
||||
|
||||
template <class Writer, typename CellIdValueIter>
|
||||
void BuildLevel(Writer & writer, CellIdValueIter const & beg, CellIdValueIter const & end,
|
||||
int level, uint32_t const * childSizesBeg, uint32_t const * childSizesEnd,
|
||||
std::vector<uint32_t> & sizes)
|
||||
int level, uint64_t const * childSizesBeg, uint64_t const * childSizesEnd,
|
||||
std::vector<uint64_t> & sizes)
|
||||
{
|
||||
UNUSED_VALUE(childSizesEnd);
|
||||
ASSERT_GREATER(level, 0, ());
|
||||
uint32_t const skipBits = m_LeafBytes * 8 + (level - 1) * m_BitsPerLevel;
|
||||
std::vector<uint32_t> expandedSizes(1 << m_BitsPerLevel);
|
||||
std::vector<uint64_t> expandedSizes(1 << m_BitsPerLevel);
|
||||
uint64_t prevKey = static_cast<uint64_t>(-1);
|
||||
uint32_t childOffset = 0;
|
||||
uint32_t nextChildOffset = 0;
|
||||
uint64_t childOffset = 0;
|
||||
uint64_t nextChildOffset = 0;
|
||||
for (CellIdValueIter it = beg; it != end; ++it)
|
||||
{
|
||||
uint64_t const key = it->GetCell() >> skipBits;
|
||||
|
@ -202,7 +217,8 @@ public:
|
|||
}
|
||||
|
||||
nextChildOffset += *childSizesBeg;
|
||||
expandedSizes[key & m_LastBitsMask] += *childSizesBeg;
|
||||
CHECK_EQUAL(expandedSizes[key & m_LastBitsMask], 0, ());
|
||||
expandedSizes[key & m_LastBitsMask] = *childSizesBeg;
|
||||
++childSizesBeg;
|
||||
prevKey = key;
|
||||
}
|
||||
|
@ -212,7 +228,7 @@ public:
|
|||
|
||||
template <class Writer, typename CellIdValueIter>
|
||||
void BuildLeaves(Writer & writer, CellIdValueIter const & beg, CellIdValueIter const & end,
|
||||
std::vector<uint32_t> & sizes)
|
||||
std::vector<uint64_t> & sizes)
|
||||
{
|
||||
using Value = typename CellIdValueIter::value_type::ValueType;
|
||||
|
||||
|
@ -226,7 +242,7 @@ public:
|
|||
Value const value = it->GetValue();
|
||||
if (it != beg && (key >> skipBits) != (prevKey >> skipBits))
|
||||
{
|
||||
sizes.push_back(static_cast<uint32_t>(writer.Pos() - prevPos));
|
||||
sizes.push_back(writer.Pos() - prevPos);
|
||||
prevValue = 0;
|
||||
prevPos = writer.Pos();
|
||||
}
|
||||
|
@ -236,46 +252,74 @@ public:
|
|||
prevKey = key;
|
||||
prevValue = value;
|
||||
}
|
||||
sizes.push_back(static_cast<uint32_t>(writer.Pos() - prevPos));
|
||||
sizes.push_back(writer.Pos() - prevPos);
|
||||
}
|
||||
|
||||
template <class SinkT>
|
||||
void WriteBitmapNode(SinkT & sink, uint32_t offset, uint32_t * childSizes)
|
||||
void WriteBitmapNode(SinkT & sink, uint64_t offset, uint64_t * childSizes)
|
||||
{
|
||||
ASSERT_GREATER_OR_EQUAL(m_BitsPerLevel, 3, ());
|
||||
WriteVarUint(sink, (offset << 1) + 1);
|
||||
|
||||
if (m_version == IntervalIndexVersion::V1)
|
||||
CHECK_LESS_OR_EQUAL(offset, std::numeric_limits<uint32_t>::max() >> 1, ());
|
||||
else
|
||||
CHECK_LESS_OR_EQUAL(offset, std::numeric_limits<uint64_t>::max() >> 1, ());
|
||||
uint64_t const offsetAndFlag = (offset << 1) + 1;
|
||||
WriteVarUint(sink, offsetAndFlag);
|
||||
|
||||
buffer_vector<uint8_t, 32> bitMask(1 << (m_BitsPerLevel - 3));
|
||||
for (uint32_t i = 0; i < static_cast<uint32_t>(1 << m_BitsPerLevel); ++i)
|
||||
if (childSizes[i])
|
||||
bits::SetBitTo1(&bitMask[0], i);
|
||||
sink.Write(&bitMask[0], bitMask.size());
|
||||
|
||||
for (uint32_t i = 0; i < static_cast<uint32_t>(1 << m_BitsPerLevel); ++i)
|
||||
if (childSizes[i])
|
||||
WriteVarUint(sink, childSizes[i]);
|
||||
{
|
||||
uint64_t size = childSizes[i];
|
||||
if (!size)
|
||||
continue;
|
||||
|
||||
if (m_version == IntervalIndexVersion::V1)
|
||||
CHECK_LESS_OR_EQUAL(size, std::numeric_limits<uint32_t>::max(), ());
|
||||
WriteVarUint(sink, size);
|
||||
}
|
||||
}
|
||||
|
||||
template <class SinkT>
|
||||
void WriteListNode(SinkT & sink, uint32_t offset, uint32_t * childSizes)
|
||||
void WriteListNode(SinkT & sink, uint64_t offset, uint64_t * childSizes)
|
||||
{
|
||||
ASSERT_LESS_OR_EQUAL(m_BitsPerLevel, 8, ());
|
||||
WriteVarUint(sink, (offset << 1));
|
||||
|
||||
if (m_version == IntervalIndexVersion::V1)
|
||||
CHECK_LESS_OR_EQUAL(offset, std::numeric_limits<uint32_t>::max() >> 1, ());
|
||||
else
|
||||
CHECK_LESS_OR_EQUAL(offset, std::numeric_limits<uint64_t>::max() >> 1, ());
|
||||
uint64_t const offsetAndFlag = offset << 1;
|
||||
WriteVarUint(sink, offsetAndFlag);
|
||||
|
||||
for (uint32_t i = 0; i < static_cast<uint32_t>(1 << m_BitsPerLevel); ++i)
|
||||
{
|
||||
if (childSizes[i])
|
||||
{
|
||||
WriteToSink(sink, static_cast<uint8_t>(i));
|
||||
WriteVarUint(sink, childSizes[i]);
|
||||
}
|
||||
uint64_t size = childSizes[i];
|
||||
if (!size)
|
||||
continue;
|
||||
|
||||
WriteToSink(sink, static_cast<uint8_t>(i));
|
||||
|
||||
if (m_version == IntervalIndexVersion::V1)
|
||||
CHECK_LESS_OR_EQUAL(size, std::numeric_limits<uint32_t>::max(), ());
|
||||
WriteVarUint(sink, size);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
IntervalIndexVersion m_version;
|
||||
uint32_t m_Levels, m_BitsPerLevel, m_LeafBytes, m_LastBitsMask;
|
||||
};
|
||||
|
||||
template <class Writer, typename CellIdValueIter>
|
||||
void BuildIntervalIndex(CellIdValueIter const & beg, CellIdValueIter const & end, Writer & writer,
|
||||
uint32_t keyBits)
|
||||
uint32_t keyBits,
|
||||
IntervalIndexVersion version = IntervalIndexVersion::V1)
|
||||
{
|
||||
IntervalIndexBuilder(keyBits, 1).BuildIndex(writer, beg, end);
|
||||
IntervalIndexBuilder(version, keyBits, 1).BuildIndex(writer, beg, end);
|
||||
}
|
||||
|
|
|
@ -72,7 +72,7 @@ bool BuildLocalityIndexFromDataFile(string const & dataFile,
|
|||
FileWriter writer(idxFileName);
|
||||
|
||||
covering::BuildLocalityIndex<LocalityVector<ModelReaderPtr>, FileWriter, DEPTH_LEVELS>(
|
||||
localities.GetVector(), writer, coverLocality, outFileName);
|
||||
localities.GetVector(), writer, coverLocality, outFileName, IntervalIndexVersion::V2);
|
||||
}
|
||||
|
||||
FilesContainerW(outFileName, FileWriter::OP_WRITE_TRUNCATE)
|
||||
|
|
|
@ -28,7 +28,8 @@ using CoverLocality =
|
|||
|
||||
template <class ObjectsVector, class Writer, int DEPTH_LEVELS>
|
||||
void BuildLocalityIndex(ObjectsVector const & objects, Writer & writer,
|
||||
CoverLocality const & coverLocality, std::string const & tmpFilePrefix)
|
||||
CoverLocality const & coverLocality, std::string const & tmpFilePrefix,
|
||||
IntervalIndexVersion version = IntervalIndexVersion::V1)
|
||||
{
|
||||
std::string const cellsToValueFile = tmpFilePrefix + CELL2LOCALITY_SORTED_EXT + ".all";
|
||||
SCOPE_GUARD(cellsToValueFileGuard, std::bind(&FileWriter::DeleteFileX, cellsToValueFile));
|
||||
|
@ -51,7 +52,7 @@ void BuildLocalityIndex(ObjectsVector const & objects, Writer & writer,
|
|||
DDVector<CellValuePair<uint64_t>, FileReader, uint64_t> cellsToValue(reader);
|
||||
|
||||
{
|
||||
BuildIntervalIndex(cellsToValue.begin(), cellsToValue.end(), writer, DEPTH_LEVELS * 2 + 1);
|
||||
BuildIntervalIndex(cellsToValue.begin(), cellsToValue.end(), writer, DEPTH_LEVELS * 2 + 1, version);
|
||||
}
|
||||
}
|
||||
} // namespace covering
|
||||
|
|
Loading…
Add table
Reference in a new issue