diff --git a/indexer/indexer_tests/interval_index_test.cpp b/indexer/indexer_tests/interval_index_test.cpp index 2e898346c6..120c12d739 100644 --- a/indexer/indexer_tests/interval_index_test.cpp +++ b/indexer/indexer_tests/interval_index_test.cpp @@ -48,8 +48,8 @@ UNIT_TEST(IntervalIndex_LevelCount) UNIT_TEST(IntervalIndex_SerializedNodeBitmap) { - uint32_t const offset = 350; // == 0x15E - uint32_t childSizes[8] = { 0, 0, 0, 10, 0, 0, 1000, 0 }; + uint64_t const offset = 350; // == 0x15E + uint64_t childSizes[8] = { 0, 0, 0, 10, 0, 0, 1000, 0 }; char const expSerial [] = "\xBD\x05" // (350 << 1) + 1 == 701 == 0x2BD - offset encoded as varuint. "\x48" // (1 << 3) | (1 << 6) == 72 == 0x48 - bitmap. @@ -57,25 +57,56 @@ UNIT_TEST(IntervalIndex_SerializedNodeBitmap) "\xE8\x07" // 1000 = 0x3E8 - childSizes[6] encoded as varuint. ""; vector serializedNode; - MemWriter > writer(serializedNode); + MemWriter> writer(serializedNode); IntervalIndexBuilder(11, 1, 3).WriteNode(writer, offset, childSizes); TEST_EQUAL(serializedNode, vector(expSerial, expSerial + ARRAY_SIZE(expSerial) - 1), ()); } +UNIT_TEST(IntervalIndexV2_SerializedNodeBitmap) +{ + uint64_t const offset = 5'547'468'350; // == 0x01'2A'A7'A6'3E + uint64_t childSizes[8] = { 0, 0, 0, 10, 0, 0, 6'200'000'000, 0 }; + char const expSerial [] = + "\xFD\x98\xBD\xAA\x29" // (5'547'468'350 << 1) + 1 - offset encoded as varuint + "\x48" // (1 << 3) | (1 << 6) == 72 == 0x48 - bitmap. + "\x0A" // 10 - childSizes[3] encoded as varuint. + "\x80\xFC\xB1\x8C\x17" // 6'200'000'000 - childSizes[6] encoded as varuint. + ""; + vector serializedNode; + MemWriter> writer(serializedNode); + IntervalIndexBuilder(IntervalIndexVersion::V2, 11, 1, 3).WriteNode(writer, offset, childSizes); + TEST_EQUAL(serializedNode, vector(expSerial, expSerial + ARRAY_SIZE(expSerial) - 1), ()); +} + UNIT_TEST(IntervalIndex_SerializedNodeList) { - uint32_t const offset = 350; // == 0x15E - uint32_t childSizes[16] = { 0, 0, 0, 0, 0, 0, 1000, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + uint64_t const offset = 350; // == 0x15E + uint64_t childSizes[16] = { 0, 0, 0, 0, 0, 0, 1000, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; char const expSerial [] = "\xBC\x05" // (350 << 1) + 0 == 700 == 0x2BC - offset encoded as varuint. "\x06" "\xE8\x07" // 6, 1000 ""; vector serializedNode; - MemWriter > writer(serializedNode); + MemWriter> writer(serializedNode); IntervalIndexBuilder(11, 1, 4).WriteNode(writer, offset, childSizes); TEST_EQUAL(serializedNode, vector(expSerial, expSerial + ARRAY_SIZE(expSerial) - 1), ()); } +UNIT_TEST(IntervalIndexV2_SerializedNodeList) +{ + uint64_t const offset = 5'547'468'350; // == 0x01'2A'A7'A6'3E + uint64_t childSizes[16] = { 0, 0, 0, 0, 0, 0, 0, 6'200'000'000, 0, 0, 0, 0, 0, 0, 0, 0, }; + char const expSerial [] = + "\xFC\x98\xBD\xAA\x29" // (5'547'468'350 << 1) + 0 - offset encoded as varuint. + "\x07" "\x80\xFC\xB1\x8C\x17" // 7, 6'200'000'000 + ""; + vector serializedNode; + MemWriter> writer(serializedNode); + IntervalIndexBuilder(IntervalIndexVersion::V2, 11, 1, 4).WriteNode(writer, offset, childSizes); + TEST_EQUAL(serializedNode, vector(expSerial, expSerial + ARRAY_SIZE(expSerial) - 1), + (DebugPrint(serializedNode), DebugPrint(expSerial))); +} + UNIT_TEST(IntervalIndex_SerializedLeaves) { vector data; @@ -83,13 +114,13 @@ UNIT_TEST(IntervalIndex_SerializedLeaves) data.push_back(CellIdFeaturePairForTest(0x1538U, 1)); data.push_back(CellIdFeaturePairForTest(0x1637U, 2)); vector serialLeaves; - MemWriter > writer(serialLeaves); - vector sizes; + MemWriter> writer(serialLeaves); + vector sizes; IntervalIndexBuilder(16, 1, 4).BuildLeaves(writer, data.begin(), data.end(), sizes); char const expSerial [] = "\x37\x00" "\x38\x02" "\x37\x04"; // 0x1537 0x1538 0x1637 uint32_t const expSizes [] = { 4, 2 }; TEST_EQUAL(serialLeaves, vector(expSerial, expSerial + ARRAY_SIZE(expSerial) - 1), ()); - TEST_EQUAL(sizes, vector(expSizes, expSizes + ARRAY_SIZE(expSizes)), ()); + TEST_EQUAL(sizes, vector(expSizes, expSizes + ARRAY_SIZE(expSizes)), ()); } UNIT_TEST(IntervalIndex_SerializedNodes) @@ -98,17 +129,17 @@ UNIT_TEST(IntervalIndex_SerializedNodes) data.push_back(CellIdFeaturePairForTest(0x1537U, 0)); data.push_back(CellIdFeaturePairForTest(0x1538U, 1)); data.push_back(CellIdFeaturePairForTest(0x1637U, 2)); - uint32_t const leavesSizes [] = { 4, 2 }; + uint64_t const leavesSizes [] = { 4, 2 }; vector serialNodes; - MemWriter > writer(serialNodes); - vector sizes; + MemWriter> writer(serialNodes); + vector sizes; IntervalIndexBuilder(16, 1, 4).BuildLevel(writer, data.begin(), data.end(), 1, leavesSizes, leavesSizes + ARRAY_SIZE(leavesSizes), sizes); char const expSerial [] = "\x01\x60\x00\x04\x02"; uint32_t const expSizes [] = { ARRAY_SIZE(expSerial) - 1 }; TEST_EQUAL(serialNodes, vector(expSerial, expSerial + ARRAY_SIZE(expSerial) - 1), ()); - TEST_EQUAL(sizes, vector(expSizes, expSizes + ARRAY_SIZE(expSizes)), ()); + TEST_EQUAL(sizes, vector(expSizes, expSizes + ARRAY_SIZE(expSizes)), ()); } UNIT_TEST(IntervalIndex_Serialized) @@ -118,7 +149,7 @@ UNIT_TEST(IntervalIndex_Serialized) data.push_back(CellIdFeaturePairForTest(0x1538U, 1)); data.push_back(CellIdFeaturePairForTest(0x1637U, 2)); vector serialIndex; - MemWriter > writer(serialIndex); + MemWriter> writer(serialIndex); IntervalIndexBuilder(16, 1, 4).BuildIndex(writer, data.begin(), data.end()); char const expSerial [] = @@ -143,6 +174,38 @@ UNIT_TEST(IntervalIndex_Serialized) TEST_EQUAL(values, vector(expected, expected + ARRAY_SIZE(expected)), ()); } +UNIT_TEST(IntervalIndexV2_Serialized) +{ + vector data; + data.push_back(CellIdFeaturePairForTest(0x1537U, 0)); + data.push_back(CellIdFeaturePairForTest(0x1538U, 1)); + data.push_back(CellIdFeaturePairForTest(0x1637U, 2)); + vector serialIndex; + MemWriter> writer(serialIndex); + IntervalIndexBuilder(IntervalIndexVersion::V2, 16, 1, 4).BuildIndex(writer, data.begin(), data.end()); + + char const expSerial [] = + "\x02\x02\x04\x01" // Header + "\x24\x00\x00\x00\x00\x00\x00\x00" // Leaves level offset + "\x2A\x00\x00\x00\x00\x00\x00\x00" // Level 1 offset + "\x2F\x00\x00\x00\x00\x00\x00\x00" // Root level offset + "\x32\x00\x00\x00\x00\x00\x00\x00" // Root level offset + "\x37\x00" "\x38\x02" "\x37\x04" // 0x1537 0x1538 0x1637 + "\x01\x60\x00\x04\x02" // 0x15, 0x16 node + "\x00\x01\x05" // Root + ""; + + TEST_EQUAL(serialIndex, vector(expSerial, expSerial + ARRAY_SIZE(expSerial) - 1), ()); + + MemReader reader(&serialIndex[0], serialIndex.size()); + IntervalIndex index(reader); + uint32_t expected [] = {0, 1, 2}; + vector values; + TEST_EQUAL(index.KeyEnd(), 0x10000, ()); + index.ForEach(IndexValueInserter(values), 0, 0x10000); + TEST_EQUAL(values, vector(expected, expected + ARRAY_SIZE(expected)), ()); +} + UNIT_TEST(IntervalIndex_Simple) { vector data; @@ -150,7 +213,7 @@ UNIT_TEST(IntervalIndex_Simple) data.push_back(CellIdFeaturePairForTest(0xA0B1C2D200ULL, 1)); data.push_back(CellIdFeaturePairForTest(0xA0B2C2D100ULL, 2)); vector serialIndex; - MemWriter > writer(serialIndex); + MemWriter> writer(serialIndex); BuildIntervalIndex(data.begin(), data.end(), writer, 40); MemReader reader(&serialIndex[0], serialIndex.size()); IntervalIndex index(reader); @@ -201,7 +264,7 @@ UNIT_TEST(IntervalIndex_Empty) { vector data; vector serialIndex; - MemWriter > writer(serialIndex); + MemWriter> writer(serialIndex); BuildIntervalIndex(data.begin(), data.end(), writer, 40); MemReader reader(&serialIndex[0], serialIndex.size()); IntervalIndex index(reader); @@ -220,7 +283,7 @@ UNIT_TEST(IntervalIndex_Simple2) data.push_back(CellIdFeaturePairForTest(0xA0B1C2D200ULL, 3)); data.push_back(CellIdFeaturePairForTest(0xA0B2C2D200ULL, 2)); vector serialIndex; - MemWriter > writer(serialIndex); + MemWriter> writer(serialIndex); BuildIntervalIndex(data.begin(), data.end(), writer, 40); MemReader reader(&serialIndex[0], serialIndex.size()); IntervalIndex index(reader); @@ -239,7 +302,7 @@ UNIT_TEST(IntervalIndex_Simple3) data.push_back(CellIdFeaturePairForTest(0x0100ULL, 0)); data.push_back(CellIdFeaturePairForTest(0x0200ULL, 1)); vector serialIndex; - MemWriter > writer(serialIndex); + MemWriter> writer(serialIndex); BuildIntervalIndex(data.begin(), data.end(), writer, 40); MemReader reader(&serialIndex[0], serialIndex.size()); IntervalIndex index(reader); @@ -258,7 +321,7 @@ UNIT_TEST(IntervalIndex_Simple4) data.push_back(CellIdFeaturePairForTest(0x01030400ULL, 0)); data.push_back(CellIdFeaturePairForTest(0x02030400ULL, 1)); vector serialIndex; - MemWriter > writer(serialIndex); + MemWriter> writer(serialIndex); BuildIntervalIndex(data.begin(), data.end(), writer, 40); MemReader reader(&serialIndex[0], serialIndex.size()); IntervalIndex index(reader); @@ -279,7 +342,7 @@ UNIT_TEST(IntervalIndex_Simple5) data.push_back(CellIdFeaturePairForTest(0xA0B1C2D200ULL, 3)); data.push_back(CellIdFeaturePairForTest(0xA0B2C2D200ULL, 2)); vector serialIndex; - MemWriter > writer(serialIndex); + MemWriter> writer(serialIndex); BuildIntervalIndex(data.begin(), data.end(), writer, 40); MemReader reader(&serialIndex[0], serialIndex.size()); IntervalIndex index(reader); diff --git a/indexer/interval_index.hpp b/indexer/interval_index.hpp index 7c7c200e51..0f8b6b94ca 100644 --- a/indexer/interval_index.hpp +++ b/indexer/interval_index.hpp @@ -8,6 +8,13 @@ #include "base/buffer_vector.hpp" #include +#include + +enum class IntervalIndexVersion : uint8_t +{ + V1 = 1, + V2 = 2, +}; class IntervalIndexBase { @@ -28,8 +35,6 @@ public: ASSERT_GREATER(bitsPerLevel, 3, ()); return 1 << (bitsPerLevel - 3); } - - enum { kVersion = 1 }; }; template @@ -42,10 +47,18 @@ public: { ReaderSource src(reader); src.Read(&m_Header, sizeof(Header)); - CHECK_EQUAL(m_Header.m_Version, static_cast(kVersion), ()); + auto const version = static_cast(m_Header.m_Version); + CHECK(version == IntervalIndexVersion::V1 || version == IntervalIndexVersion::V2, ()); if (m_Header.m_Levels != 0) + { for (int i = 0; i <= m_Header.m_Levels + 1; ++i) - m_LevelOffsets.push_back(ReadPrimitiveFromSource(src)); + { + uint64_t levelOffset = + version == IntervalIndexVersion::V1 ? ReadPrimitiveFromSource(src) + : ReadPrimitiveFromSource(src); + m_LevelOffsets.push_back(levelOffset); + } + } } uint64_t KeyEnd() const @@ -74,7 +87,7 @@ public: private: template void ForEachLeaf(F const & f, uint64_t const beg, uint64_t const end, - uint32_t const offset, uint32_t const size, + uint64_t const offset, uint64_t const size, uint64_t keyBase /* discarded part of object key value in the parent nodes*/) const { buffer_vector data; @@ -100,7 +113,7 @@ private: template void ForEachNode(F const & f, uint64_t beg, uint64_t end, int level, - uint32_t offset, uint32_t size, + uint64_t offset, uint64_t size, uint64_t keyBase /* discarded part of object key value in the parent nodes */) const { offset += m_LevelOffsets[level]; @@ -125,8 +138,8 @@ private: m_Reader.Read(offset, &data[0], size); ArrayByteSource src(&data[0]); - uint32_t const offsetAndFlag = ReadVarUint(src); - uint32_t childOffset = offsetAndFlag >> 1; + uint64_t const offsetAndFlag = ReadVarUint(src); + uint64_t childOffset = offsetAndFlag >> 1; if (offsetAndFlag & 1) { // Reading bitmap. @@ -136,7 +149,7 @@ private: { if (bits::GetBit(pBitmap, i)) { - uint32_t childSize = ReadVarUint(src); + uint64_t childSize = ReadVarUint(src); if (i >= beg0) { uint64_t const beg1 = (i == beg0) ? (beg & levelBytesFF) : 0; @@ -147,7 +160,7 @@ private: } } ASSERT(end0 != (static_cast(1) << m_Header.m_BitsPerLevel) - 1 || - static_cast(src.Ptr()) - &data[0] == size, + static_cast(static_cast(src.Ptr()) - &data[0]) == size, (beg, end, beg0, end0, offset, size, src.Ptr(), &data[0])); } else @@ -158,7 +171,7 @@ private: uint8_t const i = src.ReadByte(); if (i > end0) break; - uint32_t childSize = ReadVarUint(src); + uint64_t childSize = ReadVarUint(src); if (i >= beg0) { uint64_t const beg1 = (i == beg0) ? (beg & levelBytesFF) : 0; @@ -172,5 +185,5 @@ private: ReaderT m_Reader; Header m_Header; - buffer_vector m_LevelOffsets; + buffer_vector m_LevelOffsets; }; diff --git a/indexer/interval_index_builder.hpp b/indexer/interval_index_builder.hpp index d3ca2b1d32..043306dd35 100644 --- a/indexer/interval_index_builder.hpp +++ b/indexer/interval_index_builder.hpp @@ -10,6 +10,7 @@ #include "base/assert.hpp" #include "base/base.hpp" #include "base/bits.hpp" +#include "base/checked_cast.hpp" #include "base/logging.hpp" #include @@ -39,8 +40,17 @@ class IntervalIndexBuilder { public: IntervalIndexBuilder(uint32_t keyBits, uint32_t leafBytes, uint32_t bitsPerLevel = 8) - : m_BitsPerLevel(bitsPerLevel), m_LeafBytes(leafBytes) + : IntervalIndexBuilder(IntervalIndexVersion::V1, keyBits, leafBytes, bitsPerLevel) + { } + + IntervalIndexBuilder(IntervalIndexVersion version, uint32_t keyBits, uint32_t leafBytes, + uint32_t bitsPerLevel = 8) + : m_version{version}, m_BitsPerLevel(bitsPerLevel), m_LeafBytes(leafBytes) { + CHECK_GREATER_OR_EQUAL( + static_cast(version), static_cast(IntervalIndexVersion::V1), ()); + CHECK_LESS_OR_EQUAL( + static_cast(version), static_cast(IntervalIndexVersion::V2), ()); CHECK_GREATER(leafBytes, 0, ()); CHECK_LESS(keyBits, 63, ()); int const nodeKeyBits = keyBits - (m_LeafBytes << 3); @@ -59,7 +69,7 @@ public: if (beg == end) { IntervalIndexBase::Header header; - header.m_Version = IntervalIndexBase::kVersion; + header.m_Version = static_cast(m_version); header.m_BitsPerLevel = 0; header.m_Levels = 0; header.m_LeafBytes = 0; @@ -69,21 +79,21 @@ public: uint64_t const initialPos = writer.Pos(); WriteZeroesToSink(writer, sizeof(IntervalIndexBase::Header)); - WriteZeroesToSink(writer, 4 * (m_Levels + 2)); + WriteZeroesToSink(writer, (m_version == IntervalIndexVersion::V1 ? 4 : 8) * (m_Levels + 2)); uint64_t const afterHeaderPos = writer.Pos(); - std::vector levelOffset; + std::vector levelOffset; { - std::vector offsets; - levelOffset.push_back(static_cast(writer.Pos())); + std::vector offsets; + levelOffset.push_back(writer.Pos()); BuildLeaves(writer, beg, end, offsets); - levelOffset.push_back(static_cast(writer.Pos())); + levelOffset.push_back(writer.Pos()); for (int i = 1; i <= static_cast(m_Levels); ++i) { - std::vector nextOffsets; + std::vector nextOffsets; BuildLevel(writer, beg, end, i, &offsets[0], &offsets[0] + offsets.size(), nextOffsets); nextOffsets.swap(offsets); - levelOffset.push_back(static_cast(writer.Pos())); + levelOffset.push_back(writer.Pos()); } } @@ -93,7 +103,7 @@ public: // Write header. { IntervalIndexBase::Header header; - header.m_Version = IntervalIndexBase::kVersion; + header.m_Version = static_cast(m_version); header.m_BitsPerLevel = static_cast(m_BitsPerLevel); ASSERT_EQUAL(header.m_BitsPerLevel, m_BitsPerLevel, ()); header.m_Levels = static_cast(m_Levels); @@ -105,7 +115,12 @@ public: // Write level offsets. for (size_t i = 0; i < levelOffset.size(); ++i) - WriteToSink(writer, levelOffset[i]); + { + if (m_version == IntervalIndexVersion::V1) + WriteToSink(writer, base::checked_cast(levelOffset[i])); + else + WriteToSink(writer, levelOffset[i]); + } uint64_t const pos = writer.Pos(); CHECK_EQUAL(pos, afterHeaderPos, ()); @@ -118,8 +133,8 @@ public: // Check that [beg, end) is sorted and log most populous cell. if (beg != end) { - uint32_t count = 0; - uint32_t maxCount = 0; + uint64_t count = 0; + uint64_t maxCount = 0; typename CellIdValueIter::value_type mostPopulousCell = *beg; CellIdValueIter it = beg; uint64_t prev = it->GetCell(); @@ -154,7 +169,7 @@ public: } template - uint32_t WriteNode(SinkT & sink, uint32_t offset, uint32_t * childSizes) + uint64_t WriteNode(SinkT & sink, uint64_t offset, uint64_t * childSizes) { std::vector bitmapSerial, listSerial; bitmapSerial.reserve(1024); @@ -166,28 +181,28 @@ public: { sink.Write(&bitmapSerial[0], bitmapSerial.size()); ASSERT_EQUAL(bitmapSerial.size(), static_cast(bitmapSerial.size()), ()); - return static_cast(bitmapSerial.size()); + return bitmapSerial.size(); } else { sink.Write(&listSerial[0], listSerial.size()); ASSERT_EQUAL(listSerial.size(), static_cast(listSerial.size()), ()); - return static_cast(listSerial.size()); + return listSerial.size(); } } template void BuildLevel(Writer & writer, CellIdValueIter const & beg, CellIdValueIter const & end, - int level, uint32_t const * childSizesBeg, uint32_t const * childSizesEnd, - std::vector & sizes) + int level, uint64_t const * childSizesBeg, uint64_t const * childSizesEnd, + std::vector & sizes) { UNUSED_VALUE(childSizesEnd); ASSERT_GREATER(level, 0, ()); uint32_t const skipBits = m_LeafBytes * 8 + (level - 1) * m_BitsPerLevel; - std::vector expandedSizes(1 << m_BitsPerLevel); + std::vector expandedSizes(1 << m_BitsPerLevel); uint64_t prevKey = static_cast(-1); - uint32_t childOffset = 0; - uint32_t nextChildOffset = 0; + uint64_t childOffset = 0; + uint64_t nextChildOffset = 0; for (CellIdValueIter it = beg; it != end; ++it) { uint64_t const key = it->GetCell() >> skipBits; @@ -202,7 +217,8 @@ public: } nextChildOffset += *childSizesBeg; - expandedSizes[key & m_LastBitsMask] += *childSizesBeg; + CHECK_EQUAL(expandedSizes[key & m_LastBitsMask], 0, ()); + expandedSizes[key & m_LastBitsMask] = *childSizesBeg; ++childSizesBeg; prevKey = key; } @@ -212,7 +228,7 @@ public: template void BuildLeaves(Writer & writer, CellIdValueIter const & beg, CellIdValueIter const & end, - std::vector & sizes) + std::vector & sizes) { using Value = typename CellIdValueIter::value_type::ValueType; @@ -226,7 +242,7 @@ public: Value const value = it->GetValue(); if (it != beg && (key >> skipBits) != (prevKey >> skipBits)) { - sizes.push_back(static_cast(writer.Pos() - prevPos)); + sizes.push_back(writer.Pos() - prevPos); prevValue = 0; prevPos = writer.Pos(); } @@ -236,46 +252,74 @@ public: prevKey = key; prevValue = value; } - sizes.push_back(static_cast(writer.Pos() - prevPos)); + sizes.push_back(writer.Pos() - prevPos); } template - void WriteBitmapNode(SinkT & sink, uint32_t offset, uint32_t * childSizes) + void WriteBitmapNode(SinkT & sink, uint64_t offset, uint64_t * childSizes) { ASSERT_GREATER_OR_EQUAL(m_BitsPerLevel, 3, ()); - WriteVarUint(sink, (offset << 1) + 1); + + if (m_version == IntervalIndexVersion::V1) + CHECK_LESS_OR_EQUAL(offset, std::numeric_limits::max() >> 1, ()); + else + CHECK_LESS_OR_EQUAL(offset, std::numeric_limits::max() >> 1, ()); + uint64_t const offsetAndFlag = (offset << 1) + 1; + WriteVarUint(sink, offsetAndFlag); + buffer_vector bitMask(1 << (m_BitsPerLevel - 3)); for (uint32_t i = 0; i < static_cast(1 << m_BitsPerLevel); ++i) if (childSizes[i]) bits::SetBitTo1(&bitMask[0], i); sink.Write(&bitMask[0], bitMask.size()); + for (uint32_t i = 0; i < static_cast(1 << m_BitsPerLevel); ++i) - if (childSizes[i]) - WriteVarUint(sink, childSizes[i]); + { + uint64_t size = childSizes[i]; + if (!size) + continue; + + if (m_version == IntervalIndexVersion::V1) + CHECK_LESS_OR_EQUAL(size, std::numeric_limits::max(), ()); + WriteVarUint(sink, size); + } } template - void WriteListNode(SinkT & sink, uint32_t offset, uint32_t * childSizes) + void WriteListNode(SinkT & sink, uint64_t offset, uint64_t * childSizes) { ASSERT_LESS_OR_EQUAL(m_BitsPerLevel, 8, ()); - WriteVarUint(sink, (offset << 1)); + + if (m_version == IntervalIndexVersion::V1) + CHECK_LESS_OR_EQUAL(offset, std::numeric_limits::max() >> 1, ()); + else + CHECK_LESS_OR_EQUAL(offset, std::numeric_limits::max() >> 1, ()); + uint64_t const offsetAndFlag = offset << 1; + WriteVarUint(sink, offsetAndFlag); + for (uint32_t i = 0; i < static_cast(1 << m_BitsPerLevel); ++i) { - if (childSizes[i]) - { - WriteToSink(sink, static_cast(i)); - WriteVarUint(sink, childSizes[i]); - } + uint64_t size = childSizes[i]; + if (!size) + continue; + + WriteToSink(sink, static_cast(i)); + + if (m_version == IntervalIndexVersion::V1) + CHECK_LESS_OR_EQUAL(size, std::numeric_limits::max(), ()); + WriteVarUint(sink, size); } } private: + IntervalIndexVersion m_version; uint32_t m_Levels, m_BitsPerLevel, m_LeafBytes, m_LastBitsMask; }; template void BuildIntervalIndex(CellIdValueIter const & beg, CellIdValueIter const & end, Writer & writer, - uint32_t keyBits) + uint32_t keyBits, + IntervalIndexVersion version = IntervalIndexVersion::V1) { - IntervalIndexBuilder(keyBits, 1).BuildIndex(writer, beg, end); + IntervalIndexBuilder(version, keyBits, 1).BuildIndex(writer, beg, end); } diff --git a/indexer/locality_index_builder.cpp b/indexer/locality_index_builder.cpp index a8a4ca4d75..112148e133 100644 --- a/indexer/locality_index_builder.cpp +++ b/indexer/locality_index_builder.cpp @@ -72,7 +72,7 @@ bool BuildLocalityIndexFromDataFile(string const & dataFile, FileWriter writer(idxFileName); covering::BuildLocalityIndex, FileWriter, DEPTH_LEVELS>( - localities.GetVector(), writer, coverLocality, outFileName); + localities.GetVector(), writer, coverLocality, outFileName, IntervalIndexVersion::V2); } FilesContainerW(outFileName, FileWriter::OP_WRITE_TRUNCATE) diff --git a/indexer/locality_index_builder.hpp b/indexer/locality_index_builder.hpp index 0eaf15d8e5..88857a4eca 100644 --- a/indexer/locality_index_builder.hpp +++ b/indexer/locality_index_builder.hpp @@ -28,7 +28,8 @@ using CoverLocality = template void BuildLocalityIndex(ObjectsVector const & objects, Writer & writer, - CoverLocality const & coverLocality, std::string const & tmpFilePrefix) + CoverLocality const & coverLocality, std::string const & tmpFilePrefix, + IntervalIndexVersion version = IntervalIndexVersion::V1) { std::string const cellsToValueFile = tmpFilePrefix + CELL2LOCALITY_SORTED_EXT + ".all"; SCOPE_GUARD(cellsToValueFileGuard, std::bind(&FileWriter::DeleteFileX, cellsToValueFile)); @@ -51,7 +52,7 @@ void BuildLocalityIndex(ObjectsVector const & objects, Writer & writer, DDVector, FileReader, uint64_t> cellsToValue(reader); { - BuildIntervalIndex(cellsToValue.begin(), cellsToValue.end(), writer, DEPTH_LEVELS * 2 + 1); + BuildIntervalIndex(cellsToValue.begin(), cellsToValue.end(), writer, DEPTH_LEVELS * 2 + 1, version); } } } // namespace covering