forked from organicmaps/organicmaps-tmp
New smaller index format.
This commit is contained in:
parent
9179f04bfe
commit
45bb508bad
5 changed files with 253 additions and 164 deletions
|
@ -20,6 +20,35 @@ struct CellIdFeaturePairForTest
|
|||
};
|
||||
}
|
||||
|
||||
UNIT_TEST(IntervalIndex_Serialized)
|
||||
{
|
||||
vector<CellIdFeaturePairForTest> data;
|
||||
data.push_back(CellIdFeaturePairForTest(0x21U, 0));
|
||||
data.push_back(CellIdFeaturePairForTest(0x22U, 1));
|
||||
data.push_back(CellIdFeaturePairForTest(0x41U, 2));
|
||||
vector<char> serializedIndex;
|
||||
MemWriter<vector<char> > writer(serializedIndex);
|
||||
BuildIntervalIndex(data.begin(), data.end(), writer, 10);
|
||||
|
||||
char const expSerial [] =
|
||||
"\x02\x05" // Header
|
||||
"\x00\x00\x00\x00" "\x06\x00\x00\x00" // Root
|
||||
"\x10\x00\x00\x00" "\x06\x00\x00\x00" // 0x21 and 0x22
|
||||
"\x0A\x00\x00\x00" "\x02\x00\x00\x00" // 0x41
|
||||
"\x03\x00\x00\x00" "\x00\x00\x00\x00" // Dummy last internal node
|
||||
"\x01" "\x05" "\x09" // (0x21, 0), (0x22, 1), (0x31, 2)
|
||||
"";
|
||||
|
||||
TEST_EQUAL(serializedIndex, vector<char>(expSerial, expSerial + ARRAY_SIZE(expSerial) - 1), ());
|
||||
|
||||
MemReader reader(&serializedIndex[0], serializedIndex.size());
|
||||
IntervalIndex<MemReader> index(reader);
|
||||
uint32_t expected [] = {0, 1, 2};
|
||||
vector<uint32_t> values;
|
||||
index.ForEach(MakeBackInsertFunctor(values), 0, 0xFF);
|
||||
TEST_EQUAL(values, vector<uint32_t>(expected, expected + ARRAY_SIZE(expected)), ());
|
||||
}
|
||||
|
||||
UNIT_TEST(IntervalIndex_Simple)
|
||||
{
|
||||
vector<CellIdFeaturePairForTest> data;
|
||||
|
@ -28,9 +57,9 @@ UNIT_TEST(IntervalIndex_Simple)
|
|||
data.push_back(CellIdFeaturePairForTest(0xA0B2C2D100ULL, 2));
|
||||
vector<char> serializedIndex;
|
||||
MemWriter<vector<char> > writer(serializedIndex);
|
||||
BuildIntervalIndex<5>(data.begin(), data.end(), writer, 2);
|
||||
BuildIntervalIndex(data.begin(), data.end(), writer, 40);
|
||||
MemReader reader(&serializedIndex[0], serializedIndex.size());
|
||||
IntervalIndex<uint32_t, MemReader> index(reader, 5);
|
||||
IntervalIndex<MemReader> index(reader);
|
||||
{
|
||||
uint32_t expected [] = {0, 1, 2};
|
||||
vector<uint32_t> values;
|
||||
|
@ -78,9 +107,9 @@ UNIT_TEST(IntervalIndex_Empty)
|
|||
vector<CellIdFeaturePairForTest> data;
|
||||
vector<char> serializedIndex;
|
||||
MemWriter<vector<char> > writer(serializedIndex);
|
||||
BuildIntervalIndex<5>(data.begin(), data.end(), writer, 2);
|
||||
BuildIntervalIndex(data.begin(), data.end(), writer, 40);
|
||||
MemReader reader(&serializedIndex[0], serializedIndex.size());
|
||||
IntervalIndex<uint32_t, MemReader> index(reader, 5);
|
||||
IntervalIndex<MemReader> index(reader);
|
||||
{
|
||||
vector<uint32_t> values;
|
||||
index.ForEach(MakeBackInsertFunctor(values), 0ULL, 0xFFFFFFFFFFULL);
|
||||
|
@ -97,9 +126,9 @@ UNIT_TEST(IntervalIndex_Simple2)
|
|||
data.push_back(CellIdFeaturePairForTest(0xA0B2C2D200ULL, 2));
|
||||
vector<char> serializedIndex;
|
||||
MemWriter<vector<char> > writer(serializedIndex);
|
||||
BuildIntervalIndex<5>(data.begin(), data.end(), writer, 2);
|
||||
BuildIntervalIndex(data.begin(), data.end(), writer, 40);
|
||||
MemReader reader(&serializedIndex[0], serializedIndex.size());
|
||||
IntervalIndex<uint32_t, MemReader> index(reader, 5);
|
||||
IntervalIndex<MemReader> index(reader);
|
||||
{
|
||||
uint32_t expected [] = {0, 1, 2, 3};
|
||||
vector<uint32_t> values;
|
||||
|
@ -116,9 +145,9 @@ UNIT_TEST(IntervalIndex_Simple3)
|
|||
data.push_back(CellIdFeaturePairForTest(0x0200ULL, 1));
|
||||
vector<char> serializedIndex;
|
||||
MemWriter<vector<char> > writer(serializedIndex);
|
||||
BuildIntervalIndex<2>(data.begin(), data.end(), writer, 1);
|
||||
BuildIntervalIndex(data.begin(), data.end(), writer, 40);
|
||||
MemReader reader(&serializedIndex[0], serializedIndex.size());
|
||||
IntervalIndex<uint32_t, MemReader> index(reader, 2);
|
||||
IntervalIndex<MemReader> index(reader);
|
||||
{
|
||||
uint32_t expected [] = {0, 1};
|
||||
vector<uint32_t> values;
|
||||
|
@ -135,9 +164,9 @@ UNIT_TEST(IntervalIndex_Simple4)
|
|||
data.push_back(CellIdFeaturePairForTest(0x02030400ULL, 1));
|
||||
vector<char> serializedIndex;
|
||||
MemWriter<vector<char> > writer(serializedIndex);
|
||||
BuildIntervalIndex<4>(data.begin(), data.end(), writer, 1);
|
||||
BuildIntervalIndex(data.begin(), data.end(), writer, 40);
|
||||
MemReader reader(&serializedIndex[0], serializedIndex.size());
|
||||
IntervalIndex<uint32_t, MemReader> index(reader, 4);
|
||||
IntervalIndex<MemReader> index(reader);
|
||||
{
|
||||
uint32_t expected [] = {0, 1};
|
||||
vector<uint32_t> values;
|
||||
|
@ -156,9 +185,9 @@ UNIT_TEST(IntervalIndex_Simple5)
|
|||
data.push_back(CellIdFeaturePairForTest(0xA0B2C2D200ULL, 2));
|
||||
vector<char> serializedIndex;
|
||||
MemWriter<vector<char> > writer(serializedIndex);
|
||||
BuildIntervalIndex<5>(data.begin(), data.end(), writer, 1);
|
||||
BuildIntervalIndex(data.begin(), data.end(), writer, 40);
|
||||
MemReader reader(&serializedIndex[0], serializedIndex.size());
|
||||
IntervalIndex<uint32_t, MemReader> index(reader, 5);
|
||||
IntervalIndex<MemReader> index(reader);
|
||||
{
|
||||
uint32_t expected [] = {0, 1, 2, 3};
|
||||
vector<uint32_t> values;
|
||||
|
|
|
@ -1,10 +1,14 @@
|
|||
#pragma once
|
||||
#include "../coding/endianness.hpp"
|
||||
#include "../coding/byte_stream.hpp"
|
||||
#include "../coding/varint.hpp"
|
||||
#include "../base/assert.hpp"
|
||||
#include "../base/base.hpp"
|
||||
#include "../base/bits.hpp"
|
||||
#include "../base/buffer_vector.hpp"
|
||||
#include "../base/macros.hpp"
|
||||
|
||||
#include "../std/memcpy.hpp"
|
||||
#include "../std/static_assert.hpp"
|
||||
|
||||
class IntervalIndexBase
|
||||
{
|
||||
|
@ -12,31 +16,24 @@ public:
|
|||
#pragma pack(push, 1)
|
||||
struct Header
|
||||
{
|
||||
uint8_t m_CellIdLeafBytes;
|
||||
uint8_t m_Levels;
|
||||
uint8_t m_BitsPerLevel;
|
||||
};
|
||||
#pragma pack(pop)
|
||||
STATIC_ASSERT(sizeof(Header) == 2);
|
||||
|
||||
class Index
|
||||
struct Index
|
||||
{
|
||||
public:
|
||||
uint32_t GetBaseOffset() const { return UINT32_FROM_UINT16(m_BaseOffsetHi, m_BaseOffsetLo); }
|
||||
void SetBaseOffset(uint32_t baseOffset)
|
||||
{
|
||||
m_BaseOffsetLo = UINT32_LO(baseOffset);
|
||||
m_BaseOffsetHi = UINT32_HI(baseOffset);
|
||||
}
|
||||
inline uint32_t GetOffset() const { return m_Offset; }
|
||||
inline uint32_t Bit(uint32_t i) const { return (m_BitMask >> i) & 1; }
|
||||
|
||||
private:
|
||||
uint16_t m_BaseOffsetLo;
|
||||
uint16_t m_BaseOffsetHi;
|
||||
public:
|
||||
uint16_t m_Count[256];
|
||||
uint32_t m_Offset;
|
||||
uint32_t m_BitMask;
|
||||
};
|
||||
STATIC_ASSERT(sizeof(Index) == 2 * 258);
|
||||
STATIC_ASSERT(sizeof(Index) == 8);
|
||||
};
|
||||
|
||||
// TODO: IntervalIndex shouldn't do SwapIfBigEndian for ValueT.
|
||||
template <typename ValueT, class ReaderT>
|
||||
template <class ReaderT>
|
||||
class IntervalIndex : public IntervalIndexBase
|
||||
{
|
||||
public:
|
||||
|
@ -50,21 +47,26 @@ public:
|
|||
vector<char> m_IntervalIndexCache;
|
||||
};
|
||||
|
||||
IntervalIndex(ReaderT const & reader, int cellIdBytes = 5)
|
||||
: m_Reader(reader), m_CellIdBytes(cellIdBytes)
|
||||
explicit IntervalIndex(ReaderT const & reader)
|
||||
: m_Reader(reader)
|
||||
{
|
||||
m_Reader.Read(0, &m_Header, sizeof(m_Header));
|
||||
ASSERT_EQUAL(m_Header.m_BitsPerLevel, 5, ());
|
||||
ReadIndex(sizeof(m_Header), m_Level0Index);
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
void ForEach(F const & f, uint64_t beg, uint64_t end, Query & query) const
|
||||
{
|
||||
ASSERT_LESS(beg, 1ULL << 8 * m_CellIdBytes, (beg, end));
|
||||
ASSERT_LESS_OR_EQUAL(end, 1ULL << 8 * m_CellIdBytes, (beg, end));
|
||||
// end is inclusive in ForEachImpl().
|
||||
--end;
|
||||
ForEachImpl(f, beg, end, m_Level0Index, m_CellIdBytes - 1, query);
|
||||
ASSERT_LESS(beg, 1ULL << m_Header.m_Levels * m_Header.m_BitsPerLevel, (beg, end));
|
||||
ASSERT_LESS_OR_EQUAL(end, 1ULL << m_Header.m_Levels * m_Header.m_BitsPerLevel, (beg, end));
|
||||
if (beg != end)
|
||||
{
|
||||
// end is inclusive in ForEachImpl().
|
||||
--end;
|
||||
ForEachImpl(f, beg, end, m_Level0Index, sizeof(m_Header) + sizeof(m_Level0Index),
|
||||
(m_Header.m_Levels - 1) * m_Header.m_BitsPerLevel, query);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
|
@ -76,52 +78,64 @@ public:
|
|||
|
||||
private:
|
||||
template <typename F>
|
||||
void ForEachImpl(F const & f, uint64_t beg, uint64_t end, Index const & index, int level,
|
||||
Query & query) const
|
||||
void ForEachImpl(F const & f, uint64_t beg, uint64_t end, Index const & index,
|
||||
uint32_t baseOffset, int skipBits, Query & query) const
|
||||
{
|
||||
uint32_t const beg0 = static_cast<uint32_t>(beg >> (8 * level));
|
||||
uint32_t const end0 = static_cast<uint32_t>(end >> (8 * level));
|
||||
uint32_t cumCount = 0;
|
||||
for (uint32_t i = 0; i < beg0; ++i)
|
||||
cumCount += index.m_Count[i];
|
||||
for (uint32_t i = beg0; i <= end0; ++i)
|
||||
uint32_t const beg0 = static_cast<uint32_t>(beg >> skipBits);
|
||||
uint32_t const end0 = static_cast<uint32_t>(end >> skipBits);
|
||||
ASSERT_GREATER_OR_EQUAL(skipBits, 0, (beg, end, baseOffset));
|
||||
ASSERT_LESS_OR_EQUAL(beg, end, (baseOffset, skipBits));
|
||||
ASSERT_LESS_OR_EQUAL(beg0, end0, (beg, end, baseOffset, skipBits));
|
||||
ASSERT_LESS(end0, (1 << m_Header.m_BitsPerLevel), (beg0, beg, end, baseOffset, skipBits));
|
||||
|
||||
if (skipBits > 0)
|
||||
{
|
||||
ASSERT_LESS(i, 256, ());
|
||||
if (index.m_Count[i] != 0)
|
||||
uint32_t cumCount = 0;
|
||||
for (uint32_t i = 0; i < beg0; ++i)
|
||||
cumCount += index.Bit(i);
|
||||
for (uint32_t i = beg0; i <= end0; ++i)
|
||||
{
|
||||
uint64_t const levelBytesFF = (1ULL << 8 * level) - 1;
|
||||
uint64_t const b1 = (i == beg0) ? (beg & levelBytesFF) : 0;
|
||||
uint64_t const e1 = (i == end0) ? (end & levelBytesFF) : levelBytesFF;
|
||||
if (level > m_Header.m_CellIdLeafBytes)
|
||||
if (index.Bit(i))
|
||||
{
|
||||
uint64_t const levelBytesFF = (1ULL << skipBits) - 1;
|
||||
uint64_t const b1 = (i == beg0) ? (beg & levelBytesFF) : 0;
|
||||
uint64_t const e1 = (i == end0) ? (end & levelBytesFF) : levelBytesFF;
|
||||
|
||||
Index index1;
|
||||
ReadIndex(index.GetBaseOffset() + (cumCount * sizeof(Index)), index1);
|
||||
ForEachImpl(f, b1, e1, index1, level - 1, query);
|
||||
uint32_t const offset = baseOffset + index.GetOffset() + (cumCount * sizeof(Index));
|
||||
ReadIndex(offset, index1);
|
||||
ForEachImpl(f, b1, e1, index1, offset + sizeof(Index),
|
||||
skipBits - m_Header.m_BitsPerLevel, query);
|
||||
++cumCount;
|
||||
}
|
||||
else
|
||||
{
|
||||
// TODO: Use binary search here if count is very large.
|
||||
uint32_t const step = sizeof(ValueT) + m_Header.m_CellIdLeafBytes;
|
||||
uint32_t const count = index.m_Count[i];
|
||||
uint32_t pos = index.GetBaseOffset() + (cumCount * step);
|
||||
size_t const readSize = step * count;
|
||||
query.m_IntervalIndexCache.assign(readSize, 0);
|
||||
char * pData = &query.m_IntervalIndexCache[0];
|
||||
m_Reader.Read(pos, pData, readSize);
|
||||
for (uint32_t j = 0; j < count; ++j, pData += step)
|
||||
// for (uint32_t j = 0; j < count; ++j, pos += step)
|
||||
{
|
||||
ValueT value;
|
||||
uint32_t cellIdOnDisk = 0;
|
||||
memcpy(&value, pData, sizeof(ValueT));
|
||||
memcpy(&cellIdOnDisk, pData + sizeof(ValueT), m_Header.m_CellIdLeafBytes);
|
||||
// m_Reader.Read(pos, &value, step);
|
||||
uint32_t const cellId = SwapIfBigEndian(cellIdOnDisk);
|
||||
if (b1 <= cellId && cellId <= e1)
|
||||
f(SwapIfBigEndian(value));
|
||||
}
|
||||
}
|
||||
cumCount += index.m_Count[i];
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
Index nextIndex;
|
||||
ReadIndex(baseOffset, nextIndex);
|
||||
uint32_t const begOffset = baseOffset + index.GetOffset();
|
||||
uint32_t const endOffset = baseOffset + sizeof(Index) + nextIndex.GetOffset();
|
||||
ASSERT_LESS(begOffset, endOffset, (beg, end, baseOffset, skipBits));
|
||||
buffer_vector<uint8_t, 256> data(endOffset - begOffset);
|
||||
m_Reader.Read(begOffset, &data[0], data.size());
|
||||
ArrayByteSource src(&data[0]);
|
||||
void const * const pEnd = &data[0] + data.size();
|
||||
uint32_t key = -1;
|
||||
uint32_t value = 0;
|
||||
while (src.Ptr() < pEnd)
|
||||
{
|
||||
uint32_t const x = ReadVarUint<uint32_t>(src);
|
||||
int32_t const delta = bits::ZigZagDecode(x >> 1);
|
||||
ASSERT_GREATER_OR_EQUAL(static_cast<int32_t>(value) + delta, 0, ());
|
||||
value += delta;
|
||||
if (x & 1)
|
||||
for (++key; key < (1 << m_Header.m_BitsPerLevel) && !index.Bit(key); ++key) ;
|
||||
ASSERT_LESS(key, 1 << m_Header.m_BitsPerLevel, (key));
|
||||
if (key > end0)
|
||||
break;
|
||||
if (key >= beg0)
|
||||
f(value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -129,11 +143,8 @@ private:
|
|||
void ReadIndex(uint64_t pos, Index & index) const
|
||||
{
|
||||
m_Reader.Read(pos, &index, sizeof(Index));
|
||||
if (IsBigEndian())
|
||||
{
|
||||
for (uint32_t i = 0; i < 256; ++i)
|
||||
index.m_Count[i] = SwapIfBigEndian(index.m_Count[i]);
|
||||
}
|
||||
index.m_Offset = SwapIfBigEndian(index.m_Offset);
|
||||
index.m_BitMask = SwapIfBigEndian(index.m_BitMask);
|
||||
}
|
||||
|
||||
ReaderT m_Reader;
|
||||
|
|
|
@ -1,19 +1,57 @@
|
|||
#pragma once
|
||||
#include "interval_index.hpp"
|
||||
#include "../coding/byte_stream.hpp"
|
||||
#include "../coding/endianness.hpp"
|
||||
#include "../coding/varint.hpp"
|
||||
#include "../coding/write_to_sink.hpp"
|
||||
#include "../base/assert.hpp"
|
||||
#include "../base/base.hpp"
|
||||
#include "../base/bits.hpp"
|
||||
#include "../base/logging.hpp"
|
||||
#include "../std/vector.hpp"
|
||||
#include "../std/memcpy.hpp"
|
||||
|
||||
// TODO: BuildIntervalIndex() shouldn't rely on indexing cellid-feature pairs.
|
||||
template <int kCellIdBytes, typename CellIdValueIterT, class SinkT>
|
||||
void BuildIntervalIndex(CellIdValueIterT const & beg, CellIdValueIterT const & end,
|
||||
SinkT & writer, int const leafBytes = 1)
|
||||
namespace impl
|
||||
{
|
||||
|
||||
template <class WriterT>
|
||||
void WriteIntervalIndexNode(WriterT & writer, uint64_t offset, uint64_t bitMask)
|
||||
{
|
||||
// At the moment, uint32_t is used as a bitMask, but this can change in the future.
|
||||
CHECK_EQUAL(static_cast<uint32_t>(bitMask), bitMask, (offset));
|
||||
CHECK_GREATER_OR_EQUAL(offset, writer.Pos() + 8, ());
|
||||
WriteToSink(writer, static_cast<uint32_t>(offset - writer.Pos() - 8));
|
||||
WriteToSink(writer, static_cast<uint32_t>(bitMask));
|
||||
}
|
||||
|
||||
template <class SinkT> void WriteIntervalIndexLeaf(SinkT & sink, int bitsPerLevel,
|
||||
uint64_t prevKey, uint64_t prevValue,
|
||||
uint64_t key, uint64_t value)
|
||||
{
|
||||
uint64_t const lastBitsZeroMask = (1ULL << bitsPerLevel) - 1;
|
||||
if ((key & ~lastBitsZeroMask) != (prevKey & ~lastBitsZeroMask))
|
||||
prevValue = 0;
|
||||
|
||||
int64_t const delta = static_cast<int64_t>(value) - static_cast<int64_t>(prevValue);
|
||||
uint64_t const encodedDelta = bits::ZigZagEncode(delta);
|
||||
uint64_t const code = (encodedDelta << 1) + (key == prevKey ? 0 : 1);
|
||||
WriteVarUint(sink, code);
|
||||
}
|
||||
|
||||
inline uint32_t IntervalIndexLeafSize(int bitsPerLevel,
|
||||
uint64_t prevKey, uint64_t prevValue,
|
||||
uint64_t key, uint64_t value)
|
||||
{
|
||||
CountingSink sink;
|
||||
WriteIntervalIndexLeaf(sink, bitsPerLevel, prevKey, prevValue, key, value);
|
||||
return sink.GetCount();
|
||||
}
|
||||
|
||||
template <typename CellIdValueIterT>
|
||||
bool CheckIntervalIndexInputSequence(CellIdValueIterT const & beg,
|
||||
CellIdValueIterT const & end,
|
||||
uint32_t keyBits)
|
||||
{
|
||||
#ifdef DEBUG
|
||||
// Check that [beg, end) is sorted and log most populous cell.
|
||||
if (beg != end)
|
||||
{
|
||||
|
@ -24,8 +62,8 @@ void BuildIntervalIndex(CellIdValueIterT const & beg, CellIdValueIterT const & e
|
|||
uint64_t prev = it->GetCell();
|
||||
for (++it; it != end; ++it)
|
||||
{
|
||||
ASSERT_GREATER(it->GetCell(), 0, ());
|
||||
ASSERT_LESS_OR_EQUAL(prev, it->GetCell(), ());
|
||||
CHECK_GREATER(it->GetCell(), 0, ());
|
||||
CHECK_LESS_OR_EQUAL(prev, it->GetCell(), ());
|
||||
count = (prev == it->GetCell() ? count + 1 : 0);
|
||||
if (count > maxCount)
|
||||
{
|
||||
|
@ -41,98 +79,108 @@ void BuildIntervalIndex(CellIdValueIterT const & beg, CellIdValueIterT const & e
|
|||
}
|
||||
}
|
||||
for (CellIdValueIterT it = beg; it != end; ++it)
|
||||
ASSERT_LESS(it->GetCell(), 1ULL << 8 * kCellIdBytes, ());
|
||||
#endif
|
||||
CHECK_LESS(it->GetCell(), 1ULL << keyBits, ());
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// TODO: BuildIntervalIndex() shouldn't rely on indexing cellid-feature pairs.
|
||||
template <typename CellIdValueIterT, class SinkT>
|
||||
void BuildIntervalIndex(CellIdValueIterT const & beg, CellIdValueIterT const & end,
|
||||
SinkT & writer, uint8_t const keyBits)
|
||||
{
|
||||
CHECK_LESS(keyBits, 63, ());
|
||||
CHECK(impl::CheckIntervalIndexInputSequence(beg, end, keyBits), ());
|
||||
|
||||
uint32_t const bitsPerLevel = 5;
|
||||
uint32_t const lastBitsMask = (1 << bitsPerLevel) - 1;
|
||||
int const levelCount = (keyBits + bitsPerLevel - 1) / bitsPerLevel;
|
||||
|
||||
// Write header.
|
||||
{
|
||||
IntervalIndexBase::Header header;
|
||||
header.m_CellIdLeafBytes = leafBytes;
|
||||
header.m_BitsPerLevel = bitsPerLevel;
|
||||
header.m_Levels = levelCount;
|
||||
writer.Write(&header, sizeof(header));
|
||||
}
|
||||
|
||||
#ifdef DEBUG
|
||||
vector<uint32_t> childOffsets;
|
||||
childOffsets.push_back(static_cast<uint32_t>(writer.Pos()));
|
||||
#endif
|
||||
uint32_t childOffset = static_cast<uint32_t>(writer.Pos()) + sizeof(IntervalIndexBase::Index);
|
||||
uint32_t thisLevelCount = 1;
|
||||
for (int level = kCellIdBytes - 1; level >= leafBytes; --level)
|
||||
if (beg == end)
|
||||
{
|
||||
LOG(LINFO, ("Building interval index, level", level));
|
||||
#ifdef DEBUG
|
||||
ASSERT_EQUAL(childOffsets.back(), writer.Pos(), ());
|
||||
childOffsets.push_back(childOffset);
|
||||
#endif
|
||||
uint64_t const initialWriterPos = writer.Pos();
|
||||
uint32_t childCount = 0, totalChildCount = 0;
|
||||
IntervalIndexBase::Index index;
|
||||
memset(&index, 0, sizeof(index));
|
||||
uint64_t prevParentBytes = 0;
|
||||
uint8_t prevByte = 0;
|
||||
// Write empty index.
|
||||
CHECK_GREATER(levelCount, 1, ());
|
||||
impl::WriteIntervalIndexNode(writer, writer.Pos() + sizeof(IntervalIndexBase::Index), 0);
|
||||
LOG(LWARNING, ("Written empty index."));
|
||||
return;
|
||||
}
|
||||
|
||||
// Write internal nodes.
|
||||
uint64_t childOffset = writer.Pos() + sizeof(IntervalIndexBase::Index);
|
||||
uint64_t nextChildOffset = childOffset;
|
||||
for (int level = levelCount - 1; level >= 0; --level)
|
||||
{
|
||||
// LOG(LINFO, ("Building interval index, level", level));
|
||||
uint64_t const initialLevelWriterPos = writer.Pos();
|
||||
|
||||
uint64_t bitMask = 0;
|
||||
uint64_t prevKey = 0;
|
||||
uint64_t prevValue = 0;
|
||||
for (CellIdValueIterT it = beg; it != end; ++it)
|
||||
{
|
||||
uint64_t id = it->GetCell();
|
||||
uint64_t const thisParentBytes = id >> 8 * (level + 1);
|
||||
uint8_t const thisByte = static_cast<uint8_t>(0xFF & (id >> 8 * level));
|
||||
if (it != beg && prevParentBytes != thisParentBytes)
|
||||
uint64_t const key = it->GetCell() >> (level * bitsPerLevel);
|
||||
uint32_t const value = it->GetFeature();
|
||||
|
||||
if (it != beg && (prevKey & ~lastBitsMask) != (key & ~lastBitsMask))
|
||||
{
|
||||
// Writing index for previous parent.
|
||||
index.SetBaseOffset(childOffset);
|
||||
for (uint32_t i = 0; i < 256; ++i)
|
||||
index.m_Count[i] = SwapIfBigEndian(index.m_Count[i]);
|
||||
writer.Write(&index, sizeof(index));
|
||||
memset(&index, 0, sizeof(index));
|
||||
if (level != leafBytes)
|
||||
childOffset += childCount * sizeof(index);
|
||||
else
|
||||
childOffset += (sizeof(beg->GetFeature()) + leafBytes) * childCount;
|
||||
childCount = 0;
|
||||
--thisLevelCount;
|
||||
// Write node for the previous parent.
|
||||
impl::WriteIntervalIndexNode(writer, childOffset, bitMask);
|
||||
childOffset = nextChildOffset;
|
||||
bitMask = 0;
|
||||
}
|
||||
|
||||
if (level == leafBytes || it == beg ||
|
||||
prevByte != thisByte || prevParentBytes != thisParentBytes)
|
||||
{
|
||||
++childCount;
|
||||
++totalChildCount;
|
||||
++index.m_Count[thisByte];
|
||||
CHECK_LESS(index.m_Count[thisByte], 65535, (level, leafBytes, prevByte, thisByte, \
|
||||
prevParentBytes, thisParentBytes, \
|
||||
it->GetCell()));
|
||||
}
|
||||
bitMask |= (1ULL << (key & lastBitsMask));
|
||||
|
||||
prevParentBytes = thisParentBytes;
|
||||
prevByte = thisByte;
|
||||
if (level == 0)
|
||||
nextChildOffset += impl::IntervalIndexLeafSize(bitsPerLevel,
|
||||
prevKey, prevValue, key, value);
|
||||
else if (it == beg || prevKey != key)
|
||||
nextChildOffset += sizeof(IntervalIndexBase::Index);
|
||||
|
||||
prevKey = key;
|
||||
prevValue = value;
|
||||
}
|
||||
index.SetBaseOffset(childOffset);
|
||||
for (uint32_t i = 0; i < 256; ++i)
|
||||
index.m_Count[i] = SwapIfBigEndian(index.m_Count[i]);
|
||||
writer.Write(&index, sizeof(index));
|
||||
memset(&index, 0, sizeof(index));
|
||||
// if level == leafBytes, this is wrong, but childOffset is not needed any more.
|
||||
childOffset += childCount * sizeof(IntervalIndexBase::Index);
|
||||
--thisLevelCount;
|
||||
CHECK_EQUAL(thisLevelCount, 0, (kCellIdBytes, leafBytes));
|
||||
thisLevelCount = totalChildCount;
|
||||
|
||||
LOG(LINFO, ("Level size:", writer.Pos() - initialWriterPos));
|
||||
// Write the last node.
|
||||
impl::WriteIntervalIndexNode(writer, childOffset, bitMask);
|
||||
|
||||
if (beg == end)
|
||||
break;
|
||||
if (level == 1)
|
||||
nextChildOffset += sizeof(IntervalIndexBase::Index);
|
||||
|
||||
childOffset = nextChildOffset;
|
||||
|
||||
LOG(LINFO, ("Level:", level, "size:", writer.Pos() - initialLevelWriterPos));
|
||||
}
|
||||
|
||||
// Writing values.
|
||||
ASSERT_EQUAL(childOffsets.back(), writer.Pos(), ());
|
||||
LOG(LINFO, ("Building interval, leaves."));
|
||||
uint64_t const initialWriterPos = writer.Pos();
|
||||
uint32_t const mask = (1ULL << 8 * leafBytes) - 1;
|
||||
for (CellIdValueIterT it = beg; it != end; ++it)
|
||||
// Write the dummy one-after-last node.
|
||||
impl::WriteIntervalIndexNode(writer, nextChildOffset, 0);
|
||||
|
||||
// Write leaves.
|
||||
{
|
||||
WriteToSink(writer, it->GetFeature());
|
||||
uint32_t cellId = static_cast<uint32_t>(it->GetCell() & mask);
|
||||
cellId = SwapIfBigEndian(cellId);
|
||||
writer.Write(&cellId, leafBytes);
|
||||
uint64_t const initialLevelWriterPos = writer.Pos();
|
||||
|
||||
uint64_t prevKey = -1;
|
||||
uint32_t prevValue = 0;
|
||||
for (CellIdValueIterT it = beg; it != end; ++it)
|
||||
{
|
||||
uint64_t const key = it->GetCell();
|
||||
uint32_t const value = it->GetFeature();
|
||||
impl::WriteIntervalIndexLeaf(writer, bitsPerLevel, prevKey, prevValue, key, value);
|
||||
prevKey = key;
|
||||
prevValue = value;
|
||||
}
|
||||
|
||||
LOG(LINFO, ("Leaves size:", writer.Pos() - initialLevelWriterPos));
|
||||
}
|
||||
LOG(LINFO, ("Level size:", writer.Pos() - initialWriterPos));
|
||||
|
||||
LOG(LINFO, ("Interval index building done."));
|
||||
}
|
||||
|
|
|
@ -55,7 +55,7 @@ class ScaleIndex : public ScaleIndexBase
|
|||
{
|
||||
public:
|
||||
typedef ReaderT ReaderType;
|
||||
typedef IntervalIndex<uint32_t, ReaderT> IntervalIndexType;
|
||||
typedef IntervalIndex<ReaderT> IntervalIndexType;
|
||||
typedef typename IntervalIndexType::Query Query;
|
||||
|
||||
ScaleIndex() {}
|
||||
|
|
|
@ -145,7 +145,8 @@ inline void IndexScales(uint32_t bucketsCount,
|
|||
LOG(LINFO, ("Being indexed", "features:", numFeatures, "cells:", numCells,
|
||||
"cells per feature:", (numCells + 1.0) / (numFeatures + 1.0)));
|
||||
SubWriter<WriterT> subWriter(writer);
|
||||
BuildIntervalIndex<5>(cellsToFeatures.begin(), cellsToFeatures.end(), subWriter);
|
||||
BuildIntervalIndex(cellsToFeatures.begin(), cellsToFeatures.end(), subWriter,
|
||||
RectId::DEPTH_LEVELS * 2 + 1);
|
||||
}
|
||||
FileWriter::DeleteFileX(tmpFilePrefix + ".c2f.sorted");
|
||||
// LOG(LINFO, ("Indexing done."));
|
||||
|
|
Loading…
Add table
Reference in a new issue