[indexer] Do not reuse FeaturesVector buffer for different features. Store buffer inside FeatureType.

This commit is contained in:
tatiana-yan 2020-03-23 12:48:36 +03:00 committed by mpimenov
parent 0b17f2b893
commit ea9f99cdf6
8 changed files with 101 additions and 116 deletions

View file

@ -20,10 +20,12 @@ namespace
struct SaveForEachParams
{
explicit SaveForEachParams(vector<pair<uint64_t, string> > & data) : m_Data(data) {}
void operator () (uint64_t pos, char const * pData, uint32_t size) const
void operator()(uint64_t pos, vector<char> && data) const
{
m_Data.push_back(make_pair(pos, string(pData, pData + size)));
m_Data.emplace_back(pos, string(data.begin(), data.end()));
}
vector<pair<uint64_t, string> > & m_Data;
};
@ -37,7 +39,7 @@ UNIT_TEST(VarRecordReader_Simple)
size_t const longStringSize = sizeof(longString) - 1;
TEST_GREATER(longStringSize, 128, ());
{
MemWriter<vector<char> > writer(data);
MemWriter<vector<char>> writer(data);
WriteVarUint(writer, 3U); // 0
writer.Write("abc", 3); // 1
WriteVarUint(writer, longStringSize); // 4
@ -47,34 +49,22 @@ UNIT_TEST(VarRecordReader_Simple)
// 11 + longStringSize
}
uint32_t chunkSizes[] = {4, 5, 63, 64, 65, 1000};
for (uint32_t chunkSize = 0; chunkSize < ARRAY_SIZE(chunkSizes); ++chunkSize)
{
MemReader reader(&data[0], data.size());
VarRecordReader<MemReader, &VarRecordSizeReaderVarint> recordReader(
reader, chunkSizes[chunkSize]);
MemReader reader(&data[0], data.size());
VarRecordReader<MemReader> recordReader(reader);
vector<char> r;
uint32_t offset, size;
auto r = recordReader.ReadRecord(0);
TEST_EQUAL(string(r.begin(), r.end()), "abc", ());
TEST_EQUAL(4, recordReader.ReadRecord(0, r, offset, size), ());
r.resize(size);
TEST_EQUAL(string(r.begin() + offset, r.end()), "abc", ());
r = recordReader.ReadRecord(6 + longStringSize);
TEST_EQUAL(string(r.begin(), r.end()), "defg", ());
TEST_EQUAL(11 + longStringSize, recordReader.ReadRecord(6 + longStringSize, r, offset, size), ());
r.resize(size);
TEST_EQUAL(string(r.begin() + offset, r.end()), "defg", ());
r = recordReader.ReadRecord(4);
TEST_EQUAL(string(r.begin(), r.end()), longString, ());
TEST_EQUAL(6 + longStringSize, recordReader.ReadRecord(4, r, offset, size), ());
r.resize(size);
TEST_EQUAL(string(r.begin() + offset, r.end()), longString, ());
vector<pair<uint64_t, string> > forEachCalls;
recordReader.ForEachRecord(SaveForEachParams(forEachCalls));
vector<pair<uint64_t, string> > expectedForEachCalls;
expectedForEachCalls.push_back(pair<uint64_t, string>(0, "abc"));
expectedForEachCalls.push_back(pair<uint64_t, string>(4, longString));
expectedForEachCalls.push_back(pair<uint64_t, string>(6 + longStringSize, "defg"));
TEST_EQUAL(forEachCalls, expectedForEachCalls, ());
}
vector<pair<uint64_t, string>> forEachCalls;
recordReader.ForEachRecord(SaveForEachParams(forEachCalls));
vector<pair<uint64_t, string>> expectedForEachCalls = {{0, "abc"},
{4, longString},
{6 + longStringSize, "defg"}};
TEST_EQUAL(forEachCalls, expectedForEachCalls, ());
}

View file

@ -11,77 +11,40 @@
#include <string>
#include <vector>
inline uint32_t VarRecordSizeReaderVarint(ArrayByteSource & source)
{
return ReadVarUint<uint32_t>(source);
}
inline uint32_t VarRecordSizeReaderFixed(ArrayByteSource & source)
{
return ReadPrimitiveFromSource<uint32_t>(source);
}
// Efficiently reads records, encoded as [VarUint size] [Data] .. [VarUint size] [Data].
// If size of a record is less than expectedRecordSize, exactly 1 Reader.Read() call is made,
// otherwise exactly 2 Reader.Read() calls are made.
// Second template parameter is strategy for reading record size,
// either &VarRecordSizeReaderVarint or &VarRecordSizeReaderFixed.
template <class ReaderT, uint32_t (*VarRecordSizeReaderFn)(ArrayByteSource &)>
// Reads records, encoded as [VarUint size] [Data] .. [VarUint size] [Data].
template <class ReaderT>
class VarRecordReader
{
public:
VarRecordReader(ReaderT const & reader, uint32_t expectedRecordSize)
: m_Reader(reader), m_ReaderSize(reader.Size()), m_ExpectedRecordSize(expectedRecordSize)
VarRecordReader(ReaderT const & reader) : m_reader(reader), m_readerSize(reader.Size()) {}
std::vector<char> ReadRecord(uint64_t const pos) const
{
ASSERT_GREATER_OR_EQUAL(expectedRecordSize, 4, ());
ASSERT_LESS(pos, m_readerSize, ());
ReaderSource source(m_reader);
source.Skip(pos);
uint32_t const recordSize = ReadVarUint<uint32_t>(source);
std::vector<char> buffer(recordSize);
source.Read(buffer.data(), recordSize);
return buffer;
}
uint64_t ReadRecord(uint64_t const pos, std::vector<char> & buffer, uint32_t & recordOffset,
uint32_t & actualSize) const
{
ASSERT_LESS(pos, m_ReaderSize, ());
uint32_t const initialSize = static_cast<uint32_t>(
std::min(static_cast<uint64_t>(m_ExpectedRecordSize), m_ReaderSize - pos));
if (buffer.size() < initialSize)
buffer.resize(initialSize);
m_Reader.Read(pos, &buffer[0], initialSize);
ArrayByteSource source(&buffer[0]);
uint32_t const recordSize = VarRecordSizeReaderFn(source);
uint32_t const recordSizeSize = static_cast<uint32_t>(source.PtrC() - &buffer[0]);
uint32_t const fullSize = recordSize + recordSizeSize;
ASSERT_LESS_OR_EQUAL(pos + fullSize, m_ReaderSize, ());
if (buffer.size() < fullSize)
buffer.resize(fullSize);
if (initialSize < fullSize)
m_Reader.Read(pos + initialSize, &buffer[initialSize], fullSize - initialSize);
recordOffset = recordSizeSize;
actualSize = fullSize;
return pos + fullSize;
}
template <typename F>
void ForEachRecord(F const & f) const
void ForEachRecord(std::function<void(uint32_t, std::vector<char> &&)> const & f) const
{
uint64_t pos = 0;
std::vector<char> buffer;
while (pos < m_ReaderSize)
ReaderSource source(m_reader);
while (pos < m_readerSize)
{
uint32_t offset = 0, size = 0;
uint64_t nextPos = ReadRecord(pos, buffer, offset, size);
// uint64_t -> uint32_t : assume that feature dat file not more than 4Gb
f(static_cast<uint32_t>(pos), &buffer[offset], static_cast<uint32_t>(size - offset));
pos = nextPos;
uint32_t const recordSize = ReadVarUint<uint32_t>(source);
std::vector<char> buffer(recordSize);
source.Read(buffer.data(), recordSize);
f(static_cast<uint32_t>(pos), std::move(buffer));
pos = source.Pos();
}
ASSERT_EQUAL(pos, m_ReaderSize, ());
}
bool IsEqual(std::string const & fName) const { return m_Reader.IsEqual(fName); }
protected:
ReaderT m_Reader;
uint64_t m_ReaderSize;
uint32_t m_ExpectedRecordSize; // Expected size of a record.
ReaderT m_reader;
uint64_t m_readerSize;
};

View file

@ -109,13 +109,13 @@ int GetScaleIndex(SharedLoadInfo const & loadInfo, int scale,
return -1;
}
uint32_t CalcOffset(ArrayByteSource const & source, FeatureType::Buffer const data)
uint32_t CalcOffset(ArrayByteSource const & source, const char * start)
{
ASSERT_GREATER_OR_EQUAL(source.PtrC(), data, ());
return static_cast<uint32_t>(source.PtrC() - data);
ASSERT_GREATER_OR_EQUAL(source.PtrC(), start, ());
return static_cast<uint32_t>(source.PtrC() - start);
}
uint8_t Header(FeatureType::Buffer const data) { return static_cast<uint8_t>(*data); }
uint8_t Header(FeatureType::Buffer const & data) { return static_cast<uint8_t>(data[0]); }
void ReadOffsets(SharedLoadInfo const & loadInfo, ArrayByteSource & src, uint8_t mask,
FeatureType::GeometryOffsets & offsets)
@ -181,11 +181,10 @@ uint8_t ReadByte(TSource & src)
}
} // namespace
FeatureType::FeatureType(SharedLoadInfo const * loadInfo, Buffer buffer)
FeatureType::FeatureType(SharedLoadInfo const * loadInfo, Buffer && buffer) : m_data(buffer)
{
CHECK(loadInfo, ());
m_loadInfo = loadInfo;
m_data = buffer;
m_header = Header(m_data);
m_offsets.Reset();
@ -270,7 +269,7 @@ void FeatureType::ParseTypes()
auto const typesOffset = sizeof(m_header);
Classificator & c = classif();
ArrayByteSource source(m_data + typesOffset);
ArrayByteSource source(m_data.data() + typesOffset);
size_t const count = GetTypesCount();
uint32_t index = 0;
@ -290,7 +289,7 @@ void FeatureType::ParseTypes()
throw;
}
m_offsets.m_common = CalcOffset(source, m_data);
m_offsets.m_common = CalcOffset(source, m_data.data());
m_parsed.m_types = true;
}
@ -302,7 +301,7 @@ void FeatureType::ParseCommon()
CHECK(m_loadInfo, ());
ParseTypes();
ArrayByteSource source(m_data + m_offsets.m_common);
ArrayByteSource source(m_data.data() + m_offsets.m_common);
uint8_t const h = Header(m_data);
m_params.Read(source, h);
@ -312,7 +311,7 @@ void FeatureType::ParseCommon()
m_limitRect.Add(m_center);
}
m_offsets.m_header2 = CalcOffset(source, m_data);
m_offsets.m_header2 = CalcOffset(source, m_data.data());
m_parsed.m_common = true;
}
@ -341,7 +340,7 @@ void FeatureType::ParseHeader2()
ParseCommon();
uint8_t ptsCount = 0, ptsMask = 0, trgCount = 0, trgMask = 0;
BitSource bitSource(m_data + m_offsets.m_header2);
BitSource bitSource(m_data.data() + m_offsets.m_header2);
auto const headerGeomType = static_cast<HeaderGeomType>(Header(m_data) & HEADER_MASK_GEOMTYPE);
if (headerGeomType == HeaderGeomType::Line)
@ -400,7 +399,7 @@ void FeatureType::ParseHeader2()
ReadOffsets(*m_loadInfo, src, trgMask, m_offsets.m_trg);
}
}
m_innerStats.m_size = CalcOffset(src, m_data);
m_innerStats.m_size = CalcOffset(src, m_data.data());
m_parsed.m_header2 = true;
}

View file

@ -31,10 +31,10 @@ class MapObject;
class FeatureType
{
public:
using Buffer = char const *;
using Buffer = std::vector<char>;
using GeometryOffsets = buffer_vector<uint32_t, feature::DataHeader::kMaxScalesCount>;
FeatureType(feature::SharedLoadInfo const * loadInfo, Buffer buffer);
FeatureType(feature::SharedLoadInfo const * loadInfo, Buffer && buffer);
FeatureType(osm::MapObject const & emo);
feature::GeomType GetGeomType() const;
@ -244,8 +244,7 @@ private:
// Non-owning pointer to shared load info. SharedLoadInfo created once per FeaturesVector.
feature::SharedLoadInfo const * m_loadInfo = nullptr;
// Raw pointer to data buffer.
Buffer m_data = nullptr;
Buffer m_data;
ParsedFlags m_parsed;
Offsets m_offsets;

View file

@ -7,10 +7,8 @@
std::unique_ptr<FeatureType> FeaturesVector::GetByIndex(uint32_t index) const
{
uint32_t offset = 0, size = 0;
auto const ftOffset = m_table ? m_table->GetFeatureOffset(index) : index;
m_recordReader.ReadRecord(ftOffset, m_buffer, offset, size);
return std::make_unique<FeatureType>(&m_loadInfo, &m_buffer[offset]);
return std::make_unique<FeatureType>(&m_loadInfo, m_recordReader.ReadRecord(ftOffset));
}
size_t FeaturesVector::GetNumFeatures() const

View file

@ -20,7 +20,7 @@ class FeaturesVector
public:
FeaturesVector(FilesContainerR const & cont, feature::DataHeader const & header,
feature::FeaturesOffsetsTable const * table)
: m_loadInfo(cont, header), m_recordReader(m_loadInfo.GetDataReader(), 256), m_table(table)
: m_loadInfo(cont, header), m_recordReader(m_loadInfo.GetDataReader()), m_table(table)
{
}
@ -31,8 +31,8 @@ public:
template <class ToDo> void ForEach(ToDo && toDo) const
{
uint32_t index = 0;
m_recordReader.ForEachRecord([&](uint32_t pos, char const * data, uint32_t /*size*/) {
FeatureType ft(&m_loadInfo, data);
m_recordReader.ForEachRecord([&](uint32_t pos, std::vector<char> && data) {
FeatureType ft(&m_loadInfo, std::move(data));
// We can't properly set MwmId here, because FeaturesVector
// works with FileContainerR, not with MwmId/MwmHandle/MwmValue.
@ -45,18 +45,15 @@ public:
template <class ToDo> static void ForEachOffset(ModelReaderPtr reader, ToDo && toDo)
{
VarRecordReader<ModelReaderPtr, &VarRecordSizeReaderVarint> recordReader(reader, 256);
recordReader.ForEachRecord([&] (uint32_t pos, char const * /*data*/, uint32_t /*size*/)
{
toDo(pos);
});
VarRecordReader<ModelReaderPtr> recordReader(reader);
recordReader.ForEachRecord([&](uint32_t pos, std::vector<char> && /* data */) { toDo(pos); });
}
private:
friend class FeaturesVectorTest;
feature::SharedLoadInfo m_loadInfo;
VarRecordReader<FilesContainerR::TReader, &VarRecordSizeReaderVarint> m_recordReader;
VarRecordReader<FilesContainerR::TReader> m_recordReader;
mutable std::vector<char> m_buffer;
feature::FeaturesOffsetsTable const * m_table;
};

View file

@ -27,6 +27,7 @@ set(
postcodes_matcher_tests.cpp
postcodes_tests.cpp
rank_table_test.cpp
read_features_test.cpp
scale_index_reading_tests.cpp
scales_test.cpp
search_string_utils_test.cpp

View file

@ -0,0 +1,38 @@
#include "testing/testing.hpp"
#include "indexer/classificator_loader.hpp"
#include "indexer/data_source.hpp"
#include "platform/local_country_file.hpp"
#include <cstdint>
#include <memory>
#include <vector>
using namespace std;
UNIT_TEST(ReadFeatures_Smoke)
{
classificator::Load();
FrozenDataSource dataSource;
dataSource.RegisterMap(platform::LocalCountryFile::MakeForTesting("minsk-pass"));
vector<shared_ptr<MwmInfo>> infos;
dataSource.GetMwmsInfo(infos);
CHECK_EQUAL(infos.size(), 1, ());
auto handle = dataSource.GetMwmHandleById(MwmSet::MwmId(infos[0]));
FeaturesLoaderGuard const guard(dataSource, handle.GetId());
LOG(LINFO, (guard.GetNumFeatures()));
for (uint32_t i = 0; i < guard.GetNumFeatures() - 1; ++i)
{
LOG(LINFO, ("Trying", i, i + 1));
auto ft1 = guard.GetFeatureByIndex(i);
auto ft2 = guard.GetFeatureByIndex(i + 1);
ft2->ForEachType([](auto const t) {});
ft1->ForEachType([](auto const t) {});
}
}