Added possibility load all intermediate data in memory

This commit is contained in:
Sergey Yershov 2015-08-24 13:46:05 +03:00 committed by Alex Zolotarev
parent 8a05999c2e
commit 7fcd6ec8c4
5 changed files with 337 additions and 40 deletions

View file

@ -9,6 +9,7 @@
#include "base/logging.hpp"
#include "std/algorithm.hpp"
#include "std/deque.hpp"
#include "std/exception.hpp"
#include "std/limits.hpp"
#include "std/utility.hpp"
@ -35,6 +36,7 @@ class IndexFile
TContainer m_elements;
TFile m_file;
uint64_t m_fileSize = 0;
static size_t constexpr kFlushCount = 1024;
@ -71,23 +73,23 @@ public:
void ReadAll()
{
m_elements.clear();
uint64_t const fileSize = m_file.Size();
if (fileSize == 0)
m_fileSize = m_file.Size();
if (m_fileSize == 0)
return;
LOG_SHORT(LINFO, ("Offsets reading is started for file ", GetFileName()));
CHECK_EQUAL(0, fileSize % sizeof(TElement), ("Damaged file."));
CHECK_EQUAL(0, m_fileSize % sizeof(TElement), ("Damaged file."));
try
{
m_elements.resize(CheckedCast(fileSize / sizeof(TElement)));
m_elements.resize(CheckedCast(m_fileSize / sizeof(TElement)));
}
catch (exception const &) // bad_alloc
{
LOG(LCRITICAL, ("Insufficient memory for required offset map"));
}
m_file.Read(0, &m_elements[0], CheckedCast(fileSize));
m_file.Read(0, &m_elements[0], CheckedCast(m_fileSize));
sort(m_elements.begin(), m_elements.end(), ElementComparator());
@ -102,12 +104,12 @@ public:
m_elements.push_back(make_pair(k, v));
}
bool GetValueByKey(TKey k, TValue & v) const
bool GetValueByKey(TKey key, TValue & value) const
{
auto it = lower_bound(m_elements.begin(), m_elements.end(), k, ElementComparator());
if ((it != m_elements.end()) && ((*it).first == k))
auto it = lower_bound(m_elements.begin(), m_elements.end(), key, ElementComparator());
if ((it != m_elements.end()) && ((*it).first == key))
{
v = (*it).second;
value = (*it).second;
return true;
}
return false;
@ -132,41 +134,95 @@ class OSMElementCache
{
public:
using TKey = uint64_t;
using TStream = typename conditional<TMode == EMode::Write, FileWriterStream, FileReaderStream>::type;
using TStorage = typename conditional<TMode == EMode::Write, FileWriter, FileReader>::type;
using TOffsetFile = typename conditional<TMode == EMode::Write, FileWriter, FileReader>::type;
protected:
TStream m_stream;
using TBuffer = vector<uint8_t>;
TStorage m_storage;
detail::IndexFile<TOffsetFile, uint64_t> m_offsets;
string m_name;
TBuffer m_data;
bool m_preload = false;
public:
OSMElementCache(string const & name) : m_stream(name), m_offsets(name + OFFSET_EXT), m_name(name) {}
template <class TValue>
void Write(TKey id, TValue const & value)
OSMElementCache(string const & name, bool preload = false)
: m_storage(name)
, m_offsets(name + OFFSET_EXT)
, m_name(name)
, m_preload(preload)
{
m_offsets.Add(id, m_stream.Pos());
value.Write(m_stream);
std::ofstream ff((m_name+".wlog").c_str(), std::ios::binary | std::ios::app);
ff << id << " " << value.ToString() << std::endl;
InitStorage<TMode>();
}
template <class TValue>
bool Read(TKey id, TValue & value)
template <EMode T>
typename enable_if<T == EMode::Write, void>::type InitStorage() {}
template <EMode T>
typename enable_if<T == EMode::Read, void>::type InitStorage()
{
if (m_preload)
{
size_t sz = m_storage.Size();
m_data.resize(sz);
m_storage.Read(0, m_data.data(), sz);
}
}
template <class TValue, EMode T = TMode>
typename enable_if<T == EMode::Write, void>::type Write(TKey id, TValue const & value)
{
m_offsets.Add(id, m_storage.Pos());
m_data.clear();
MemWriter<TBuffer> w(m_data);
value.Write(w);
// write buffer
ASSERT_LESS(m_data.size(), numeric_limits<uint32_t>::max(), ());
uint32_t sz = static_cast<uint32_t>(m_data.size());
m_storage.Write(&sz, sizeof(sz));
m_storage.Write(m_data.data(), sz * sizeof(TBuffer::value_type));
// std::ofstream ff((m_name+".wlog").c_str(), std::ios::binary | std::ios::app);
// ff << id << " " << value.ToString() << std::endl;
// if (id == 1942060)
// ff << id << " " << value.Dump() << std::endl;
}
template <class TValue, EMode T = TMode>
typename enable_if<T == EMode::Read, bool>::type Read(TKey id, TValue & value)
{
uint64_t pos;
if (m_offsets.GetValueByKey(id, pos))
{
m_stream.Seek(pos);
value.Read(m_stream);
std::ofstream ff((m_name+".rlog").c_str(), std::ios::binary | std::ios::app);
ff << id << " " << value.ToString() << std::endl;
uint32_t valueSize = m_preload ? *((uint32_t *)(m_data.data() + pos)) : 0;
size_t offset = pos + sizeof(uint32_t);
if (!m_preload)
{
// in case not in memory work we read buffer
m_storage.Read(pos, &valueSize, sizeof(valueSize));
m_data.resize(valueSize);
m_storage.Read(pos + sizeof(valueSize), m_data.data(), valueSize);
offset = 0;
}
// prepare correct reader
MemReader reader(m_data.data() + offset, valueSize);
value.Read(reader);
// std::ofstream ff((m_name+".rlog").c_str(), std::ios::binary | std::ios::app);
// ff << id << " " << value.ToString() << std::endl;
// if (id == 1942060)
// ff << id << " " << value.Dump() << std::endl;
return true;
}
else
{
LOG_SHORT(LWARNING, ("Can't find offset in file ", m_offsets.GetFileName(), " by id ", id));
LOG_SHORT(LWARNING, ("Can't find offset in file", m_offsets.GetFileName(), "by id", id));
return false;
}
}

View file

@ -0,0 +1,135 @@
//
// intermediate_data_test.cpp
// generator_tool
//
// Created by Sergey Yershov on 20.08.15.
// Copyright (c) 2015 maps.me. All rights reserved.
//
#include "testing/testing.hpp"
#include "generator/osm_decl.hpp"
UNIT_TEST(Intermediate_Data_empty_way_element_save_load_test)
{
WayElement e1(1 /* fake osm id */);
using TBuffer = vector<uint8_t>;
TBuffer buffer;
MemWriter<TBuffer> w(buffer);
e1.Write(w);
MemReader r(buffer.data(), buffer.size());
WayElement e2(1 /* fake osm id */);
e2.Read(r);
TEST_EQUAL(e2.nodes.size(), 0, ());
}
UNIT_TEST(Intermediate_Data_way_element_save_load_test)
{
WayElement e1(1 /* fake osm id */);
e1.nodes.push_back(0);
e1.nodes.push_back(1);
e1.nodes.push_back(2);
e1.nodes.push_back(3);
e1.nodes.push_back(0xFFFFFFFF);
e1.nodes.push_back(0xFFFFFFFFFFFFFFFF);
using TBuffer = vector<uint8_t>;
TBuffer buffer;
MemWriter<TBuffer> w(buffer);
e1.Write(w);
MemReader r(buffer.data(), buffer.size());
WayElement e2(1 /* fake osm id */);
e2.Read(r);
TEST_EQUAL(e2.nodes.size(), 6, ());
TEST_EQUAL(e2.nodes[0], 0, ());
TEST_EQUAL(e2.nodes[1], 1, ());
TEST_EQUAL(e2.nodes[2], 2, ());
TEST_EQUAL(e2.nodes[3], 3, ());
TEST_EQUAL(e2.nodes[4], 0xFFFFFFFF, ());
TEST_EQUAL(e2.nodes[5], 0xFFFFFFFFFFFFFFFF, ());
}
UNIT_TEST(Intermediate_Data_relation_element_save_load_test)
{
RelationElement e1;
e1.nodes.emplace_back(1, "inner");
e1.nodes.emplace_back(2, "outer");
e1.nodes.emplace_back(3, "unknown");
e1.nodes.emplace_back(4, "inner role");
e1.ways.emplace_back(1, "inner");
e1.ways.emplace_back(2, "outer");
e1.ways.emplace_back(3, "unknown");
e1.ways.emplace_back(4, "inner role");
e1.tags.emplace("key1","value1");
e1.tags.emplace("key2","value2");
e1.tags.emplace("key3","value3");
e1.tags.emplace("key4","value4");
using TBuffer = vector<uint8_t>;
TBuffer buffer;
MemWriter<TBuffer> w(buffer);
e1.Write(w);
MemReader r(buffer.data(), buffer.size());
RelationElement e2;
e2.nodes.emplace_back(30, "000unknown");
e2.nodes.emplace_back(40, "000inner role");
e2.ways.emplace_back(10, "000inner");
e2.ways.emplace_back(20, "000outer");
e2.tags.emplace("key1old","value1old");
e2.tags.emplace("key2old","value2old");
e2.Read(r);
TEST_EQUAL(e2.nodes.size(), 4, ());
TEST_EQUAL(e2.ways.size(), 4, ());
TEST_EQUAL(e2.tags.size(), 4, ());
TEST_EQUAL(e2.nodes[0].first, 1, ());
TEST_EQUAL(e2.nodes[1].first, 2, ());
TEST_EQUAL(e2.nodes[2].first, 3, ());
TEST_EQUAL(e2.nodes[3].first, 4, ());
TEST_EQUAL(e2.nodes[0].second, "inner", ());
TEST_EQUAL(e2.nodes[1].second, "outer", ());
TEST_EQUAL(e2.nodes[2].second, "unknown", ());
TEST_EQUAL(e2.nodes[3].second, "inner role", ());
TEST_EQUAL(e2.ways[0].first, 1, ());
TEST_EQUAL(e2.ways[1].first, 2, ());
TEST_EQUAL(e2.ways[2].first, 3, ());
TEST_EQUAL(e2.ways[3].first, 4, ());
TEST_EQUAL(e2.ways[0].second, "inner", ());
TEST_EQUAL(e2.ways[1].second, "outer", ());
TEST_EQUAL(e2.ways[2].second, "unknown", ());
TEST_EQUAL(e2.ways[3].second, "inner role", ());
TEST_EQUAL(e2.tags["key1"], "value1", ());
TEST_EQUAL(e2.tags["key2"], "value2", ());
TEST_EQUAL(e2.tags["key3"], "value3", ());
TEST_EQUAL(e2.tags["key4"], "value4", ());
TEST_NOT_EQUAL(e2.tags["key1old"], "value1old", ());
TEST_NOT_EQUAL(e2.tags["key2old"], "value2old", ());
}

View file

@ -3,8 +3,13 @@
#include "base/assert.hpp"
#include "base/std_serialization.hpp"
#include "coding/reader.hpp"
#include "coding/varint.hpp"
#include "coding/writer.hpp"
#include "std/algorithm.hpp"
#include "std/bind.hpp"
#include "std/limits.hpp"
#include "std/string.hpp"
#include "std/utility.hpp"
#include "std/vector.hpp"
@ -49,16 +54,23 @@ struct WayElement
for_each(nodes.rbegin(), nodes.rend(), ref(toDo));
}
template <class TArchive>
void Write(TArchive & ar) const
template <class TWriter>
void Write(TWriter & writer) const
{
ar << nodes;
uint64_t count = nodes.size();
WriteVarUint(writer, count);
for (uint64_t e : nodes)
WriteVarUint(writer, e);
}
template <class TArchive>
void Read(TArchive & ar)
template <class TReader>
void Read(TReader & reader)
{
ar >> nodes;
ReaderSource<MemReader> r(reader);
uint64_t count = ReadVarUint<uint64_t>(r);
nodes.resize(count);
for (uint64_t & e : nodes)
e = ReadVarUint<uint64_t>(r);
}
string ToString() const
@ -67,6 +79,14 @@ struct WayElement
ss << nodes.size() << " " << m_wayOsmId;
return ss.str();
}
string Dump() const
{
stringstream ss;
for (auto const & e : nodes)
ss << e << ";";
return ss.str();
}
};
class RelationElement
@ -104,16 +124,86 @@ public:
tags.swap(rhs.tags);
}
template <class TArchive>
void Write(TArchive & ar) const
template <class TWriter>
void Write(TWriter & writer) const
{
ar << nodes << ways << tags;
auto StringWriter = [&writer, this](string const & str)
{
CHECK_LESS(str.size(), numeric_limits<uint16_t>::max(),
("Can't store string greater then 65535 bytes", Dump()));
uint16_t sz = static_cast<uint16_t>(str.size());
writer.Write(&sz, sizeof(sz));
writer.Write(str.data(), sz);
};
auto MembersWriter = [&writer, &StringWriter](TMembers const & members)
{
uint64_t count = members.size();
WriteVarUint(writer, count);
for (auto const & e : members)
{
// write id
WriteVarUint(writer, e.first);
// write role
StringWriter(e.second);
}
};
MembersWriter(nodes);
MembersWriter(ways);
uint64_t count = tags.size();
WriteVarUint(writer, count);
for (auto const & e : tags)
{
// write key
StringWriter(e.first);
// write value
StringWriter(e.second);
}
}
template <class TArchive>
void Read(TArchive & ar)
template <class TReader>
void Read(TReader & reader)
{
ar >> nodes >> ways >> tags;
ReaderSource<TReader> r(reader);
auto StringReader = [&r](string & str)
{
uint16_t sz = 0;
r.Read(&sz, sizeof(sz));
str.resize(sz);
r.Read(&str[0], sz);
};
auto MembersReader = [&r, &StringReader](TMembers & members)
{
uint64_t count = ReadVarUint<uint64_t>(r);
members.resize(count);
for (auto & e : members)
{
// decode id
e.first = ReadVarUint<uint64_t>(r);
// decode role
StringReader(e.second);
}
};
MembersReader(nodes);
MembersReader(ways);
// decode tags
tags.clear();
uint64_t count = ReadVarUint<uint64_t>(r);
for (uint64_t i = 0; i < count; ++i)
{
pair<string, string> kv;
// decode key
StringReader(kv.first);
// decode value
StringReader(kv.second);
tags.emplace(kv);
}
}
string ToString() const
@ -123,6 +213,18 @@ public:
return ss.str();
}
string Dump() const
{
stringstream ss;
for (auto const & e : nodes)
ss << "n{" << e.first << "," << e.second << "};";
for (auto const & e : ways)
ss << "w{" << e.first << "," << e.second << "};";
for (auto const & e : tags)
ss << "t{" << e.first << "," << e.second << "};";
return ss.str();
}
protected:
bool FindRoleImpl(TMembers const & container, uint64_t id, string & role) const
{

View file

@ -111,8 +111,8 @@ class IntermediateData
public:
IntermediateData(TNodesHolder & nodes, string const & dir)
: m_nodes(nodes)
, m_ways(my::JoinFoldersToPath(dir, WAYS_FILE))
, m_relations(my::JoinFoldersToPath(dir, RELATIONS_FILE))
, m_ways(my::JoinFoldersToPath(dir, WAYS_FILE), true)
, m_relations(my::JoinFoldersToPath(dir, RELATIONS_FILE), true)
, m_nodeToRelations(my::JoinFoldersToPath(dir, string(NODES_FILE) + ID2REL_EXT))
, m_wayToRelations(my::JoinFoldersToPath(dir, string(WAYS_FILE) + ID2REL_EXT))
{

View file

@ -7,6 +7,7 @@
objects = {
/* Begin PBXBuildFile section */
671F58B91B8611360032311E /* intermediate_data_test.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 671F58B71B86109B0032311E /* intermediate_data_test.cpp */; };
6726C2231A4C2BBD005EEA39 /* IOKit.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 675343E01A3F600D00A0A8C3 /* IOKit.framework */; };
6726C2261A4C2BBD005EEA39 /* Cocoa.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 675343E21A3F607600A0A8C3 /* Cocoa.framework */; };
6726C2411A4C2D9F005EEA39 /* testingmain.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 6726C2401A4C2D9F005EEA39 /* testingmain.cpp */; };
@ -134,6 +135,7 @@
670B84FF1A9F73C700CE4492 /* weak_ptr.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = weak_ptr.hpp; sourceTree = "<group>"; };
670B85001A9F73C700CE4492 /* windows.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = windows.hpp; sourceTree = "<group>"; };
670D05AD1B0E08260013A7AC /* defaults.xcconfig */ = {isa = PBXFileReference; lastKnownFileType = text.xcconfig; name = defaults.xcconfig; path = ../defaults.xcconfig; sourceTree = "<group>"; };
671F58B71B86109B0032311E /* intermediate_data_test.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = intermediate_data_test.cpp; sourceTree = "<group>"; };
6726C1E31A4C28D5005EEA39 /* check_mwms.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = check_mwms.cpp; sourceTree = "<group>"; };
6726C1E41A4C28D5005EEA39 /* classificator_tests.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = classificator_tests.cpp; sourceTree = "<group>"; };
6726C1E51A4C28D5005EEA39 /* coasts_test.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = coasts_test.cpp; sourceTree = "<group>"; };
@ -311,6 +313,7 @@
674A39D21B727589001DDB91 /* source_to_element_test.cpp */,
67AB92B31B738DE800AB5194 /* source_data.cpp */,
67AB92B41B738DE800AB5194 /* source_data.hpp */,
671F58B71B86109B0032311E /* intermediate_data_test.cpp */,
);
name = Tests;
path = ../../generator/generator_tests;
@ -451,6 +454,7 @@
isa = PBXSourcesBuildPhase;
buildActionMask = 2147483647;
files = (
671F58B91B8611360032311E /* intermediate_data_test.cpp in Sources */,
67AB92D51B75157400AB5194 /* osm_type_test.cpp in Sources */,
67AB92D41B75157100AB5194 /* osm_parser_test.cpp in Sources */,
67AB92D81B75157D00AB5194 /* metadata_test.cpp in Sources */,