From e2cc64f2db2a825aabbee7d78f2761164d913586 Mon Sep 17 00:00:00 2001 From: Maksim Andrianov Date: Tue, 2 Jul 2019 16:28:18 +0300 Subject: [PATCH] Optimization step 'generate_features': Added new serialization type. --- generator/feature_builder.cpp | 68 ++++++++++++++++++ generator/feature_builder.hpp | 129 ++++++++++++++++++++++++++++++---- 2 files changed, 183 insertions(+), 14 deletions(-) diff --git a/generator/feature_builder.cpp b/generator/feature_builder.cpp index daea2cc762..61eaecb74c 100644 --- a/generator/feature_builder.cpp +++ b/generator/feature_builder.cpp @@ -444,6 +444,66 @@ void FeatureBuilder::DeserializeFromIntermediate(Buffer & data) Check(*this); } +void FeatureBuilder::SerializeAccuratelyForIntermediate(Buffer & data) const +{ + Check(*this); + data.clear(); + PushBackByteSink sink(data); + m_params.Write(sink, true /* store additional info from FeatureParams */); + if (IsPoint()) + { + rw::WritePOD(sink, m_center); + } + else + { + WriteVarUint(sink, static_cast(m_polygons.size())); + for (PointSeq const & points : m_polygons) + rw::WriteVectorOfPOD(sink, points); + + WriteVarInt(sink, m_coastCell); + } + + // save OSM IDs to link meta information with sorted features later + rw::WriteVectorOfPOD(sink, m_osmIds); + // check for correct serialization +#ifdef DEBUG + Buffer tmp(data); + FeatureBuilder fb; + fb.DeserializeAccuratelyFromIntermediate(tmp); + ASSERT ( fb == *this, ("Source feature: ", *this, "Deserialized feature: ", fb) ); +#endif + +} + +void FeatureBuilder::DeserializeAccuratelyFromIntermediate(Buffer & data) +{ + ArrayByteSource source(&data[0]); + m_params.Read(source); + m_limitRect.MakeEmpty(); + if (IsPoint()) + { + rw::ReadPOD(source, m_center); + m_limitRect.Add(m_center); + } + else + { + m_polygons.clear(); + uint32_t const count = ReadVarUint(source); + ASSERT_GREATER (count, 0, (*this)); + for (uint32_t i = 0; i < count; ++i) + { + m_polygons.push_back(PointSeq()); + rw::ReadVectorOfPOD(source, m_polygons.back()); + CalcRect(m_polygons.back(), m_limitRect); + } + + m_coastCell = ReadVarInt(source); + } + + rw::ReadVectorOfPOD(source, m_osmIds); + Check(*this); +} + void FeatureBuilder::AddOsmId(base::GeoObjectId id) { m_osmIds.push_back(id); } void FeatureBuilder::SetOsmId(base::GeoObjectId id) { m_osmIds.assign(1, id); } @@ -691,4 +751,12 @@ string DebugPrint(FeatureBuilder const & fb) << " " << ::DebugPrint(fb.GetOsmIds()); return out.str(); } + +namespace serialization_policy +{ +// static +TypeSerializationVersion const MinSize::kSerializationVersion; +// static +TypeSerializationVersion const MaxAccuracy::kSerializationVersion; +} // namespace serialization_policy } // namespace feature diff --git a/generator/feature_builder.hpp b/generator/feature_builder.hpp index 8ba3ad9ea1..f9b47a6143 100644 --- a/generator/feature_builder.hpp +++ b/generator/feature_builder.hpp @@ -3,6 +3,7 @@ #include "indexer/feature_data.hpp" #include "coding/file_reader.hpp" +#include "coding/file_writer.hpp" #include "coding/read_write_utils.hpp" #include "base/geo_object_id.hpp" @@ -134,6 +135,12 @@ public: return m_params.m_types.empty(); } + template + bool HasTypesIf(FnT fn) const + { + return std::any_of(std::begin(m_params.m_types), std::end(m_params.m_types), fn); + } + bool HasType(uint32_t t) const { return m_params.IsTypeExist(t); } bool HasType(uint32_t t, uint8_t level) const { return m_params.IsTypeExist(t, level); } uint32_t FindType(uint32_t comp, uint8_t level) const { return m_params.FindType(comp, level); } @@ -180,6 +187,10 @@ public: Buffer & data) const; void DeserializeFromIntermediate(Buffer & data); + // These methods use geometry without loss of accuracy. + void SerializeAccuratelyForIntermediate(Buffer & data) const; + void DeserializeAccuratelyFromIntermediate(Buffer & data); + bool PreSerializeAndRemoveUselessNamesForMwm(SupportingData const & data); void SerializeLocalityObject(serial::GeometryCodingParams const & params, SupportingData & data) const; @@ -197,6 +208,7 @@ public: // area's one if there is no relation, and relation id otherwise. base::GeoObjectId GetMostGenericOsmId() const; bool HasOsmId(base::GeoObjectId const & id) const; + bool HasOsmIds() const { return !m_osmIds.empty(); } std::vector const & GetOsmIds() const { return m_osmIds; } // To work with coasts. @@ -233,38 +245,97 @@ protected: void Check(FeatureBuilder const fb); std::string DebugPrint(FeatureBuilder const & fb); +// SerializePolicy serialization and deserialization. +namespace serialization_policy +{ +enum class SerializationVersion : uint32_t +{ + Undef, + MinSize, + MaxAccuracy +}; + +using TypeSerializationVersion = typename std::underlying_type::type; + +struct MinSize +{ + auto static const kSerializationVersion = static_cast(SerializationVersion::MinSize); + + static void Serialize(FeatureBuilder const & fb, FeatureBuilder::Buffer & data) + { + fb.SerializeForIntermediate(data); + } + + static void Deserialize(FeatureBuilder & fb, FeatureBuilder::Buffer & data) + { + fb.DeserializeFromIntermediate(data); + } +}; + +struct MaxAccuracy +{ + auto static const kSerializationVersion = static_cast(SerializationVersion::MinSize); + + static void Serialize(FeatureBuilder const & fb, FeatureBuilder::Buffer & data) + { + fb.SerializeAccuratelyForIntermediate(data); + } + + static void Deserialize(FeatureBuilder & fb, FeatureBuilder::Buffer & data) + { + fb.DeserializeAccuratelyFromIntermediate(data); + } +}; +} // namespace serialization_policy + +// TODO(maksimandrianov): I would like to support the verification of serialization versions, +// but this requires reworking of FeatureCollector class and its derived classes. It is in future plans + +//template +//void TryReadAndCheckVersion(Source & src) +//{ +// if (src.Size() - src.Pos() >= sizeof(serialization_policy::TypeSerializationVersion)) +// { +// auto const type = ReadVarUint(src); +// CHECK_EQUAL(type, SerializePolicy::kSerializationVersion, ()); +// } +// else +// { +// LOG(LWARNING, ("Unable to read file version.")) +// } +//} + // Read feature from feature source. -template +template void ReadFromSourceRawFormat(Source & src, FeatureBuilder & fb) { uint32_t const sz = ReadVarUint(src); typename FeatureBuilder::Buffer buffer(sz); src.Read(&buffer[0], sz); - fb.DeserializeFromIntermediate(buffer); + SerializePolicy::Deserialize(fb, buffer); } // Process features in .dat file. -template +template void ForEachFromDatRawFormat(std::string const & filename, ToDo && toDo) { FileReader reader(filename); ReaderSource src(reader); - - uint64_t currPos = 0; - uint64_t const fileSize = reader.Size(); - +// TryReadAndCheckVersion(src); + auto const fileSize = reader.Size(); + uint64_t currPos = src.Pos(); // read features one by one while (currPos < fileSize) { FeatureBuilder fb; - ReadFromSourceRawFormat(src, fb); + ReadFromSourceRawFormat(src, fb); toDo(fb, currPos); currPos = src.Pos(); } } /// Parallel process features in .dat file. -template +template void ForEachParallelFromDatRawFormat(size_t threadsCount, std::string const & filename, ToDo && toDo) { @@ -274,10 +345,9 @@ void ForEachParallelFromDatRawFormat(size_t threadsCount, std::string const & fi FileReader reader(filename); ReaderSource src(reader); - - uint64_t currPos = 0; - uint64_t const fileSize = reader.Size(); - +// TryReadAndCheckVersion(src); + auto const fileSize = reader.Size(); + uint64_t currPos = src.Pos(); std::mutex readMutex; auto concurrentProcessor = [&] { for (;;) @@ -291,7 +361,7 @@ void ForEachParallelFromDatRawFormat(size_t threadsCount, std::string const & fi if (fileSize <= currPos) break; - ReadFromSourceRawFormat(src, fb); + ReadFromSourceRawFormat(src, fb); featurePos = currPos; currPos = src.Pos(); } @@ -304,4 +374,35 @@ void ForEachParallelFromDatRawFormat(size_t threadsCount, std::string const & fi for (size_t i = 0; i < threadsCount; ++i) threadPool.Submit(concurrentProcessor); } +template +std::vector ReadAllDatRawFormat(std::string const & fileName) +{ + std::vector fbs; + ForEachFromDatRawFormat(fileName, [&](auto && fb, auto const &) { + fbs.emplace_back(std::move(fb)); + }); + return fbs; +} + +template +class FeatureBuilderWriter +{ +public: + FeatureBuilderWriter(std::string const & filename, FileWriter::Op op = FileWriter::Op::OP_WRITE_TRUNCATE) + : m_writer(filename, op) + { +// WriteVarUint(m_writer, static_cast(SerializePolicy::kSerializationVersion)); + } + + void Write(FeatureBuilder const & fb) + { + FeatureBuilder::Buffer buffer; + SerializePolicy::Serialize(fb, buffer); + WriteVarUint(m_writer, static_cast(buffer.size())); + m_writer.Write(buffer.data(), buffer.size() * sizeof(FeatureBuilder::Buffer::value_type)); + } + +private: + Writer m_writer; +}; } // namespace feature