diff --git a/indexer/CMakeLists.txt b/indexer/CMakeLists.txt index c401eba51c..3fb034ee68 100644 --- a/indexer/CMakeLists.txt +++ b/indexer/CMakeLists.txt @@ -26,6 +26,9 @@ set( classificator.hpp classificator_loader.cpp classificator_loader.hpp + complex/serdes.cpp + complex/serdes.hpp + complex/serdes_utils.hpp complex/tree_node.hpp cuisines.cpp cuisines.hpp diff --git a/indexer/complex/serdes.cpp b/indexer/complex/serdes.cpp new file mode 100644 index 0000000000..42cad0cad0 --- /dev/null +++ b/indexer/complex/serdes.cpp @@ -0,0 +1,7 @@ +#include "indexer/complex/serdes.hpp" + +namespace complex +{ +// static +ComplexSerdes::Version const ComplexSerdes::kLatestVersion = Version::V0; +} // namespace complex diff --git a/indexer/complex/serdes.hpp b/indexer/complex/serdes.hpp new file mode 100644 index 0000000000..9e1a1b1198 --- /dev/null +++ b/indexer/complex/serdes.hpp @@ -0,0 +1,165 @@ +#pragma once + +#include "indexer/complex/serdes_utils.hpp" +#include "indexer/complex/tree_node.hpp" + +#include "coding/reader.hpp" +#include "coding/varint.hpp" +#include "coding/writer.hpp" + +#include "base/logging.hpp" +#include "base/stl_helpers.hpp" + +#include +#include + +#include + +namespace complex +{ +class ComplexSerdes +{ +public: + using Ids = std::vector; + + enum class Version : uint8_t + { + // There aren't optimized serialization and deserialization. It's experimental verison. + // todo(m.andrianov): Explore better ways for serialization and deserialization. + V0, + }; + + static Version const kLatestVersion; + + template + static void Serialize(Sink & sink, tree_node::Forest const & forest) + { + SerializeLatestVersion(sink); + Serialize(sink, kLatestVersion, forest); + } + + template + static bool Deserialize(Reader & reader, tree_node::Forest & forest) + { + ReaderSource src(reader); + auto const version = DeserializeVersion(src); + return version ? Deserialize(src, *version, forest) : false; + } + + template + static void Serialize(Sink & sink, Version version, tree_node::Forest const & forest) + { + switch (version) + { + case Version::V0: V0::Serialize(sink, forest); break; + default: UNREACHABLE(); + } + } + + template + static bool Deserialize(Src & src, Version version, tree_node::Forest & forest) + { + switch (version) + { + case Version::V0: return V0::Deserialize(src, forest); + default: UNREACHABLE(); + } + } + +private: + using ByteVector = std::vector; + + class V0 + { + public: + template + static void Serialize(Sink & sink, tree_node::Forest const & forest) + { + ByteVector forestBuffer; + MemWriter forestWriter(forestBuffer); + WriterSink forestMemSink(forestWriter); + forest.ForEachTree([&](auto const & tree) { Serialize(forestMemSink, tree); }); + coding_utils::WriteCollectionPrimitive(sink, forestBuffer); + } + + template + static bool Deserialize(Src & src, tree_node::Forest & forest) + { + ByteVector forestBuffer; + coding_utils::ReadCollectionPrimitive(src, std::back_inserter(forestBuffer)); + MemReader forestReader(forestBuffer.data(), forestBuffer.size()); + ReaderSource forestSrc(forestReader); + while (forestSrc.Size() > 0) + { + tree_node::types::Ptr tree; + if (!Deserialize(forestSrc, tree)) + return false; + + forest.Append(tree); + } + return true; + } + + private: + template + static void Serialize(Sink & sink, tree_node::types::Ptr const & tree) + { + uint32_t const base = tree_node::Min(tree, [](auto const & data) { + auto const it = std::min_element(std::cbegin(data), std::cend(data)); + CHECK(it != std::cend(data), ()); + return *it; + }); + WriteVarUint(sink, base); + tree_node::PreOrderVisit(tree, [&](auto const & node) { + coding_utils::DeltaEncode(sink, node->GetData(), base); + auto const size = base::checked_cast(node->GetChildren().size()); + WriteVarUint(sink, size); + }); + } + + template + static bool Deserialize(Src & src, tree_node::types::Ptr & tree) + { + auto const base = ReadVarUint(src); + std::function &)> deserializeTree; + deserializeTree = [&](auto & tree) { + if (src.Size() == 0) + return true; + + Ids ids; + coding_utils::DeltaDecode(src, std::back_inserter(ids), base); + tree = tree_node::MakeTreeNode(std::move(ids)); + auto const size = ReadVarUint(src); + tree_node::types::Ptrs children(size); + for (auto & n : children) + { + if (!deserializeTree(n)) + return false; + + n->SetParent(tree); + } + tree->SetChildren(std::move(children)); + return true; + }; + return deserializeTree(tree); + } + }; + + template + static void SerializeLatestVersion(Sink & sink) + { + WriteToSink(sink, base::Underlying(kLatestVersion)); + } + + template + static boost::optional DeserializeVersion(Src & src) + { + if (src.Size() < sizeof(kLatestVersion)) + { + LOG(LERROR, ("Unable to deserialize complexes: wrong header version.")); + return {}; + } + return static_cast(ReadPrimitiveFromSource::type>(src)); + } +}; +} // namespace complex diff --git a/indexer/complex/serdes_utils.hpp b/indexer/complex/serdes_utils.hpp new file mode 100644 index 0000000000..84a341472a --- /dev/null +++ b/indexer/complex/serdes_utils.hpp @@ -0,0 +1,129 @@ +#pragma once + +#include "coding/reader.hpp" +#include "coding/varint.hpp" + +#include "base/checked_cast.hpp" + +#include + +namespace coding_utils +{ +namespace detail +{ +// WriteVarIntegral is abstraction over WriteVarUint and WriteVarInt functions. +// WriteVarIntegral may be used in generic code. +template < + typename T, typename Sink, + typename std::enable_if_t::value && std::is_unsigned::value, int> = 0> +void WriteVarIntegral(Sink & dst, T value) +{ + WriteVarUint(dst, value); +} +template < + typename T, typename Sink, + typename std::enable_if_t::value && std::is_signed::value, int> = 0> +void WriteVarIntegral(Sink & dst, T value) +{ + WriteVarInt(dst, value); +} + +// ReadVarIntegral is abstraction over ReadVarUint and ReadVarInt functions. +// ReadVarIntegral may be used in generic code. +template < + typename T, typename Source, + typename std::enable_if_t::value && std::is_unsigned::value, int> = 0> +T ReadVarIntegral(Source & src) +{ + return ReadVarUint(src); +} +template < + typename T, typename Source, + typename std::enable_if_t::value && std::is_signed::value, int> = 0> +T ReadVarIntegral(Source & src) +{ + return ReadVarInt(src); +} +} // namespace detail + +// Type of collection size. It used for reading and writing collections. +using CollectionSizeType = uint32_t; + +// DeltaEncodeAs encodes data in the form of differences between sequential data. +// It required sorted |container|. |fn| may used used to access structure fields or data modification. +template +void DeltaEncodeAs(Sink & sink, Cont const & container, Fn && fn) +{ + ASSERT((std::is_sorted(std::cbegin(container), std::cend(container), + [&](auto const & lhs, auto const & rhs) { return fn(lhs) < fn(rhs); })), + ()); + + auto const contSize = base::checked_cast(container.size()); + detail::WriteVarIntegral(sink, contSize); + if (contSize == 0) + return; + + auto first = std::begin(container); + auto const last = std::end(container); + auto acc = fn(*first); + detail::WriteVarIntegral(sink, acc); + while (++first != last) + { + auto const val = fn(*first); + auto const delta = base::checked_cast(val - acc); + detail::WriteVarIntegral(sink, delta); + acc = val; + } +} + +// DeltaDecodeAs decodes data from the form of differences between sequential data. +// |fn| may used used to initialize an object or data modification. +template +void DeltaDecodeAs(Source & src, OutIt it, Fn && fn) +{ + auto contSize = detail::ReadVarIntegral(src); + if (contSize == 0) + return; + + auto sum = detail::ReadVarIntegral(src); + *it++ = fn(sum); + while (--contSize) + { + sum = sum + detail::ReadVarIntegral(src); + *it++ = fn(sum); + } +} + +template +void DeltaEncode(Sink & sink, Cont const & container, ValueType base = {}) +{ + DeltaEncodeAs(sink, container, [&](ValueType val) { return val - base; }); +} + +template +void DeltaDecode(Source & src, OutIt it, ValueType base = {}) +{ + DeltaDecodeAs(src, it, [&](ValueType val) { return val + base; }); +} + +// WriteCollectionPrimitive writes collection. It used WriteToSink function. +template +void WriteCollectionPrimitive(Sink & sink, Cont const & container) +{ + auto const contSize = static_cast(container.size()); + WriteVarUint(sink, contSize); + for (auto value : container) + WriteToSink(sink, value); +} + +// ReadCollectionPrimitive reads collection. It used ReadPrimitiveFromSource function. +template +void ReadCollectionPrimitive(Source & src, OutIt it) +{ + using ValueType = typename OutIt::container_type::value_type; + + auto size = ReadVarUint(src); + while (size--) + *it++ = ReadPrimitiveFromSource(src); +} +} // namespace coding_utils diff --git a/indexer/complex/tree_node.hpp b/indexer/complex/tree_node.hpp index d04157a220..c1c6fd616c 100644 --- a/indexer/complex/tree_node.hpp +++ b/indexer/complex/tree_node.hpp @@ -2,7 +2,9 @@ #include #include +#include #include +#include #include #include @@ -196,6 +198,16 @@ size_t CountIf(types::Ptr const & node, Fn && fn) return count; } +template +decltype(auto) Min(types::Ptr const & node, Fn && fn) +{ + auto m = std::numeric_limitsGetData()))>::max(); + PreOrderVisit(node, [&](auto const & node) { + m = std::min(fn(node->GetData()), m); + }); + return m; +} + template void Print(types::Ptr const & node, std::ostream & stream, std::string prefix = "", bool isTail = true) diff --git a/indexer/indexer_tests/CMakeLists.txt b/indexer/indexer_tests/CMakeLists.txt index 8a870f52e9..813957b949 100644 --- a/indexer/indexer_tests/CMakeLists.txt +++ b/indexer/indexer_tests/CMakeLists.txt @@ -11,6 +11,8 @@ set( checker_test.cpp cities_boundaries_serdes_tests.cpp classificator_tests.cpp + complex_serdes_tests.cpp + complex_serdes_utils_tests.cpp data_source_test.cpp drules_selector_parser_test.cpp editable_map_object_test.cpp diff --git a/indexer/indexer_tests/complex_serdes_tests.cpp b/indexer/indexer_tests/complex_serdes_tests.cpp new file mode 100644 index 0000000000..45560cf3d8 --- /dev/null +++ b/indexer/indexer_tests/complex_serdes_tests.cpp @@ -0,0 +1,67 @@ +#include "testing/testing.hpp" + +#include "indexer/complex/serdes.hpp" +#include "indexer/complex/tree_node.hpp" + +#include "coding/reader.hpp" +#include "coding/writer.hpp" + +namespace +{ +using ByteVector = std::vector; + +decltype(auto) GetForest() +{ + auto tree1 = tree_node::MakeTreeNode({1, 2, 3}); + auto node11 = tree_node::MakeTreeNode({11, 12, 13}); + tree_node::Link(tree_node::MakeTreeNode({111}), node11); + tree_node::Link(tree_node::MakeTreeNode({112, 113}), node11); + tree_node::Link(tree_node::MakeTreeNode({114}), node11); + tree_node::Link(node11, tree1); + tree_node::Link(tree_node::MakeTreeNode({6, 7}), tree1); + + auto tree2 = tree_node::MakeTreeNode({9}); + tree_node::Link(tree_node::MakeTreeNode({21, 22, 23}), tree2); + tree_node::Link(tree_node::MakeTreeNode({24, 25}), tree2); + + tree_node::Forest forest; + forest.Append(tree1); + forest.Append(tree2); + return forest; +} + +UNIT_TEST(Complex_SerdesV0) +{ + auto const expectedForest = GetForest(); + + ByteVector buffer; + MemWriter writer(buffer); + WriterSink sink(writer); + complex::ComplexSerdes::Serialize(sink, complex::ComplexSerdes::Version::V0, expectedForest); + + MemReader reader(buffer.data(), buffer.size()); + ReaderSource src(reader); + tree_node::Forest forest; + TEST(complex::ComplexSerdes::Deserialize(src, complex::ComplexSerdes::Version::V0, forest), ()); + LOG(LINFO, (forest)); + LOG(LINFO, (expectedForest)); + TEST_EQUAL(forest, expectedForest, ()); +} + +UNIT_TEST(Complex_Serdes) +{ + auto const expectedForest = GetForest(); + + ByteVector buffer; + MemWriter writer(buffer); + WriterSink sink(writer); + complex::ComplexSerdes::Serialize(sink, expectedForest); + + MemReader reader(buffer.data(), buffer.size()); + tree_node::Forest forest; + TEST(complex::ComplexSerdes::Deserialize(reader, forest), ()); + LOG(LINFO, (forest)); + LOG(LINFO, (expectedForest)); + TEST_EQUAL(forest, expectedForest, ()); +} +} // namespace diff --git a/indexer/indexer_tests/complex_serdes_utils_tests.cpp b/indexer/indexer_tests/complex_serdes_utils_tests.cpp new file mode 100644 index 0000000000..c3f515c29f --- /dev/null +++ b/indexer/indexer_tests/complex_serdes_utils_tests.cpp @@ -0,0 +1,116 @@ +#include "testing/testing.hpp" + +#include "indexer/complex/serdes_utils.hpp" + +#include "coding/reader.hpp" +#include "coding/writer.hpp" + +#include +#include +#include +#include +#include + +namespace +{ +using ByteVector = std::vector; + +template +void DeltaEncodeDecodelTest(T const & cont, ValueType base = {}) +{ + ByteVector buffer; + MemWriter writer(buffer); + WriterSink sink(writer); + coding_utils::DeltaEncode(sink, cont, base); + + MemReader reader(buffer.data(), buffer.size()); + ReaderSource src(reader); + T res; + coding_utils::DeltaDecode(src, std::inserter(res, std::end(res)), base); + TEST_EQUAL(cont, res, ()); +} + +template +void CollectionPrimitiveTest(T const & cont) +{ + ByteVector buffer; + MemWriter writer(buffer); + WriterSink sink(writer); + coding_utils::WriteCollectionPrimitive(sink, cont); + + MemReader reader(buffer.data(), buffer.size()); + ReaderSource src(reader); + T res; + coding_utils::ReadCollectionPrimitive(src, std::inserter(res, std::end(res))); + TEST_EQUAL(cont, res, ()); +} + +UNIT_TEST(Utils_DeltaEncodeDecode) +{ + { + std::vector cont; + DeltaEncodeDecodelTest(cont); + } + { + std::list cont{1, 2, 3, 4}; + cont.sort(); + DeltaEncodeDecodelTest(cont); + } + { + std::list cont{6, 7, 30, 40}; + cont.sort(); + DeltaEncodeDecodelTest(cont, 6); + } + { + std::list cont{-1, -2, -3, -4}; + cont.sort(); + DeltaEncodeDecodelTest(cont); + } + { + std::vector cont{1, 2, 3, 4, 32, 124}; + std::sort(std::begin(cont), std::end(cont)); + DeltaEncodeDecodelTest(cont, 1); + } + { + std::vector cont{-1, -2, -3, -4, 23, 67}; + std::sort(std::begin(cont), std::end(cont)); + DeltaEncodeDecodelTest(cont); + } + { + std::set cont{1, 2, 3, 4, 999, 100124, 243}; + DeltaEncodeDecodelTest(cont); + } + { + std::set cont{-1, -2, -3, -4}; + DeltaEncodeDecodelTest(cont); + } +} + +UNIT_TEST(Utils_CollectionPrimitive) +{ + { + std::list const cont{1, 2, 3, 4}; + CollectionPrimitiveTest(cont); + } + { + std::list const cont{-1, -2, -3, -4}; + CollectionPrimitiveTest(cont); + } + { + std::vector const cont{1, 2, 3, 4}; + CollectionPrimitiveTest(cont); + } + { + std::vector const cont{-1, -2, -3, -4}; + CollectionPrimitiveTest(cont); + } + { + std::set const cont{1, 2, 3, 4}; + CollectionPrimitiveTest(cont); + } + { + std::set const cont{-1, -2, -3, -4}; + CollectionPrimitiveTest(cont); + } +} +} // namespace