From f47f4aded0755f169cc7b768804b40ef39eb8c34 Mon Sep 17 00:00:00 2001 From: Yuri Gorshenin Date: Fri, 9 Oct 2015 20:52:52 +0300 Subject: [PATCH] [search] Added test for opposite endianness. --- 3party/succinct/mappable_vector.hpp | 8 ++ coding/coding_tests/succinct_mapper_test.cpp | 16 ++++ coding/succinct_mapper.hpp | 98 ++++++++++++++++---- indexer/indexer_tests/rank_table_test.cpp | 57 ++++++++++++ indexer/rank_table.cpp | 11 ++- indexer/rank_table.hpp | 7 +- 6 files changed, 173 insertions(+), 24 deletions(-) diff --git a/3party/succinct/mappable_vector.hpp b/3party/succinct/mappable_vector.hpp index 0f45f427ec..8e911cba47 100644 --- a/3party/succinct/mappable_vector.hpp +++ b/3party/succinct/mappable_vector.hpp @@ -17,6 +17,10 @@ namespace coding { template class FreezeVisitor; + +template +class ReverseFreezeVisitor; + class MapVisitor; class ReverseMapVisitor; } @@ -125,6 +129,10 @@ namespace succinct { namespace mapper { template friend class coding::FreezeVisitor; + + template + friend class coding::ReverseFreezeVisitor; + friend class coding::MapVisitor; friend class coding::ReverseMapVisitor; diff --git a/coding/coding_tests/succinct_mapper_test.cpp b/coding/coding_tests/succinct_mapper_test.cpp index c46008a41d..02e77844d7 100644 --- a/coding/coding_tests/succinct_mapper_test.cpp +++ b/coding/coding_tests/succinct_mapper_test.cpp @@ -36,3 +36,19 @@ UNIT_TEST(Freeze_Smoke) TEST_EQUAL(8, Map(value, reinterpret_cast(data.data()), "uint64_t"), ()); TEST_EQUAL(0x0123456789abcdef, value, ()); } + +UNIT_TEST(ReverseFreeze_Smoke) +{ + vector data; + { + MemWriter writer(data); + uint64_t const data = 0x0123456789abcdef; + ReverseFreeze(data, writer, "uint64_t"); + } + + TEST_EQUAL(8, data.size(), ()); + + uint64_t value = 0x0; + TEST_EQUAL(8, Map(value, reinterpret_cast(data.data()), "uint64_t"), ()); + TEST_EQUAL(0xefcdab8967452301, value, ()); +} diff --git a/coding/succinct_mapper.hpp b/coding/succinct_mapper.hpp index 2c2a774c1c..30d413f732 100644 --- a/coding/succinct_mapper.hpp +++ b/coding/succinct_mapper.hpp @@ -21,6 +21,20 @@ static T * Align8Ptr(T * ptr) inline uint32_t ToAlign8(uint64_t written) { return (0x8 - (written & 0x7)) & 0x7; } +inline bool IsAligned(uint64_t offset) { return ToAlign8(offset) == 0; } + +template +void WritePadding(TWriter & writer, uint64_t & bytesWritten) +{ + static uint64_t const zero = 0; + + uint32_t const padding = ToAlign8(bytesWritten); + if (padding == 0) + return; + writer.Write(&zero, padding); + bytesWritten += padding; +} + class MapVisitor { public: @@ -125,7 +139,7 @@ public: typename enable_if::value, FreezeVisitor &>::type operator()(T & val, char const * /* name */) { - ASSERT(IsAligned(), ()); + ASSERT(IsAligned(m_writer.Pos()), ()); val.map(*this); return *this; } @@ -134,48 +148,88 @@ public: typename enable_if::value, FreezeVisitor &>::type operator()(T & val, char const * /* name */) { - ASSERT(IsAligned(), ()); + ASSERT(IsAligned(m_writer.Pos()), ()); m_writer.Write(&val, sizeof(T)); m_bytesWritten += sizeof(T); - WritePadding(); + WritePadding(m_writer, m_bytesWritten); return *this; } template FreezeVisitor & operator()(succinct::mapper::mappable_vector & vec, char const * /* name */) { - ASSERT(IsAligned(), ()); + ASSERT(IsAligned(m_writer.Pos()), ()); (*this)(vec.m_size, "size"); size_t const bytes = static_cast(vec.m_size * sizeof(T)); m_writer.Write(vec.m_data, bytes); m_bytesWritten += bytes; - WritePadding(); + WritePadding(m_writer, m_bytesWritten); return *this; } uint64_t BytesWritten() const { return m_bytesWritten; } private: - bool IsAligned() const { return ToAlign8(m_writer.Pos()) == 0; } - - void WritePadding() - { - static uint64_t const zero = 0; - - uint32_t const padding = ToAlign8(m_bytesWritten); - if (padding == 0) - return; - m_writer.Write(&zero, padding); - m_bytesWritten += padding; - } - TWriter & m_writer; uint64_t m_bytesWritten; DISALLOW_COPY_AND_MOVE(FreezeVisitor); }; +template +class ReverseFreezeVisitor +{ +public: + explicit ReverseFreezeVisitor(TWriter & writer) : m_writer(writer), m_bytesWritten(0) {} + + template + typename enable_if::value, ReverseFreezeVisitor &>::type operator()( + T & val, char const * /* name */) + { + ASSERT(IsAligned(m_writer.Pos()), ()); + val.map(*this); + return *this; + } + + template + typename enable_if::value, ReverseFreezeVisitor &>::type operator()( + T & val, char const * /* name */) + { + ASSERT(IsAligned(m_writer.Pos()), ()); + T const reversedVal = ReverseByteOrder(val); + m_writer.Write(&reversedVal, sizeof(reversedVal)); + m_bytesWritten += sizeof(T); + WritePadding(m_writer, m_bytesWritten); + return *this; + } + + template + ReverseFreezeVisitor & operator()(succinct::mapper::mappable_vector & vec, + char const * /* name */) + { + ASSERT(IsAligned(m_writer.Pos()), ()); + (*this)(vec.m_size, "size"); + + for (auto const & val : vec) + { + T const reversedVal = ReverseByteOrder(val); + m_writer.Write(&reversedVal, sizeof(reversedVal)); + } + m_bytesWritten += static_cast(vec.m_size * sizeof(T)); + WritePadding(m_writer, m_bytesWritten); + return *this; + } + + uint64_t BytesWritten() const { return m_bytesWritten; } + +private: + TWriter & m_writer; + uint64_t m_bytesWritten; + + DISALLOW_COPY_AND_MOVE(ReverseFreezeVisitor); +}; + template uint64_t Map(T & value, uint8_t const * base, char const * name) { @@ -199,4 +253,12 @@ uint64_t Freeze(T & val, TWriter & writer, char const * name) visitor(val, name); return visitor.BytesWritten(); } + +template +uint64_t ReverseFreeze(T & val, TWriter & writer, char const * name) +{ + ReverseFreezeVisitor visitor(writer); + visitor(val, name); + return visitor.BytesWritten(); +} } // namespace coding diff --git a/indexer/indexer_tests/rank_table_test.cpp b/indexer/indexer_tests/rank_table_test.cpp index 65ab0e2ff3..59f1cf03a3 100644 --- a/indexer/indexer_tests/rank_table_test.cpp +++ b/indexer/indexer_tests/rank_table_test.cpp @@ -13,9 +13,12 @@ #include "coding/file_name_utils.hpp" #include "coding/file_writer.hpp" #include "coding/internal/file_data.hpp" +#include "coding/writer.hpp" #include "base/scope_guard.hpp" +#include "defines.hpp" + #include "std/string.hpp" #include "std/vector.hpp" @@ -100,3 +103,57 @@ UNIT_TEST(RankTableBuilder_EndToEnd) TestTable(ranks, mapPath); } + +UNIT_TEST(RankTableBuilder_WrongEndianness) +{ + char const kTestFile[] = "test.mwm"; + MY_SCOPE_GUARD(cleanup, bind(&FileWriter::DeleteFileX, kTestFile)); + + vector ranks = {0, 1, 2, 3, 4}; + { + FilesContainerW wcont(kTestFile); + search::RankTableBuilder::Create(ranks, wcont); + } + + // Load rank table in host endianness. + unique_ptr table; + { + FilesContainerR rcont(kTestFile); + table = search::RankTable::Load(rcont); + TEST(table.get(), ()); + TestTable(ranks, *table); + } + + // Serialize rank table in opposite endianness. + { + vector data; + { + MemWriter writer(data); + table->Serialize(writer, false /* preserveHostEndianness */); + } + + FilesContainerW wcont(kTestFile); + wcont.Write(data, RANKS_FILE_TAG); + } + + // Try to load rank table from opposite endianness. + { + FilesContainerR rcont(kTestFile); + auto table = search::RankTable::Load(rcont); + TEST(table.get(), ()); + TestTable(ranks, *table); + } + + // It's impossible to map rank table from opposite endianness. + { + FilesMappingContainer mcont(kTestFile); + auto table = search::RankTable::Load(mcont); + TEST(!table.get(), ()); + } + + // Try to re-create rank table in test file. + TEST(search::RankTableBuilder::CreateIfNotExists(kTestFile), ()); + + // Try to load and map rank table - both methods should work now. + TestTable(ranks, kTestFile); +} diff --git a/indexer/rank_table.cpp b/indexer/rank_table.cpp index fa71ca1c2b..4bba203978 100644 --- a/indexer/rank_table.cpp +++ b/indexer/rank_table.cpp @@ -128,16 +128,19 @@ public: uint8_t Get(uint64_t i) const override { return m_coding.Get(i); } uint64_t Size() const override { return m_coding.Size(); } RankTable::Version GetVersion() const override { return V0; } - void Serialize(Writer & writer) override + void Serialize(Writer & writer, bool preserveHostEndianness) override { static uint64_t const padding = 0; uint8_t const version = GetVersion(); - uint8_t const flags = IsBigEndian(); + uint8_t const flags = preserveHostEndianness ? IsBigEndian() : !IsBigEndian(); writer.Write(&version, sizeof(version)); writer.Write(&flags, sizeof(flags)); writer.Write(&padding, 6); - Freeze(m_coding, writer, "SimpleDenseCoding"); + if (preserveHostEndianness) + Freeze(m_coding, writer, "SimpleDenseCoding"); + else + ReverseFreeze(m_coding, writer, "SimpleDenseCoding"); } // Loads RankTableV0 from a raw memory region. @@ -199,7 +202,7 @@ void SerializeRankTable(RankTable & table, FilesContainerW & wcont) vector buffer; { MemWriter writer(buffer); - table.Serialize(writer); + table.Serialize(writer, true /* hostEndianness */); } wcont.Write(buffer, RANKS_FILE_TAG); diff --git a/indexer/rank_table.hpp b/indexer/rank_table.hpp index 2d784c2a91..bea8a8da1d 100644 --- a/indexer/rank_table.hpp +++ b/indexer/rank_table.hpp @@ -56,8 +56,11 @@ public: // Returns underlying data format version. virtual Version GetVersion() const = 0; - // Serializes rank table. - virtual void Serialize(Writer & writer) = 0; + // Serializes rank table. When |preserveHostEndianness| is true, + // table is serialized in host endianness, otherwise, opposite + // endianness is used. Please, don't set this parameter to false + // unless you know what you do. + virtual void Serialize(Writer & writer, bool preserveHostEndianness) = 0; // Copies whole section corresponding to a rank table and // deserializes it. Returns nullptr if there're no ranks section or