diff --git a/coding/simple_dense_coding.cpp b/coding/simple_dense_coding.cpp index 03d88f1b8a..c75034b5be 100644 --- a/coding/simple_dense_coding.cpp +++ b/coding/simple_dense_coding.cpp @@ -4,6 +4,7 @@ #include "std/algorithm.hpp" #include "std/limits.hpp" +#include "std/utility.hpp" namespace coding { @@ -46,10 +47,7 @@ public: } } - inline Code const & GetCode(uint8_t rank) const - { - return m_table[rank]; - } + inline Code const & GetCode(uint8_t rank) const { return m_table[rank]; } private: Code m_table[kAlphabetSize]; @@ -108,6 +106,8 @@ SimpleDenseCoding::SimpleDenseCoding(vector const & data) m_symbols.assign(symbols); } +SimpleDenseCoding::SimpleDenseCoding(SimpleDenseCoding && rhs) { Swap(move(rhs)); } + uint8_t SimpleDenseCoding::Get(uint64_t i) const { ASSERT_LESS(i, Size(), ()); diff --git a/coding/simple_dense_coding.hpp b/coding/simple_dense_coding.hpp index f6ee0f438c..cd647991a9 100644 --- a/coding/simple_dense_coding.hpp +++ b/coding/simple_dense_coding.hpp @@ -36,6 +36,8 @@ public: SimpleDenseCoding(vector const & data); + SimpleDenseCoding(SimpleDenseCoding && rhs); + uint8_t Get(uint64_t i) const; inline uint64_t Size() const { return m_index.num_ones(); } @@ -50,6 +52,14 @@ public: visitor(m_symbols, "m_symbols"); } + template + void Swap(TSDC && rhs) + { + m_bits.swap(rhs.m_bits); + m_index.swap(rhs.m_index); + m_symbols.swap(rhs.m_symbols); + } + private: succinct::bit_vector m_bits; succinct::rs_bit_vector m_index; diff --git a/coding/succinct_mapper.hpp b/coding/succinct_mapper.hpp index a5c288dac0..2c2a774c1c 100644 --- a/coding/succinct_mapper.hpp +++ b/coding/succinct_mapper.hpp @@ -97,9 +97,10 @@ public: vec.clear(); (*this)(vec.m_size, "size"); - vec.m_data = reinterpret_cast(m_cur); - for (auto const it = vec.cbegin(); it != vec.cend(); ++it) - *it = ReverseByteOrder(*it); + T * data = reinterpret_cast(m_cur); + for (uint64_t i = 0; i < vec.m_size; ++i) + data[i] = ReverseByteOrder(data[i]); + vec.m_data = data; m_cur = Align8Ptr(m_cur + vec.m_size * sizeof(T)); return *this; diff --git a/defines.hpp b/defines.hpp index 6aeea94b81..430fd98afc 100644 --- a/defines.hpp +++ b/defines.hpp @@ -25,6 +25,7 @@ #define METADATA_INDEX_FILE_TAG "metaidx" #define COMPRESSED_SEARCH_INDEX_FILE_TAG "csdx" #define FEATURE_OFFSETS_FILE_TAG "offs" +#define RANKS_FILE_TAG "ranks" #define ROUTING_MATRIX_FILE_TAG "mercedes" #define ROUTING_EDGEDATA_FILE_TAG "daewoo" diff --git a/indexer/indexer.pro b/indexer/indexer.pro index f7944555bc..e7fe5e6a5b 100644 --- a/indexer/indexer.pro +++ b/indexer/indexer.pro @@ -43,6 +43,7 @@ SOURCES += \ mwm_set.cpp \ old/feature_loader_101.cpp \ point_to_int64.cpp \ + rank_table.cpp \ scales.cpp \ search_delimiters.cpp \ search_index_builder.cpp \ @@ -74,6 +75,7 @@ HEADERS += \ feature_loader_base.hpp \ feature_meta.hpp \ feature_processor.hpp \ + feature_rank_table.cpp \ feature_utils.hpp \ feature_visibility.hpp \ features_offsets_table.hpp \ @@ -92,6 +94,7 @@ HEADERS += \ old/feature_loader_101.hpp \ old/interval_index_101.hpp \ point_to_int64.hpp \ + rank_table.cpp \ scale_index.hpp \ scale_index_builder.hpp \ scales.hpp \ diff --git a/indexer/indexer_tests/indexer_tests.pro b/indexer/indexer_tests/indexer_tests.pro index 3b0202ff8f..ae4ee56e2d 100644 --- a/indexer/indexer_tests/indexer_tests.pro +++ b/indexer/indexer_tests/indexer_tests.pro @@ -4,8 +4,10 @@ CONFIG -= app_bundle TEMPLATE = app ROOT_DIR = ../.. -DEPENDENCIES = indexer platform geometry coding base protobuf tomcrypt +DEPENDENCIES = indexer platform geometry coding base protobuf tomcrypt succinct +!linux { DEPENDENCIES += opening_hours +} include($$ROOT_DIR/common.pri) @@ -31,6 +33,7 @@ SOURCES += \ interval_index_test.cpp \ mwm_set_test.cpp \ point_to_int64_test.cpp \ + rank_table_test.cpp \ scales_test.cpp \ search_string_utils_test.cpp \ sort_and_merge_intervals_test.cpp \ diff --git a/indexer/indexer_tests/rank_table_test.cpp b/indexer/indexer_tests/rank_table_test.cpp new file mode 100644 index 0000000000..9430394000 --- /dev/null +++ b/indexer/indexer_tests/rank_table_test.cpp @@ -0,0 +1,58 @@ +#include "testing/testing.hpp" + +#include "indexer/rank_table.hpp" + +#include "platform/country_defines.hpp" +#include "platform/local_country_file.hpp" + +#include "coding/file_container.hpp" +#include "coding/file_writer.hpp" + +#include "base/scope_guard.hpp" + +#include "std/vector.hpp" + +namespace +{ +void TestTable(vector const & ranks, search::RankTable const & table) +{ + TEST_EQUAL(ranks.size(), table.Size(), ()); + TEST_EQUAL(table.GetVersion(), search::RankTable::V1, ()); + for (size_t i = 0; i < ranks.size(); ++i) + TEST_EQUAL(i, table.Get(i), ()); +} +} // namespace + +UNIT_TEST(FeatureRankTableBuilder_Smoke) +{ + char const kTestCont[] = "test.tmp"; + size_t const kNumRanks = 256; + + FileWriter::DeleteFileX(kTestCont); + MY_SCOPE_GUARD(cleanup, bind(&FileWriter::DeleteFileX, kTestCont)); + + vector ranks; + for (size_t i = 0; i < kNumRanks; ++i) + ranks.push_back(i); + + { + FilesContainerW wcont(kTestCont); + search::RankTableBuilder::Create(ranks, wcont); + } + + // Tries to load table via file read. + { + FilesContainerR rcont(kTestCont); + auto table = search::RankTable::Load(rcont); + TEST(table, ()); + TestTable(ranks, *table); + } + + // Tries to load table via file mapping. + { + FilesMappingContainer mcont(kTestCont); + auto table = search::RankTable::Load(mcont); + TEST(table, ()); + TestTable(ranks, *table); + } +} diff --git a/indexer/rank_table.cpp b/indexer/rank_table.cpp new file mode 100644 index 0000000000..b7a500b19e --- /dev/null +++ b/indexer/rank_table.cpp @@ -0,0 +1,299 @@ +#include "indexer/rank_table.hpp" + +#include "indexer/data_header.hpp" +#include "indexer/feature_algo.hpp" +#include "indexer/feature_utils.hpp" +#include "indexer/features_offsets_table.hpp" +#include "indexer/features_vector.hpp" +#include "indexer/types_skipper.hpp" + +#include "platform/local_country_file.hpp" + +#include "coding/endianness.hpp" +#include "coding/file_container.hpp" +#include "coding/simple_dense_coding.hpp" +#include "coding/succinct_mapper.hpp" +#include "coding/writer.hpp" + +#include "base/assert.hpp" +#include "base/logging.hpp" +#include "base/macros.hpp" + +#include "std/utility.hpp" + +#include "defines.hpp" + +namespace search +{ +uint64_t const kVersionOffset = 0; +uint64_t const kFlagsOffset = 1; +uint64_t const kHeaderSize = 8; + +namespace +{ +// Returns true when flags claim that the serialized data has the same +// endianness as a host. +bool SameEndianness(uint8_t flags) +{ + bool const isHostBigEndian = IsBigEndian(); + bool const isDataBigEndian = flags & 1; + return isHostBigEndian == isDataBigEndian; +} + +class MemoryRegion +{ +public: + virtual ~MemoryRegion() = default; + + virtual uint64_t Size() const = 0; + virtual uint8_t const * ImmutableData() const = 0; +}; + +class MappedMemoryRegion : public MemoryRegion +{ +public: + MappedMemoryRegion(FilesMappingContainer::Handle && handle) : m_handle(move(handle)) {} + + // MemoryRegion overrides: + uint64_t Size() const override { return m_handle.GetSize(); } + uint8_t const * ImmutableData() const override { return m_handle.GetData(); } + +private: + FilesMappingContainer::Handle m_handle; + + DISALLOW_COPY(MappedMemoryRegion); +}; + +class CopiedMemoryRegion : public MemoryRegion +{ +public: + CopiedMemoryRegion(vector && buffer) : m_buffer(move(buffer)) {} + + // MemoryRegion overrides: + uint64_t Size() const override { return m_buffer.size(); } + uint8_t const * ImmutableData() const override { return m_buffer.data(); } + + inline uint8_t * MutableData() { return m_buffer.data(); } + +private: + vector m_buffer; + + DISALLOW_COPY(CopiedMemoryRegion); +}; + +unique_ptr GetMemoryRegionForTag(FilesContainerR & rcont, + FilesContainerBase::Tag const & tag) +{ + if (!rcont.IsExist(tag)) + return unique_ptr(); + FilesContainerR::ReaderT reader = rcont.GetReader(tag); + vector buffer(reader.Size()); + reader.Read(0, buffer.data(), buffer.size()); + return make_unique(move(buffer)); +} + +unique_ptr GetMemoryRegionForTag(FilesMappingContainer & mcont, + FilesContainerBase::Tag const & tag) +{ + if (!mcont.IsExist(tag)) + return unique_ptr(); + FilesMappingContainer::Handle handle = mcont.Map(tag); + return make_unique(move(handle)); +} + +class RankTableV1 : public RankTable +{ +public: + RankTableV1() = default; + + RankTableV1(vector const & ranks) : m_coding(ranks) {} + + // RankTable overrides: + uint8_t Get(uint64_t i) const override { return m_coding.Get(i); } + uint64_t Size() const override { return m_coding.Size(); } + RankTable::Version GetVersion() const override { return V1; } + void Serialize(Writer & writer) override + { + static uint64_t const padding = 0; + + uint8_t const version = GetVersion(); + uint8_t const flags = IsBigEndian(); + writer.Write(&version, sizeof(version)); + writer.Write(&flags, sizeof(flags)); + writer.Write(&padding, 6); + Freeze(m_coding, writer, "SimpleDenseCoding"); + } + + // Loads rank table v1 from a raw memory region. + static unique_ptr Load(unique_ptr && region) + { + if (!region.get() || region->Size() < kHeaderSize) + return unique_ptr(); + + uint8_t const flags = region->ImmutableData()[kFlagsOffset]; + if (!SameEndianness(flags)) + return unique_ptr(); + + unique_ptr table(new RankTableV1()); + coding::Map(table->m_coding, region->ImmutableData() + kHeaderSize, "SimpleDenseCoding"); + table->m_region = move(region); + return table; + } + + // Loads rank table v1 from a raw memory region. Modifies region in + // the case of endianness mismatch. + static unique_ptr Load(unique_ptr && region) + { + if (!region.get() || region->Size() < kHeaderSize) + return unique_ptr(); + + unique_ptr table(new RankTableV1()); + uint8_t const flags = region->ImmutableData()[kFlagsOffset]; + if (SameEndianness(flags)) + coding::Map(table->m_coding, region->ImmutableData() + kHeaderSize, "SimpleDenseCoding"); + else + coding::ReverseMap(table->m_coding, region->MutableData() + kHeaderSize, "SimpleDenseCoding"); + table->m_region = move(region); + return table; + } + +private: + unique_ptr m_region; + coding::SimpleDenseCoding m_coding; +}; + +// Creates a rank section and serializes |table| to it. +void SerializeRankTable(RankTable & table, FilesContainerW & wcont) +{ + if (wcont.IsExist(RANKS_FILE_TAG)) + wcont.DeleteSection(RANKS_FILE_TAG); + ASSERT(!wcont.IsExist(RANKS_FILE_TAG), ()); + + vector buffer; + { + MemWriter writer(buffer); + table.Serialize(writer); + } + + wcont.Write(buffer, RANKS_FILE_TAG); + wcont.Finish(); +} + +// Deserializes rank table from a rank section. Returns null when it's +// not possible to load a rank table (no rank section, corrupted +// header, endianness mismatch for a mapped mwm).. +template +unique_ptr LoadRankTable(unique_ptr && region) +{ + if (!region || !region->ImmutableData() || region->Size() < 8) + { + LOG(LERROR, ("Invalid RankTable format.")); + return unique_ptr(); + } + + RankTable::Version const version = + static_cast(region->ImmutableData()[kVersionOffset]); + switch (version) + { + case RankTable::V1: + return RankTableV1::Load(move(region)); + } + return unique_ptr(); +} + +// Calculates search rank for a feature. +uint8_t CalcSearchRank(FeatureType const & ft) +{ + static search::TypesSkipper skipIndex; + + feature::TypesHolder types(ft); + skipIndex.SkipTypes(types); + if (types.Empty()) + return 0; + + m2::PointD const center = feature::GetCenter(ft); + return feature::GetSearchRank(types, center, ft.GetPopulation()); +} +} // namespace + +RankTable::~RankTable() {} + +// static +unique_ptr RankTable::Load(FilesContainerR & rcont) +{ + return LoadRankTable(GetMemoryRegionForTag(rcont, RANKS_FILE_TAG)); +} + +// static +unique_ptr RankTable::Load(FilesMappingContainer & mcont) +{ + return LoadRankTable(GetMemoryRegionForTag(mcont, RANKS_FILE_TAG)); +} + +// static +void RankTableBuilder::CalcSearchRanks(FilesContainerR & rcont, vector & ranks) +{ + feature::DataHeader header(rcont); + unique_ptr offsetsTable = + feature::FeaturesOffsetsTable::CreateIfNotExistsAndLoad(rcont); + ASSERT(offsetsTable.get(), ()); + FeaturesVector featuresVector(rcont, header, offsetsTable.get()); + + featuresVector.ForEach([&ranks](FeatureType const & ft, uint32_t /* index */) + { + ranks.push_back(CalcSearchRank(ft)); + }); +} + +// static +void RankTableBuilder::Create(platform::LocalCountryFile const & localFile) +{ + string const mapPath = localFile.GetPath(MapOptions::Map); + + unique_ptr table; + { + FilesContainerR rcont(mapPath); + if (rcont.IsExist(RANKS_FILE_TAG)) + { + auto reader = rcont.GetReader(RANKS_FILE_TAG); + if (reader.Size() >= kHeaderSize) + { + uint8_t flags; + reader.Read(kFlagsOffset, &flags, sizeof(flags)); + + if (SameEndianness(flags)) + { + // Feature rank table already exists and has correct + // endianess. Nothing to do here. + return; + } + + // Copy whole serialized table and try to deserialize it via + // reverse mapping. + auto region = GetMemoryRegionForTag(rcont, RANKS_FILE_TAG); + table = LoadRankTable(move(region)); + } + } + + // Table doesn't exist or has wrong format. It's better to create + // it from scratch. + if (!table) + { + vector ranks; + CalcSearchRanks(rcont, ranks); + table = make_unique(ranks); + } + } + + ASSERT(table.get(), ()); + FilesContainerW wcont(mapPath); + SerializeRankTable(*table, wcont); +} + +// static +void RankTableBuilder::Create(vector const & ranks, FilesContainerW & wcont) +{ + RankTableV1 table(ranks); + SerializeRankTable(table, wcont); +} +} // namespace search diff --git a/indexer/rank_table.hpp b/indexer/rank_table.hpp new file mode 100644 index 0000000000..8f9b94b486 --- /dev/null +++ b/indexer/rank_table.hpp @@ -0,0 +1,101 @@ +#pragma once + +#include "std/cstdint.hpp" +#include "std/unique_ptr.hpp" +#include "std/vector.hpp" + +class FilesContainerR; +class FilesContainerW; +class FilesMappingContainer; +class Writer; + +namespace platform +{ +class LocalCountryFile; +} + +namespace search +{ +// A wrapper class around serialized as an mwm-section rank table. +// +// *NOTE* This wrapper is abstract enough so feel free to change it, +// note that there should always be backward-compatibility. Thus, when +// adding new versions, never change old data format of old versions. + +// All rank tables are serialized in the following format: +// +// File offset (bytes) Field name Field size (bytes) +// 0 version 1 +// 1 flags 1 +// 2 data * +// +// Flags bits: +// 0 - endianess of the stored table, 1 if BigEndian, 0 otherwise. +// [1, 8) - currently not used. + +// Data size and contents depend on the version, but note that data +// should always be 8-bytes aligned. Therefore, there're 6-bytes empty +// area between flags and data. Feel free to use it if you need it. +class RankTable +{ +public: + enum Version + { + V1 = 0 + }; + + virtual ~RankTable(); + + // Returns rank of the i-th feature. + virtual uint8_t Get(uint64_t i) const = 0; + + // Returns total number of ranks (or features, as there're 1-1 correspondence). + virtual uint64_t Size() const = 0; + + // Returns underlying data format version. + virtual Version GetVersion() const = 0; + + // Serializes rank table. + virtual void Serialize(Writer & writer) = 0; + + // Copies whole section corresponding to a rank table and + // deserializes it. Returns nullptr if there're no ranks section or + // rank table's header is damaged. + // + // *NOTE* Return value can outlive |rcont|. Also note that there're + // undefined behaviour if ranks section exists but internally + // damaged. + static unique_ptr Load(FilesContainerR & rcont); + + // Maps whole section corresponding to a rank table and deserializes + // it. Returns nullptr if there're no ranks section, rank table's + // header is damaged or serialized rank table has improper + // endianness. + // + // *NOTE* Return value can't outlive |mcont|, i.e. it must be + // destructed before |mcont| is closed. Also note that there're + // undefined behaviour if ranks section exists but internally + // damaged. + static unique_ptr Load(FilesMappingContainer & mcont); +}; + +// A builder class for rank tables. +class RankTableBuilder +{ +public: + // Calculates search ranks for all features in an mwm. + static void CalcSearchRanks(FilesContainerR & rcont, vector & ranks); + + // Creates rank table for an mwm. + // * When rank table already exists and has proper endianness, does nothing. + // * When rank table already exists but has improper endianness, re-creates it by + // reverse mapping. + // * When rank table does not exists or exists but damaged, calculates all + // features's ranks and creates rank table. + static void Create(platform::LocalCountryFile const & localFile); + + // Force creation of a rank table from array of ranks. Existing rank + // table is removed (if any). + static void Create(vector const & ranks, FilesContainerW & wcont); +}; +} // namespace search