[indexer] Implemented rank table builder.

This commit is contained in:
Yuri Gorshenin 2015-09-03 17:44:52 +03:00 committed by Sergey Yershov
parent b4bd3a3e80
commit 5e5befb58e
9 changed files with 484 additions and 8 deletions

View file

@ -4,6 +4,7 @@
#include "std/algorithm.hpp"
#include "std/limits.hpp"
#include "std/utility.hpp"
namespace coding
{
@ -46,10 +47,7 @@ public:
}
}
inline Code const & GetCode(uint8_t rank) const
{
return m_table[rank];
}
inline Code const & GetCode(uint8_t rank) const { return m_table[rank]; }
private:
Code m_table[kAlphabetSize];
@ -108,6 +106,8 @@ SimpleDenseCoding::SimpleDenseCoding(vector<uint8_t> const & data)
m_symbols.assign(symbols);
}
SimpleDenseCoding::SimpleDenseCoding(SimpleDenseCoding && rhs) { Swap(move(rhs)); }
uint8_t SimpleDenseCoding::Get(uint64_t i) const
{
ASSERT_LESS(i, Size(), ());

View file

@ -36,6 +36,8 @@ public:
SimpleDenseCoding(vector<uint8_t> const & data);
SimpleDenseCoding(SimpleDenseCoding && rhs);
uint8_t Get(uint64_t i) const;
inline uint64_t Size() const { return m_index.num_ones(); }
@ -50,6 +52,14 @@ public:
visitor(m_symbols, "m_symbols");
}
template <typename TSDC>
void Swap(TSDC && rhs)
{
m_bits.swap(rhs.m_bits);
m_index.swap(rhs.m_index);
m_symbols.swap(rhs.m_symbols);
}
private:
succinct::bit_vector m_bits;
succinct::rs_bit_vector m_index;

View file

@ -97,9 +97,10 @@ public:
vec.clear();
(*this)(vec.m_size, "size");
vec.m_data = reinterpret_cast<const T *>(m_cur);
for (auto const it = vec.cbegin(); it != vec.cend(); ++it)
*it = ReverseByteOrder(*it);
T * data = reinterpret_cast<T *>(m_cur);
for (uint64_t i = 0; i < vec.m_size; ++i)
data[i] = ReverseByteOrder(data[i]);
vec.m_data = data;
m_cur = Align8Ptr(m_cur + vec.m_size * sizeof(T));
return *this;

View file

@ -25,6 +25,7 @@
#define METADATA_INDEX_FILE_TAG "metaidx"
#define COMPRESSED_SEARCH_INDEX_FILE_TAG "csdx"
#define FEATURE_OFFSETS_FILE_TAG "offs"
#define RANKS_FILE_TAG "ranks"
#define ROUTING_MATRIX_FILE_TAG "mercedes"
#define ROUTING_EDGEDATA_FILE_TAG "daewoo"

View file

@ -43,6 +43,7 @@ SOURCES += \
mwm_set.cpp \
old/feature_loader_101.cpp \
point_to_int64.cpp \
rank_table.cpp \
scales.cpp \
search_delimiters.cpp \
search_index_builder.cpp \
@ -74,6 +75,7 @@ HEADERS += \
feature_loader_base.hpp \
feature_meta.hpp \
feature_processor.hpp \
feature_rank_table.cpp \
feature_utils.hpp \
feature_visibility.hpp \
features_offsets_table.hpp \
@ -92,6 +94,7 @@ HEADERS += \
old/feature_loader_101.hpp \
old/interval_index_101.hpp \
point_to_int64.hpp \
rank_table.cpp \
scale_index.hpp \
scale_index_builder.hpp \
scales.hpp \

View file

@ -4,8 +4,10 @@ CONFIG -= app_bundle
TEMPLATE = app
ROOT_DIR = ../..
DEPENDENCIES = indexer platform geometry coding base protobuf tomcrypt
DEPENDENCIES = indexer platform geometry coding base protobuf tomcrypt succinct
!linux {
DEPENDENCIES += opening_hours
}
include($$ROOT_DIR/common.pri)
@ -31,6 +33,7 @@ SOURCES += \
interval_index_test.cpp \
mwm_set_test.cpp \
point_to_int64_test.cpp \
rank_table_test.cpp \
scales_test.cpp \
search_string_utils_test.cpp \
sort_and_merge_intervals_test.cpp \

View file

@ -0,0 +1,58 @@
#include "testing/testing.hpp"
#include "indexer/rank_table.hpp"
#include "platform/country_defines.hpp"
#include "platform/local_country_file.hpp"
#include "coding/file_container.hpp"
#include "coding/file_writer.hpp"
#include "base/scope_guard.hpp"
#include "std/vector.hpp"
namespace
{
void TestTable(vector<uint8_t> const & ranks, search::RankTable const & table)
{
TEST_EQUAL(ranks.size(), table.Size(), ());
TEST_EQUAL(table.GetVersion(), search::RankTable::V1, ());
for (size_t i = 0; i < ranks.size(); ++i)
TEST_EQUAL(i, table.Get(i), ());
}
} // namespace
UNIT_TEST(FeatureRankTableBuilder_Smoke)
{
char const kTestCont[] = "test.tmp";
size_t const kNumRanks = 256;
FileWriter::DeleteFileX(kTestCont);
MY_SCOPE_GUARD(cleanup, bind(&FileWriter::DeleteFileX, kTestCont));
vector<uint8_t> ranks;
for (size_t i = 0; i < kNumRanks; ++i)
ranks.push_back(i);
{
FilesContainerW wcont(kTestCont);
search::RankTableBuilder::Create(ranks, wcont);
}
// Tries to load table via file read.
{
FilesContainerR rcont(kTestCont);
auto table = search::RankTable::Load(rcont);
TEST(table, ());
TestTable(ranks, *table);
}
// Tries to load table via file mapping.
{
FilesMappingContainer mcont(kTestCont);
auto table = search::RankTable::Load(mcont);
TEST(table, ());
TestTable(ranks, *table);
}
}

299
indexer/rank_table.cpp Normal file
View file

@ -0,0 +1,299 @@
#include "indexer/rank_table.hpp"
#include "indexer/data_header.hpp"
#include "indexer/feature_algo.hpp"
#include "indexer/feature_utils.hpp"
#include "indexer/features_offsets_table.hpp"
#include "indexer/features_vector.hpp"
#include "indexer/types_skipper.hpp"
#include "platform/local_country_file.hpp"
#include "coding/endianness.hpp"
#include "coding/file_container.hpp"
#include "coding/simple_dense_coding.hpp"
#include "coding/succinct_mapper.hpp"
#include "coding/writer.hpp"
#include "base/assert.hpp"
#include "base/logging.hpp"
#include "base/macros.hpp"
#include "std/utility.hpp"
#include "defines.hpp"
namespace search
{
uint64_t const kVersionOffset = 0;
uint64_t const kFlagsOffset = 1;
uint64_t const kHeaderSize = 8;
namespace
{
// Returns true when flags claim that the serialized data has the same
// endianness as a host.
bool SameEndianness(uint8_t flags)
{
bool const isHostBigEndian = IsBigEndian();
bool const isDataBigEndian = flags & 1;
return isHostBigEndian == isDataBigEndian;
}
class MemoryRegion
{
public:
virtual ~MemoryRegion() = default;
virtual uint64_t Size() const = 0;
virtual uint8_t const * ImmutableData() const = 0;
};
class MappedMemoryRegion : public MemoryRegion
{
public:
MappedMemoryRegion(FilesMappingContainer::Handle && handle) : m_handle(move(handle)) {}
// MemoryRegion overrides:
uint64_t Size() const override { return m_handle.GetSize(); }
uint8_t const * ImmutableData() const override { return m_handle.GetData<uint8_t>(); }
private:
FilesMappingContainer::Handle m_handle;
DISALLOW_COPY(MappedMemoryRegion);
};
class CopiedMemoryRegion : public MemoryRegion
{
public:
CopiedMemoryRegion(vector<uint8_t> && buffer) : m_buffer(move(buffer)) {}
// MemoryRegion overrides:
uint64_t Size() const override { return m_buffer.size(); }
uint8_t const * ImmutableData() const override { return m_buffer.data(); }
inline uint8_t * MutableData() { return m_buffer.data(); }
private:
vector<uint8_t> m_buffer;
DISALLOW_COPY(CopiedMemoryRegion);
};
unique_ptr<CopiedMemoryRegion> GetMemoryRegionForTag(FilesContainerR & rcont,
FilesContainerBase::Tag const & tag)
{
if (!rcont.IsExist(tag))
return unique_ptr<CopiedMemoryRegion>();
FilesContainerR::ReaderT reader = rcont.GetReader(tag);
vector<uint8_t> buffer(reader.Size());
reader.Read(0, buffer.data(), buffer.size());
return make_unique<CopiedMemoryRegion>(move(buffer));
}
unique_ptr<MappedMemoryRegion> GetMemoryRegionForTag(FilesMappingContainer & mcont,
FilesContainerBase::Tag const & tag)
{
if (!mcont.IsExist(tag))
return unique_ptr<MappedMemoryRegion>();
FilesMappingContainer::Handle handle = mcont.Map(tag);
return make_unique<MappedMemoryRegion>(move(handle));
}
class RankTableV1 : public RankTable
{
public:
RankTableV1() = default;
RankTableV1(vector<uint8_t> const & ranks) : m_coding(ranks) {}
// RankTable overrides:
uint8_t Get(uint64_t i) const override { return m_coding.Get(i); }
uint64_t Size() const override { return m_coding.Size(); }
RankTable::Version GetVersion() const override { return V1; }
void Serialize(Writer & writer) override
{
static uint64_t const padding = 0;
uint8_t const version = GetVersion();
uint8_t const flags = IsBigEndian();
writer.Write(&version, sizeof(version));
writer.Write(&flags, sizeof(flags));
writer.Write(&padding, 6);
Freeze(m_coding, writer, "SimpleDenseCoding");
}
// Loads rank table v1 from a raw memory region.
static unique_ptr<RankTableV1> Load(unique_ptr<MappedMemoryRegion> && region)
{
if (!region.get() || region->Size() < kHeaderSize)
return unique_ptr<RankTableV1>();
uint8_t const flags = region->ImmutableData()[kFlagsOffset];
if (!SameEndianness(flags))
return unique_ptr<RankTableV1>();
unique_ptr<RankTableV1> table(new RankTableV1());
coding::Map(table->m_coding, region->ImmutableData() + kHeaderSize, "SimpleDenseCoding");
table->m_region = move(region);
return table;
}
// Loads rank table v1 from a raw memory region. Modifies region in
// the case of endianness mismatch.
static unique_ptr<RankTableV1> Load(unique_ptr<CopiedMemoryRegion> && region)
{
if (!region.get() || region->Size() < kHeaderSize)
return unique_ptr<RankTableV1>();
unique_ptr<RankTableV1> table(new RankTableV1());
uint8_t const flags = region->ImmutableData()[kFlagsOffset];
if (SameEndianness(flags))
coding::Map(table->m_coding, region->ImmutableData() + kHeaderSize, "SimpleDenseCoding");
else
coding::ReverseMap(table->m_coding, region->MutableData() + kHeaderSize, "SimpleDenseCoding");
table->m_region = move(region);
return table;
}
private:
unique_ptr<MemoryRegion> m_region;
coding::SimpleDenseCoding m_coding;
};
// Creates a rank section and serializes |table| to it.
void SerializeRankTable(RankTable & table, FilesContainerW & wcont)
{
if (wcont.IsExist(RANKS_FILE_TAG))
wcont.DeleteSection(RANKS_FILE_TAG);
ASSERT(!wcont.IsExist(RANKS_FILE_TAG), ());
vector<char> buffer;
{
MemWriter<decltype(buffer)> writer(buffer);
table.Serialize(writer);
}
wcont.Write(buffer, RANKS_FILE_TAG);
wcont.Finish();
}
// Deserializes rank table from a rank section. Returns null when it's
// not possible to load a rank table (no rank section, corrupted
// header, endianness mismatch for a mapped mwm)..
template <typename TRegion>
unique_ptr<RankTable> LoadRankTable(unique_ptr<TRegion> && region)
{
if (!region || !region->ImmutableData() || region->Size() < 8)
{
LOG(LERROR, ("Invalid RankTable format."));
return unique_ptr<RankTable>();
}
RankTable::Version const version =
static_cast<RankTable::Version>(region->ImmutableData()[kVersionOffset]);
switch (version)
{
case RankTable::V1:
return RankTableV1::Load(move(region));
}
return unique_ptr<RankTable>();
}
// Calculates search rank for a feature.
uint8_t CalcSearchRank(FeatureType const & ft)
{
static search::TypesSkipper skipIndex;
feature::TypesHolder types(ft);
skipIndex.SkipTypes(types);
if (types.Empty())
return 0;
m2::PointD const center = feature::GetCenter(ft);
return feature::GetSearchRank(types, center, ft.GetPopulation());
}
} // namespace
RankTable::~RankTable() {}
// static
unique_ptr<RankTable> RankTable::Load(FilesContainerR & rcont)
{
return LoadRankTable(GetMemoryRegionForTag(rcont, RANKS_FILE_TAG));
}
// static
unique_ptr<RankTable> RankTable::Load(FilesMappingContainer & mcont)
{
return LoadRankTable(GetMemoryRegionForTag(mcont, RANKS_FILE_TAG));
}
// static
void RankTableBuilder::CalcSearchRanks(FilesContainerR & rcont, vector<uint8_t> & ranks)
{
feature::DataHeader header(rcont);
unique_ptr<feature::FeaturesOffsetsTable> offsetsTable =
feature::FeaturesOffsetsTable::CreateIfNotExistsAndLoad(rcont);
ASSERT(offsetsTable.get(), ());
FeaturesVector featuresVector(rcont, header, offsetsTable.get());
featuresVector.ForEach([&ranks](FeatureType const & ft, uint32_t /* index */)
{
ranks.push_back(CalcSearchRank(ft));
});
}
// static
void RankTableBuilder::Create(platform::LocalCountryFile const & localFile)
{
string const mapPath = localFile.GetPath(MapOptions::Map);
unique_ptr<RankTable> table;
{
FilesContainerR rcont(mapPath);
if (rcont.IsExist(RANKS_FILE_TAG))
{
auto reader = rcont.GetReader(RANKS_FILE_TAG);
if (reader.Size() >= kHeaderSize)
{
uint8_t flags;
reader.Read(kFlagsOffset, &flags, sizeof(flags));
if (SameEndianness(flags))
{
// Feature rank table already exists and has correct
// endianess. Nothing to do here.
return;
}
// Copy whole serialized table and try to deserialize it via
// reverse mapping.
auto region = GetMemoryRegionForTag(rcont, RANKS_FILE_TAG);
table = LoadRankTable(move(region));
}
}
// Table doesn't exist or has wrong format. It's better to create
// it from scratch.
if (!table)
{
vector<uint8_t> ranks;
CalcSearchRanks(rcont, ranks);
table = make_unique<RankTableV1>(ranks);
}
}
ASSERT(table.get(), ());
FilesContainerW wcont(mapPath);
SerializeRankTable(*table, wcont);
}
// static
void RankTableBuilder::Create(vector<uint8_t> const & ranks, FilesContainerW & wcont)
{
RankTableV1 table(ranks);
SerializeRankTable(table, wcont);
}
} // namespace search

101
indexer/rank_table.hpp Normal file
View file

@ -0,0 +1,101 @@
#pragma once
#include "std/cstdint.hpp"
#include "std/unique_ptr.hpp"
#include "std/vector.hpp"
class FilesContainerR;
class FilesContainerW;
class FilesMappingContainer;
class Writer;
namespace platform
{
class LocalCountryFile;
}
namespace search
{
// A wrapper class around serialized as an mwm-section rank table.
//
// *NOTE* This wrapper is abstract enough so feel free to change it,
// note that there should always be backward-compatibility. Thus, when
// adding new versions, never change old data format of old versions.
// All rank tables are serialized in the following format:
//
// File offset (bytes) Field name Field size (bytes)
// 0 version 1
// 1 flags 1
// 2 data *
//
// Flags bits:
// 0 - endianess of the stored table, 1 if BigEndian, 0 otherwise.
// [1, 8) - currently not used.
// Data size and contents depend on the version, but note that data
// should always be 8-bytes aligned. Therefore, there're 6-bytes empty
// area between flags and data. Feel free to use it if you need it.
class RankTable
{
public:
enum Version
{
V1 = 0
};
virtual ~RankTable();
// Returns rank of the i-th feature.
virtual uint8_t Get(uint64_t i) const = 0;
// Returns total number of ranks (or features, as there're 1-1 correspondence).
virtual uint64_t Size() const = 0;
// Returns underlying data format version.
virtual Version GetVersion() const = 0;
// Serializes rank table.
virtual void Serialize(Writer & writer) = 0;
// Copies whole section corresponding to a rank table and
// deserializes it. Returns nullptr if there're no ranks section or
// rank table's header is damaged.
//
// *NOTE* Return value can outlive |rcont|. Also note that there're
// undefined behaviour if ranks section exists but internally
// damaged.
static unique_ptr<RankTable> Load(FilesContainerR & rcont);
// Maps whole section corresponding to a rank table and deserializes
// it. Returns nullptr if there're no ranks section, rank table's
// header is damaged or serialized rank table has improper
// endianness.
//
// *NOTE* Return value can't outlive |mcont|, i.e. it must be
// destructed before |mcont| is closed. Also note that there're
// undefined behaviour if ranks section exists but internally
// damaged.
static unique_ptr<RankTable> Load(FilesMappingContainer & mcont);
};
// A builder class for rank tables.
class RankTableBuilder
{
public:
// Calculates search ranks for all features in an mwm.
static void CalcSearchRanks(FilesContainerR & rcont, vector<uint8_t> & ranks);
// Creates rank table for an mwm.
// * When rank table already exists and has proper endianness, does nothing.
// * When rank table already exists but has improper endianness, re-creates it by
// reverse mapping.
// * When rank table does not exists or exists but damaged, calculates all
// features's ranks and creates rank table.
static void Create(platform::LocalCountryFile const & localFile);
// Force creation of a rank table from array of ranks. Existing rank
// table is removed (if any).
static void Create(vector<uint8_t> const & ranks, FilesContainerW & wcont);
};
} // namespace search