Comments were addressed.

This commit is contained in:
Yuri Gorshenin 2015-02-12 17:03:42 +03:00 committed by Alex Zolotarev
parent fca244a024
commit 9cb0221908
3 changed files with 75 additions and 76 deletions

View file

@ -1,80 +1,59 @@
#include "features_offsets_table.hpp"
#include "../../std/string.hpp"
#include "../coding/file_writer.hpp"
#include "../base/assert.hpp"
#include "../base/scope_guard.hpp"
#include "../coding/file_writer.hpp"
#include "../../std/string.hpp"
#include "../defines.hpp"
namespace feature
{
FeaturesOffsetsTable::Builder::Builder()
{
}
FeaturesOffsetsTable::Builder::~Builder()
{
}
void FeaturesOffsetsTable::Builder::PushOffset(uint64_t offset)
void FeaturesOffsetsTable::Builder::PushOffset(uint64_t const offset)
{
ASSERT(m_offsets.empty() || m_offsets.back() < offset, ());
m_offsets.push_back(offset);
}
FeaturesOffsetsTable::FeaturesOffsetsTable(
succinct::elias_fano::elias_fano_builder & builder)
FeaturesOffsetsTable::FeaturesOffsetsTable(succinct::elias_fano::elias_fano_builder & builder)
: m_table(&builder)
{
}
FeaturesOffsetsTable::FeaturesOffsetsTable(
FilesMappingContainer::Handle && handle)
FeaturesOffsetsTable::FeaturesOffsetsTable(FilesMappingContainer::Handle && handle)
: m_handle(move(handle))
{
succinct::mapper::map(m_table, m_handle.GetData<char>());
}
FeaturesOffsetsTable::~FeaturesOffsetsTable()
{
}
// static
unique_ptr<FeaturesOffsetsTable> FeaturesOffsetsTable::Build(
Builder & builder)
unique_ptr<FeaturesOffsetsTable> FeaturesOffsetsTable::Build(Builder & builder)
{
vector<uint64_t> const & offsets = builder.m_offsets;
uint64_t numOffsets = offsets.size();
uint64_t maxOffset = offsets.empty() ? 0 : offsets.back();
uint64_t const numOffsets = offsets.size();
uint64_t const maxOffset = offsets.empty() ? 0 : offsets.back();
succinct::elias_fano::elias_fano_builder elias_fano_builder(maxOffset,
numOffsets);
succinct::elias_fano::elias_fano_builder elias_fano_builder(maxOffset, numOffsets);
for (uint64_t offset : offsets)
elias_fano_builder.push_back(offset);
return unique_ptr<FeaturesOffsetsTable>(
new FeaturesOffsetsTable(elias_fano_builder));
return unique_ptr<FeaturesOffsetsTable>(new FeaturesOffsetsTable(elias_fano_builder));
}
// static
unique_ptr<FeaturesOffsetsTable> FeaturesOffsetsTable::Load(
FilesMappingContainer const & container)
{
FilesMappingContainer::Handle handle(
container.Map(FEATURES_OFFSETS_TABLE_FILE_TAG));
FilesMappingContainer::Handle handle(container.Map(FEATURES_OFFSETS_TABLE_FILE_TAG));
if (!handle.IsValid())
return unique_ptr<FeaturesOffsetsTable>();
return unique_ptr<FeaturesOffsetsTable>(
new FeaturesOffsetsTable(std::move(handle)));
return unique_ptr<FeaturesOffsetsTable>(new FeaturesOffsetsTable(std::move(handle)));
}
void FeaturesOffsetsTable::Save(FilesContainerW & container)
{
string const fileName =
container.GetFileName() + "." FEATURES_OFFSETS_TABLE_FILE_TAG;
MY_SCOPE_GUARD(deleteFileGuard,
bind(&FileWriter::DeleteFileX, cref(fileName)));
string const fileName = container.GetFileName() + "." FEATURES_OFFSETS_TABLE_FILE_TAG;
MY_SCOPE_GUARD(deleteFileGuard, bind(&FileWriter::DeleteFileX, cref(fileName)));
succinct::mapper::freeze(m_table, fileName.c_str());
container.Write(fileName, FEATURES_OFFSETS_TABLE_FILE_TAG);
}

View file

@ -1,26 +1,36 @@
#pragma once
#include "../3party/succinct/elias_fano.hpp"
#include "../3party/succinct/mapper.hpp"
#include "../coding/file_container.hpp"
#include "../std/stdint.hpp"
#include "../std/unique_ptr.hpp"
#include "../std/vector.hpp"
#include "../3party/succinct/elias_fano.hpp"
#include "../3party/succinct/mapper.hpp"
namespace feature
{
/// This class is a wrapper around elias-fano encoder, which allows
/// to efficiently encode a sequence of strictly increasing features
/// offsets in a MWM file and access them by feature's index.
class FeaturesOffsetsTable
{
public:
/// This class is used to accumulate strictly increasing features
/// offsets and then build FeaturesOffsetsTable.
class Builder
{
public:
Builder();
~Builder();
Builder() = default;
~Builder() = default;
/// Adds offset to the end of the sequence of already
/// accumulated offsets. Note that offset must be strictly
/// greater than all previously added offsets.
///
/// \param offset a feature's offset in a MWM file
void PushOffset(uint64_t offset);
/// \return number of already accumulated offsets
inline uint64_t size() const
{
return static_cast<uint64_t>(m_offsets.size());
@ -32,26 +42,46 @@ namespace feature
vector<uint64_t> m_offsets;
};
/// Builds FeaturesOffsetsTable from the strictly increasing
/// sequence of file offsets.
///
/// \param builder Builder containing sequence of offsets.
/// \return a pointer to an instance of FeaturesOffsetsTable
static unique_ptr<FeaturesOffsetsTable> Build(Builder & builder);
static unique_ptr<FeaturesOffsetsTable> Load(
FilesMappingContainer const & container);
~FeaturesOffsetsTable();
/// Loads FeaturesOffsetsTable from FilesMappingContainer. Note
/// that some part of a file referenced by container will be
/// mapped to the memory and used by internal structures of
/// FeaturesOffsetsTable.
///
/// \param container a container with a section devoted to
/// FeaturesOffsetsTable
/// \return a pointer to an instance of FeaturesOffsetsTable or nullptr
/// when it's not possible to load FeaturesOffsetsTable.
static unique_ptr<FeaturesOffsetsTable> Load(FilesMappingContainer const & container);
~FeaturesOffsetsTable() = default;
FeaturesOffsetsTable(FeaturesOffsetsTable const &) = delete;
FeaturesOffsetsTable const & operator=(FeaturesOffsetsTable const &) =
delete;
FeaturesOffsetsTable const & operator=(FeaturesOffsetsTable const &) = delete;
/// Serializes current instance to a section in container.
///
/// \param container a container current instance will be serialized to
void Save(FilesContainerW & container);
/// \param index index of a feature
/// \return offset a feature
uint64_t GetFeatureOffset(size_t index) const;
/// \return number of features offsets in a table.
inline uint64_t size() const
{
return m_table.num_ones();
}
/// \return byte size of a table, may be slightly different from a
/// real byte size in memory or on disk due to alignment, but
/// can be used in benchmarks, logging, etc.
inline uint64_t byte_size()
{
return succinct::mapper::size_of(m_table);

View file

@ -1,21 +1,20 @@
#include "../../base/scope_guard.hpp"
#include "../features_offsets_table.hpp"
#include "../data_header.hpp"
#include "../features_vector.hpp"
#include "../../coding/file_container.hpp"
#include "../../defines.hpp"
#include "../../platform/platform.hpp"
#include "../../base/scope_guard.hpp"
#include "../../std/bind.hpp"
#include "../../std/string.hpp"
#include "../../defines.hpp"
#include "../../platform/platform.hpp"
#include "../../testing/testing.hpp"
#include "../data_header.hpp"
#include "../features_offsets_table.hpp"
#include "../features_vector.hpp"
namespace feature
{
UNIT_TEST(FeaturesOffsetsTable_Empty)
{
FeaturesOffsetsTable::Builder builder;
unique_ptr<FeaturesOffsetsTable> table(
FeaturesOffsetsTable::Build(builder));
unique_ptr<FeaturesOffsetsTable> table(FeaturesOffsetsTable::Build(builder));
TEST(table.get(), ());
TEST_EQUAL(static_cast<uint64_t>(0), table->size(), ());
}
@ -32,8 +31,7 @@ namespace feature
builder.PushOffset(513);
builder.PushOffset(1024);
unique_ptr<FeaturesOffsetsTable> table(
FeaturesOffsetsTable::Build(builder));
unique_ptr<FeaturesOffsetsTable> table(FeaturesOffsetsTable::Build(builder));
TEST(table.get(), ());
TEST_EQUAL(static_cast<uint64_t>(8), table->size(), ());
@ -50,8 +48,7 @@ namespace feature
UNIT_TEST(FeaturesOffsetsTable_ReadWrite)
{
Platform & p = GetPlatform();
FilesContainerR baseContainer(
p.GetReader("minsk-pass" DATA_FILE_EXTENSION));
FilesContainerR baseContainer(p.GetReader("minsk-pass" DATA_FILE_EXTENSION));
feature::DataHeader header;
header.Load(baseContainer.GetReader(HEADER_FILE_TAG));
@ -64,15 +61,12 @@ namespace feature
builder.PushOffset(offset);
});
unique_ptr<FeaturesOffsetsTable> table(
FeaturesOffsetsTable::Build(builder));
unique_ptr<FeaturesOffsetsTable> table(FeaturesOffsetsTable::Build(builder));
TEST(table.get(), ());
TEST_EQUAL(builder.size(), table->size(), ());
string const testFile =
p.WritablePathForFile("test_file" DATA_FILE_EXTENSION);
MY_SCOPE_GUARD(deleteTestFileGuard,
bind(&FileWriter::DeleteFileX, cref(testFile)));
string const testFile = p.WritablePathForFile("test_file" DATA_FILE_EXTENSION);
MY_SCOPE_GUARD(deleteTestFileGuard, bind(&FileWriter::DeleteFileX, cref(testFile)));
// Store table in a temporary data file.
{
@ -80,12 +74,11 @@ namespace feature
// Just copy all sections except a possibly existing offsets
// table section.
baseContainer.ForEachTag(
[&baseContainer, &testContainer](string const & tag)
{
if (tag != FEATURES_OFFSETS_TABLE_FILE_TAG)
testContainer.Write(baseContainer.GetReader(tag), tag);
});
baseContainer.ForEachTag([&baseContainer, &testContainer](string const & tag)
{
if (tag != FEATURES_OFFSETS_TABLE_FILE_TAG)
testContainer.Write(baseContainer.GetReader(tag), tag);
});
table->Save(testContainer);
testContainer.Finish();
}
@ -93,17 +86,14 @@ namespace feature
// Load table from the temporary data file.
{
FilesMappingContainer testContainer(testFile);
MY_SCOPE_GUARD(testContainerGuard,
bind(&FilesMappingContainer::Close, &testContainer));
MY_SCOPE_GUARD(testContainerGuard, bind(&FilesMappingContainer::Close, &testContainer));
unique_ptr<FeaturesOffsetsTable> loadedTable(
FeaturesOffsetsTable::Load(testContainer));
unique_ptr<FeaturesOffsetsTable> loadedTable(FeaturesOffsetsTable::Load(testContainer));
TEST(loadedTable.get(), ());
TEST_EQUAL(table->size(), loadedTable->size(), ());
for (uint64_t i = 0; i < table->size(); ++i)
TEST_EQUAL(table->GetFeatureOffset(i), loadedTable->GetFeatureOffset(i),
());
TEST_EQUAL(table->GetFeatureOffset(i), loadedTable->GetFeatureOffset(i), ());
}
}
} // namespace feature