[generator] Added classes for features generation optimization.

This commit is contained in:
Maksim Andrianov 2019-08-08 15:19:03 +03:00 committed by LaGrunge
parent bf738ddbb9
commit 249b39b361
8 changed files with 578 additions and 0 deletions

View file

@ -0,0 +1,73 @@
#include "generator/filter_world.hpp"
#include "search/utils.hpp"
#include "indexer/categories_holder.hpp"
#include "indexer/classificator.hpp"
#include "indexer/scales.hpp"
namespace generator
{
FilterWorld::FilterWorld(std::string const & popularityFilename)
: m_popularityFilename(popularityFilename)
{
if (popularityFilename.empty())
LOG(LWARNING, ("popular_places_data option not set. Popular atractions will not be added to World.mwm"));
}
std::shared_ptr<FilterInterface> FilterWorld::Clone() const
{
return std::make_shared<FilterWorld>(m_popularityFilename);
}
bool FilterWorld::IsAccepted(feature::FeatureBuilder const & fb)
{
return IsGoogScale(fb) ||
IsPopularAttraction(fb, m_popularityFilename) ||
IsInternationalAirport(fb);
}
// static
bool FilterWorld::IsInternationalAirport(feature::FeatureBuilder const & fb)
{
auto static const kAirport = classif().GetTypeByPath({"aeroway", "aerodrome", "international"});
return fb.HasType(kAirport);
}
// static
bool FilterWorld::IsGoogScale(feature::FeatureBuilder const & fb)
{
// GetMinFeatureDrawScale also checks suitable size for AREA features
return scales::GetUpperWorldScale() >= fb.GetMinFeatureDrawScale();
}
// static
bool FilterWorld::IsPopularAttraction(feature::FeatureBuilder const & fb, std::string const & popularityFilename)
{
if (fb.GetName().empty())
return false;
auto static const attractionTypes = search::GetCategoryTypes("attractions", "en", GetDefaultCategories());
ASSERT(is_sorted(attractionTypes.begin(), attractionTypes.end()), ());
auto const & featureTypes = fb.GetTypes();
if (!std::any_of(featureTypes.begin(), featureTypes.end(), [](uint32_t t) {
return std::binary_search(attractionTypes.begin(), attractionTypes.end(), t);
}))
{
return false;
}
auto static const & m_popularPlaces = PopularPlacesLoader::GetOrLoad(popularityFilename);
auto const it = m_popularPlaces.find(fb.GetMostGenericOsmId());
if (it == m_popularPlaces.end())
return false;
// todo(@t.yan): adjust
uint8_t const kPopularityThreshold = 12;
if (it->second < kPopularityThreshold)
return false;
// todo(@t.yan): maybe check place has wikipedia link.
return true;
}
} // namespace generator

View file

@ -0,0 +1,29 @@
#pragma once
#include "generator/feature_builder.hpp"
#include "generator/filter_interface.hpp"
#include "generator/popular_places_section_builder.hpp"
#include <memory>
#include <string>
namespace generator
{
class FilterWorld : public FilterInterface
{
public:
explicit FilterWorld(std::string const & popularityFilename);
// FilterInterface overrides:
std::shared_ptr<FilterInterface> Clone() const override;
bool IsAccepted(feature::FeatureBuilder const & feature) override;
static bool IsInternationalAirport(feature::FeatureBuilder const & fb);
static bool IsGoogScale(feature::FeatureBuilder const & fb);
static bool IsPopularAttraction(feature::FeatureBuilder const & fb, std::string const & popularityFilename);
private:
std::string m_popularityFilename;
};
} // namespace generator

199
generator/raw_generator.cpp Normal file
View file

@ -0,0 +1,199 @@
#include "generator/raw_generator.hpp"
#include "generator/osm_source.hpp"
#include "generator/processor_factory.hpp"
#include "generator/raw_generator_writer.hpp"
#include "generator/translator_factory.hpp"
#include "generator/translators_pool.hpp"
#include "base/thread_pool_computational.hpp"
#include "defines.hpp"
namespace generator
{
RawGenerator::RawGenerator(feature::GenerateInfo & genInfo, size_t threadsCount, size_t chankSize)
: m_genInfo(genInfo)
, m_threadsCount(threadsCount)
, m_chankSize(chankSize)
, m_cache(std::make_shared<generator::cache::IntermediateData>(genInfo))
, m_queue(std::make_shared<FeatureProcessorQueue>())
, m_translators(std::make_shared<TranslatorCollection>())
{
}
void RawGenerator::ForceReloadCache()
{
m_cache = std::make_shared<cache::IntermediateData>(m_genInfo, true /* forceReload */);
}
std::shared_ptr<FeatureProcessorQueue> RawGenerator::GetQueue()
{
return m_queue;
}
void RawGenerator::GenerateCountries(bool disableAds)
{
auto processor = CreateProcessor(ProcessorType::Country, m_queue, m_genInfo.m_targetDir, "",
m_genInfo.m_haveBordersForWholeWorld);
auto const translatorType = disableAds ? TranslatorType::Country : TranslatorType::CountryWithAds;
m_translators->Append(CreateTranslator(translatorType, processor, m_cache, m_genInfo));
m_finalProcessors.emplace(CreateCountryFinalProcessor());
}
void RawGenerator::GenerateWorld(bool disableAds)
{
auto processor = CreateProcessor(ProcessorType::World, m_queue, m_genInfo.m_popularPlacesFilename);
auto const translatorType = disableAds ? TranslatorType::World : TranslatorType::WorldWithAds;
m_translators->Append(CreateTranslator(translatorType, processor, m_cache, m_genInfo));
m_finalProcessors.emplace(CreateWorldFinalProcessor());
}
void RawGenerator::GenerateCoasts()
{
auto processor = CreateProcessor(ProcessorType::Coastline, m_queue);
m_translators->Append(CreateTranslator(TranslatorType::Coastline, processor, m_cache));
m_finalProcessors.emplace(CreateCoslineFinalProcessor());
}
void RawGenerator::GenerateRegionFeatures(string const & filename)
{
auto processor = CreateProcessor(ProcessorType::Simple, m_queue, filename);
m_translators->Append(CreateTranslator(TranslatorType::Regions, processor, m_cache, m_genInfo));
}
void RawGenerator::GenerateStreetsFeatures(string const & filename)
{
auto processor = CreateProcessor(ProcessorType::Simple, m_queue, filename);
m_translators->Append(CreateTranslator(TranslatorType::Streets, processor, m_cache));
}
void RawGenerator::GenerateGeoObjectsFeatures(string const & filename)
{
auto processor = CreateProcessor(ProcessorType::Simple, m_queue, filename);
m_translators->Append(CreateTranslator(TranslatorType::GeoObjects, processor, m_cache));
}
void RawGenerator::GenerateCustom(std::shared_ptr<TranslatorInterface> const & translator)
{
m_translators->Append(translator);
}
void RawGenerator::GenerateCustom(std::shared_ptr<TranslatorInterface> const & translator,
std::shared_ptr<FinalProcessorIntermediateMwmInterface> const & finalProcessor)
{
m_translators->Append(translator);
m_finalProcessors.emplace(finalProcessor);
}
bool RawGenerator::Execute()
{
if (!GenerateFilteredFeatures())
return false;
while (!m_finalProcessors.empty())
{
base::thread_pool::computational::ThreadPool threadPool(m_threadsCount);
do
{
auto const & finalProcessor = m_finalProcessors.top();
threadPool.SubmitWork([finalProcessor{finalProcessor}]() {
finalProcessor->Process();
});
m_finalProcessors.pop();
if (m_finalProcessors.empty() || *finalProcessor != *m_finalProcessors.top())
break;
}
while (true);
}
LOG(LINFO, ("Final processing is finished."));
return true;
}
std::vector<std::string> RawGenerator::GetNames() const
{
return m_names;
}
RawGenerator::FinalProcessorPtr RawGenerator::CreateCoslineFinalProcessor()
{
auto finalProcessor = make_shared<CoastlineFinalProcessor>(
m_genInfo.GetTmpFileName(WORLD_COASTS_FILE_NAME, DATA_FILE_EXTENSION_TMP));
finalProcessor->SetCoastlinesFilenames(
m_genInfo.GetIntermediateFileName(WORLD_COASTS_FILE_NAME, ".geom"),
m_genInfo.GetIntermediateFileName(WORLD_COASTS_FILE_NAME, RAW_GEOM_FILE_EXTENSION));
return finalProcessor;
}
RawGenerator::FinalProcessorPtr RawGenerator::CreateCountryFinalProcessor()
{
auto finalProcessor = make_shared<CountryFinalProcessor>(m_genInfo.m_targetDir, m_genInfo.m_tmpDir,
m_genInfo.m_haveBordersForWholeWorld,
m_threadsCount);
finalProcessor->SetBooking(m_genInfo.m_bookingDataFilename);
finalProcessor->SetCitiesAreas(m_genInfo.GetIntermediateFileName(CITIES_AREAS_TMP_FILENAME));
finalProcessor->SetPromoCatalog(m_genInfo.m_promoCatalogCitiesFilename);
if (m_genInfo.m_emitCoasts)
{
finalProcessor->SetCoastlines(m_genInfo.GetIntermediateFileName(WORLD_COASTS_FILE_NAME, ".geom"),
m_genInfo.GetTmpFileName(WORLD_COASTS_FILE_NAME));
}
finalProcessor->DumpCitiesBoundaries(m_genInfo.m_citiesBoundariesFilename);
return finalProcessor;
}
RawGenerator::FinalProcessorPtr RawGenerator::CreateWorldFinalProcessor()
{
auto finalProcessor = make_shared<WorldFinalProcessor>(
m_genInfo.m_tmpDir,
m_genInfo.GetIntermediateFileName(WORLD_COASTS_FILE_NAME, RAW_GEOM_FILE_EXTENSION));
finalProcessor->SetPopularPlaces(m_genInfo.m_popularPlacesFilename);
finalProcessor->SetCitiesAreas(m_genInfo.GetIntermediateFileName(CITIES_AREAS_TMP_FILENAME));
finalProcessor->SetPromoCatalog(m_genInfo.m_promoCatalogCitiesFilename);
return finalProcessor;
}
bool RawGenerator::GenerateFilteredFeatures()
{
SourceReader reader = m_genInfo.m_osmFileName.empty() ? SourceReader()
: SourceReader(m_genInfo.m_osmFileName);
unique_ptr<ProcessorOsmElementsInterface> sourseProcessor;
switch (m_genInfo.m_osmFileType) {
case feature::GenerateInfo::OsmSourceType::O5M:
sourseProcessor = make_unique<ProcessorOsmElementsFromO5M>(reader);
break;
case feature::GenerateInfo::OsmSourceType::XML:
sourseProcessor = make_unique<ProcessorXmlElementsFromXml>(reader);
break;
}
TranslatorsPool translators(m_translators, m_cache, m_threadsCount - 1 /* copyCount */);
RawGeneratorWriter rawGeneratorWriter(m_queue, m_genInfo.m_tmpDir);
rawGeneratorWriter.Run();
size_t element_pos = 0;
std::vector<OsmElement> elements(m_chankSize);
while(sourseProcessor->TryRead(elements[element_pos]))
{
if (++element_pos != m_chankSize)
continue;
translators.Emit(std::move(elements));
elements = vector<OsmElement>(m_chankSize);
element_pos = 0;
}
elements.resize(element_pos);
translators.Emit(std::move(elements));
LOG(LINFO, ("Input was processed."));
if (!translators.Finish())
return false;
m_names = rawGeneratorWriter.GetNames();
LOG(LINFO, ("Names:", m_names));
return true;
}
} // namespace generator

View file

@ -0,0 +1,62 @@
#pragma once
#include "generator/features_processing_helpers.hpp"
#include "generator/final_processor_intermediate_mwm.hpp"
#include "generator/generate_info.hpp"
#include "generator/intermediate_data.hpp"
#include "generator/translator_collection.hpp"
#include "generator/translator_interface.hpp"
#include <memory>
#include <queue>
#include <string>
#include <vector>
namespace generator
{
class RawGenerator
{
public:
explicit RawGenerator(feature::GenerateInfo & genInfo, size_t threadsCount = 1,
size_t chankSize = 1024);
void GenerateCountries(bool disableAds = true);
void GenerateWorld(bool disableAds = true);
void GenerateCoasts();
void GenerateRegionFeatures(std::string const & filename);
void GenerateStreetsFeatures(std::string const & filename);
void GenerateGeoObjectsFeatures(std::string const & filename);
void GenerateCustom(std::shared_ptr<TranslatorInterface> const & translator);
void GenerateCustom(std::shared_ptr<TranslatorInterface> const & translator,
std::shared_ptr<FinalProcessorIntermediateMwmInterface> const & finalProcessor);
bool Execute();
std::vector<std::string> GetNames() const;
std::shared_ptr<FeatureProcessorQueue> GetQueue();
void ForceReloadCache();
private:
using FinalProcessorPtr = std::shared_ptr<FinalProcessorIntermediateMwmInterface>;
struct FinalProcessorPtrCmp
{
bool operator()(FinalProcessorPtr const & l, FinalProcessorPtr const & r)
{
return *l < *r;
}
};
FinalProcessorPtr CreateCoslineFinalProcessor();
FinalProcessorPtr CreateCountryFinalProcessor();
FinalProcessorPtr CreateWorldFinalProcessor();
bool GenerateFilteredFeatures();
feature::GenerateInfo & m_genInfo;
size_t m_threadsCount;
size_t m_chankSize;
std::shared_ptr<cache::IntermediateData> m_cache;
std::shared_ptr<FeatureProcessorQueue> m_queue;
std::shared_ptr<TranslatorCollection> m_translators;
std::priority_queue<FinalProcessorPtr, std::vector<FinalProcessorPtr>, FinalProcessorPtrCmp> m_finalProcessors;
std::vector<std::string> m_names;
};
} // namespace generator

View file

@ -0,0 +1,78 @@
#include "generator/raw_generator_writer.hpp"
#include "coding/varint.hpp"
#include "base/file_name_utils.hpp"
#include <iterator>
namespace generator
{
RawGeneratorWriter::RawGeneratorWriter(std::shared_ptr<FeatureProcessorQueue> const & queue,
std::string const & path)
: m_queue(queue), m_path(path) {}
RawGeneratorWriter::~RawGeneratorWriter()
{
ShutdownAndJoin();
}
void RawGeneratorWriter::Run()
{
m_thread = std::thread([&]() {
while (true)
{
FeatureProcessorChank chank;
m_queue->WaitAndPop(chank);
if (chank.IsEmpty())
return;
Write(chank.Get());
}
});
}
std::vector<std::string> RawGeneratorWriter::GetNames()
{
ShutdownAndJoin();
std::vector<std::string> names;
names.reserve(m_collectors.size());
for (const auto & p : m_collectors)
names.emplace_back(p.first);
return names;
}
void RawGeneratorWriter::Write(std::vector<ProcessedData> const & vecChanks)
{
for (auto const & chank : vecChanks)
{
for (auto const & affiliation : chank.m_affiliations)
{
if (affiliation.empty())
continue;
auto collectorIt = m_collectors.find(affiliation);
if (collectorIt == std::cend(m_collectors))
{
auto path = base::JoinPath(m_path, affiliation + DATA_FILE_EXTENSION_TMP);
auto writer = std::make_unique<FileWriter>(std::move(path));
collectorIt = m_collectors.emplace(affiliation, std::move(writer)).first;
}
auto & collector = collectorIt->second;
auto const & buffer = chank.m_buffer;
WriteVarUint(*collector, static_cast<uint32_t>(buffer.size()));
collector->Write(buffer.data(), buffer.size());
}
}
}
void RawGeneratorWriter::ShutdownAndJoin()
{
m_queue->Push({});
if (m_thread.joinable())
m_thread.join();
}
} // namespace generator

View file

@ -0,0 +1,35 @@
#pragma once
#include "generator/feature_builder.hpp"
#include "generator/features_processing_helpers.hpp"
#include <memory>
#include <string>
#include <thread>
#include <unordered_map>
#include <vector>
namespace generator
{
class RawGeneratorWriter
{
public:
RawGeneratorWriter(std::shared_ptr<FeatureProcessorQueue> const & queue,
std::string const & path);
~RawGeneratorWriter();
void Run();
std::vector<std::string> GetNames();
private:
using FeatureBuilderWriter = feature::FeatureBuilderWriter<feature::serialization_policy::MaxAccuracy>;
void Write(std::vector<ProcessedData> const & vecChanks);
void ShutdownAndJoin();
std::thread m_thread;
std::shared_ptr<FeatureProcessorQueue> m_queue;
std::string m_path;
std::unordered_map<std::string, std::unique_ptr<FileWriter>> m_collectors;
};
} // namespace generator

View file

@ -0,0 +1,72 @@
#include "generator/translators_pool.hpp"
#include <future>
namespace generator
{
TranslatorsPool::TranslatorsPool(std::shared_ptr<TranslatorInterface> const & original,
std::shared_ptr<cache::IntermediateData> const & cache,
size_t copyCount)
: m_translators({original})
, m_threadPool(copyCount + 1)
{
m_freeTranslators.Push(0);
m_translators.reserve(copyCount + 1);
for (size_t i = 0; i < copyCount; ++i)
{
auto cache_ = cache->Clone();
auto translator = original->Clone(cache_);
m_translators.emplace_back(translator);
m_freeTranslators.Push(i + 1);
}
}
void TranslatorsPool::Emit(std::vector<OsmElement> && elements)
{
base::threads::DataWrapper<size_t> d;
m_freeTranslators.WaitAndPop(d);
auto const idx = d.Get();
m_threadPool.SubmitWork([&, idx, elements{move(elements)}]() mutable {
for (auto & element : elements)
m_translators[idx]->Emit(element);
m_freeTranslators.Push(idx);
});
}
bool TranslatorsPool::Finish()
{
m_threadPool.WaitAndStop();
using TranslatorPtr = std::shared_ptr<TranslatorInterface>;
base::threads::ThreadSafeQueue<std::future<TranslatorPtr>> queue;
for (auto const & t : m_translators)
{
std::promise<TranslatorPtr> p;
p.set_value(t);
queue.Push(p.get_future());
}
base::thread_pool::computational::ThreadPool pool(m_translators.size() / 2 + 1);
while (queue.Size() != 1)
{
std::future<TranslatorPtr> left;
std::future<TranslatorPtr> right;
queue.WaitAndPop(left);
queue.WaitAndPop(right);
queue.Push(pool.Submit([left{move(left)}, right{move(right)}]() mutable {
auto leftTranslator = left.get();
auto rigthTranslator = right.get();
rigthTranslator->Finish();
leftTranslator->Finish();
leftTranslator->Merge(*rigthTranslator);
return leftTranslator;
}));
}
std::future<TranslatorPtr> translatorFuture;
queue.WaitAndPop(translatorFuture);
auto translator = translatorFuture.get();
translator->Finish();
return translator->Save();
}
} // namespace generator

View file

@ -0,0 +1,30 @@
#pragma once
#include "generator/intermediate_data.hpp"
#include "generator/osm_element.hpp"
#include "generator/translator_interface.hpp"
#include "base/thread_pool_computational.hpp"
#include "base/thread_safe_queue.hpp"
#include <memory>
#include <vector>
namespace generator
{
class TranslatorsPool
{
public:
explicit TranslatorsPool(std::shared_ptr<TranslatorInterface> const & original,
std::shared_ptr<generator::cache::IntermediateData> const & cache,
size_t copyCount);
void Emit(std::vector<OsmElement> && elements);
bool Finish();
private:
std::vector<std::shared_ptr<TranslatorInterface>> m_translators;
base::thread_pool::computational::ThreadPool m_threadPool;
base::threads::ThreadSafeQueue<base::threads::DataWrapper<size_t>> m_freeTranslators;
};
} // namespace generator