From 249b39b3619ed6412a593510d055d0b8c1f867c4 Mon Sep 17 00:00:00 2001 From: Maksim Andrianov Date: Thu, 8 Aug 2019 15:19:03 +0300 Subject: [PATCH] [generator] Added classes for features generation optimization. --- generator/filter_world.cpp | 73 +++++++++++ generator/filter_world.hpp | 29 +++++ generator/raw_generator.cpp | 199 +++++++++++++++++++++++++++++ generator/raw_generator.hpp | 62 +++++++++ generator/raw_generator_writer.cpp | 78 +++++++++++ generator/raw_generator_writer.hpp | 35 +++++ generator/translators_pool.cpp | 72 +++++++++++ generator/translators_pool.hpp | 30 +++++ 8 files changed, 578 insertions(+) create mode 100644 generator/filter_world.cpp create mode 100644 generator/filter_world.hpp create mode 100644 generator/raw_generator.cpp create mode 100644 generator/raw_generator.hpp create mode 100644 generator/raw_generator_writer.cpp create mode 100644 generator/raw_generator_writer.hpp create mode 100644 generator/translators_pool.cpp create mode 100644 generator/translators_pool.hpp diff --git a/generator/filter_world.cpp b/generator/filter_world.cpp new file mode 100644 index 0000000000..6351062df4 --- /dev/null +++ b/generator/filter_world.cpp @@ -0,0 +1,73 @@ +#include "generator/filter_world.hpp" + +#include "search/utils.hpp" + +#include "indexer/categories_holder.hpp" +#include "indexer/classificator.hpp" +#include "indexer/scales.hpp" + +namespace generator +{ +FilterWorld::FilterWorld(std::string const & popularityFilename) + : m_popularityFilename(popularityFilename) +{ + if (popularityFilename.empty()) + LOG(LWARNING, ("popular_places_data option not set. Popular atractions will not be added to World.mwm")); +} + +std::shared_ptr FilterWorld::Clone() const +{ + return std::make_shared(m_popularityFilename); +} + +bool FilterWorld::IsAccepted(feature::FeatureBuilder const & fb) +{ + return IsGoogScale(fb) || + IsPopularAttraction(fb, m_popularityFilename) || + IsInternationalAirport(fb); +} + +// static +bool FilterWorld::IsInternationalAirport(feature::FeatureBuilder const & fb) +{ + auto static const kAirport = classif().GetTypeByPath({"aeroway", "aerodrome", "international"}); + return fb.HasType(kAirport); +} + +// static +bool FilterWorld::IsGoogScale(feature::FeatureBuilder const & fb) +{ + // GetMinFeatureDrawScale also checks suitable size for AREA features + return scales::GetUpperWorldScale() >= fb.GetMinFeatureDrawScale(); +} + +// static +bool FilterWorld::IsPopularAttraction(feature::FeatureBuilder const & fb, std::string const & popularityFilename) +{ + if (fb.GetName().empty()) + return false; + + auto static const attractionTypes = search::GetCategoryTypes("attractions", "en", GetDefaultCategories()); + ASSERT(is_sorted(attractionTypes.begin(), attractionTypes.end()), ()); + auto const & featureTypes = fb.GetTypes(); + if (!std::any_of(featureTypes.begin(), featureTypes.end(), [](uint32_t t) { + return std::binary_search(attractionTypes.begin(), attractionTypes.end(), t); + })) + { + return false; + } + + auto static const & m_popularPlaces = PopularPlacesLoader::GetOrLoad(popularityFilename); + auto const it = m_popularPlaces.find(fb.GetMostGenericOsmId()); + if (it == m_popularPlaces.end()) + return false; + + // todo(@t.yan): adjust + uint8_t const kPopularityThreshold = 12; + if (it->second < kPopularityThreshold) + return false; + + // todo(@t.yan): maybe check place has wikipedia link. + return true; +} +} // namespace generator diff --git a/generator/filter_world.hpp b/generator/filter_world.hpp new file mode 100644 index 0000000000..62833cfdc7 --- /dev/null +++ b/generator/filter_world.hpp @@ -0,0 +1,29 @@ +#pragma once + +#include "generator/feature_builder.hpp" +#include "generator/filter_interface.hpp" +#include "generator/popular_places_section_builder.hpp" + +#include +#include + +namespace generator +{ +class FilterWorld : public FilterInterface +{ +public: + explicit FilterWorld(std::string const & popularityFilename); + + // FilterInterface overrides: + std::shared_ptr Clone() const override; + + bool IsAccepted(feature::FeatureBuilder const & feature) override; + + static bool IsInternationalAirport(feature::FeatureBuilder const & fb); + static bool IsGoogScale(feature::FeatureBuilder const & fb); + static bool IsPopularAttraction(feature::FeatureBuilder const & fb, std::string const & popularityFilename); + +private: + std::string m_popularityFilename; +}; +} // namespace generator diff --git a/generator/raw_generator.cpp b/generator/raw_generator.cpp new file mode 100644 index 0000000000..1b89245ffa --- /dev/null +++ b/generator/raw_generator.cpp @@ -0,0 +1,199 @@ +#include "generator/raw_generator.hpp" + +#include "generator/osm_source.hpp" +#include "generator/processor_factory.hpp" +#include "generator/raw_generator_writer.hpp" +#include "generator/translator_factory.hpp" +#include "generator/translators_pool.hpp" + +#include "base/thread_pool_computational.hpp" + +#include "defines.hpp" + +namespace generator +{ +RawGenerator::RawGenerator(feature::GenerateInfo & genInfo, size_t threadsCount, size_t chankSize) + : m_genInfo(genInfo) + , m_threadsCount(threadsCount) + , m_chankSize(chankSize) + , m_cache(std::make_shared(genInfo)) + , m_queue(std::make_shared()) + , m_translators(std::make_shared()) +{ +} + +void RawGenerator::ForceReloadCache() +{ + m_cache = std::make_shared(m_genInfo, true /* forceReload */); +} + +std::shared_ptr RawGenerator::GetQueue() +{ + return m_queue; +} + +void RawGenerator::GenerateCountries(bool disableAds) +{ + auto processor = CreateProcessor(ProcessorType::Country, m_queue, m_genInfo.m_targetDir, "", + m_genInfo.m_haveBordersForWholeWorld); + auto const translatorType = disableAds ? TranslatorType::Country : TranslatorType::CountryWithAds; + m_translators->Append(CreateTranslator(translatorType, processor, m_cache, m_genInfo)); + m_finalProcessors.emplace(CreateCountryFinalProcessor()); +} + +void RawGenerator::GenerateWorld(bool disableAds) +{ + auto processor = CreateProcessor(ProcessorType::World, m_queue, m_genInfo.m_popularPlacesFilename); + auto const translatorType = disableAds ? TranslatorType::World : TranslatorType::WorldWithAds; + m_translators->Append(CreateTranslator(translatorType, processor, m_cache, m_genInfo)); + m_finalProcessors.emplace(CreateWorldFinalProcessor()); +} + +void RawGenerator::GenerateCoasts() +{ + auto processor = CreateProcessor(ProcessorType::Coastline, m_queue); + m_translators->Append(CreateTranslator(TranslatorType::Coastline, processor, m_cache)); + m_finalProcessors.emplace(CreateCoslineFinalProcessor()); +} + +void RawGenerator::GenerateRegionFeatures(string const & filename) +{ + auto processor = CreateProcessor(ProcessorType::Simple, m_queue, filename); + m_translators->Append(CreateTranslator(TranslatorType::Regions, processor, m_cache, m_genInfo)); +} + +void RawGenerator::GenerateStreetsFeatures(string const & filename) +{ + auto processor = CreateProcessor(ProcessorType::Simple, m_queue, filename); + m_translators->Append(CreateTranslator(TranslatorType::Streets, processor, m_cache)); +} + +void RawGenerator::GenerateGeoObjectsFeatures(string const & filename) +{ + auto processor = CreateProcessor(ProcessorType::Simple, m_queue, filename); + m_translators->Append(CreateTranslator(TranslatorType::GeoObjects, processor, m_cache)); +} + +void RawGenerator::GenerateCustom(std::shared_ptr const & translator) +{ + m_translators->Append(translator); +} + +void RawGenerator::GenerateCustom(std::shared_ptr const & translator, + std::shared_ptr const & finalProcessor) +{ + m_translators->Append(translator); + m_finalProcessors.emplace(finalProcessor); +} + +bool RawGenerator::Execute() +{ + if (!GenerateFilteredFeatures()) + return false; + + while (!m_finalProcessors.empty()) + { + base::thread_pool::computational::ThreadPool threadPool(m_threadsCount); + do + { + auto const & finalProcessor = m_finalProcessors.top(); + threadPool.SubmitWork([finalProcessor{finalProcessor}]() { + finalProcessor->Process(); + }); + m_finalProcessors.pop(); + if (m_finalProcessors.empty() || *finalProcessor != *m_finalProcessors.top()) + break; + } + while (true); + } + + LOG(LINFO, ("Final processing is finished.")); + return true; +} + +std::vector RawGenerator::GetNames() const +{ + return m_names; +} + +RawGenerator::FinalProcessorPtr RawGenerator::CreateCoslineFinalProcessor() +{ + auto finalProcessor = make_shared( + m_genInfo.GetTmpFileName(WORLD_COASTS_FILE_NAME, DATA_FILE_EXTENSION_TMP)); + finalProcessor->SetCoastlinesFilenames( + m_genInfo.GetIntermediateFileName(WORLD_COASTS_FILE_NAME, ".geom"), + m_genInfo.GetIntermediateFileName(WORLD_COASTS_FILE_NAME, RAW_GEOM_FILE_EXTENSION)); + return finalProcessor; +} + +RawGenerator::FinalProcessorPtr RawGenerator::CreateCountryFinalProcessor() +{ + auto finalProcessor = make_shared(m_genInfo.m_targetDir, m_genInfo.m_tmpDir, + m_genInfo.m_haveBordersForWholeWorld, + m_threadsCount); + finalProcessor->SetBooking(m_genInfo.m_bookingDataFilename); + finalProcessor->SetCitiesAreas(m_genInfo.GetIntermediateFileName(CITIES_AREAS_TMP_FILENAME)); + finalProcessor->SetPromoCatalog(m_genInfo.m_promoCatalogCitiesFilename); + if (m_genInfo.m_emitCoasts) + { + finalProcessor->SetCoastlines(m_genInfo.GetIntermediateFileName(WORLD_COASTS_FILE_NAME, ".geom"), + m_genInfo.GetTmpFileName(WORLD_COASTS_FILE_NAME)); + } + + finalProcessor->DumpCitiesBoundaries(m_genInfo.m_citiesBoundariesFilename); + return finalProcessor; +} + +RawGenerator::FinalProcessorPtr RawGenerator::CreateWorldFinalProcessor() +{ + auto finalProcessor = make_shared( + m_genInfo.m_tmpDir, + m_genInfo.GetIntermediateFileName(WORLD_COASTS_FILE_NAME, RAW_GEOM_FILE_EXTENSION)); + finalProcessor->SetPopularPlaces(m_genInfo.m_popularPlacesFilename); + finalProcessor->SetCitiesAreas(m_genInfo.GetIntermediateFileName(CITIES_AREAS_TMP_FILENAME)); + finalProcessor->SetPromoCatalog(m_genInfo.m_promoCatalogCitiesFilename); + return finalProcessor; +} + +bool RawGenerator::GenerateFilteredFeatures() +{ + SourceReader reader = m_genInfo.m_osmFileName.empty() ? SourceReader() + : SourceReader(m_genInfo.m_osmFileName); + + unique_ptr sourseProcessor; + switch (m_genInfo.m_osmFileType) { + case feature::GenerateInfo::OsmSourceType::O5M: + sourseProcessor = make_unique(reader); + break; + case feature::GenerateInfo::OsmSourceType::XML: + sourseProcessor = make_unique(reader); + break; + } + + TranslatorsPool translators(m_translators, m_cache, m_threadsCount - 1 /* copyCount */); + RawGeneratorWriter rawGeneratorWriter(m_queue, m_genInfo.m_tmpDir); + rawGeneratorWriter.Run(); + + size_t element_pos = 0; + std::vector elements(m_chankSize); + while(sourseProcessor->TryRead(elements[element_pos])) + { + if (++element_pos != m_chankSize) + continue; + + translators.Emit(std::move(elements)); + elements = vector(m_chankSize); + element_pos = 0; + } + elements.resize(element_pos); + translators.Emit(std::move(elements)); + + LOG(LINFO, ("Input was processed.")); + if (!translators.Finish()) + return false; + + m_names = rawGeneratorWriter.GetNames(); + LOG(LINFO, ("Names:", m_names)); + return true; +} +} // namespace generator diff --git a/generator/raw_generator.hpp b/generator/raw_generator.hpp new file mode 100644 index 0000000000..d98f210ac7 --- /dev/null +++ b/generator/raw_generator.hpp @@ -0,0 +1,62 @@ +#pragma once + +#include "generator/features_processing_helpers.hpp" +#include "generator/final_processor_intermediate_mwm.hpp" +#include "generator/generate_info.hpp" +#include "generator/intermediate_data.hpp" +#include "generator/translator_collection.hpp" +#include "generator/translator_interface.hpp" + +#include +#include +#include +#include + +namespace generator +{ +class RawGenerator +{ +public: + explicit RawGenerator(feature::GenerateInfo & genInfo, size_t threadsCount = 1, + size_t chankSize = 1024); + + void GenerateCountries(bool disableAds = true); + void GenerateWorld(bool disableAds = true); + void GenerateCoasts(); + void GenerateRegionFeatures(std::string const & filename); + void GenerateStreetsFeatures(std::string const & filename); + void GenerateGeoObjectsFeatures(std::string const & filename); + void GenerateCustom(std::shared_ptr const & translator); + void GenerateCustom(std::shared_ptr const & translator, + std::shared_ptr const & finalProcessor); + bool Execute(); + std::vector GetNames() const; + std::shared_ptr GetQueue(); + void ForceReloadCache(); + +private: + using FinalProcessorPtr = std::shared_ptr; + + struct FinalProcessorPtrCmp + { + bool operator()(FinalProcessorPtr const & l, FinalProcessorPtr const & r) + { + return *l < *r; + } + }; + + FinalProcessorPtr CreateCoslineFinalProcessor(); + FinalProcessorPtr CreateCountryFinalProcessor(); + FinalProcessorPtr CreateWorldFinalProcessor(); + bool GenerateFilteredFeatures(); + + feature::GenerateInfo & m_genInfo; + size_t m_threadsCount; + size_t m_chankSize; + std::shared_ptr m_cache; + std::shared_ptr m_queue; + std::shared_ptr m_translators; + std::priority_queue, FinalProcessorPtrCmp> m_finalProcessors; + std::vector m_names; +}; +} // namespace generator diff --git a/generator/raw_generator_writer.cpp b/generator/raw_generator_writer.cpp new file mode 100644 index 0000000000..aac73644a5 --- /dev/null +++ b/generator/raw_generator_writer.cpp @@ -0,0 +1,78 @@ +#include "generator/raw_generator_writer.hpp" + +#include "coding/varint.hpp" + +#include "base/file_name_utils.hpp" + +#include + +namespace generator +{ +RawGeneratorWriter::RawGeneratorWriter(std::shared_ptr const & queue, + std::string const & path) + : m_queue(queue), m_path(path) {} + + +RawGeneratorWriter::~RawGeneratorWriter() +{ + ShutdownAndJoin(); +} + +void RawGeneratorWriter::Run() +{ + m_thread = std::thread([&]() { + while (true) + { + FeatureProcessorChank chank; + m_queue->WaitAndPop(chank); + if (chank.IsEmpty()) + return; + + Write(chank.Get()); + } + }); +} + +std::vector RawGeneratorWriter::GetNames() +{ + ShutdownAndJoin(); + std::vector names; + names.reserve(m_collectors.size()); + for (const auto & p : m_collectors) + names.emplace_back(p.first); + + return names; +} + +void RawGeneratorWriter::Write(std::vector const & vecChanks) +{ + for (auto const & chank : vecChanks) + { + for (auto const & affiliation : chank.m_affiliations) + { + if (affiliation.empty()) + continue; + + auto collectorIt = m_collectors.find(affiliation); + if (collectorIt == std::cend(m_collectors)) + { + auto path = base::JoinPath(m_path, affiliation + DATA_FILE_EXTENSION_TMP); + auto writer = std::make_unique(std::move(path)); + collectorIt = m_collectors.emplace(affiliation, std::move(writer)).first; + } + + auto & collector = collectorIt->second; + auto const & buffer = chank.m_buffer; + WriteVarUint(*collector, static_cast(buffer.size())); + collector->Write(buffer.data(), buffer.size()); + } + } +} + +void RawGeneratorWriter::ShutdownAndJoin() +{ + m_queue->Push({}); + if (m_thread.joinable()) + m_thread.join(); +} +} // namespace generator diff --git a/generator/raw_generator_writer.hpp b/generator/raw_generator_writer.hpp new file mode 100644 index 0000000000..d286eb8ba4 --- /dev/null +++ b/generator/raw_generator_writer.hpp @@ -0,0 +1,35 @@ +#pragma once + +#include "generator/feature_builder.hpp" +#include "generator/features_processing_helpers.hpp" + +#include +#include +#include +#include +#include + +namespace generator +{ +class RawGeneratorWriter +{ +public: + RawGeneratorWriter(std::shared_ptr const & queue, + std::string const & path); + ~RawGeneratorWriter(); + + void Run(); + std::vector GetNames(); + +private: + using FeatureBuilderWriter = feature::FeatureBuilderWriter; + + void Write(std::vector const & vecChanks); + void ShutdownAndJoin(); + + std::thread m_thread; + std::shared_ptr m_queue; + std::string m_path; + std::unordered_map> m_collectors; +}; +} // namespace generator diff --git a/generator/translators_pool.cpp b/generator/translators_pool.cpp new file mode 100644 index 0000000000..08e1d96773 --- /dev/null +++ b/generator/translators_pool.cpp @@ -0,0 +1,72 @@ +#include "generator/translators_pool.hpp" + +#include + +namespace generator +{ +TranslatorsPool::TranslatorsPool(std::shared_ptr const & original, + std::shared_ptr const & cache, + size_t copyCount) + : m_translators({original}) + , m_threadPool(copyCount + 1) +{ + m_freeTranslators.Push(0); + m_translators.reserve(copyCount + 1); + for (size_t i = 0; i < copyCount; ++i) + { + auto cache_ = cache->Clone(); + auto translator = original->Clone(cache_); + m_translators.emplace_back(translator); + m_freeTranslators.Push(i + 1); + } +} + +void TranslatorsPool::Emit(std::vector && elements) +{ + base::threads::DataWrapper d; + m_freeTranslators.WaitAndPop(d); + auto const idx = d.Get(); + m_threadPool.SubmitWork([&, idx, elements{move(elements)}]() mutable { + for (auto & element : elements) + m_translators[idx]->Emit(element); + + m_freeTranslators.Push(idx); + }); +} + +bool TranslatorsPool::Finish() +{ + m_threadPool.WaitAndStop(); + using TranslatorPtr = std::shared_ptr; + base::threads::ThreadSafeQueue> queue; + for (auto const & t : m_translators) + { + std::promise p; + p.set_value(t); + queue.Push(p.get_future()); + } + + base::thread_pool::computational::ThreadPool pool(m_translators.size() / 2 + 1); + while (queue.Size() != 1) + { + std::future left; + std::future right; + queue.WaitAndPop(left); + queue.WaitAndPop(right); + queue.Push(pool.Submit([left{move(left)}, right{move(right)}]() mutable { + auto leftTranslator = left.get(); + auto rigthTranslator = right.get(); + rigthTranslator->Finish(); + leftTranslator->Finish(); + leftTranslator->Merge(*rigthTranslator); + return leftTranslator; + })); + } + + std::future translatorFuture; + queue.WaitAndPop(translatorFuture); + auto translator = translatorFuture.get(); + translator->Finish(); + return translator->Save(); +} +} // namespace generator diff --git a/generator/translators_pool.hpp b/generator/translators_pool.hpp new file mode 100644 index 0000000000..8dcc3e9d47 --- /dev/null +++ b/generator/translators_pool.hpp @@ -0,0 +1,30 @@ +#pragma once + +#include "generator/intermediate_data.hpp" +#include "generator/osm_element.hpp" +#include "generator/translator_interface.hpp" + +#include "base/thread_pool_computational.hpp" +#include "base/thread_safe_queue.hpp" + +#include +#include + +namespace generator +{ +class TranslatorsPool +{ +public: + explicit TranslatorsPool(std::shared_ptr const & original, + std::shared_ptr const & cache, + size_t copyCount); + + void Emit(std::vector && elements); + bool Finish(); + +private: + std::vector> m_translators; + base::thread_pool::computational::ThreadPool m_threadPool; + base::threads::ThreadSafeQueue> m_freeTranslators; +}; +} // namespace generator