[generator:regions] Parallel build regions tree

This commit is contained in:
Anatoly Serdtcev 2020-01-09 11:30:52 +03:00 committed by LaGrunge
parent de9a1c60e1
commit 143106cb1d
5 changed files with 101 additions and 25 deletions

View file

@ -125,6 +125,8 @@ public:
m_joiner.Join();
}
size_t Size() const noexcept { return m_threads.size(); }
private:
void Worker()
{

View file

@ -123,7 +123,8 @@ std::vector<std::string> GenerateTestRegions(std::vector<OsmElementData> const &
RegionInfo collector(filename);
BuildTestData(testData, regions, placePointsMap, collector);
RegionsBuilder builder(std::move(regions), std::move(placePointsMap));
base::thread_pool::computational::ThreadPool threadsPool{1};
RegionsBuilder builder(std::move(regions), std::move(placePointsMap), threadsPool);
std::vector<std::string> kvRegions;
builder.ForEachCountry([&](std::string const & /*name*/, Node::PtrList const & outers) {
for (auto const & tree : outers)
@ -292,7 +293,8 @@ UNIT_TEST(RegionsBuilderTest_GetCountryNames)
auto const filename = MakeCollectorData();
SCOPE_GUARD(removeCollectorFile, std::bind(Platform::RemoveFileIfExists, std::cref(filename)));
RegionInfo collector(filename);
RegionsBuilder builder(MakeTestDataSet1(collector), {} /* placePointsMap */);
base::thread_pool::computational::ThreadPool threadsPool{1};
RegionsBuilder builder(MakeTestDataSet1(collector), {} /* placePointsMap */, threadsPool);
auto const & countryNames = builder.GetCountryInternationalNames();
TEST_EQUAL(countryNames.size(), 2, ());
TEST(std::count(std::begin(countryNames), std::end(countryNames), "Country_1"), ());
@ -304,7 +306,8 @@ UNIT_TEST(RegionsBuilderTest_GetCountries)
auto const filename = MakeCollectorData();
SCOPE_GUARD(removeCollectorFile, std::bind(Platform::RemoveFileIfExists, std::cref(filename)));
RegionInfo collector(filename);
RegionsBuilder builder(MakeTestDataSet1(collector), {} /* placePointsMap */);
base::thread_pool::computational::ThreadPool threadsPool{1};
RegionsBuilder builder(MakeTestDataSet1(collector), {} /* placePointsMap */, threadsPool);
auto const & countries = builder.GetCountriesOuters();
TEST_EQUAL(countries.size(), 3, ());
size_t countries1 = std::count_if(std::begin(countries), std::end(countries),
@ -322,7 +325,8 @@ UNIT_TEST(RegionsBuilderTest_GetCountryTrees)
SCOPE_GUARD(removeCollectorFile, std::bind(Platform::RemoveFileIfExists, std::cref(filename)));
RegionInfo collector(filename);
std::vector<std::string> bankOfNames;
RegionsBuilder builder(MakeTestDataSet1(collector), {} /* placePointsMap */);
base::thread_pool::computational::ThreadPool threadsPool{1};
RegionsBuilder builder(MakeTestDataSet1(collector), {} /* placePointsMap */, threadsPool);
builder.ForEachCountry([&](std::string const & /*name*/, Node::PtrList const & outers) {
for (auto const & tree : outers)
{

View file

@ -43,6 +43,8 @@ public:
bool verbose, unsigned int threadsCount)
: m_pathRegionsTmpMwm{pathRegionsTmpMwm}
, m_pathOutRegionsKv{pathOutRegionsKv}
, m_threadsCount{threadsCount}
, m_taskProcessingThreadPool{threadsCount}
, m_verbose{verbose}
, m_regionsInfoCollector{pathInRegionsCollector}
, m_regionsKv{pathOutRegionsKv, std::ofstream::out}
@ -54,7 +56,8 @@ public:
PlacePointsMap placePointsMap;
std::tie(regions, placePointsMap) =
ReadDatasetFromTmpMwm(m_pathRegionsTmpMwm, m_regionsInfoCollector);
RegionsBuilder builder{std::move(regions), std::move(placePointsMap), threadsCount};
RegionsBuilder builder{
std::move(regions), std::move(placePointsMap), m_taskProcessingThreadPool};
GenerateRegions(builder);
LOG(LINFO, ("Finish generating regions.", timer.ElapsedSeconds(), "seconds."));
@ -339,6 +342,9 @@ private:
std::string m_pathRegionsTmpMwm;
std::string m_pathOutRegionsKv;
unsigned int m_threadsCount{1};
base::thread_pool::computational::ThreadPool mutable m_taskProcessingThreadPool;
bool m_verbose{false};
RegionInfo m_regionsInfoCollector;

View file

@ -10,6 +10,7 @@
#include "base/thread_pool_computational.hpp"
#include <algorithm>
#include <atomic>
#include <chrono>
#include <functional>
#include <numeric>
@ -21,12 +22,12 @@ namespace generator
{
namespace regions
{
RegionsBuilder::RegionsBuilder(Regions && regions, PlacePointsMap && placePointsMap,
unsigned int threadsCount)
: m_threadsCount(threadsCount)
RegionsBuilder::RegionsBuilder(
Regions && regions, PlacePointsMap && placePointsMap,
base::thread_pool::computational::ThreadPool & taskProcessingThreadPool)
: m_threadsCount{static_cast<unsigned int>(taskProcessingThreadPool.Size())}
, m_taskProcessingThreadPool{taskProcessingThreadPool}
{
ASSERT(m_threadsCount != 0, ());
std::erase_if(placePointsMap, [](auto const & item) {
return strings::IsASCIINumeric(item.second.GetName());
});
@ -119,14 +120,26 @@ Node::Ptr RegionsBuilder::BuildCountryRegionTree(
auto nodes = MakeCountryNodesInAreaOrder(outer, m_regionsInAreaOrder, countryCode,
countrySpecifier);
for (auto i = std::crbegin(nodes), end = std::crend(nodes); i != end; ++i)
{
if (auto parent = ChooseParent(nodes, i, countrySpecifier))
auto && parentChildPairs = FindParentChildPairs(nodes, countrySpecifier);
auto bindingTask = m_taskProcessingThreadPool.Submit([&] {
for (auto const & partion : parentChildPairs)
{
(*i)->SetParent(parent);
parent->AddChild(*i);
for (auto const & parentChildPair : partion)
{
auto & parent = parentChildPair.first;
auto & child = parentChildPair.second;
child->SetParent(parent);
parent->AddChild(child);
auto & children = parent->GetChildren();
std::sort(children.begin(), children.end(), [](auto && a, auto && b) {
return a->GetData().GetArea() > b->GetData().GetArea();
});
}
}
}
});
bindingTask.wait();
return nodes.front();
}
@ -156,9 +169,50 @@ std::vector<Node::Ptr> RegionsBuilder::MakeCountryNodesInAreaOrder(
return nodes;
}
std::list<RegionsBuilder::ParentChildPairs> RegionsBuilder::FindParentChildPairs(
std::vector<Node::Ptr> const & nodes, CountrySpecifier const & countrySpecifier) const
{
constexpr auto nodesCountPerTask = 1000;
auto const tasksCount = std::min(std::max(size_t{1}, nodes.size() / nodesCountPerTask),
m_taskProcessingThreadPool.Size());
CHECK(!nodes.empty(), ());
std::atomic_size_t unprocessedIndex{1};
auto task = [&] {
ParentChildPairs parentChildPairs;
parentChildPairs.reserve(nodes.size() / tasksCount);
while (true)
{
auto const i = unprocessedIndex++;
if (i >= nodes.size())
break;
auto itemIterator = nodes.begin() + i;
auto itemReverseIterator = std::make_reverse_iterator(std::next(itemIterator));
if (auto && parent = ChooseParent(nodes, itemReverseIterator, countrySpecifier))
parentChildPairs.emplace_back(parent, *itemIterator);
}
return parentChildPairs;
};
auto buildingTasks = std::vector<std::future<ParentChildPairs>>{};
buildingTasks.reserve(tasksCount);
for (auto i = 0u; i < tasksCount; ++i)
buildingTasks.push_back(m_taskProcessingThreadPool.Submit(task));
auto parentChildPairs = std::list<ParentChildPairs>{};
for (auto & task : buildingTasks)
parentChildPairs.emplace_back(std::move(task.get()));
return parentChildPairs;
}
// static
Node::Ptr RegionsBuilder::ChooseParent(std::vector<Node::Ptr> const & nodesInAreaOrder,
std::vector<Node::Ptr>::const_reverse_iterator forItem,
CountrySpecifier const & countrySpecifier) const
CountrySpecifier const & countrySpecifier)
{
auto const & node = *forItem;
auto const & region = node->GetData();
@ -200,9 +254,10 @@ Node::Ptr RegionsBuilder::ChooseParent(std::vector<Node::Ptr> const & nodesInAre
return parent;
}
// static
std::vector<Node::Ptr>::const_reverse_iterator RegionsBuilder::FindAreaLowerBoundRely(
std::vector<Node::Ptr> const & nodesInAreaOrder,
std::vector<Node::Ptr>::const_reverse_iterator forItem) const
std::vector<Node::Ptr>::const_reverse_iterator forItem)
{
auto const & region = (*forItem)->GetData();

View file

@ -5,7 +5,10 @@
#include "generator/regions/node.hpp"
#include "generator/regions/region.hpp"
#include "base/thread_pool_computational.hpp"
#include <functional>
#include <list>
#include <map>
#include <memory>
#include <string>
@ -26,8 +29,9 @@ public:
using StringsList = std::vector<std::string>;
using CountryFn = std::function<void(std::string const &, Node::PtrList const &)>;
explicit RegionsBuilder(Regions && regions, PlacePointsMap && placePointsMap,
unsigned int threadsCount = 1);
explicit RegionsBuilder(
Regions && regions, PlacePointsMap && placePointsMap,
base::thread_pool::computational::ThreadPool & taskProcessingThreadPool);
Regions const & GetCountriesOuters() const;
StringsList GetCountryInternationalNames() const;
@ -43,6 +47,8 @@ public:
private:
static constexpr double kAreaRelativeErrorPercent = 0.1;
using ParentChildPairs = std::vector<std::pair<Node::Ptr, Node::Ptr>>;
void MoveLabelPlacePoints(PlacePointsMap & placePointsMap, Regions & regions);
Regions FormRegionsInAreaOrder(Regions && regions);
Regions ExtractCountriesOuters(Regions & regions);
@ -59,12 +65,14 @@ private:
Region const & countryOuter, Regions const & regionsInAreaOrder,
boost::optional<std::string> const & countryCode,
CountrySpecifier const & countrySpecifier) const;
Node::Ptr ChooseParent(std::vector<Node::Ptr> const & nodesInAreaOrder,
std::vector<Node::Ptr>::const_reverse_iterator forItem,
CountrySpecifier const & countrySpecifier) const;
std::vector<Node::Ptr>::const_reverse_iterator FindAreaLowerBoundRely(
std::list<ParentChildPairs> FindParentChildPairs(
std::vector<Node::Ptr> const & nodes, CountrySpecifier const & countrySpecifier) const;
static Node::Ptr ChooseParent(std::vector<Node::Ptr> const & nodesInAreaOrder,
std::vector<Node::Ptr>::const_reverse_iterator forItem,
CountrySpecifier const & countrySpecifier);
static std::vector<Node::Ptr>::const_reverse_iterator FindAreaLowerBoundRely(
std::vector<Node::Ptr> const & nodesInAreaOrder,
std::vector<Node::Ptr>::const_reverse_iterator forItem) const;
std::vector<Node::Ptr>::const_reverse_iterator forItem);
static void InsertIntoSubtree(Node::Ptr & subtree, Node::Ptr && newNode,
CountrySpecifier const & countrySpecifier);
@ -72,6 +80,7 @@ private:
Regions m_regionsInAreaOrder;
PlacePointsMap m_placePointsMap;
unsigned int m_threadsCount;
base::thread_pool::computational::ThreadPool & m_taskProcessingThreadPool;
};
} // namespace regions
} // namespace generator