[generator:region] Speedup locality index building: parallel region object covering

This commit is contained in:
Anatoly Serdtcev 2019-12-26 19:14:10 +03:00 committed by LaGrunge
parent 895b474dd1
commit 346fceb020
8 changed files with 210 additions and 20 deletions

View file

@ -107,7 +107,8 @@ public:
template <typename F, typename... Args>
void PerformParallelWorks(F && func, size_t workersCountHint)
{
size_t const workersCount = std::min(std::max(size_t{1}, workersCountHint), Size());
size_t const workersCount = std::min(std::max(1u, static_cast<unsigned int>(workersCountHint)),
Size());
std::vector<std::future<void>> workers{};
workers.reserve(workersCount);
@ -143,7 +144,7 @@ public:
m_joiner.Join();
}
size_t Size() const noexcept { return m_threads.size(); }
unsigned int Size() const noexcept { return static_cast<unsigned int>(m_threads.size()); }
private:
void Worker()

View file

@ -233,9 +233,9 @@ bool GenerateRegionsIndex(std::string const & outPath, std::string const & featu
unsigned int threadsCount)
{
base::thread_pool::computational::ThreadPool threadPool{threadsCount};
auto const featuresFilter = [](FeatureBuilder & fb) { return fb.IsArea(); };
indexer::RegionsIndexBuilder indexBuilder;
indexer::RegionsIndexBuilder indexBuilder{threadPool};
auto const featuresFilter = [](FeatureBuilder & fb) { return fb.IsArea(); };
covering::ObjectsCovering objectsCovering;
CoverFeatures(featuresFile, featuresFilter, indexBuilder, threadsCount,
1 /* chunkFeaturesCount */, threadPool, objectsCovering);
@ -255,7 +255,7 @@ bool GenerateGeoObjectsIndex(
{
base::thread_pool::computational::ThreadPool threadPool{threadsCount};
covering::ObjectsCovering objectsCovering;
indexer::GeoObjectsIndexBuilder indexBuilder;
indexer::GeoObjectsIndexBuilder indexBuilder{threadPool};
set<uint64_t> nodeIds;
if (nodesFile && !ParseNodes(*nodesFile, nodeIds))

View file

@ -174,7 +174,7 @@ std::list<RegionsBuilder::ParentChildPairs> RegionsBuilder::FindParentChildPairs
{
constexpr auto nodesCountPerTask = 1000;
auto const tasksCount = std::min(std::max(size_t{1}, nodes.size() / nodesCountPerTask),
m_taskProcessingThreadPool.Size());
size_t{m_taskProcessingThreadPool.Size()});
CHECK(!nodes.empty(), ());
std::atomic_size_t unprocessedIndex{1};

View file

@ -9,6 +9,7 @@
#include "base/buffer_vector.hpp"
#include "base/logging.hpp"
#include "base/math.hpp"
#include "base/thread_pool_computational.hpp"
#include <cmath>
#include <cstddef>
@ -116,4 +117,146 @@ void CoverObject(IntersectF const & intersect, uint64_t cellPenaltyArea, CellIdC
out.push_back(subdiv[i]);
}
}
// ObjectCoverer -----------------------------------------------------------------------------------
template <typename CellId, typename IntersectionInspector>
class ObjectCoverer
{
public:
ObjectCoverer(IntersectionInspector const & intersectionInspector, int cellDepth,
base::thread_pool::computational::ThreadPool & threadPool)
: m_intersectionInspector{intersectionInspector}
, m_cellDepth{cellDepth}
, m_threadPool{threadPool}
{ }
std::vector<CellId> Cover() const
{
std::vector<CellId> result;
auto covering = std::vector<ObjectCovering>{{result, CellId::Root(), {}}};
Cover(0, covering);
return result;
}
private:
struct ObjectCovering
{
std::vector<CellId> & m_out;
CellId m_cell;
std::vector<CellId> m_subCells;
};
void Cover(int level, std::vector<ObjectCovering> & levelCovering) const
{
auto const uptoLevel = m_cellDepth - 1;
if (level < uptoLevel)
CoverBySubCells(level, levelCovering);
ForwardLevelCoveringToOut(level, levelCovering);
}
void ForwardLevelCoveringToOut(int level, std::vector<ObjectCovering> & levelCovering) const
{
for (auto & cellCovering : levelCovering)
{
auto & out = cellCovering.m_out;
auto const & subCells = cellCovering.m_subCells;
bool allSubcellsAreChildren =
std::all_of(subCells.begin(), subCells.end(),
[level](auto const & subCell) { return subCell.Level() + 1 == level; });
if (subCells.empty())
out.push_back(cellCovering.m_cell);
else if (allSubcellsAreChildren && subCells.size() == 4)
out.push_back(cellCovering.m_cell);
else
out.insert(out.end(), subCells.begin(), subCells.end());
}
}
void CoverBySubCells(int level, std::vector<ObjectCovering> & levelCovering) const
{
if (level == m_parallelingLevel && levelCovering.size() / m_tasksPerThread > 1)
CoverParallelBySubCells(level, levelCovering);
else
CoverSequencedBySubCells(level, levelCovering.begin(), levelCovering.end());
}
void CoverParallelBySubCells(int level, std::vector<ObjectCovering> & levelCovering) const
{
std::atomic_size_t unprocessedIndex{0};
auto processor = [&]() {
while (true)
{
auto const i = unprocessedIndex++;
if (i >= levelCovering.size())
return;
CoverSequencedBySubCells(level, levelCovering.begin() + i, levelCovering.begin() + i + 1);
}
};
auto const tasksCount = levelCovering.size() / m_tasksPerThread;
m_threadPool.PerformParallelWorks(processor, tasksCount);
}
void CoverSequencedBySubCells(int level, auto levelCoveringBegin, auto levelCoveringEnd) const
{
auto const childrenLevel = level + 1;
auto childrenLevelCovering = std::vector<ObjectCovering>{};
childrenLevelCovering.reserve(std::distance(levelCoveringBegin, levelCoveringEnd));
for (auto cellCovering = levelCoveringBegin; cellCovering != levelCoveringEnd; ++cellCovering)
{
auto & cell = cellCovering->m_cell;
auto & subCells = cellCovering->m_subCells;
for (uint8_t i = 0; i < 4; ++i)
{
auto childCell = cell.Child(i);
CellObjectIntersection const intersection = m_intersectionInspector(childCell);
if (intersection == CELL_OBJECT_NO_INTERSECTION)
continue;
if (intersection == CELL_INSIDE_OBJECT)
{
subCells.push_back(childCell);
continue;
}
if (childrenLevel == m_cellDepth - 1)
subCells.push_back(childCell);
else
childrenLevelCovering.push_back({subCells, childCell, {}});
}
}
if (!childrenLevelCovering.empty())
Cover(childrenLevel, childrenLevelCovering);
}
IntersectionInspector const & m_intersectionInspector;
int m_cellDepth;
base::thread_pool::computational::ThreadPool & m_threadPool;
// |m_parallelingLevel| is checking level for parallelization.
// This level has 87380 subcells (~100'000) and let this number is task unit complexity.
int const m_parallelingLevel{m_cellDepth - std::min(m_cellDepth, 9)};
unsigned const m_tasksPerThread = 10; // ~1'000'000 == 10 * ~100'000 (see |m_parallelingLevel|)
};
template <class CellId, typename IntersectF>
std::vector<CellId> CoverObject(
IntersectF const & intersect, int cellDepth,
base::thread_pool::computational::ThreadPool & threadPool)
{
ObjectCoverer<CellId, IntersectF> coverer{intersect, cellDepth, threadPool};
return coverer.Cover();
}
} // namespace covering

View file

@ -12,6 +12,7 @@
#include "base/logging.hpp"
#include "base/macros.hpp"
#include "base/scope_guard.hpp"
#include "base/thread_pool_computational.hpp"
#include "defines.hpp"
@ -34,13 +35,17 @@ template <typename BuilderSpec>
class CoveringIndexBuilder
{
public:
CoveringIndexBuilder(base::thread_pool::computational::ThreadPool & threadPool)
: m_threadPool{threadPool}
{ }
void Cover(CoveredObject const & coveredObject, covering::ObjectsCovering & covering) const
{
static auto const cellDepth =
covering::GetCodingDepth<BuilderSpec::kDepthLevels>(scales::GetUpperScale());
auto const id = coveredObject.GetStoredId();
auto && cells = m_builderSpec.Cover(coveredObject, cellDepth);
auto && cells = m_builderSpec.Cover(coveredObject, cellDepth, m_threadPool);
for (auto const & cell : cells)
covering.emplace_back(cell, id);
}
@ -83,6 +88,7 @@ public:
private:
BuilderSpec m_builderSpec;
base::thread_pool::computational::ThreadPool & m_threadPool;
};
struct RegionsIndexBuilderSpec
@ -90,9 +96,10 @@ struct RegionsIndexBuilderSpec
static constexpr int kDepthLevels = kRegionsDepthLevels;
static constexpr auto const & kIndexFileTag = REGIONS_INDEX_FILE_TAG;
std::vector<int64_t> Cover(indexer::CoveredObject const & o, int cellDepth) const
std::vector<int64_t> Cover(indexer::CoveredObject const & o, int cellDepth,
base::thread_pool::computational::ThreadPool & threadPool) const
{
return covering::CoverRegion(o, cellDepth);
return covering::CoverRegion(o, cellDepth, threadPool);
}
};
@ -101,7 +108,8 @@ struct GeoObjectsIndexBuilderSpec
static constexpr int kDepthLevels = kGeoObjectsDepthLevels;
static constexpr auto const & kIndexFileTag = GEO_OBJECTS_INDEX_FILE_TAG;
std::vector<int64_t> Cover(indexer::CoveredObject const & o, int cellDepth) const
std::vector<int64_t> Cover(indexer::CoveredObject const & o, int cellDepth,
base::thread_pool::computational::ThreadPool & /* threadPool */) const
{
return covering::CoverGeoObject(o, cellDepth);
}

View file

@ -125,9 +125,9 @@ void GetIntersection(FeatureType & f, FeatureIntersector<DEPTH_LEVELS> & fIsect)
f.GetLimitRect(scale).IsValid(), (f.DebugString(scale)));
}
template <int DEPTH_LEVELS>
vector<int64_t> CoverIntersection(FeatureIntersector<DEPTH_LEVELS> const & fIsect, int cellDepth,
uint64_t cellPenaltyArea)
template <int DEPTH_LEVELS, typename Cover>
vector<int64_t> CoverIntersection(
Cover && cover, FeatureIntersector<DEPTH_LEVELS> const & fIsect, int cellDepth)
{
if (fIsect.m_trg.empty() && fIsect.m_polyline.size() == 1)
{
@ -138,9 +138,7 @@ vector<int64_t> CoverIntersection(FeatureIntersector<DEPTH_LEVELS> const & fIsec
.ToInt64(cellDepth));
}
vector<m2::CellId<DEPTH_LEVELS>> cells;
covering::CoverObject(fIsect, cellPenaltyArea, cells, cellDepth,
m2::CellId<DEPTH_LEVELS>::Root());
auto && cells = cover(fIsect, cellDepth);
vector<int64_t> res(cells.size());
for (size_t i = 0; i < cells.size(); ++i)
@ -149,6 +147,32 @@ vector<int64_t> CoverIntersection(FeatureIntersector<DEPTH_LEVELS> const & fIsec
return res;
}
template <int DEPTH_LEVELS>
vector<int64_t> CoverIntersection(
FeatureIntersector<DEPTH_LEVELS> const & fIsect, int cellDepth, uint64_t cellPenaltyArea)
{
auto cover = [cellPenaltyArea] (auto const & intersect, int cellDepth) {
vector<m2::CellId<DEPTH_LEVELS>> cells;
covering::CoverObject(intersect, cellPenaltyArea, cells, cellDepth,
m2::CellId<DEPTH_LEVELS>::Root());
return cells;
};
return CoverIntersection(cover, fIsect, cellDepth);
}
template <int DEPTH_LEVELS>
vector<int64_t> CoverIntersection(
FeatureIntersector<DEPTH_LEVELS> const & fIsect, int cellDepth,
base::thread_pool::computational::ThreadPool & threadPool)
{
auto cover = [&] (auto const & intersect, int cellDepth) {
return covering::CoverObject<m2::CellId<DEPTH_LEVELS>>(intersect, cellDepth, threadPool);
};
return CoverIntersection(cover, fIsect, cellDepth);
}
template <int DEPTH_LEVELS>
vector<int64_t> Cover(indexer::CoveredObject const & o, int cellDepth)
{
@ -157,6 +181,16 @@ vector<int64_t> Cover(indexer::CoveredObject const & o, int cellDepth)
o.ForEachTriangle(fIsect);
return CoverIntersection(fIsect, cellDepth, 0 /* cellPenaltyArea */);
}
template <int DEPTH_LEVELS>
vector<int64_t> Cover(indexer::CoveredObject const & o, int cellDepth,
base::thread_pool::computational::ThreadPool & threadPool)
{
FeatureIntersector<DEPTH_LEVELS> fIsect;
o.ForEachPoint(fIsect);
o.ForEachTriangle(fIsect);
return CoverIntersection(fIsect, cellDepth, threadPool);
}
} // namespace
namespace covering
@ -173,9 +207,10 @@ vector<int64_t> CoverGeoObject(indexer::CoveredObject const & o, int cellDepth)
return Cover<kGeoObjectsDepthLevels>(o, cellDepth);
}
vector<int64_t> CoverRegion(indexer::CoveredObject const & o, int cellDepth)
vector<int64_t> CoverRegion(indexer::CoveredObject const & o, int cellDepth,
base::thread_pool::computational::ThreadPool & threadPool)
{
return Cover<kRegionsDepthLevels>(o, cellDepth);
return Cover<kRegionsDepthLevels>(o, cellDepth, threadPool);
}
void SortAndMergeIntervals(Intervals v, Intervals & res)

View file

@ -11,6 +11,7 @@
#include "geometry/rect2d.hpp"
#include "base/logging.hpp"
#include "base/thread_pool_computational.hpp"
#include <cstdint>
#include <set>
@ -32,7 +33,8 @@ typedef std::vector<Interval> Intervals;
// Cover feature with RectIds and return their integer representations.
std::vector<int64_t> CoverFeature(FeatureType & feature, int cellDepth, uint64_t cellPenaltyArea);
std::vector<int64_t> CoverRegion(indexer::CoveredObject const & o, int cellDepth);
std::vector<int64_t> CoverRegion(indexer::CoveredObject const & o, int cellDepth,
base::thread_pool::computational::ThreadPool & threadPool);
std::vector<int64_t> CoverGeoObject(indexer::CoveredObject const & o, int cellDepth);
// Given a vector of intervals [a, b), sort them and merge overlapping intervals.

View file

@ -26,7 +26,8 @@ namespace
template <class ObjectsVector, class Writer>
void BuildGeoObjectsIndex(ObjectsVector const & objects, Writer && writer)
{
indexer::GeoObjectsIndexBuilder indexBuilder;
base::thread_pool::computational::ThreadPool threadPool{1};
indexer::GeoObjectsIndexBuilder indexBuilder{threadPool};
covering::ObjectsCovering objectsCovering;
for (auto const & object : objects)