[geocoder] Fix for review

This commit is contained in:
Anatoly Serdtcev 2019-02-01 21:41:02 +03:00
parent 0e8477ad8b
commit 0236ae6c27
6 changed files with 39 additions and 26 deletions

View file

@ -15,6 +15,7 @@
#include <algorithm>
#include <set>
#include <thread>
#include <utility>
using namespace std;
@ -195,13 +196,18 @@ vector<Geocoder::Layer> & Geocoder::Context::GetLayers() { return m_layers; }
vector<Geocoder::Layer> const & Geocoder::Context::GetLayers() const { return m_layers; }
// Geocoder ----------------------------------------------------------------------------------------
Geocoder::Geocoder(string const & pathToJsonHierarchy)
: Geocoder(HierarchyReader(pathToJsonHierarchy).Read())
Geocoder::Geocoder(string const & pathToJsonHierarchy, unsigned int loadThreadsCount)
: Geocoder{HierarchyReader{pathToJsonHierarchy}.Read(loadThreadsCount), loadThreadsCount}
{
}
Geocoder::Geocoder(Hierarchy && hierarchy)
: m_hierarchy(move(hierarchy)), m_index(m_hierarchy)
Geocoder::Geocoder(istream & jsonHierarchy, unsigned int loadThreadsCount)
: Geocoder{HierarchyReader{jsonHierarchy}.Read(loadThreadsCount), loadThreadsCount}
{
}
Geocoder::Geocoder(Hierarchy && hierarchy, unsigned int loadThreadsCount)
: m_hierarchy(move(hierarchy)), m_index(m_hierarchy, loadThreadsCount)
{
}

View file

@ -12,6 +12,7 @@
#include <cstddef>
#include <string>
#include <thread>
#include <unordered_map>
#include <utility>
#include <vector>
@ -118,8 +119,10 @@ public:
std::vector<Layer> m_layers;
};
explicit Geocoder(std::string const & pathToJsonHierarchy);
explicit Geocoder(Hierarchy && hierarchy);
explicit Geocoder(std::string const & pathToJsonHierarchy,
unsigned int loadThreadsCount = std::thread::hardware_concurrency());
explicit Geocoder(std::istream & jsonHierarchy,
unsigned int loadThreadsCount = std::thread::hardware_concurrency());
void ProcessQuery(std::string const & query, std::vector<Result> & results) const;
@ -128,6 +131,8 @@ public:
Index const & GetIndex() const;
private:
explicit Geocoder(Hierarchy && hierarchy, unsigned int loadThreadsCount);
void Go(Context & ctx, Type type) const;
void FillBuildingsLayer(Context & ctx, Tokens const & subquery, Layer & curLayer) const;

View file

@ -2,6 +2,7 @@
#include "base/logging.hpp"
#include <algorithm>
#include <queue>
#include <thread>
@ -41,7 +42,7 @@ HierarchyReader::HierarchyReader(string const & pathToJsonHierarchy)
MYTHROW(OpenException, ("Failed to open file", pathToJsonHierarchy));
}
HierarchyReader::HierarchyReader(std::istream & in)
HierarchyReader::HierarchyReader(istream & in)
: m_in{in}
{
}
@ -86,7 +87,7 @@ Hierarchy HierarchyReader::Read(unsigned int readersCount)
("Entries whose names do not match their most specific addresses:", stats.m_mismatchedNames));
LOG(LINFO, ("(End of stats.)"));
return Hierarchy{std::move(entries), true};
return Hierarchy{move(entries), true};
}
vector<Hierarchy::Entry> HierarchyReader::MergeEntries(vector<multimap<base::GeoObjectId, Entry>> & entryParts)
@ -104,18 +105,18 @@ vector<Hierarchy::Entry> HierarchyReader::MergeEntries(vector<multimap<base::Geo
using PartReference = reference_wrapper<multimap<base::GeoObjectId, Entry>>;
struct ReferenceGreater
{
bool operator () (PartReference const & l, PartReference const & r) const noexcept
bool operator()(PartReference const & l, PartReference const & r) const noexcept
{ return l.get() > r.get(); }
};
auto partsQueue = priority_queue<PartReference, std::vector<PartReference>, ReferenceGreater>
(entryParts.begin(), entryParts.end());
auto partsQueue = priority_queue<PartReference, vector<PartReference>, ReferenceGreater>(
entryParts.begin(), entryParts.end());
while (!partsQueue.empty())
{
auto & minPart = partsQueue.top().get();
partsQueue.pop();
while (minPart.size() && (partsQueue.empty() || minPart <= partsQueue.top().get()))
while (!minPart.empty() && (partsQueue.empty() || minPart <= partsQueue.top().get()))
{
entries.emplace_back(move(minPart.begin()->second));
minPart.erase(minPart.begin());

View file

@ -10,6 +10,7 @@
#include <map>
#include <mutex>
#include <string>
#include <thread>
#include <vector>
namespace geocoder
@ -23,10 +24,10 @@ public:
DECLARE_EXCEPTION(OpenException, RootException);
explicit HierarchyReader(std::string const & pathToJsonHierarchy);
explicit HierarchyReader(std::istream & in);
explicit HierarchyReader(std::istream & jsonHierarchy);
// Read hierarchy file/stream concurrency in |readersCount| threads.
Hierarchy Read(unsigned int readersCount = 4);
Hierarchy Read(unsigned int readersCount = std::thread::hardware_concurrency());
private:
void ReadEntryMap(std::multimap<base::GeoObjectId, Entry> & entries, ParsingStats & stats);

View file

@ -23,17 +23,17 @@ size_t const kLogBatch = 100000;
namespace geocoder
{
Index::Index(Hierarchy const & hierarchy, unsigned int processingThreadsCount)
: m_docs(hierarchy.GetEntries()), m_processingThreadsCount{processingThreadsCount}
Index::Index(Hierarchy const & hierarchy, unsigned int loadThreadsCount)
: m_docs(hierarchy.GetEntries())
{
if (auto hardwareConcurrency = thread::hardware_concurrency())
m_processingThreadsCount = min(hardwareConcurrency, m_processingThreadsCount);
m_processingThreadsCount = max(1U, m_processingThreadsCount);
loadThreadsCount = min(hardwareConcurrency, loadThreadsCount);
loadThreadsCount = max(1U, loadThreadsCount);
LOG(LINFO, ("Indexing hierarchy entries..."));
AddEntries();
LOG(LINFO, ("Indexing houses..."));
AddHouses();
AddHouses(loadThreadsCount);
}
Index::Doc const & Index::GetDoc(DocId const id) const
@ -94,12 +94,12 @@ void Index::AddStreet(DocId const & docId, Index::Doc const & doc)
}
}
void Index::AddHouses()
void Index::AddHouses(unsigned int loadThreadsCount)
{
atomic<size_t> numIndexed{0};
std::mutex mutex;
mutex mutex;
vector<thread> threads(m_processingThreadsCount);
vector<thread> threads(loadThreadsCount);
CHECK_GREATER(threads.size(), 0, ());
for (size_t t = 0; t < threads.size(); ++t)

View file

@ -6,6 +6,7 @@
#include <cstdint>
#include <string>
#include <thread>
#include <unordered_map>
#include <vector>
@ -20,7 +21,8 @@ public:
// that the index was constructed from.
using DocId = std::vector<Doc>::size_type;
explicit Index(Hierarchy const & hierarchy, unsigned int processingThreadsCount = 4);
explicit Index(Hierarchy const & hierarchy,
unsigned int loadThreadsCount = std::thread::hardware_concurrency());
Doc const & GetDoc(DocId const id) const;
@ -66,7 +68,7 @@ private:
void AddStreet(DocId const & docId, Doc const & e);
// Fills the |m_buildingsOnStreet| field.
void AddHouses();
void AddHouses(unsigned int loadThreadsCount);
std::vector<Doc> const & m_docs;
@ -74,7 +76,5 @@ private:
// Lists of houses grouped by the streets they belong to.
std::unordered_map<DocId, std::vector<DocId>> m_buildingsOnStreet;
unsigned int m_processingThreadsCount;
};
} // namespace geocoder