[generator:geo_objects] Optimize geo objects generator: eliminate geo objects loading

This commit is contained in:
Anatoly Serdtcev 2019-06-17 15:25:43 +03:00 committed by Maksim Andrianov
parent 06a69067f3
commit 5c9c012602
5 changed files with 47 additions and 43 deletions

View file

@ -7,8 +7,8 @@ namespace generator
namespace geo_objects
{
GeoObjectInfoGetter::GeoObjectInfoGetter(indexer::GeoObjectsIndex<IndexReader> && index,
KeyValueStorage && kvStorage)
: m_index{std::move(index)}, m_storage{std::move(kvStorage)}
KeyValueStorage const & kvStorage)
: m_index{std::move(index)}, m_storage{kvStorage}
{ }
std::vector<base::GeoObjectId> GeoObjectInfoGetter::SearchObjectsInIndex(m2::PointD const & point) const

View file

@ -28,7 +28,7 @@ class GeoObjectInfoGetter
public:
using IndexReader = ReaderPtr<Reader>;
GeoObjectInfoGetter(indexer::GeoObjectsIndex<IndexReader> && index, KeyValueStorage && kvStorage);
GeoObjectInfoGetter(indexer::GeoObjectsIndex<IndexReader> && index, KeyValueStorage const & kvStorage);
template <typename Predicate>
std::shared_ptr<JsonValue> Find(m2::PointD const & point, Predicate && pred) const;
@ -37,7 +37,7 @@ private:
std::vector<base::GeoObjectId> SearchObjectsInIndex(m2::PointD const & point) const;
indexer::GeoObjectsIndex<IndexReader> m_index;
KeyValueStorage m_storage;
KeyValueStorage const & m_storage;
};
template <typename Predicate>

View file

@ -32,6 +32,7 @@
#include "platform/platform.hpp"
#include <boost/optional.hpp>
#include "3party/jansson/myjansson.hpp"
using namespace feature;
@ -90,31 +91,19 @@ base::JSONPtr AddAddress(FeatureBuilder const & fb, KeyValue const & regionKeyVa
return result;
}
std::unique_ptr<char, JSONFreeDeleter>
MakeGeoObjectValueWithAddress(FeatureBuilder const & fb, KeyValue const & keyValue)
{
auto const jsonWithAddress = AddAddress(fb, keyValue);
auto const cstr = json_dumps(jsonWithAddress.get(),
JSON_REAL_PRECISION(regions::JsonPolicy::kDefaultPrecision) | JSON_COMPACT);
return std::unique_ptr<char, JSONFreeDeleter>(cstr);
}
std::shared_ptr<JsonValue>
FindHousePoi(FeatureBuilder const & fb, GeoObjectInfoGetter const & geoObjectInfoGetter)
{
return geoObjectInfoGetter.Find(fb.GetKeyPoint(), HouseHasAddress);
}
std::unique_ptr<char, JSONFreeDeleter>
MakeGeoObjectValueWithoutAddress(FeatureBuilder const & fb, JsonValue const & json)
base::JSONPtr MakeGeoObjectValueWithoutAddress(FeatureBuilder const & fb, JsonValue const & json)
{
auto jsonWithAddress = json.MakeDeepCopyJson();
auto properties = json_object_get(jsonWithAddress.get(), "properties");
ToJSONObject(*properties, "name", fb.GetName());
UpdateCoordinates(fb.GetKeyPoint(), jsonWithAddress);
auto const cstr = json_dumps(jsonWithAddress.get(),
JSON_REAL_PRECISION(generator::regions::JsonPolicy::kDefaultPrecision) | JSON_COMPACT);
return std::unique_ptr<char, JSONFreeDeleter>(cstr);
return jsonWithAddress;
}
boost::optional<indexer::GeoObjectsIndex<IndexReader>>
@ -183,14 +172,14 @@ void FilterAddresslessByCountryAndRepackMwm(std::string const & pathInGeoObjects
LOG(LERROR, ("Error: Cannot rename", path, "to", pathInGeoObjectsTmpMwm));
}
void BuildGeoObjectsWithAddresses(regions::RegionInfoGetter const & regionInfoGetter,
void BuildGeoObjectsWithAddresses(KeyValueStorage & geoObjectsKv,
regions::RegionInfoGetter const & regionInfoGetter,
std::string const & pathInGeoObjectsTmpMwm,
std::ostream & streamGeoObjectsKv,
bool verbose, size_t threadsCount)
{
size_t countGeoObjects = 0;
std::mutex kvStreamMutex;
std::mutex updateMutex;
auto const concurrentTransformer = [&](FeatureBuilder & fb, uint64_t /* currPos */) {
if (!GeoObjectsFilter::IsBuilding(fb) && !GeoObjectsFilter::HasHouse(fb))
return;
@ -200,10 +189,12 @@ void BuildGeoObjectsWithAddresses(regions::RegionInfoGetter const & regionInfoGe
return;
auto const id = fb.GetMostGenericOsmId().GetEncodedId();
auto const value = MakeGeoObjectValueWithAddress(fb, *regionKeyValue);
auto jsonValue = AddAddress(fb, *regionKeyValue);
auto json = KeyValueStorage::JsonString{json_dumps(jsonValue.get(),
JSON_REAL_PRECISION(regions::JsonPolicy::kDefaultPrecision) | JSON_COMPACT)};
std::lock_guard<std::mutex> lock(kvStreamMutex);
streamGeoObjectsKv << static_cast<int64_t>(id) << " " << value.get() << "\n";
std::lock_guard<std::mutex> lock(updateMutex);
geoObjectsKv.Insert(id, std::move(json), std::move(jsonValue));
++countGeoObjects;
};
@ -212,15 +203,15 @@ void BuildGeoObjectsWithAddresses(regions::RegionInfoGetter const & regionInfoGe
LOG(LINFO, ("Added ", countGeoObjects, "geo objects with addresses."));
}
void BuildGeoObjectsWithoutAddresses(GeoObjectInfoGetter const & geoObjectInfoGetter,
void BuildGeoObjectsWithoutAddresses(KeyValueStorage & geoObjectsKv,
GeoObjectInfoGetter const & geoObjectInfoGetter,
std::string const & pathInGeoObjectsTmpMwm,
std::ostream & streamGeoObjectsKv,
std::ostream & streamIdsWithoutAddress,
bool verbose, size_t threadsCount)
{
size_t countGeoObjects = 0;
std::mutex kvStreamMutex;
std::mutex updateMutex;
auto const concurrentTransformer = [&](FeatureBuilder & fb, uint64_t /* currPos */) {
if (!GeoObjectsFilter::IsPoi(fb))
return;
@ -232,10 +223,12 @@ void BuildGeoObjectsWithoutAddresses(GeoObjectInfoGetter const & geoObjectInfoGe
return;
auto const id = static_cast<int64_t>(fb.GetMostGenericOsmId().GetEncodedId());
auto const value = MakeGeoObjectValueWithoutAddress(fb, *house);
auto jsonValue = MakeGeoObjectValueWithoutAddress(fb, *house);
auto json = KeyValueStorage::JsonString{json_dumps(jsonValue.get(),
JSON_REAL_PRECISION(regions::JsonPolicy::kDefaultPrecision) | JSON_COMPACT)};
std::lock_guard<std::mutex> lock(kvStreamMutex);
streamGeoObjectsKv << id << " " << value.get() << "\n";
std::lock_guard<std::mutex> lock(updateMutex);
geoObjectsKv.Insert(id, std::move(json), std::move(jsonValue));
streamIdsWithoutAddress << id << "\n";
++countGeoObjects;
};
@ -274,25 +267,21 @@ bool GenerateGeoObjects(std::string const & pathInRegionsIndex,
auto geoObjectIndexFuture = std::async(std::launch::async, MakeTempGeoObjectsIndex,
pathInGeoObjectsTmpMwm);
std::ofstream streamGeoObjectsKv(pathOutGeoObjectsKv);
BuildGeoObjectsWithAddresses(regionInfoGetter, pathInGeoObjectsTmpMwm, streamGeoObjectsKv,
Platform().RemoveFileIfExists(pathOutGeoObjectsKv);
KeyValueStorage geoObjectsKv(pathOutGeoObjectsKv);
BuildGeoObjectsWithAddresses(geoObjectsKv, regionInfoGetter, pathInGeoObjectsTmpMwm,
verbose, threadsCount);
LOG(LINFO, ("Geo objects with addresses were built."));
auto const pred = [](KeyValue const & kv) { return HouseHasAddress(*kv.second); };
KeyValueStorage geoObjectsKv(pathOutGeoObjectsKv, pred);
LOG(LINFO, ("Size of geo objects key-value storage:", geoObjectsKv.Size()));
auto geoObjectIndex = geoObjectIndexFuture.get();
LOG(LINFO, ("Index was built."));
if (!geoObjectIndex)
return false;
GeoObjectInfoGetter geoObjectInfoGetter{std::move(*geoObjectIndex), std::move(geoObjectsKv)};
GeoObjectInfoGetter geoObjectInfoGetter{std::move(*geoObjectIndex), geoObjectsKv};
std::ofstream streamIdsWithoutAddress(pathOutIdsWithoutAddress);
BuildGeoObjectsWithoutAddresses(geoObjectInfoGetter, pathInGeoObjectsTmpMwm,
streamGeoObjectsKv, streamIdsWithoutAddress,
BuildGeoObjectsWithoutAddresses(geoObjectsKv, geoObjectInfoGetter, pathInGeoObjectsTmpMwm,
streamIdsWithoutAddress,
verbose, threadsCount);
LOG(LINFO, ("Geo objects without addresses were built."));
LOG(LINFO, ("Geo objects key-value storage saved to", pathOutGeoObjectsKv));

View file

@ -9,14 +9,14 @@ namespace generator
{
KeyValueStorage::KeyValueStorage(std::string const & path,
std::function<bool(KeyValue const &)> const & pred)
: m_storage{path, std::ios_base::in | std::ios_base::out | std::ios_base::app}
{
std::fstream stream{path};
if (!stream)
if (!m_storage)
MYTHROW(Reader::OpenException, ("Failed to open file", path));
std::string line;
std::streamoff lineNumber = 0;
while (std::getline(stream, line))
while (std::getline(m_storage, line))
{
++lineNumber;
@ -26,6 +26,8 @@ KeyValueStorage::KeyValueStorage(std::string const & path,
m_values.insert(kv);
}
m_storage.clear();
}
// static
@ -58,6 +60,15 @@ bool KeyValueStorage::ParseKeyValueLine(std::string const & line, KeyValue & res
return true;
}
void KeyValueStorage::Insert(uint64_t key, JsonString && valueJson, base::JSONPtr && value)
{
auto const emplace = m_values.emplace(key, std::make_shared<JsonValue>(std::move(value)));
if (!emplace.second) // it is ok for OSM relation with several outer borders
return;
m_storage << static_cast<int64_t>(key) << " " << valueJson.get() << "\n";
}
std::shared_ptr<JsonValue> KeyValueStorage::Find(uint64_t key) const
{
auto const it = m_values.find(key);

View file

@ -39,6 +39,8 @@ using KeyValue = std::pair<uint64_t, std::shared_ptr<JsonValue>>;
class KeyValueStorage
{
public:
using JsonString = std::unique_ptr<char, JSONFreeDeleter>;
explicit KeyValueStorage(std::string const & kvPath,
std::function<bool(KeyValue const &)> const & pred = DefaultPred);
@ -48,6 +50,7 @@ public:
KeyValueStorage(KeyValueStorage const &) = delete;
KeyValueStorage & operator=(KeyValueStorage const &) = delete;
void Insert(uint64_t key, JsonString && valueJson, base::JSONPtr && value);
std::shared_ptr<JsonValue> Find(uint64_t key) const;
size_t Size() const;
@ -55,6 +58,7 @@ private:
static bool DefaultPred(KeyValue const &) { return true; }
static bool ParseKeyValueLine(std::string const & line, KeyValue & res, std::streamoff lineNumber);
std::fstream m_storage;
std::unordered_map<uint64_t, std::shared_ptr<JsonValue>> m_values;
};
} // namespace generator