[generator:geo_objects] Refactor: KeyValueMem -> KeyValueStorage, remove KeyValueMap

This commit is contained in:
Anatoly Serdtcev 2019-04-08 10:59:18 +03:00
parent a685936cae
commit 798c8302a6
10 changed files with 251 additions and 104 deletions

View file

@ -91,8 +91,12 @@ set(SRC
geometry_holder.hpp
geo_objects/geo_objects.cpp
geo_objects/geo_objects.hpp
geo_objects/geo_object_info_getter.cpp
geo_objects/geo_object_info_getter.hpp
geo_objects/key_value_storage.cpp
geo_objects/key_value_storage.hpp
geo_objects/region_info_getter.cpp
geo_objects/region_info_getter.hpp
holes.cpp
holes.hpp
intermediate_data.cpp

View file

@ -0,0 +1,22 @@
#include "generator/geo_objects/geo_object_info_getter.hpp"
#include <utility>
namespace generator
{
namespace geo_objects
{
GeoObjectInfoGetter::GeoObjectInfoGetter(indexer::GeoObjectsIndex<IndexReader> && index,
KeyValueStorage && kvStorage)
: m_index{std::move(index)}, m_storage{std::move(kvStorage)}
{ }
std::vector<base::GeoObjectId> GeoObjectInfoGetter::SearchObjectsInIndex(m2::PointD const & point) const
{
std::vector<base::GeoObjectId> ids;
auto const emplace = [&ids] (base::GeoObjectId const & osmId) { ids.emplace_back(osmId); };
m_index.ForEachAtPoint(emplace, point);
return ids;
}
} // namespace geo_objects
} // namespace generator

View file

@ -0,0 +1,58 @@
#pragma once
#include "generator/geo_objects/key_value_storage.hpp"
#include "indexer/locality_index.hpp"
#include "coding/reader.hpp"
#include "geometry/point2d.hpp"
#include "base/geo_object_id.hpp"
#include <utility>
#include <vector>
#include <boost/optional.hpp>
#include "3party/jansson/myjansson.hpp"
namespace generator
{
namespace geo_objects
{
class GeoObjectInfoGetter
{
public:
using IndexReader = ReaderPtr<Reader>;
GeoObjectInfoGetter(indexer::GeoObjectsIndex<IndexReader> && index, KeyValueStorage && kvStorage);
template <typename Predicate>
boost::optional<base::Json> Find(m2::PointD const & point, Predicate && pred) const;
private:
std::vector<base::GeoObjectId> SearchObjectsInIndex(m2::PointD const & point) const;
indexer::GeoObjectsIndex<IndexReader> m_index;
KeyValueStorage m_storage;
};
template <typename Predicate>
boost::optional<base::Json> GeoObjectInfoGetter::Find(m2::PointD const & point, Predicate && pred) const
{
auto const ids = SearchObjectsInIndex(point);
for (auto const & id : ids)
{
auto const object = m_storage.Find(id.GetEncodedId());
if (!object)
continue;
if (pred(*object))
return object;
}
return {};
}
} // namespace geo_objects
} // namespace generator

View file

@ -1,5 +1,9 @@
#include "generator/geo_objects/geo_objects.hpp"
#include "generator/geo_objects/geo_object_info_getter.hpp"
#include "generator/geo_objects/key_value_storage.hpp"
#include "generator/geo_objects/region_info_getter.hpp"
#include "generator/geo_objects/key_value_storage.hpp"
#include "generator/feature_builder.hpp"
@ -64,65 +68,6 @@ bool HouseHasAddress(base::Json json)
return false;
}
template <typename Index>
std::vector<base::GeoObjectId> SearchObjectsInIndex(FeatureBuilder1 const & fb, Index const & index)
{
std::vector<base::GeoObjectId> ids;
auto const fn = [&ids] (base::GeoObjectId const & osmid) { ids.emplace_back(osmid); };
auto const center = fb.GetKeyPoint();
index.ForEachInRect(fn, m2::RectD(center, center));
return ids;
}
int GetRankFromValue(base::Json json)
{
int rank;
auto properties = json_object_get(json.get(), "properties");
FromJSONObject(properties, "rank", rank);
return rank;
}
boost::optional<KeyValue> GetDeepestRegion(std::vector<base::GeoObjectId> const & ids,
KeyValueStorage const & regionKv)
{
boost::optional<KeyValue> deepest;
int deepestRank = 0;
for (auto const & id : ids)
{
base::Json temp;
auto const res = regionKv.Find(id.GetEncodedId());
if (!res)
{
LOG(LWARNING, ("Id not found in region key-value storage:", id));
continue;
}
temp = *res;
if (!json_is_object(temp.get()))
{
LOG(LWARNING, ("Value is not a json object in region key-value storage:", id));
continue;
}
if (!deepest)
{
deepestRank = GetRankFromValue(temp);
deepest = KeyValue(static_cast<int64_t>(id.GetEncodedId()), temp);
}
else
{
int tempRank = GetRankFromValue(temp);
if (deepestRank < tempRank)
{
deepest = KeyValue(static_cast<int64_t>(id.GetEncodedId()), temp);
deepestRank = tempRank;
}
}
}
return deepest;
}
void UpdateCoordinates(m2::PointD const & point, base::Json json)
{
auto geometry = json_object_get(json.get(), "geometry");
@ -161,14 +106,6 @@ base::Json AddAddress(FeatureBuilder1 const & fb, KeyValue const & regionKeyValu
return result;
}
boost::optional<KeyValue>
FindRegion(FeatureBuilder1 const & fb, indexer::RegionsIndex<IndexReader> const & regionIndex,
KeyValueStorage const & regionKv)
{
auto const ids = SearchObjectsInIndex(fb, regionIndex);
return GetDeepestRegion(ids, regionKv);
}
std::unique_ptr<char, JSONFreeDeleter>
MakeGeoObjectValueWithAddress(FeatureBuilder1 const & fb, KeyValue const & keyValue)
{
@ -178,24 +115,15 @@ MakeGeoObjectValueWithAddress(FeatureBuilder1 const & fb, KeyValue const & keyVa
}
boost::optional<base::Json>
FindHousePoi(FeatureBuilder1 const & fb,
indexer::GeoObjectsIndex<IndexReader> const & geoObjectsIndex,
KeyValueStorage const & geoObjectsKv)
FindHousePoi(FeatureBuilder1 const & fb, GeoObjectInfoGetter const & geoObjectInfoGetter)
{
auto const ids = SearchObjectsInIndex(fb, geoObjectsIndex);
for (auto const & id : ids)
{
auto const house = geoObjectsKv.Find(id.GetEncodedId());
if (!house)
continue;
auto properties = json_object_get(house->get(), "properties");
auto const isBuilding = [](base::Json const & object) {
auto properties = json_object_get(object.get(), "properties");
auto address = json_object_get(properties, "address");
if (json_object_get(address, "building"))
return house;
}
return json_object_get(address, "building");
};
return {};
return geoObjectInfoGetter.Find(fb.GetKeyPoint(), isBuilding);
}
std::unique_ptr<char, JSONFreeDeleter>
@ -231,8 +159,7 @@ MakeTempGeoObjectsIndex(std::string const & pathToGeoObjectsTmpMwm)
return indexer::ReadIndex<indexer::GeoObjectsIndexBox<IndexReader>, MmapReader>(indexFile);
}
void BuildGeoObjectsWithAddresses(indexer::RegionsIndex<IndexReader> const & regionIndex,
KeyValueStorage const & regionKv,
void BuildGeoObjectsWithAddresses(RegionInfoGetter const & regionInfoGetter,
std::string const & pathInGeoObjectsTmpMwm,
std::ostream & streamGeoObjectsKv, bool)
{
@ -241,7 +168,7 @@ void BuildGeoObjectsWithAddresses(indexer::RegionsIndex<IndexReader> const & reg
if (!(IsBuilding(fb) || HasHouse(fb)))
return;
auto regionKeyValue = FindRegion(fb, regionIndex, regionKv);
auto regionKeyValue = regionInfoGetter.FindDeepest(fb.GetKeyPoint());
if (!regionKeyValue)
return;
@ -255,9 +182,8 @@ void BuildGeoObjectsWithAddresses(indexer::RegionsIndex<IndexReader> const & reg
LOG(LINFO, ("Added ", countGeoObjects, "geo objects with addresses."));
}
void BuildGeoObjectsWithoutAddresses(indexer::GeoObjectsIndex<IndexReader> const & geoObjectsIndex,
void BuildGeoObjectsWithoutAddresses(GeoObjectInfoGetter const & geoObjectInfoGetter,
std::string const & pathInGeoObjectsTmpMwm,
KeyValueStorage const & geoObjectsKv,
std::ostream & streamGeoObjectsKv,
std::ostream & streamIdsWithoutAddress, bool)
{
@ -266,7 +192,7 @@ void BuildGeoObjectsWithoutAddresses(indexer::GeoObjectsIndex<IndexReader> const
if (IsBuilding(fb) || HasHouse(fb))
return;
auto const house = FindHousePoi(fb, geoObjectsIndex, geoObjectsKv);
auto const house = FindHousePoi(fb, geoObjectInfoGetter);
if (!house)
return;
@ -285,7 +211,7 @@ void BuildGeoObjectsWithoutAddresses(indexer::GeoObjectsIndex<IndexReader> const
}
} // namespace
bool GenerateGeoObjects(std::string const & pathInRegionsIndx,
bool GenerateGeoObjects(std::string const & pathInRegionsIndex,
std::string const & pathInRegionsKv,
std::string const & pathInGeoObjectsTmpMwm,
std::string const & pathOutIdsWithoutAddress,
@ -299,27 +225,26 @@ bool GenerateGeoObjects(std::string const & pathInRegionsIndx,
auto geoObjectIndexFuture = std::async(std::launch::async, MakeTempGeoObjectsIndex,
pathInGeoObjectsTmpMwm);
auto const regionIndex =
indexer::ReadIndex<indexer::RegionsIndexBox<IndexReader>, MmapReader>(pathInRegionsIndx);
// Regions key-value storage is small (~150 Mb). We will load everything into memory.
std::fstream streamRegionKv(pathInRegionsKv);
KeyValueStorage const regionsKv(streamRegionKv);
LOG(LINFO, ("Size of regions key-value storage:", regionsKv.Size()));
RegionInfoGetter regionInfoGetter{pathInRegionsIndex, pathInRegionsKv};
LOG(LINFO, ("Size of regions key-value storage:", regionInfoGetter.GetStorage().Size()));
std::ofstream streamIdsWithoutAddress(pathOutIdsWithoutAddress);
std::ofstream streamGeoObjectsKv(pathOutGeoObjectsKv);
BuildGeoObjectsWithAddresses(regionIndex, regionsKv, pathInGeoObjectsTmpMwm,
streamGeoObjectsKv, verbose);
BuildGeoObjectsWithAddresses(regionInfoGetter, pathInGeoObjectsTmpMwm, streamGeoObjectsKv, verbose);
LOG(LINFO, ("Geo objects with addresses were built."));
std::ifstream tempStream(pathOutGeoObjectsKv);
auto const pred = [](KeyValue const & kv) { return HouseHasAddress(kv.second); };
KeyValueStorage const geoObjectsKv(tempStream, pred);
KeyValueStorage geoObjectsKv(pathOutGeoObjectsKv, pred);
LOG(LINFO, ("Size of geo objects key-value storage:", geoObjectsKv.Size()));
auto const geoObjectIndex = geoObjectIndexFuture.get();
auto geoObjectIndex = geoObjectIndexFuture.get();
LOG(LINFO, ("Index was built."));
if (!geoObjectIndex)
return false;
BuildGeoObjectsWithoutAddresses(*geoObjectIndex, pathInGeoObjectsTmpMwm, geoObjectsKv,
GeoObjectInfoGetter geoObjectInfoGetter{std::move(*geoObjectIndex), std::move(geoObjectsKv)};
BuildGeoObjectsWithoutAddresses(geoObjectInfoGetter, pathInGeoObjectsTmpMwm,
streamGeoObjectsKv, streamIdsWithoutAddress, verbose);
LOG(LINFO, ("Geo objects without addresses were built."));
LOG(LINFO, ("Geo objects key-value storage saved to", pathOutGeoObjectsKv));

View file

@ -11,7 +11,7 @@ namespace geo_objects
// In this step, we need key-value pairs for the regions and the index for the regions.
// Then we build an index for houses. And then we finish building key-value pairs for poi using
// this index for houses.
bool GenerateGeoObjects(std::string const & pathInRegionsIndx,
bool GenerateGeoObjects(std::string const & pathInRegionsIndex,
std::string const & pathInRegionsKv,
std::string const & pathInGeoObjectsTmpMwm,
std::string const & pathOutIdsWithoutAddress,

View file

@ -1,13 +1,21 @@
#include "generator/geo_objects/key_value_storage.hpp"
#include "coding/reader.hpp"
#include "base/exception.hpp"
#include "base/logging.hpp"
namespace generator
{
namespace geo_objects
{
KeyValueStorage::KeyValueStorage(std::istream & stream, std::function<bool(KeyValue const &)> const & pred)
KeyValueStorage::KeyValueStorage(std::string const & path,
std::function<bool(KeyValue const &)> const & pred)
{
std::fstream stream{path};
if (!stream)
MYTHROW(Reader::OpenException, ("Failed to open file", path));
std::string line;
std::streamoff lineNumber = 0;
while (std::getline(stream, line))

View file

@ -20,9 +20,15 @@ using KeyValue = std::pair<uint64_t, base::Json>;
class KeyValueStorage
{
public:
explicit KeyValueStorage(std::istream & stream,
explicit KeyValueStorage(std::string const & kvPath,
std::function<bool(KeyValue const &)> const & pred = DefaultPred);
KeyValueStorage(KeyValueStorage &&) = default;
KeyValueStorage & operator=(KeyValueStorage &&) = default;
KeyValueStorage(KeyValueStorage const &) = delete;
KeyValueStorage & operator=(KeyValueStorage const &) = delete;
boost::optional<base::Json> Find(uint64_t key) const;
size_t Size() const;

View file

@ -0,0 +1,76 @@
#include "generator/geo_objects/region_info_getter.hpp"
#include "coding/mmap_reader.hpp"
#include "base/logging.hpp"
namespace generator
{
namespace geo_objects
{
RegionInfoGetter::RegionInfoGetter(std::string const & indexPath, std::string const & kvPath)
: m_index{indexer::ReadIndex<indexer::RegionsIndexBox<IndexReader>, MmapReader>(indexPath)}
, m_storage(kvPath)
{ }
boost::optional<KeyValue> RegionInfoGetter::FindDeepest(m2::PointD const & point) const
{
auto const ids = SearchObjectsInIndex(point);
return GetDeepest(ids);
}
std::vector<base::GeoObjectId> RegionInfoGetter::SearchObjectsInIndex(m2::PointD const & point) const
{
std::vector<base::GeoObjectId> ids;
auto const emplace = [&ids] (base::GeoObjectId const & osmId) { ids.emplace_back(osmId); };
m_index.ForEachAtPoint(emplace, point);
return ids;
}
boost::optional<KeyValue> RegionInfoGetter::GetDeepest(std::vector<base::GeoObjectId> const & ids) const
{
boost::optional<KeyValue> deepest;
int deepestRank = 0;
for (auto const & id : ids)
{
base::Json temp;
auto const res = m_storage.Find(id.GetEncodedId());
if (!res)
{
LOG(LWARNING, ("Id not found in region key-value storage:", id));
continue;
}
temp = *res;
if (!json_is_object(temp.get()))
{
LOG(LWARNING, ("Value is not a json object in region key-value storage:", id));
continue;
}
int tempRank = GetRank(temp);
if (!deepest || deepestRank < tempRank)
{
deepestRank = tempRank;
deepest = KeyValue(static_cast<int64_t>(id.GetEncodedId()), temp);
}
}
return deepest;
}
int RegionInfoGetter::GetRank(base::Json const & json) const
{
json_t * properties = nullptr;
FromJSONObject(json.get(), "properties", properties);
int rank;
FromJSONObject(properties, "rank", rank);
return rank;
}
KeyValueStorage const & RegionInfoGetter::GetStorage() const noexcept
{
return m_storage;
}
} // namespace geo_objects
} // namespace generator

View file

@ -0,0 +1,43 @@
#pragma once
#include "generator/geo_objects/key_value_storage.hpp"
#include "indexer/locality_index.hpp"
#include "coding/reader.hpp"
#include "geometry/point2d.hpp"
#include "base/geo_object_id.hpp"
#include <string>
#include <vector>
#include <boost/optional.hpp>
#include "3party/jansson/myjansson.hpp"
namespace generator
{
namespace geo_objects
{
class RegionInfoGetter
{
public:
RegionInfoGetter(std::string const & indexPath, std::string const & kvPath);
boost::optional<KeyValue> FindDeepest(m2::PointD const & point) const;
KeyValueStorage const & GetStorage() const noexcept;
private:
using IndexReader = ReaderPtr<Reader>;
std::vector<base::GeoObjectId> SearchObjectsInIndex(m2::PointD const & point) const;
boost::optional<KeyValue> GetDeepest(std::vector<base::GeoObjectId> const & ids) const;
int GetRank(base::Json const & json) const;
indexer::RegionsIndex<IndexReader> m_index;
KeyValueStorage m_storage;
};
} // namespace geo_objects
} // namespace generator

View file

@ -40,6 +40,11 @@ public:
m_intervalIndex = std::make_unique<IntervalIndex<Reader, uint64_t>>(reader);
}
void ForEachAtPoint(ProcessObject const & processObject, m2::PointD const & point) const
{
ForEachInRect(processObject, m2::RectD(point, point));
}
void ForEachInRect(ProcessObject const & processObject, m2::RectD const & rect) const
{
covering::CoveringGetter cov(rect, covering::CoveringMode::ViewportWithLowLevels);