[generator] Process locality object on memory for interval index building.

This commit is contained in:
Anatoly Serdtcev 2019-12-18 11:35:31 +03:00 committed by LaGrunge
parent be7a96fd58
commit b924f170d0
15 changed files with 539 additions and 593 deletions

View file

@ -58,8 +58,8 @@ set(
key_value_concurrent_writer.hpp
key_value_storage.cpp
key_value_storage.hpp
locality_sorter.cpp
locality_sorter.hpp
locality_index_generator.cpp
locality_index_generator.hpp
osm2meta.cpp
osm2meta.hpp
osm2type.cpp

View file

@ -3,10 +3,9 @@
#include "generator/data_version.hpp"
#include "generator/geo_objects/geo_objects.hpp"
#include "generator/locality_sorter.hpp"
#include "generator/locality_index_generator.hpp"
#include "indexer/locality_index.hpp"
#include "indexer/locality_index_builder.hpp"
#include "base/assert.hpp"
@ -26,17 +25,12 @@ GeoObjectsIndex<IndexReader> GenerateStreetsIndex(std::vector<OsmElementData> co
ScopedFile const streetsFeatures{"streets"s + DATA_FILE_EXTENSION, ScopedFile::Mode::DoNotCreate};
WriteFeatures(osmElements, streetsFeatures);
auto const locDataFile = GetFileName("streets"s + LOC_DATA_FILE_EXTENSION);
bool locDataGeneration =
feature::GenerateGeoObjectsData(locDataFile, geoObjectsFeatures.GetFullPath(),
boost::none /* nodesFile */, streetsFeatures.GetFullPath());
CHECK(locDataGeneration, ());
ScopedFile const streetsIndex{"streets"s + LOC_IDX_FILE_EXTENSION, ScopedFile::Mode::DoNotCreate};
auto streetsIndexBuilding =
BuildGeoObjectsIndexFromDataFile(locDataFile, streetsIndex.GetFullPath(), {},
DataVersion::kFileTag);
CHECK(streetsIndexBuilding, ());
bool streetsIndexGeneration =
GenerateGeoObjectsIndex(streetsIndex.GetFullPath(), geoObjectsFeatures.GetFullPath(),
1 /* threadsCount */, {} /* nodesFile */,
streetsFeatures.GetFullPath());
CHECK(streetsIndexGeneration, ());
return ReadIndex<GeoObjectsIndexBox<IndexReader>, MmapReader>(streetsIndex.GetFullPath());
}

View file

@ -1,7 +1,7 @@
#include "generator/data_version.hpp"
#include "generator/generate_info.hpp"
#include "generator/geo_objects/geo_objects_generator.hpp"
#include "generator/locality_sorter.hpp"
#include "generator/locality_index_generator.hpp"
#include "generator/osm_source.hpp"
#include "generator/processor_factory.hpp"
#include "generator/raw_generator.hpp"
@ -294,29 +294,21 @@ int GeneratorToolMain(int argc, char ** argv)
return EXIT_FAILURE;
}
auto const locDataFile =
base::FilenameWithoutExt(options.m_geo_objects_index) + LOC_DATA_FILE_EXTENSION;
auto const nodesListPath =
boost::make_optional(!options.m_nodes_list_path.empty(), options.m_nodes_list_path);
auto const streetsFeaturesPath =
boost::make_optional(!options.m_streets_features.empty(), options.m_streets_features);
if (!feature::GenerateGeoObjectsData(locDataFile, options.m_geo_objects_features,
nodesListPath, streetsFeaturesPath))
{
LOG(LCRITICAL, ("Error generating geo objects data."));
return EXIT_FAILURE;
}
LOG(LINFO, ("Saving geo objects index to", options.m_geo_objects_index));
if (!indexer::BuildGeoObjectsIndexFromDataFile(
locDataFile, options.m_geo_objects_index,
DataVersion::LoadFromPath(path).GetVersionJson(),
DataVersion::kFileTag))
if (!GenerateGeoObjectsIndex(options.m_geo_objects_index, options.m_geo_objects_features,
genInfo.m_threadsCount, nodesListPath, streetsFeaturesPath))
{
LOG(LCRITICAL, ("Error generating geo objects index."));
return EXIT_FAILURE;
}
WriteDataVersionSection(options.m_geo_objects_index,
DataVersion::LoadFromPath(genInfo.m_dataPath).GetVersionJson());
}
if (options.m_generate_regions)
@ -327,29 +319,23 @@ int GeneratorToolMain(int argc, char ** argv)
return EXIT_FAILURE;
}
auto const locDataFile =
base::FilenameWithoutExt(options.m_regions_index) + LOC_DATA_FILE_EXTENSION;
if (!feature::GenerateRegionsData(locDataFile, options.m_regions_features))
{
LOG(LCRITICAL, ("Error generating regions data."));
return EXIT_FAILURE;
}
LOG(LINFO, ("Saving regions index to", options.m_regions_index));
if (!indexer::BuildRegionsIndexFromDataFile(locDataFile, options.m_regions_index,
DataVersion::LoadFromPath(path).GetVersionJson(),
DataVersion::kFileTag))
if (!GenerateRegionsIndex(options.m_regions_index, options.m_regions_features,
genInfo.m_threadsCount))
{
LOG(LCRITICAL, ("Error generating regions index."));
return EXIT_FAILURE;
}
if (!feature::GenerateBorders(options.m_regions_index, options.m_regions_features))
LOG(LINFO, ("Saving regions borders to", options.m_regions_index));
if (!GenerateBorders(options.m_regions_index, options.m_regions_features))
{
LOG(LCRITICAL, ("Error generating regions borders."));
return EXIT_FAILURE;
}
WriteDataVersionSection(options.m_regions_index,
DataVersion::LoadFromPath(genInfo.m_dataPath).GetVersionJson());
}
if (options.m_generate_regions_kv)

View file

@ -3,7 +3,7 @@
#include "generator/feature_generator.hpp"
#include "generator/key_value_concurrent_writer.hpp"
#include "generator/key_value_storage.hpp"
#include "generator/locality_sorter.hpp"
#include "generator/locality_index_generator.hpp"
#include "generator/geo_objects/geo_objects.hpp"
#include "generator/geo_objects/geo_objects_filter.hpp"
@ -13,7 +13,6 @@
#include "indexer/classificator.hpp"
#include "indexer/locality_index.hpp"
#include "indexer/locality_index_builder.hpp"
#include "coding/mmap_reader.hpp"
@ -372,17 +371,9 @@ bool JsonHasBuilding(JsonValue const & json)
boost::optional<indexer::GeoObjectsIndex<IndexReader>> MakeTempGeoObjectsIndex(
std::string const & pathToGeoObjectsTmpMwm)
{
auto const dataFile = GetPlatform().TmpPathForFile();
SCOPE_GUARD(removeDataFile, std::bind(Platform::RemoveFileIfExists, std::cref(dataFile)));
if (!GenerateGeoObjectsData(dataFile, pathToGeoObjectsTmpMwm))
{
LOG(LCRITICAL, ("Error generating geo objects data."));
return {};
}
auto const indexFile = GetPlatform().TmpPathForFile();
SCOPE_GUARD(removeIndexFile, std::bind(Platform::RemoveFileIfExists, std::cref(indexFile)));
if (!indexer::BuildGeoObjectsIndexFromDataFile(dataFile, indexFile, std::string(), DataVersion::kFileTag))
if (!GenerateGeoObjectsIndex(indexFile, pathToGeoObjectsTmpMwm, 1))
{
LOG(LCRITICAL, ("Error generating geo objects index."));
return {};

View file

@ -0,0 +1,347 @@
#include "generator/locality_index_generator.hpp"
#include "generator/data_version.hpp"
#include "generator/geo_objects/geo_objects_filter.hpp"
#include "generator/geometry_holder.hpp"
#include "generator/streets/streets_filter.hpp"
#include "generator/utils.hpp"
#include "indexer/data_header.hpp"
#include "indexer/locality_index_builder.hpp"
#include "indexer/locality_object.hpp"
#include "indexer/scales.hpp"
#include "coding/file_container.hpp"
#include "coding/internal/file_data.hpp"
#include "coding/geometry_coding.hpp"
#include "geometry/convex_hull.hpp"
#include "platform/platform.hpp"
#include "base/file_name_utils.hpp"
#include "base/logging.hpp"
#include "base/scope_guard.hpp"
#include "base/string_utils.hpp"
#include "base/thread_utils.hpp"
#include "base/timer.hpp"
#include "defines.hpp"
#include <cstdint>
#include <functional>
#include <limits>
#include <set>
#include <stdexcept>
#include <vector>
using namespace feature;
using namespace std;
namespace generator
{
class LocalityObjectBuilder
{
public:
LocalityObjectBuilder()
{
m_header.SetGeometryCodingParams(serial::GeometryCodingParams());
m_header.SetScales({scales::GetUpperScale()});
}
boost::optional<indexer::LocalityObject const &> operator()(FeatureBuilder & fb)
{
auto && geometryHolder = MakeGeometryHolder(fb);
if (!geometryHolder)
return boost::none;
auto & data = geometryHolder->GetBuffer();
auto const encodedId = fb.GetMostGenericOsmId().GetEncodedId();
m_localityObject.SetId(encodedId);
switch (fb.GetGeomType())
{
case GeomType::Point:
{
buffer_vector<m2::PointD, 32> points{fb.GetKeyPoint()};
m_localityObject.SetPoints(std::move(points));
break;
}
case GeomType::Line:
{
buffer_vector<m2::PointD, 32> points{data.m_innerPts.begin(), data.m_innerPts.end()};
m_localityObject.SetPoints(std::move(points));
break;
}
case GeomType::Area:
{
CHECK_GREATER_OR_EQUAL(data.m_innerTrg.size(), 3, ());
m_pointsBuffer.clear();
m_pointsBuffer.append(data.m_innerTrg.begin(), data.m_innerTrg.end());
buffer_vector<m2::PointD, 32> triangles;
serial::StripToTriangles(m_pointsBuffer.size(), m_pointsBuffer, triangles);
m_localityObject.SetTriangles(std::move(triangles));
break;
}
default:
UNREACHABLE();
};
return {m_localityObject};
}
private:
boost::optional<GeometryHolder> MakeGeometryHolder(FeatureBuilder & fb)
{
// Do not limit inner triangles number to save all geometry without additional sections.
GeometryHolder holder{
fb, m_header, std::numeric_limits<uint32_t>::max() /* maxTrianglesNumber */};
// Simplify and serialize geometry.
vector<m2::PointD> points;
m2::SquaredDistanceFromSegmentToPoint<m2::PointD> distFn;
SimplifyPoints(distFn, scales::GetUpperScale(), holder.GetSourcePoints(), points);
if (points.empty())
return boost::none;
if (fb.IsLine())
holder.AddPoints(points, 0);
// For areas we save outer geometry only.
if (fb.IsArea() && holder.NeedProcessTriangles())
{
// At this point we don't need last point equal to first.
points.pop_back();
auto const & polys = fb.GetGeometry();
if (polys.size() != 1)
{
points.clear();
for (auto const & poly : polys)
points.insert(points.end(), poly.begin(), poly.end());
}
if (points.size() > 2)
{
if (!holder.TryToMakeStrip(points))
{
m2::ConvexHull hull(points, 1e-16);
vector<m2::PointD> hullPoints = hull.Points();
holder.SetInner();
auto const id = fb.GetMostGenericOsmId();
if (!holder.TryToMakeStrip(hullPoints))
{
LOG(LWARNING, ("Error while building tringles for object with OSM Id:",
id.GetSerialId(), "Type:", id.GetType(), "points:", points,
"hull:", hull.Points()));
return boost::none;
}
}
}
if (holder.NeedProcessTriangles())
return boost::none;
}
return {std::move(holder)};
}
DataHeader m_header;
indexer::LocalityObject m_localityObject;
buffer_vector<m2::PointD, 32> m_pointsBuffer;
};
template <typename FeatureFilter, typename IndexBuilder>
bool GenerateLocalityIndex(
std::string const & outPath, std::string const & featuresFile, FeatureFilter && featureFilter,
IndexBuilder && indexBuilder, unsigned int threadsCount, uint64_t chunkFeaturesCount)
{
std::list<covering::LocalitiesCovering> coveringsParts{};
auto makeProcessor = [&] {
coveringsParts.emplace_back();
auto & covering = coveringsParts.back();
LocalityObjectBuilder localityObjectBuilder;
auto processor = [featureFilter, &indexBuilder, &covering, localityObjectBuilder]
(FeatureBuilder & fb, uint64_t /* currPos */) mutable
{
if (!featureFilter(fb))
return;
if (auto && localityObject = localityObjectBuilder(fb))
indexBuilder.Cover(*localityObject, covering);
};
return processor;
};
LOG(LINFO, ("Geometry cover features..."));
feature::ProcessParallelFromDatRawFormat(
threadsCount, chunkFeaturesCount, featuresFile, makeProcessor);
LOG(LINFO, ("Finish features geometry covering"));
LOG(LINFO, ("Merge geometry coverings..."));
covering::LocalitiesCovering localitiesCovering;
while (!coveringsParts.empty())
{
auto const & part = coveringsParts.back();
localitiesCovering.insert(localitiesCovering.end(), part.begin(), part.end());
coveringsParts.pop_back();
}
LOG(LINFO, ("Finish merging of geometry coverings"));
LOG(LINFO, ("Build locality index..."));
if (!indexBuilder.BuildCoveringIndex(std::move(localitiesCovering), outPath))
return false;
LOG(LINFO, ("Finish locality index building ", outPath));
return true;
}
namespace
{
bool ParseNodes(string nodesFile, set<uint64_t> & nodeIds)
{
if (nodesFile.empty())
return true;
ifstream stream(nodesFile);
if (!stream)
{
LOG(LERROR, ("Could not open", nodesFile));
return false;
}
string line;
size_t lineNumber = 1;
while (getline(stream, line))
{
strings::SimpleTokenizer iter(line, " ");
uint64_t nodeId;
if (!iter || !strings::to_uint64(*iter, nodeId))
{
LOG(LERROR, ("Error while parsing node id at line", lineNumber, "Line contents:", line));
return false;
}
nodeIds.insert(nodeId);
++lineNumber;
}
return true;
}
} // namespace
bool GenerateRegionsIndex(std::string const & outPath, std::string const & featuresFile,
unsigned int threadsCount)
{
auto const featuresFilter = [](FeatureBuilder & fb) { return fb.IsArea(); };
indexer::RegionsLocalityIndexBuilder indexBuilder;
return GenerateLocalityIndex(outPath, featuresFile, featuresFilter, indexBuilder,
threadsCount, 1 /* chunkFeaturesCount */);
}
bool GenerateGeoObjectsIndex(
std::string const & outPath, std::string const & geoObjectsFeaturesFile,
unsigned int threadsCount,
boost::optional<std::string> const & nodesFile,
boost::optional<std::string> const & streetsFeaturesFile)
{
set<uint64_t> nodeIds;
if (nodesFile && !ParseNodes(*nodesFile, nodeIds))
return false;
bool const allowStreet = bool{streetsFeaturesFile};
bool const allowPoi = !nodeIds.empty();
auto const featuresFilter = [&nodeIds, allowStreet, allowPoi](FeatureBuilder & fb) {
using generator::geo_objects::GeoObjectsFilter;
using generator::streets::StreetsFilter;
if (GeoObjectsFilter::IsBuilding(fb) || GeoObjectsFilter::HasHouse(fb))
return true;
if (allowStreet && StreetsFilter::IsStreet(fb))
return true;
if (allowPoi && GeoObjectsFilter::IsPoi(fb))
return 0 != nodeIds.count(fb.GetMostGenericOsmId().GetEncodedId());
return false;
};
indexer::GeoObjectsLocalityIndexBuilder indexBuilder;
if (!streetsFeaturesFile)
{
return GenerateLocalityIndex(outPath, geoObjectsFeaturesFile, featuresFilter, indexBuilder,
threadsCount, 10 /* chunkFeaturesCount */);
}
auto const featuresDirectory = base::GetDirectory(geoObjectsFeaturesFile);
auto const featuresFile = base::JoinPath(
featuresDirectory, std::string{"geo_objects_and_streets"} + DATA_FILE_EXTENSION_TMP);
SCOPE_GUARD(featuresFileGuard, std::bind(Platform::RemoveFileIfExists, featuresFile));
base::AppendFileToFile(geoObjectsFeaturesFile, featuresFile);
base::AppendFileToFile(*streetsFeaturesFile, featuresFile);
return GenerateLocalityIndex(outPath, featuresFile, featuresFilter, indexBuilder,
threadsCount, 100 /* chunkFeaturesCount */);
}
// BordersCollector --------------------------------------------------------------------------------
class BordersCollector
{
public:
explicit BordersCollector(string const & filename)
: m_writer(filename, FileWriter::OP_WRITE_EXISTING)
, m_bordersWriter{m_writer.GetWriter(BORDERS_FILE_TAG)}
{
}
void Collect(FeatureBuilder & fb)
{
if (fb.IsArea())
{
m_buffer.clear();
fb.SerializeBorderForIntermediate(serial::GeometryCodingParams(), m_buffer);
WriteFeatureData(m_buffer);
}
}
private:
void WriteFeatureData(std::vector<char> const & bytes)
{
size_t const sz = bytes.size();
CHECK(sz != 0, ("Empty feature not allowed here!"));
WriteVarUint(*m_bordersWriter, sz);
m_bordersWriter->Write(&bytes[0], sz);
}
FilesContainerW m_writer;
std::unique_ptr<FileContainerWriter> m_bordersWriter;
FeatureBuilder::Buffer m_buffer;
DISALLOW_COPY_AND_MOVE(BordersCollector);
};
bool GenerateBorders(std::string const & outPath, string const & featuresFile)
{
BordersCollector bordersCollector(outPath);
ForEachFromDatRawFormat(featuresFile, [&](FeatureBuilder & fb, uint64_t /* currPos */) {
bordersCollector.Collect(fb);
});
return true;
}
//--------------------------------------------------------------------------------------------------
void WriteDataVersionSection(std::string const & outPath, std::string const & dataVersionJson)
{
FilesContainerW writer{outPath, FileWriter::OP_WRITE_EXISTING};
writer.Write(dataVersionJson.c_str(), dataVersionJson.size(), DataVersion::kFileTag);
}
} // namespace generator

View file

@ -0,0 +1,22 @@
#pragma once
#include <string>
#include <boost/optional.hpp>
namespace generator
{
bool GenerateRegionsIndex(
std::string const & outPath, std::string const & featuresFile, unsigned int threadsCount);
bool GenerateGeoObjectsIndex(
std::string const & outPath, std::string const & geoObjectsFeaturesFile,
unsigned int threadsCount,
boost::optional<std::string> const & nodesFile = {},
boost::optional<std::string> const & streetsFeaturesFile = {});
// Generates borders section for server-side reverse geocoder from input feature-dat-files.
bool GenerateBorders(std::string const & outPath, std::string const & featuresDir);
void WriteDataVersionSection(std::string const & outPath, std::string const & dataVersionJson);
} // namespace generator

View file

@ -1,298 +0,0 @@
#include "generator/locality_sorter.hpp"
#include "generator/geo_objects/geo_objects_filter.hpp"
#include "generator/geometry_holder.hpp"
#include "generator/streets/streets_filter.hpp"
#include "generator/utils.hpp"
#include "indexer/data_header.hpp"
#include "indexer/scales.hpp"
#include "coding/file_container.hpp"
#include "coding/internal/file_data.hpp"
#include "geometry/convex_hull.hpp"
#include "platform/platform.hpp"
#include "base/file_name_utils.hpp"
#include "base/logging.hpp"
#include "base/scope_guard.hpp"
#include "base/string_utils.hpp"
#include "base/timer.hpp"
#include "defines.hpp"
#include <cstdint>
#include <functional>
#include <limits>
#include <set>
#include <vector>
using namespace feature;
using namespace std;
namespace
{
class BordersCollector : public FeaturesCollector
{
public:
explicit BordersCollector(string const & filename)
: FeaturesCollector(filename + EXTENSION_TMP), m_writer(filename, FileWriter::OP_WRITE_EXISTING)
{
}
// FeaturesCollector overrides:
uint32_t Collect(FeatureBuilder & fb) override
{
if (fb.IsArea())
{
FeatureBuilder::Buffer buffer;
fb.SerializeBorderForIntermediate(serial::GeometryCodingParams(), buffer);
WriteFeatureBase(buffer, fb);
}
return 0;
}
void Finish() override
{
Flush();
m_writer.Write(m_datFile.GetName(), BORDERS_FILE_TAG);
m_writer.Finish();
}
private:
FilesContainerW m_writer;
DataHeader m_header;
DISALLOW_COPY_AND_MOVE(BordersCollector);
};
class LocalityCollector : public FeaturesCollector
{
public:
LocalityCollector(string const & filename, DataHeader const & header, uint32_t versionDate)
: FeaturesCollector(filename + EXTENSION_TMP)
, m_writer(filename)
, m_header(header)
, m_versionDate(versionDate)
{
}
// FeaturesCollector overrides:
void Finish() override
{
{
auto w = m_writer.GetWriter(VERSION_FILE_TAG);
version::WriteVersion(*w, m_versionDate);
}
m_header.SetBounds(m_bounds);
{
auto w = m_writer.GetWriter(HEADER_FILE_TAG);
m_header.Save(*w);
}
Flush();
m_writer.Write(m_datFile.GetName(), LOCALITY_DATA_FILE_TAG);
m_writer.Finish();
}
uint32_t Collect(FeatureBuilder & fb) override
{
// Do not limit inner triangles number to save all geometry without additional sections.
GeometryHolder holder(fb, m_header, numeric_limits<uint32_t>::max() /* maxTrianglesNumber */);
// Simplify and serialize geometry.
vector<m2::PointD> points;
m2::SquaredDistanceFromSegmentToPoint<m2::PointD> distFn;
SimplifyPoints(distFn, scales::GetUpperScale(), holder.GetSourcePoints(), points);
if (fb.IsLine())
holder.AddPoints(points, 0);
// For areas we save outer geometry only.
if (fb.IsArea() && holder.NeedProcessTriangles())
{
// At this point we don't need last point equal to first.
points.pop_back();
auto const & polys = fb.GetGeometry();
if (polys.size() != 1)
{
points.clear();
for (auto const & poly : polys)
points.insert(points.end(), poly.begin(), poly.end());
}
if (points.size() > 2)
{
if (!holder.TryToMakeStrip(points))
{
m2::ConvexHull hull(points, 1e-16);
vector<m2::PointD> hullPoints = hull.Points();
holder.SetInner();
auto const id = fb.GetMostGenericOsmId();
if (!holder.TryToMakeStrip(hullPoints))
{
LOG(LWARNING, ("Error while building tringles for object with OSM Id:", id.GetSerialId(),
"Type:", id.GetType(), "points:", points, "hull:", hull.Points()));
return 0;
}
}
}
}
auto & buffer = holder.GetBuffer();
if (fb.PreSerializeAndRemoveUselessNamesForMwm(buffer))
{
fb.SerializeLocalityObject(serial::GeometryCodingParams(), buffer);
WriteFeatureBase(buffer.m_buffer, fb);
}
return 0;
}
private:
FilesContainerW m_writer;
DataHeader m_header;
uint32_t m_versionDate;
DISALLOW_COPY_AND_MOVE(LocalityCollector);
};
bool ParseNodes(string nodesFile, set<uint64_t> & nodeIds)
{
if (nodesFile.empty())
return true;
ifstream stream(nodesFile);
if (!stream)
{
LOG(LERROR, ("Could not open", nodesFile));
return false;
}
string line;
size_t lineNumber = 1;
while (getline(stream, line))
{
strings::SimpleTokenizer iter(line, " ");
uint64_t nodeId;
if (!iter || !strings::to_uint64(*iter, nodeId))
{
LOG(LERROR, ("Error while parsing node id at line", lineNumber, "Line contents:", line));
return false;
}
nodeIds.insert(nodeId);
++lineNumber;
}
return true;
}
using NeedSerialize = function<bool(FeatureBuilder & fb1)>;
bool GenerateLocalityDataImpl(FeaturesCollector & collector,
NeedSerialize const & needSerialize,
string const & featuresFile)
{
// Transform features from raw format to LocalityObject format.
try
{
LOG(LINFO, ("Processing", featuresFile));
ForEachFromDatRawFormat(featuresFile, [&](FeatureBuilder & fb, uint64_t /* currPos */) {
if (needSerialize(fb))
collector.Collect(fb);
});
collector.Finish();
}
catch (RootException const & ex)
{
LOG(LCRITICAL, ("Locality data writing error:", ex.Msg()));
return false;
}
return true;
}
} // namespace
namespace feature
{
bool GenerateGeoObjectsData(string const & toDataFile, string const & featuresFile,
NeedSerialize const & needSerialize)
{
DataHeader header;
header.SetGeometryCodingParams(serial::GeometryCodingParams());
header.SetScales({scales::GetUpperScale()});
LocalityCollector localityCollector(toDataFile, header,
static_cast<uint32_t>(base::SecondsSinceEpoch()));
return GenerateLocalityDataImpl(
localityCollector,
needSerialize, featuresFile);
}
bool GenerateGeoObjectsData(string const & toDataFile,
string const & geoObjectsFeaturesFile,
boost::optional<string> const & nodesFile,
boost::optional<string> const & streetsFeaturesFile)
{
set<uint64_t> nodeIds;
if (nodesFile && !ParseNodes(*nodesFile, nodeIds))
return false;
bool const allowStreet = bool{streetsFeaturesFile};
bool const allowPoi = !nodeIds.empty();
auto const needSerialize = [&nodeIds, allowStreet, allowPoi](FeatureBuilder & fb) {
using generator::geo_objects::GeoObjectsFilter;
using generator::streets::StreetsFilter;
if (GeoObjectsFilter::IsBuilding(fb) || GeoObjectsFilter::HasHouse(fb))
return true;
if (allowStreet && StreetsFilter::IsStreet(fb))
return true;
if (allowPoi && GeoObjectsFilter::IsPoi(fb))
return 0 != nodeIds.count(fb.GetMostGenericOsmId().GetEncodedId());
return false;
};
if (!streetsFeaturesFile)
return GenerateGeoObjectsData(toDataFile, geoObjectsFeaturesFile, needSerialize);
auto const featuresDirectory = base::GetDirectory(geoObjectsFeaturesFile);
auto const featuresFile = base::JoinPath(
featuresDirectory, std::string{"geo_objects_and_streets"} + DATA_FILE_EXTENSION_TMP);
SCOPE_GUARD(featuresFileGuard, std::bind(Platform::RemoveFileIfExists, featuresFile));
base::AppendFileToFile(geoObjectsFeaturesFile, featuresFile);
base::AppendFileToFile(*streetsFeaturesFile, featuresFile);
return GenerateGeoObjectsData(toDataFile, featuresFile, needSerialize);
}
bool GenerateRegionsData(std::string const & toDataFile, string const & featuresFile)
{
DataHeader header;
header.SetGeometryCodingParams(serial::GeometryCodingParams());
header.SetScales({scales::GetUpperScale()});
LocalityCollector regionsCollector(toDataFile, header,
static_cast<uint32_t>(base::SecondsSinceEpoch()));
auto const needSerialize = [](FeatureBuilder const & fb) { return fb.IsArea(); };
return GenerateLocalityDataImpl(regionsCollector, needSerialize, featuresFile);
}
bool GenerateBorders(std::string const & toDataFile, string const & featuresFile)
{
BordersCollector bordersCollector(toDataFile);
auto const needSerialize = [](FeatureBuilder const & fb) { return fb.IsArea(); };
return GenerateLocalityDataImpl(bordersCollector, needSerialize, featuresFile);
}
} // namespace feature

View file

@ -1,20 +0,0 @@
#pragma once
#include <string>
#include <boost/optional.hpp>
namespace feature
{
// Generates data for GeoObjectsIndexBuilder from input feature-dat-files.
bool GenerateGeoObjectsData(std::string const & toDataFile,
std::string const & geoObjectsFeaturesFile,
boost::optional<std::string> const & nodesFile = {},
boost::optional<std::string> const & streetsFeaturesFile = {});
// Generates data for RegionsIndexBuilder from input feature-dat-files.
bool GenerateRegionsData(std::string const & toDataFile, std::string const & featuresFile);
// Generates borders section for server-side reverse geocoder from input feature-dat-files.
bool GenerateBorders(std::string const & toDataFile, std::string const & featuresDir);
} // namespace feature

View file

@ -86,7 +86,6 @@ set(
interval_index_builder.hpp
locality_index.cpp
locality_index.hpp
locality_index_builder.cpp
locality_index_builder.hpp
locality_object.cpp
locality_object.hpp

View file

@ -32,9 +32,8 @@ typedef std::vector<Interval> Intervals;
// Cover feature with RectIds and return their integer representations.
std::vector<int64_t> CoverFeature(FeatureType & feature, int cellDepth, uint64_t cellPenaltyArea);
std::vector<int64_t> CoverGeoObject(indexer::LocalityObject const & o, int cellDepth);
std::vector<int64_t> CoverRegion(indexer::LocalityObject const & o, int cellDepth);
std::vector<int64_t> CoverGeoObject(indexer::LocalityObject const & o, int cellDepth);
// Given a vector of intervals [a, b), sort them and merge overlapping intervals.
Intervals SortAndMergeIntervals(Intervals const & intervals);

View file

@ -23,26 +23,17 @@ using namespace std;
namespace
{
struct LocalityObjectVector
{
template <typename ToDo>
void ForEach(ToDo && toDo) const
{
for_each(m_objects.cbegin(), m_objects.cend(), forward<ToDo>(toDo));
}
vector<LocalityObject> m_objects;
};
template <class ObjectsVector, class Writer>
void BuildGeoObjectsIndex(ObjectsVector const & objects, Writer & writer,
string const & tmpFilePrefix)
void BuildGeoObjectsIndex(ObjectsVector const & objects, Writer && writer)
{
auto coverLocality = [](indexer::LocalityObject const & o, int cellDepth) {
return covering::CoverGeoObject(o, cellDepth);
};
return covering::BuildLocalityIndex<ObjectsVector, Writer, kGeoObjectsDepthLevels>(
objects, writer, coverLocality, tmpFilePrefix);
indexer::GeoObjectsLocalityIndexBuilder indexBuilder;
covering::LocalitiesCovering objectsCovering;
for (auto const & object : objects)
indexBuilder.Cover(object, objectsCovering);
indexBuilder.BuildCoveringIndex(std::move(objectsCovering), std::forward<Writer>(writer),
kGeoObjectsDepthLevels);
}
using Ids = set<uint64_t>;
@ -70,16 +61,16 @@ RankedIds GetRankedIds(LocalityIndex const & index, m2::PointD const & center,
UNIT_TEST(BuildLocalityIndexTest)
{
LocalityObjectVector objects;
objects.m_objects.resize(4);
objects.m_objects[0].SetForTesting(1, m2::PointD{0, 0});
objects.m_objects[1].SetForTesting(2, m2::PointD{1, 0});
objects.m_objects[2].SetForTesting(3, m2::PointD{1, 1});
objects.m_objects[3].SetForTesting(4, m2::PointD{0, 1});
vector<LocalityObject> objects;
objects.resize(4);
objects[0].SetForTesting(1, m2::PointD{0, 0});
objects[1].SetForTesting(2, m2::PointD{1, 0});
objects[2].SetForTesting(3, m2::PointD{1, 1});
objects[3].SetForTesting(4, m2::PointD{0, 1});
vector<uint8_t> localityIndex;
MemWriter<vector<uint8_t>> writer(localityIndex);
BuildGeoObjectsIndex(objects, writer, "tmp");
BuildGeoObjectsIndex(objects, writer);
MemReader reader(localityIndex.data(), localityIndex.size());
indexer::GeoObjectsIndex<MemReader> index(reader);
@ -91,16 +82,16 @@ UNIT_TEST(BuildLocalityIndexTest)
UNIT_TEST(LocalityIndexRankTest)
{
LocalityObjectVector objects;
objects.m_objects.resize(4);
objects.m_objects[0].SetForTesting(1, m2::PointD{1, 0});
objects.m_objects[1].SetForTesting(2, m2::PointD{2, 0});
objects.m_objects[2].SetForTesting(3, m2::PointD{3, 0});
objects.m_objects[3].SetForTesting(4, m2::PointD{4, 0});
vector<LocalityObject> objects;
objects.resize(4);
objects[0].SetForTesting(1, m2::PointD{1, 0});
objects[1].SetForTesting(2, m2::PointD{2, 0});
objects[2].SetForTesting(3, m2::PointD{3, 0});
objects[3].SetForTesting(4, m2::PointD{4, 0});
vector<uint8_t> localityIndex;
MemWriter<vector<uint8_t>> writer(localityIndex);
BuildGeoObjectsIndex(objects, writer, "tmp");
BuildGeoObjectsIndex(objects, writer);
MemReader reader(localityIndex.data(), localityIndex.size());
indexer::GeoObjectsIndex<MemReader> index(reader);
@ -123,24 +114,24 @@ UNIT_TEST(LocalityIndexRankTest)
UNIT_TEST(LocalityIndexTopSizeTest)
{
LocalityObjectVector objects;
objects.m_objects.resize(8);
vector<LocalityObject> objects;
objects.resize(8);
// Same cell.
objects.m_objects[0].SetForTesting(1, m2::PointD{1.0, 0.0});
objects.m_objects[1].SetForTesting(2, m2::PointD{1.0, 0.0});
objects.m_objects[2].SetForTesting(3, m2::PointD{1.0, 0.0});
objects.m_objects[3].SetForTesting(4, m2::PointD{1.0, 0.0});
objects[0].SetForTesting(1, m2::PointD{1.0, 0.0});
objects[1].SetForTesting(2, m2::PointD{1.0, 0.0});
objects[2].SetForTesting(3, m2::PointD{1.0, 0.0});
objects[3].SetForTesting(4, m2::PointD{1.0, 0.0});
// Another close cell.
objects.m_objects[4].SetForTesting(5, m2::PointD{1.0, 1.0});
objects.m_objects[5].SetForTesting(6, m2::PointD{1.0, 1.0});
objects[4].SetForTesting(5, m2::PointD{1.0, 1.0});
objects[5].SetForTesting(6, m2::PointD{1.0, 1.0});
// Far cell.
objects.m_objects[6].SetForTesting(7, m2::PointD{10.0, 10.0});
objects[6].SetForTesting(7, m2::PointD{10.0, 10.0});
// The big object contains all points and must be returned on any query.
objects.m_objects[7].SetForTesting(8, m2::RectD{0.0, 0.0, 10.0, 10.0});
objects[7].SetForTesting(8, m2::RectD{0.0, 0.0, 10.0, 10.0});
vector<uint8_t> localityIndex;
MemWriter<vector<uint8_t>> writer(localityIndex);
BuildGeoObjectsIndex(objects, writer, "tmp");
BuildGeoObjectsIndex(objects, writer);
MemReader reader(localityIndex.data(), localityIndex.size());
indexer::GeoObjectsIndex<MemReader> index(reader);
@ -189,21 +180,21 @@ UNIT_TEST(LocalityIndexWeightRankTest)
m2::PointD queryPoint{0, 0};
m2::PointD queryBorder{0, 2};
LocalityObjectVector objects;
objects.m_objects.resize(7);
vector<LocalityObject> objects;
objects.resize(7);
// Enclose query point.
objects.m_objects[0].SetForTesting(1, m2::PointD{0, 0});
objects.m_objects[1].SetForTesting(2, m2::PointD{0.000001, 0.000001}); // in the same lowermost cell
objects.m_objects[2].SetForTesting(3, m2::RectD{-1, -1, 1, 1});
objects[0].SetForTesting(1, m2::PointD{0, 0});
objects[1].SetForTesting(2, m2::PointD{0.000001, 0.000001}); // in the same lowermost cell
objects[2].SetForTesting(3, m2::RectD{-1, -1, 1, 1});
// Closest objects.
objects.m_objects[3].SetForTesting(4, m2::RectD{0.5, 0.5, 1.0, 1.0});
objects.m_objects[4].SetForTesting(5, m2::PointD{1, 0});
objects.m_objects[5].SetForTesting(6, m2::PointD{1, 1});
objects.m_objects[6].SetForTesting(7, m2::RectD{1, 0, 1.1, 0.1});
objects[3].SetForTesting(4, m2::RectD{0.5, 0.5, 1.0, 1.0});
objects[4].SetForTesting(5, m2::PointD{1, 0});
objects[5].SetForTesting(6, m2::PointD{1, 1});
objects[6].SetForTesting(7, m2::RectD{1, 0, 1.1, 0.1});
vector<uint8_t> localityIndex;
MemWriter<vector<uint8_t>> writer(localityIndex);
BuildGeoObjectsIndex(objects, writer, "tmp");
BuildGeoObjectsIndex(objects, writer);
MemReader reader(localityIndex.data(), localityIndex.size());
indexer::GeoObjectsIndex<MemReader> index(reader);

View file

@ -1,121 +0,0 @@
#include "indexer/locality_index_builder.hpp"
#include "indexer/locality_object.hpp"
#include "defines.hpp"
#include "coding/file_container.hpp"
#include "coding/var_record_reader.hpp"
#include "base/logging.hpp"
using namespace std;
namespace indexer
{
namespace
{
template <class Reader>
class LocalityVector
{
DISALLOW_COPY(LocalityVector);
public:
explicit LocalityVector(Reader const & reader) : m_recordReader(reader, 256 /* expectedRecordSize */) {}
template <class ToDo>
void ForEach(ToDo && toDo) const
{
m_recordReader.ForEachRecord([&](uint32_t /*pos*/, char const * data, uint32_t /*size*/) {
LocalityObject o;
o.Deserialize(data);
toDo(o);
});
}
private:
friend class LocalityVectorReader;
VarRecordReader<FilesContainerR::TReader, &VarRecordSizeReaderVarint> m_recordReader;
};
// Test features vector (reader) that combines all the needed data for stand-alone work.
// Used in generator_tool and unit tests.
class LocalityVectorReader
{
DISALLOW_COPY(LocalityVectorReader);
public:
explicit LocalityVectorReader(string const & filePath)
: m_cont(filePath), m_vector(m_cont.GetReader(LOCALITY_DATA_FILE_TAG))
{
}
LocalityVector<ModelReaderPtr> const & GetVector() const { return m_vector; }
private:
FilesContainerR m_cont;
LocalityVector<ModelReaderPtr> m_vector;
};
template <int DEPTH_LEVELS>
bool BuildLocalityIndexFromDataFile(string const & dataFile,
covering::CoverLocality const & coverLocality,
string const & outFileName,
string const & localityIndexFileTag,
string const & dataVersionJson,
string const & dataVersionTag)
{
try
{
string const idxFileName(outFileName + LOCALITY_INDEX_TMP_EXT);
{
LocalityVectorReader localities(dataFile);
FileWriter writer(idxFileName);
covering::BuildLocalityIndex<LocalityVector<ModelReaderPtr>, FileWriter, DEPTH_LEVELS>(
localities.GetVector(), writer, coverLocality, outFileName, IntervalIndexVersion::V2);
}
FilesContainerW writer(outFileName, FileWriter::OP_WRITE_TRUNCATE);
writer.Write(idxFileName, localityIndexFileTag);
writer.Write(dataVersionJson.c_str(), dataVersionJson.size(), dataVersionTag);
FileWriter::DeleteFileX(idxFileName);
}
catch (Reader::Exception const & e)
{
LOG(LERROR, ("Error while reading file:", e.Msg()));
return false;
}
catch (Writer::Exception const & e)
{
LOG(LERROR, ("Error writing index file:", e.Msg()));
return false;
}
return true;
}
} // namespace
bool BuildGeoObjectsIndexFromDataFile(string const & dataFile, string const & outFileName,
string const & dataVersionJson,
string const & dataVersionTag)
{
auto coverObject = [](indexer::LocalityObject const & o, int cellDepth) {
return covering::CoverGeoObject(o, cellDepth);
};
return BuildLocalityIndexFromDataFile<kGeoObjectsDepthLevels>(dataFile, coverObject, outFileName,
GEO_OBJECTS_INDEX_FILE_TAG,
dataVersionJson, dataVersionTag);
}
bool BuildRegionsIndexFromDataFile(string const & dataFile, string const & outFileName,
string const & dataVersionJson,
string const & dataVersionTag)
{
auto coverRegion = [](indexer::LocalityObject const & o, int cellDepth) {
return covering::CoverRegion(o, cellDepth);
};
return BuildLocalityIndexFromDataFile<kRegionsDepthLevels>(
dataFile, coverRegion, outFileName, REGIONS_INDEX_FILE_TAG, dataVersionJson, dataVersionTag);
}
} // namespace indexer

View file

@ -6,8 +6,7 @@
#include "indexer/locality_object.hpp"
#include "indexer/scales.hpp"
#include "coding/dd_vector.hpp"
#include "coding/file_sort.hpp"
#include "coding/file_container.hpp"
#include "coding/writer.hpp"
#include "base/logging.hpp"
@ -17,57 +16,97 @@
#include "defines.hpp"
#include <cstdint>
#include <deque>
#include <functional>
#include <string>
#include <vector>
#include <boost/sort/sort.hpp>
namespace covering
{
using CoverLocality =
std::function<std::vector<int64_t>(indexer::LocalityObject const & o, int cellDepth)>;
template <class ObjectsVector, class Writer, int DEPTH_LEVELS>
void BuildLocalityIndex(ObjectsVector const & objects, Writer & writer,
CoverLocality const & coverLocality, std::string const & tmpFilePrefix,
IntervalIndexVersion version = IntervalIndexVersion::V1)
{
std::string const cellsToValueFile = tmpFilePrefix + CELL2LOCALITY_SORTED_EXT + ".all";
SCOPE_GUARD(cellsToValueFileGuard, std::bind(&FileWriter::DeleteFileX, cellsToValueFile));
{
FileWriter cellsToValueWriter(cellsToValueFile);
WriterFunctor<FileWriter> out(cellsToValueWriter);
FileSorter<CellValuePair<uint64_t>, WriterFunctor<FileWriter>> sorter(
1024 * 1024 /* bufferBytes */, tmpFilePrefix + CELL2LOCALITY_TMP_EXT, out);
objects.ForEach([&sorter, &coverLocality](indexer::LocalityObject const & o) {
std::vector<int64_t> const cells =
coverLocality(o, GetCodingDepth<DEPTH_LEVELS>(scales::GetUpperScale()));
for (auto const & cell : cells)
sorter.Add(CellValuePair<uint64_t>(cell, o.GetStoredId()));
});
sorter.SortAndFinish();
}
FileReader reader(cellsToValueFile);
DDVector<CellValuePair<uint64_t>, FileReader, uint64_t> cellsToValue(reader);
{
BuildIntervalIndex(cellsToValue.begin(), cellsToValue.end(), writer, DEPTH_LEVELS * 2 + 1, version);
}
}
using LocalitiesCovering = std::deque<CellValuePair<uint64_t>>;
} // namespace covering
namespace indexer
{
// Builds indexer::GeoObjectsIndex for reverse geocoder with |kGeoObjectsDepthLevels| depth levels
// and saves it to |GEO_OBJECTS_INDEX_FILE_TAG| of |out|.
bool BuildGeoObjectsIndexFromDataFile(std::string const & dataFile, std::string const & out,
std::string const & dataVersionJson,
std::string const & dataVersionTag);
template <typename BuilderSpec>
class LocalityIndexBuilder
{
public:
void Cover(LocalityObject const & localityObject, covering::LocalitiesCovering & covering) const
{
static auto const cellDepth =
covering::GetCodingDepth<BuilderSpec::kDepthLevels>(scales::GetUpperScale());
// Builds indexer::RegionsIndex for reverse geocoder with |kRegionsDepthLevels| depth levels and
// saves it to |REGIONS_INDEX_FILE_TAG| of |out|.
bool BuildRegionsIndexFromDataFile(std::string const & dataFile, std::string const & out,
std::string const & dataVersionJson,
std::string const & dataVersionTag);
auto const id = localityObject.GetStoredId();
auto && cells = m_builderSpec.Cover(localityObject, cellDepth);
for (auto const & cell : cells)
covering.emplace_back(cell, id);
}
bool BuildCoveringIndex(covering::LocalitiesCovering && covering,
std::string const & localityIndexPath) const
{
std::vector<char> buffer;
buffer.reserve(covering.size() * 10 /* ~ ratio file-size / cell-pair */);
MemWriter<std::vector<char>> indexWriter{buffer};
BuildCoveringIndex(std::move(covering), indexWriter, BuilderSpec::kDepthLevels);
try
{
FilesContainerW writer(localityIndexPath, FileWriter::OP_WRITE_TRUNCATE);
writer.Write(buffer, BuilderSpec::kIndexFileTag);
}
catch (Writer::Exception const & e)
{
LOG(LERROR, ("Error writing index file:", e.Msg()));
return false;
}
return true;
}
template <typename Writer>
void BuildCoveringIndex(covering::LocalitiesCovering && covering, Writer && writer,
int depthLevel) const
{
// 32 threads block_indirect_sort is fastest for |block_size| (internal parameter) and
// sizeof(CellValuePair<uint64_t>).
auto sortThreadsCount = std::min(32u, std::thread::hardware_concurrency());
boost::sort::block_indirect_sort(covering.begin(), covering.end(), sortThreadsCount);
BuildIntervalIndex(covering.begin(), covering.end(), std::forward<Writer>(writer),
depthLevel * 2 + 1, IntervalIndexVersion::V2);
}
private:
BuilderSpec m_builderSpec;
};
struct RegionsIndexBuilderSpec
{
static constexpr int kDepthLevels = kRegionsDepthLevels;
static constexpr auto const & kIndexFileTag = REGIONS_INDEX_FILE_TAG;
std::vector<int64_t> Cover(indexer::LocalityObject const & o, int cellDepth) const
{
return covering::CoverRegion(o, cellDepth);
}
};
struct GeoObjectsIndexBuilderSpec
{
static constexpr int kDepthLevels = kGeoObjectsDepthLevels;
static constexpr auto const & kIndexFileTag = GEO_OBJECTS_INDEX_FILE_TAG;
std::vector<int64_t> Cover(indexer::LocalityObject const & o, int cellDepth) const
{
return covering::CoverGeoObject(o, cellDepth);
}
};
using RegionsLocalityIndexBuilder = LocalityIndexBuilder<RegionsIndexBuilderSpec>;
using GeoObjectsLocalityIndexBuilder = LocalityIndexBuilder<GeoObjectsIndexBuilderSpec>;
} // namespace indexer

View file

@ -27,7 +27,7 @@ void LocalityObject::Deserialize(char const * data)
ReadPrimitiveFromSource(src, ptsCount);
CHECK_GREATER(ptsCount, 1, ());
char const * start = src.PtrC();
src = ArrayByteSource(serial::LoadInnerPath(start, ptsCount, cp, m_points));
serial::LoadInnerPath(start, ptsCount, cp, m_points);
return;
}

View file

@ -18,6 +18,8 @@ namespace indexer
class LocalityObject
{
public:
LocalityObject() = default;
// Decodes id stored in LocalityIndex. See GetStoredId().
static base::GeoObjectId FromStoredId(uint64_t storedId)
{
@ -45,6 +47,21 @@ public:
toDo(m_triangles[i - 2], m_triangles[i - 1], m_triangles[i]);
}
void SetId(uint64_t id)
{
m_id = id;
}
void SetPoints(buffer_vector<m2::PointD, 32> && points)
{
m_points = std::move(points);
}
void SetTriangles(buffer_vector<m2::PointD, 32> && triangles)
{
m_triangles = std::move(triangles);
}
void SetForTesting(uint64_t id, m2::PointD point)
{
m_id = id;