[geocoder] Generate regions index and borders from raw features file.

This commit is contained in:
tatiana-kondakova 2018-04-23 16:49:12 +03:00 committed by Ilya Zverev
parent 4e26f89813
commit 6025d7aa48
6 changed files with 223 additions and 84 deletions

View file

@ -408,6 +408,25 @@ void FeatureBuilder1::Serialize(TBuffer & data) const
#endif
}
void FeatureBuilder1::SerializeBorder(serial::GeometryCodingParams const & params,
TBuffer & data) const
{
data.clear();
PushBackByteSink<TBuffer> sink(data);
WriteToSink(sink, GetMostGenericOsmId().EncodedId());
CHECK_GREATER(m_polygons.size(), 0, ());
WriteToSink(sink, m_polygons.size() - 1);
for (auto const & polygon : m_polygons)
{
WriteToSink(sink, polygon.size());
for (auto const & p : polygon)
serial::SavePoint(sink, p, params);
}
}
void FeatureBuilder1::Deserialize(TBuffer & data)
{
serial::GeometryCodingParams cp;
@ -595,14 +614,14 @@ bool FeatureBuilder2::PreSerialize(SupportingData const & data)
return TBase::PreSerialize();
}
bool FeatureBuilder2::IsLocalityObject()
bool FeatureBuilder2::IsLocalityObject() const
{
return (m_params.GetGeomType() == GEOM_POINT || m_params.GetGeomType() == GEOM_AREA) &&
!m_params.house.IsEmpty();
}
void FeatureBuilder2::SerializeLocalityObject(serial::GeometryCodingParams const & params,
SupportingData & data)
SupportingData & data) const
{
data.m_buffer.clear();
@ -628,7 +647,8 @@ void FeatureBuilder2::SerializeLocalityObject(serial::GeometryCodingParams const
serial::SaveInnerTriangles(data.m_innerTrg, params, sink);
}
void FeatureBuilder2::Serialize(SupportingData & data, serial::GeometryCodingParams const & params)
void FeatureBuilder2::Serialize(SupportingData & data,
serial::GeometryCodingParams const & params) const
{
data.m_buffer.clear();

View file

@ -96,6 +96,7 @@ public:
void Serialize(TBuffer & data) const;
void SerializeBase(TBuffer & data, serial::GeometryCodingParams const & params,
bool saveAddInfo) const;
void SerializeBorder(serial::GeometryCodingParams const & params, TBuffer & data) const;
void Deserialize(TBuffer & data);
//@}
@ -258,9 +259,10 @@ public:
/// @name Overwrite from base_type.
//@{
bool PreSerialize(SupportingData const & data);
bool IsLocalityObject();
void SerializeLocalityObject(serial::GeometryCodingParams const & params, SupportingData & data);
void Serialize(SupportingData & data, serial::GeometryCodingParams const & params);
bool IsLocalityObject() const;
void SerializeLocalityObject(serial::GeometryCodingParams const & params,
SupportingData & data) const;
void Serialize(SupportingData & data, serial::GeometryCodingParams const & params) const;
//@}
feature::AddressData const & GetAddressData() const { return m_params.GetAddressData(); }

View file

@ -48,6 +48,12 @@ public:
/// \note See implementation operator() in derived class for cases when |f| cannot be
/// serialized.
virtual uint32_t operator()(FeatureBuilder1 const & f);
virtual uint32_t operator()(FeatureBuilder1 & f)
{
auto const & f1 = f;
return (*this)(f1);
};
virtual void Finish() {}
};
class FeaturesAndRawGeometryCollector : public FeaturesCollector

View file

@ -91,7 +91,10 @@ DEFINE_bool(generate_geometry, false,
"3rd pass - split and simplify geometry and triangles for features.");
DEFINE_bool(generate_index, false, "4rd pass - generate index.");
DEFINE_bool(generate_search_index, false, "5th pass - generate search index.");
DEFINE_bool(generate_locality_index, false, "3rd pass - generate locality objects and locality index.");
DEFINE_bool(generate_geo_objects_index, false,
"Generate objects and index for server-side reverse geocoder.");
DEFINE_bool(generate_regions, false,
"Generate regiond index and borders for server-side reverse geocoder.");
DEFINE_bool(dump_cities_boundaries, false, "Dump cities boundaries to a file");
DEFINE_bool(generate_cities_boundaries, false, "Generate cities boundaries section");
@ -212,9 +215,10 @@ int main(int argc, char ** argv)
GetStyleReader().SetCurrentStyle(MapStyleMerged);
// Load classificator only when necessary.
if (FLAGS_make_coasts || FLAGS_generate_features || FLAGS_generate_geometry || FLAGS_generate_locality_index ||
FLAGS_generate_index || FLAGS_generate_search_index || FLAGS_generate_cities_boundaries ||
FLAGS_calc_statistics || FLAGS_type_statistics || FLAGS_dump_types || FLAGS_dump_prefixes ||
if (FLAGS_make_coasts || FLAGS_generate_features || FLAGS_generate_geometry ||
FLAGS_generate_geo_objects_index || FLAGS_generate_regions || FLAGS_generate_index ||
FLAGS_generate_search_index || FLAGS_generate_cities_boundaries || FLAGS_calc_statistics ||
FLAGS_type_statistics || FLAGS_dump_types || FLAGS_dump_prefixes ||
FLAGS_dump_feature_names != "" || FLAGS_check_mwm || FLAGS_srtm_path != "" ||
FLAGS_make_routing_index || FLAGS_make_cross_mwm || FLAGS_make_transit_cross_mwm ||
FLAGS_generate_traffic_keys || FLAGS_transit_path != "" || FLAGS_ugc_data != "")
@ -266,7 +270,7 @@ int main(int argc, char ** argv)
genInfo.m_bucketNames.push_back(FLAGS_output);
}
if (FLAGS_generate_locality_index)
if (FLAGS_generate_geo_objects_index || FLAGS_generate_regions)
{
if (FLAGS_output.empty() || FLAGS_intermediate_data_path.empty())
{
@ -277,18 +281,43 @@ int main(int argc, char ** argv)
auto const locDataFile = my::JoinPath(path, FLAGS_output + LOC_DATA_FILE_EXTENSION);
auto const outFile = my::JoinPath(path, FLAGS_output + LOC_IDX_FILE_EXTENSION);
if (!feature::GenerateLocalityData(genInfo.m_tmpDir, FLAGS_nodes_list_path, locDataFile))
if (FLAGS_generate_geo_objects_index)
{
LOG(LCRITICAL, ("Error generating locality data."));
return -1;
if (!feature::GenerateGeoObjectsData(genInfo.m_tmpDir, FLAGS_nodes_list_path, locDataFile))
{
LOG(LCRITICAL, ("Error generating geo objects data."));
return -1;
}
LOG(LINFO, ("Saving geo objects index to", outFile));
if (!indexer::BuildGeoObjectsIndexFromDataFile(locDataFile, outFile))
{
LOG(LCRITICAL, ("Error generating geo objects index."));
return -1;
}
}
LOG(LINFO, ("Saving locality index to", outFile));
if (!indexer::BuildGeoObjectsIndexFromDataFile(locDataFile, outFile))
if (FLAGS_generate_regions)
{
LOG(LCRITICAL, ("Error generating locality index."));
return -1;
if (!feature::GenerateRegionsData(genInfo.m_tmpDir, locDataFile))
{
LOG(LCRITICAL, ("Error generating regions data."));
return -1;
}
LOG(LINFO, ("Saving regions index to", outFile));
if (!indexer::BuildRegionsIndexFromDataFile(locDataFile, outFile))
{
LOG(LCRITICAL, ("Error generating regions index."));
return -1;
}
if (!feature::GenerateBorders(genInfo.m_tmpDir, outFile))
{
LOG(LCRITICAL, ("Error generating regions borders."));
return -1;
}
}
}

View file

@ -24,6 +24,7 @@
#include <cstdint>
#include <fstream>
#include <functional>
#include <limits>
#include <set>
#include <vector>
@ -31,24 +32,56 @@
using namespace feature;
using namespace std;
class LocalityCollector : public FeaturesCollector
namespace
{
DISALLOW_COPY_AND_MOVE(LocalityCollector);
class BordersCollector : public FeaturesCollector
{
public:
BordersCollector(string const & filename)
: FeaturesCollector(filename + EXTENSION_TMP), m_writer(filename, FileWriter::OP_WRITE_EXISTING)
{
}
// FeaturesCollector overrides:
uint32_t operator()(FeatureBuilder1 & fb) override
{
if (fb.IsArea())
{
FeatureBuilder1::TBuffer buffer;
fb.SerializeBorder(serial::GeometryCodingParams(), buffer);
WriteFeatureBase(buffer, fb);
}
return 0;
}
void Finish() override
{
Flush();
m_writer.Write(m_datFile.GetName(), BORDERS_FILE_TAG);
m_writer.Finish();
}
private:
FilesContainerW m_writer;
DataHeader m_header;
uint32_t m_versionDate;
DISALLOW_COPY_AND_MOVE(BordersCollector);
};
class LocalityCollector : public FeaturesCollector
{
public:
LocalityCollector(string const & fName, DataHeader const & header, uint32_t versionDate)
: FeaturesCollector(fName + EXTENSION_TMP)
, m_writer(fName)
LocalityCollector(string const & filename, DataHeader const & header, uint32_t versionDate)
: FeaturesCollector(filename + EXTENSION_TMP)
, m_writer(filename)
, m_header(header)
, m_versionDate(versionDate)
{
}
void Finish()
// FeaturesCollector overrides:
void Finish() override
{
{
FileWriter w = m_writer.GetWriter(VERSION_FILE_TAG);
@ -67,10 +100,12 @@ public:
m_writer.Finish();
}
void operator()(FeatureBuilder2 & fb)
uint32_t operator()(FeatureBuilder1 & fb1) override
{
auto & fb2 = static_cast<FeatureBuilder2 &>(fb1);
// Do not limit inner triangles number to save all geometry without additional sections.
GeometryHolder holder(fb, m_header, numeric_limits<uint32_t>::max() /* maxTrianglesNumber */);
GeometryHolder holder(fb2, m_header, numeric_limits<uint32_t>::max() /* maxTrianglesNumber */);
// Simplify and serialize geometry.
vector<m2::PointD> points;
@ -79,11 +114,11 @@ public:
SimplifyPoints(dist, scales::GetUpperScale(), holder.GetSourcePoints(), points);
// For areas we save outer geometry only.
if (fb.IsArea() && holder.NeedProcessTriangles())
if (fb2.IsArea() && holder.NeedProcessTriangles())
{
// At this point we don't need last point equal to first.
points.pop_back();
auto const & polys = fb.GetGeometry();
auto const & polys = fb2.GetGeometry();
if (polys.size() != 1)
{
points.clear();
@ -98,7 +133,7 @@ public:
m2::ConvexHull hull(points, 1e-16);
vector<m2::PointD> hullPoints = hull.Points();
holder.SetInner();
auto const id = fb.GetMostGenericOsmId();
auto const id = fb2.GetMostGenericOsmId();
CHECK(holder.TryToMakeStrip(hullPoints),
("Error while building tringles for object with OSM Id:", id.OsmId(),
"Type:", id.IsRelation() ? "Relation" : "Way", "points:", points,
@ -108,68 +143,65 @@ public:
}
auto & buffer = holder.GetBuffer();
if (fb.PreSerialize(buffer))
if (fb2.PreSerialize(buffer))
{
fb.SerializeLocalityObject(serial::GeometryCodingParams(), buffer);
WriteFeatureBase(buffer.m_buffer, fb);
fb2.SerializeLocalityObject(serial::GeometryCodingParams(), buffer);
WriteFeatureBase(buffer.m_buffer, fb2);
}
return 0;
}
private:
FilesContainerW m_writer;
DataHeader m_header;
uint32_t m_versionDate;
DISALLOW_COPY_AND_MOVE(LocalityCollector);
};
// Simplify geometry for the upper scale.
FeatureBuilder2 & GetFeatureBuilder2(FeatureBuilder1 & fb)
bool ParseNodes(string nodesFile, set<uint64_t> & nodeIds)
{
return static_cast<FeatureBuilder2 &>(fb);
}
if (nodesFile.empty())
return true;
namespace feature
{
bool GenerateLocalityData(string const & featuresDir, string const & nodesFile,
string const & dataFile)
{
DataHeader header;
header.SetGeometryCodingParams(serial::GeometryCodingParams());
header.SetScales({scales::GetUpperScale()});
set<uint64_t> nodeIds;
if (!nodesFile.empty())
ifstream stream(nodesFile);
if (!stream)
{
ifstream stream(nodesFile);
if (!stream)
LOG(LERROR, ("Could not open", nodesFile));
return false;
}
string line;
size_t lineNumber = 1;
while (getline(stream, line))
{
strings::SimpleTokenizer iter(line, " ");
uint64_t nodeId;
if (!iter || !strings::to_uint64(*iter, nodeId))
{
LOG(LERROR, ("Could not open", nodesFile));
LOG(LERROR, ("Error while parsing node id at line", lineNumber, "Line contents:", line));
return false;
}
string line;
size_t lineNumber = 1;
while (getline(stream, line))
{
strings::SimpleTokenizer iter(line, " ");
uint64_t nodeId;
if (!iter || !strings::to_uint64(*iter, nodeId))
{
LOG(LERROR, ("Error while parsing node id at line", lineNumber, "Line contents:", line));
return false;
}
nodeIds.insert(nodeId);
++lineNumber;
}
nodeIds.insert(nodeId);
++lineNumber;
}
return true;
}
using NeedSerialize = function<bool(FeatureBuilder1 & fb1)>;
bool GenerateLocalityDataImpl(FeaturesCollector & collector, NeedSerialize const & needSerialize,
string const & featuresDir, string const & dataFile)
{
// Transform features from raw format to LocalityObject format.
try
{
LocalityCollector collector(dataFile, header,
static_cast<uint32_t>(my::SecondsSinceEpoch()));
Platform::FilesList files;
Platform::GetFilesByExt(featuresDir, DATA_FILE_EXTENSION_TMP, files);
for (auto const & fileName : files)
for (auto const & filename : files)
{
auto const file = my::JoinFoldersToPath(featuresDir, fileName);
auto const file = my::JoinFoldersToPath(featuresDir, filename);
LOG(LINFO, ("Processing", file));
CalculateMidPoints midPoints;
@ -187,12 +219,8 @@ bool GenerateLocalityData(string const & featuresDir, string const & nodesFile,
FeatureBuilder1 f;
ReadFromSourceRowFormat(src, f);
// Emit object.
auto & fb2 = GetFeatureBuilder2(f);
if (fb2.IsLocalityObject() ||
(!fb2.GetOsmIds().empty() && nodeIds.count(fb2.GetMostGenericOsmId().EncodedId()) != 0))
{
collector(fb2);
}
if (needSerialize(f))
collector(f);
}
}
@ -206,4 +234,48 @@ bool GenerateLocalityData(string const & featuresDir, string const & nodesFile,
return true;
}
} // namespace
namespace feature
{
bool GenerateGeoObjectsData(string const & featuresDir, string const & nodesFile,
string const & dataFile)
{
set<uint64_t> nodeIds;
if (!ParseNodes(nodesFile, nodeIds))
return false;
auto const needSerialize = [&nodeIds](FeatureBuilder1 & fb) {
auto & fb2 = static_cast<FeatureBuilder2 &>(fb);
return fb2.IsLocalityObject() ||
(!fb.GetOsmIds().empty() && nodeIds.count(fb.GetMostGenericOsmId().EncodedId()) != 0);
};
DataHeader header;
header.SetGeometryCodingParams(serial::GeometryCodingParams());
header.SetScales({scales::GetUpperScale()});
LocalityCollector localityCollector(dataFile, header,
static_cast<uint32_t>(my::SecondsSinceEpoch()));
return GenerateLocalityDataImpl(localityCollector, needSerialize, featuresDir, dataFile);
}
bool GenerateRegionsData(string const & featuresDir, string const & dataFile)
{
DataHeader header;
header.SetGeometryCodingParams(serial::GeometryCodingParams());
header.SetScales({scales::GetUpperScale()});
LocalityCollector regionsCollector(dataFile, header,
static_cast<uint32_t>(my::SecondsSinceEpoch()));
auto const needSerialize = [](FeatureBuilder1 const & fb) { return fb.IsArea(); };
return GenerateLocalityDataImpl(regionsCollector, needSerialize, featuresDir, dataFile);
}
bool GenerateBorders(string const & featuresDir, string const & dataFile)
{
BordersCollector bordersCollector(dataFile);
auto const needSerialize = [](FeatureBuilder1 const & fb) { return fb.IsArea(); };
return GenerateLocalityDataImpl(bordersCollector, needSerialize, featuresDir, dataFile);
}
} // namespace feature

View file

@ -4,10 +4,20 @@
namespace feature
{
/// Generates data for LocalityIndexBuilder from input feature-dat-files.
/// @param featuresDir - path to folder with pregenerated features data;
/// @param nodesFile - path to file with list of node ids we need to add to output;
/// @param out - output file name;
bool GenerateLocalityData(std::string const & featuresDir, std::string const & nodesFile,
std::string const & out);
// Generates data for GeoObjectsIndexBuilder from input feature-dat-files.
// @param featuresDir - path to folder with pregenerated features data;
// @param nodesFile - path to file with list of node ids we need to add to output;
// @param out - output file name;
bool GenerateGeoObjectsData(std::string const & featuresDir, std::string const & nodesFile,
std::string const & out);
// Generates data for RegionsIndexBuilder from input feature-dat-files.
// @param featuresDir - path to folder with pregenerated features data;
// @param out - output file name;
bool GenerateRegionsData(std::string const & featuresDir, std::string const & out);
// Generates borders section for server-side reverse geocoder from input feature-dat-files.
// @param featuresDir - path to folder with pregenerated features data;
// @param out - output file to add borders section;
bool GenerateBorders(std::string const & featuresDir, std::string const & out);
} // namespace feature