From ab919c89216198f804ce9e27be148e2b9dc90f26 Mon Sep 17 00:00:00 2001 From: Alex Zolotarev Date: Sun, 16 Jan 2011 18:53:05 +0200 Subject: [PATCH] [indexer_tool] Added --split-by-polygons for countries --- data/polygons.lst | 2 + indexer/feature.hpp | 11 ++ indexer/indexer_tool/feature_generator.cpp | 28 +++-- indexer/indexer_tool/feature_generator.hpp | 6 +- indexer/indexer_tool/indexer_tool.cpp | 14 +-- indexer/indexer_tool/indexer_tool.pro | 1 + indexer/indexer_tool/kml_parser.cpp | 131 +++++++++++---------- indexer/indexer_tool/kml_parser.hpp | 32 ++--- indexer/indexer_tool/polygonizer.hpp | 118 +++++++++++++++++++ 9 files changed, 236 insertions(+), 107 deletions(-) create mode 100644 data/polygons.lst create mode 100644 indexer/indexer_tool/polygonizer.hpp diff --git a/data/polygons.lst b/data/polygons.lst new file mode 100644 index 0000000000..31bf0bc2f7 --- /dev/null +++ b/data/polygons.lst @@ -0,0 +1,2 @@ +Belarus +Lithuania \ No newline at end of file diff --git a/indexer/feature.hpp b/indexer/feature.hpp index 9a5d2d9a64..7202b2e986 100644 --- a/indexer/feature.hpp +++ b/indexer/feature.hpp @@ -83,6 +83,17 @@ public: { for_each(m_Geometry.begin(), m_Geometry.end(), bind(ref(toDo), _1)); } + + // stops processing when functor returns false + template + void ForEachTruePointRef(ToDo & toDo) const + { + for (points_t::const_iterator it = m_Geometry.begin(); it != m_Geometry.end(); ++it) + if (!toDo(*it)) + return; + } + + m2::PointD CenterPoint() const { return m_Center; } //@} bool PreSerialize() { return m_bPoint || m_bLinear || m_bArea; } diff --git a/indexer/indexer_tool/feature_generator.cpp b/indexer/indexer_tool/feature_generator.cpp index 74cad6c2aa..31c0d0a39c 100644 --- a/indexer/indexer_tool/feature_generator.cpp +++ b/indexer/indexer_tool/feature_generator.cpp @@ -2,6 +2,7 @@ #include "feature_bucketer.hpp" #include "data_cache_file.hpp" #include "osm_element.hpp" +#include "polygonizer.hpp" #include "../../storage/defines.hpp" @@ -285,9 +286,6 @@ public: template class TParser> bool GenerateImpl(GenerateInfo & info) { - CHECK_GREATER_OR_EQUAL(info.cellBucketingLevel, 0, ()); - CHECK_LESS(info.cellBucketingLevel, 10, ()); - try { TNodesHolder nodes(info.dir + NODES_FILE); @@ -299,15 +297,29 @@ bool GenerateImpl(GenerateInfo & info) FeaturesCollector::InitDataType collectorInitData(info.datFilePrefix, info.datFileSuffix); - typedef CellFeatureBucketer - FeatureBucketerType; - FeatureBucketerType bucketer(info.cellBucketingLevel, collectorInitData, - info.m_maxScaleForWorldFeatures, info.m_worldOnly); + if (info.m_splitByPolygons) { + typedef Polygonizer FeaturePolygonizerType; + // prefix is data dir + FeaturePolygonizerType bucketer(info.datFilePrefix, collectorInitData); + TParser parser(bucketer, holder); + ParseXMLFromStdIn(parser); + bucketer.GetBucketNames(MakeBackInsertFunctor(info.bucketNames)); + } + else + { + CHECK_GREATER_OR_EQUAL(info.cellBucketingLevel, 0, ()); + CHECK_LESS(info.cellBucketingLevel, 10, ()); + + typedef CellFeatureBucketer + FeatureBucketerType; + FeatureBucketerType bucketer(info.cellBucketingLevel, collectorInitData, + info.m_maxScaleForWorldFeatures, info.m_worldOnly); TParser parser(bucketer, holder); ParseXMLFromStdIn(parser); + bucketer.GetBucketNames(MakeBackInsertFunctor(info.bucketNames)); } - bucketer.GetBucketNames(MakeBackInsertFunctor(info.bucketNames)); + } catch (Reader::Exception const & e) { diff --git a/indexer/indexer_tool/feature_generator.hpp b/indexer/indexer_tool/feature_generator.hpp index 90844d0826..23e95f0b90 100644 --- a/indexer/indexer_tool/feature_generator.hpp +++ b/indexer/indexer_tool/feature_generator.hpp @@ -1,5 +1,5 @@ - #pragma once + #include "../../indexer/osm_decl.hpp" #include "../../geometry/rect2d.hpp" @@ -15,7 +15,8 @@ namespace feature { struct GenerateInfo { - GenerateInfo() : m_maxScaleForWorldFeatures(-1), m_worldOnly(false) {} + GenerateInfo() + : m_maxScaleForWorldFeatures(-1), m_worldOnly(false), m_splitByPolygons(false) {} string dir, datFilePrefix, datFileSuffix; int cellBucketingLevel; vector bucketNames; @@ -24,6 +25,7 @@ namespace feature /// @note if -1, world file will not be created int m_maxScaleForWorldFeatures; bool m_worldOnly; + bool m_splitByPolygons; }; bool GenerateFeatures(GenerateInfo & info, bool lightNodes); diff --git a/indexer/indexer_tool/indexer_tool.cpp b/indexer/indexer_tool/indexer_tool.cpp index 85b6ba38e8..55a546b9ad 100644 --- a/indexer/indexer_tool/indexer_tool.cpp +++ b/indexer/indexer_tool/indexer_tool.cpp @@ -5,7 +5,6 @@ #include "feature_bucketer.hpp" #include "grid_generator.hpp" #include "statistics.hpp" -#include "kml_parser.hpp" #include "../classif_routine.hpp" #include "../features_vector.hpp" @@ -124,14 +123,16 @@ int main(int argc, char ** argv) else genInfo.datFilePrefix = path + FLAGS_output + (FLAGS_bucketing_level > 0 ? "-" : ""); genInfo.datFileSuffix = DATA_FILE_EXTENSION; + + // split data by countries polygons + genInfo.m_splitByPolygons = FLAGS_split_by_polygons; + genInfo.cellBucketingLevel = FLAGS_bucketing_level; genInfo.m_maxScaleForWorldFeatures = FLAGS_worldmap_max_zoom; genInfo.m_worldOnly = FLAGS_world_only; if (!feature::GenerateFeatures(genInfo, FLAGS_use_light_nodes)) - { return -1; - } for (size_t i = 0; i < genInfo.bucketNames.size(); ++i) genInfo.bucketNames[i] = genInfo.datFilePrefix + genInfo.bucketNames[i] + genInfo.datFileSuffix; @@ -186,12 +187,5 @@ int main(int argc, char ** argv) update::GenerateFilesList(path); } - if (FLAGS_split_by_polygons) - { - kml::CountryPolygons country; - kml::LoadPolygonsFromKml(FLAGS_output, country); - LOG(LINFO, (country.m_name, country.m_rect)); - } - return 0; } diff --git a/indexer/indexer_tool/indexer_tool.pro b/indexer/indexer_tool/indexer_tool.pro index 4e88736d21..84cb89caa6 100644 --- a/indexer/indexer_tool/indexer_tool.pro +++ b/indexer/indexer_tool/indexer_tool.pro @@ -39,3 +39,4 @@ HEADERS += \ grid_generator.hpp \ statistics.hpp \ kml_parser.hpp \ + polygonizer.hpp \ diff --git a/indexer/indexer_tool/kml_parser.cpp b/indexer/indexer_tool/kml_parser.cpp index db3f27741a..60a1393c50 100644 --- a/indexer/indexer_tool/kml_parser.cpp +++ b/indexer/indexer_tool/kml_parser.cpp @@ -5,15 +5,17 @@ #include "../../coding/parse_xml.hpp" #include "../../coding/file_reader.hpp" +#include "../../geometry/rect2d.hpp" + #include "../../indexer/cell_id.hpp" #include "../../indexer/mercator.hpp" -#include "../../storage/simple_tree.hpp" - -#include "../../std/iostream.hpp" -#include "../../std/vector.hpp" #include "../../std/fstream.hpp" +#define POLYGONS_FILE "polygons.lst" +#define BORDERS_DIR "borders/" +#define BORDERS_EXTENSION ".kml" + namespace kml { @@ -45,33 +47,31 @@ namespace kml return true; } - typedef boost::polygon::point_data Point; - typedef vector PointsContainer; - + template class PointsCollector { - PointsContainer & m_points; + RegionT & m_region; m2::RectD & m_rect; public: - PointsCollector(PointsContainer & container, m2::RectD & rect) - : m_points(container), m_rect(rect) {} + PointsCollector(RegionT & region, m2::RectD & rect) + : m_region(region), m_rect(rect) {} void operator()(string const & latLon) { size_t const firstCommaPos = latLon.find(','); CHECK(firstCommaPos != string::npos, ("invalid latlon", latLon)); - string const latStr = latLon.substr(0, firstCommaPos); - double lat; - CHECK(utils::to_double(latStr, lat), ("invalid lat", latStr)); - size_t const secondCommaPos = latLon.find(',', firstCommaPos + 1); - string lonStr; - if (secondCommaPos == string::npos) - lonStr = latLon.substr(firstCommaPos + 1); - else - lonStr = latLon.substr(firstCommaPos + 1, secondCommaPos - firstCommaPos - 1); + string const lonStr = latLon.substr(0, firstCommaPos); double lon; CHECK(utils::to_double(lonStr, lon), ("invalid lon", lonStr)); + size_t const secondCommaPos = latLon.find(',', firstCommaPos + 1); + string latStr; + if (secondCommaPos == string::npos) + latStr = latLon.substr(firstCommaPos + 1); + else + latStr = latLon.substr(firstCommaPos + 1, secondCommaPos - firstCommaPos - 1); + double lat; + CHECK(utils::to_double(latStr, lat), ("invalid lon", latStr)); // to mercator double const x = MercatorBounds::LonToX(lon); double const y = MercatorBounds::LatToY(lat); @@ -80,7 +80,7 @@ namespace kml typedef CellIdConverter CellIdConverterType; uint32_t const ix = static_cast(CellIdConverterType::XToCellIdX(x)); uint32_t const iy = static_cast(CellIdConverterType::YToCellIdY(y)); - m_points.push_back(Point(ix, iy)); + m_region.AddPoint(Region::value_type(ix, iy)); } }; @@ -91,22 +91,21 @@ namespace kml } else if (element == "coordinates") { - PointsContainer points; - PointsCollector collector(points, m_country.m_rect); - utils::TokenizeString(m_data, " \n\r\a", collector); - CHECK(!points.empty(), ()); - size_t const size = m_tags.size(); CHECK(m_tags.size() > 3, ()); CHECK(m_tags[size - 2] == "LinearRing", ()); - using namespace boost::polygon::operators; - if (m_tags[size - 3] == "outerBoundaryIs") { - Polygon polygon; - set_points(polygon, points.begin(), points.end()); - m_country.m_polygons.push_back(polygon); + Region reg; + m2::RectD rect; + PointsCollector collector(reg, rect); + utils::TokenizeString(m_data, " \n\r\a", collector); + if (reg.IsValid()) + { + m_country.m_regions.push_back(reg); + m_country.m_rect.Add(rect); + } } else if (m_tags[size - 3] == "innerBoundaryIs") { // currently we're ignoring holes @@ -139,53 +138,61 @@ namespace kml } } - void LoadPolygonsFromKml(string const & kmlFile, CountryPolygons & country) + bool LoadPolygonsFromKml(string const & kmlFile, CountryPolygons & country) { KmlParser parser(country); + try { FileReader file(kmlFile); ReaderSource source(file); - CHECK(ParseXML(source, parser, true), ("Error while parsing", kmlFile)); + return ParseXML(source, parser, true); } + catch (std::exception const &) + { + } + return false; } - typedef SimpleTree TCountriesTree; - bool LoadCountriesList(string const & countriesListFile, TCountriesTree & countries) + class PolygonLoader { - countries.Clear(); - ifstream stream(countriesListFile.c_str()); + string m_baseDir; + CountryPolygons & m_out; + + public: + PolygonLoader(string const & basePolygonsDir, CountryPolygons & polygons) + : m_baseDir(basePolygonsDir), m_out(polygons) {} + void operator()(string const & name) + { + if (m_out.m_name.empty()) + m_out.m_name = name; + CountryPolygons current; + if (LoadPolygonsFromKml(m_baseDir + BORDERS_DIR + name + BORDERS_EXTENSION, current) + && current.m_regions.size()) + { + m_out.m_regions.insert(m_out.m_regions.end(), + current.m_regions.begin(), current.m_regions.end()); + m_out.m_rect.Add(current.m_rect); + } + } + }; + + bool LoadCountriesList(string const & baseDir, CountriesContainerT & countries) + { + countries.clear(); + ifstream stream((baseDir + POLYGONS_FILE).c_str()); std::string line; - CountryPolygons * currentCountry = &countries.Value(); + while (stream.good()) { std::getline(stream, line); if (line.empty()) continue; - - // calculate spaces - depth inside the tree - int spaces = 0; - for (size_t i = 0; i < line.size(); ++i) - { - if (line[i] == ' ') - ++spaces; - else - break; - } - switch (spaces) - { - case 0: // this is value for current tree node - CHECK(false, ()); - break; - case 1: // country group - case 2: // country name - case 3: // region - currentCountry = &countries.AddAtDepth(spaces - 1, CountryPolygons(line.substr(spaces))); - break; - default: - return false; - } + CountryPolygons country; + PolygonLoader loader(baseDir, country); + utils::TokenizeString(line, ",", loader); + if (!country.m_regions.empty()) + countries.push_back(country); } - return countries.SiblingsCount() > 0; + return !countries.empty(); } - } diff --git a/indexer/indexer_tool/kml_parser.hpp b/indexer/indexer_tool/kml_parser.hpp index 19f078deee..d2b1a8a8e3 100644 --- a/indexer/indexer_tool/kml_parser.hpp +++ b/indexer/indexer_tool/kml_parser.hpp @@ -1,43 +1,25 @@ #pragma once -#include "../../geometry/rect2d.hpp" +#include "../../geometry/region2d.hpp" #include "../../std/string.hpp" #include "../../std/vector.hpp" -#include - -namespace boost -{ - namespace polygon - { - template <> - struct coordinate_traits { - typedef uint32_t coordinate_type; - typedef long double area_type; - typedef int64_t manhattan_area_type; - typedef uint64_t unsigned_area_type; - typedef int64_t coordinate_difference; - typedef long double coordinate_distance; - }; - } -} - namespace kml { - - typedef uint32_t TCoordType; - typedef boost::polygon::polygon_data Polygon; - typedef std::vector PolygonSet; + typedef m2::RegionU Region; + typedef std::vector RegionsContainerT; struct CountryPolygons { CountryPolygons(string const & name = "") : m_name(name) {} - PolygonSet m_polygons; + RegionsContainerT m_regions; string m_name; + /// limit rect for all country polygons m2::RectD m_rect; }; - void LoadPolygonsFromKml(string const & kmlFile, CountryPolygons & country); + typedef vector CountriesContainerT; + bool LoadCountriesList(string const & baseDir, CountriesContainerT & countries); } diff --git a/indexer/indexer_tool/polygonizer.hpp b/indexer/indexer_tool/polygonizer.hpp new file mode 100644 index 0000000000..e358d0c8ab --- /dev/null +++ b/indexer/indexer_tool/polygonizer.hpp @@ -0,0 +1,118 @@ +#pragma once + +#include "../../base/base.hpp" + +#include "../../coding/file_writer.hpp" + +#include "../../geometry/rect2d.hpp" + +#include "../../indexer/feature.hpp" +#include "../../indexer/feature_visibility.hpp" +#include "../../indexer/cell_id.hpp" + +#include "../../std/string.hpp" + +#include + +#include "kml_parser.hpp" + +namespace feature +{ + // Groups features according to country polygons + template + class Polygonizer + { + public: + Polygonizer(string const & dir, typename FeatureOutT::InitDataType const & featureOutInitData) + : m_FeatureOutInitData(featureOutInitData) + { + CHECK(kml::LoadCountriesList(dir, m_countries), ("Error loading polygons")); + LOG(LINFO, ("Loaded polygons count for regions:")); + for (size_t i = 0; i < m_countries.size(); ++i) + { + LOG(LINFO, (m_countries[i].m_name, m_countries[i].m_regions.size())); + } + + m_Buckets.resize(m_countries.size()); + } + + struct PointChecker + { + typedef CellIdConverter CellIdConverterType; + + kml::RegionsContainerT const & m_regions; + bool m_belongs; + + PointChecker(kml::RegionsContainerT const & regions) + : m_regions(regions), m_belongs(false) {} + + bool operator()(m2::PointD const & pt) + { + for (size_t i = 0; i < m_regions.size(); ++i) + { + m2::PointU uPoint(static_cast(CellIdConverterType::XToCellIdX(pt.x)), + static_cast(CellIdConverterType::YToCellIdY(pt.y))); + if (m_regions[i].Contains(uPoint)) + { + m_belongs = true; + // stop points processing + return false; + } + } + // continue with next point + return true; + } + }; + + void operator () (FeatureBuilder1 const & fb) + { + m2::RectD const limitRect = fb.GetLimitRect(); + for (uint32_t i = 0; i < m_Buckets.size(); ++i) + { + // First quick and dirty limit rect intersection. + if (m_countries[i].m_rect.IsIntersect(limitRect)) + { + PointChecker isPointContained(m_countries[i].m_regions); + // feature can be without geometry but with only center point + if (fb.GetPointsCount()) + fb.ForEachTruePointRef(isPointContained); + else + isPointContained.operator ()(fb.CenterPoint()); + + if (isPointContained.m_belongs) + { + if (!m_Buckets[i].m_pOut) + m_Buckets[i].m_pOut = new FeatureOutT(BucketName(i), m_FeatureOutInitData); + + (*(m_Buckets[i].m_pOut))(fb); + } + } + } + } + + template void GetBucketNames(F f) const + { + for (uint32_t i = 0; i < m_Buckets.size(); ++i) + if (m_Buckets[i].m_pOut) + f(BucketName(i)); + } + + private: + inline string BucketName(uint32_t i) const + { + return m_countries[i].m_name; + } + + struct Bucket + { + Bucket() : m_pOut(NULL) {} + ~Bucket() { delete m_pOut; } + + FeatureOutT * m_pOut; + }; + + typename FeatureOutT::InitDataType m_FeatureOutInitData; + vector m_Buckets; + kml::CountriesContainerT m_countries; + }; +}