From 5610308496b75b637ca533abd1a486ab9b8ca663 Mon Sep 17 00:00:00 2001 From: Alex Zolotarev Date: Sat, 15 Jan 2011 13:39:03 +0200 Subject: [PATCH] Added base kml parser code --- indexer/indexer_tool/indexer_tool.cpp | 9 ++ indexer/indexer_tool/indexer_tool.pro | 2 + indexer/indexer_tool/kml_parser.cpp | 191 ++++++++++++++++++++++++++ indexer/indexer_tool/kml_parser.hpp | 43 ++++++ 4 files changed, 245 insertions(+) create mode 100644 indexer/indexer_tool/kml_parser.cpp create mode 100644 indexer/indexer_tool/kml_parser.hpp diff --git a/indexer/indexer_tool/indexer_tool.cpp b/indexer/indexer_tool/indexer_tool.cpp index d0646f7789..85b6ba38e8 100644 --- a/indexer/indexer_tool/indexer_tool.cpp +++ b/indexer/indexer_tool/indexer_tool.cpp @@ -5,6 +5,7 @@ #include "feature_bucketer.hpp" #include "grid_generator.hpp" #include "statistics.hpp" +#include "kml_parser.hpp" #include "../classif_routine.hpp" #include "../features_vector.hpp" @@ -48,6 +49,7 @@ DEFINE_int32(bucketing_level, 7, "Level of cell ids for bucketing."); DEFINE_int32(worldmap_max_zoom, -1, "If specified, features for zoomlevels [0..this_value] " " which are enabled in classificator will be added to the separate world.map"); DEFINE_bool(world_only, false, "Generate only world features for given worldmap_max_zoom"); +DEFINE_bool(split_by_polygons, false, "Use kml shape files to split planet by regions and countries"); string AddSlashIfNeeded(string const & str) { @@ -184,5 +186,12 @@ int main(int argc, char ** argv) update::GenerateFilesList(path); } + if (FLAGS_split_by_polygons) + { + kml::CountryPolygons country; + kml::LoadPolygonsFromKml(FLAGS_output, country); + LOG(LINFO, (country.m_name, country.m_rect)); + } + return 0; } diff --git a/indexer/indexer_tool/indexer_tool.pro b/indexer/indexer_tool/indexer_tool.pro index 82ddbc5ff1..4e88736d21 100644 --- a/indexer/indexer_tool/indexer_tool.pro +++ b/indexer/indexer_tool/indexer_tool.pro @@ -25,6 +25,7 @@ SOURCES += \ update_generator.cpp \ grid_generator.cpp \ statistics.cpp \ + kml_parser.cpp \ HEADERS += \ osm_element.hpp \ @@ -37,3 +38,4 @@ HEADERS += \ feature_bucketer.hpp \ grid_generator.hpp \ statistics.hpp \ + kml_parser.hpp \ diff --git a/indexer/indexer_tool/kml_parser.cpp b/indexer/indexer_tool/kml_parser.cpp new file mode 100644 index 0000000000..db3f27741a --- /dev/null +++ b/indexer/indexer_tool/kml_parser.cpp @@ -0,0 +1,191 @@ +#include "kml_parser.hpp" + +#include "../../base/string_utils.hpp" + +#include "../../coding/parse_xml.hpp" +#include "../../coding/file_reader.hpp" + +#include "../../indexer/cell_id.hpp" +#include "../../indexer/mercator.hpp" + +#include "../../storage/simple_tree.hpp" + +#include "../../std/iostream.hpp" +#include "../../std/vector.hpp" +#include "../../std/fstream.hpp" + +namespace kml +{ + + class KmlParser + { + vector m_tags; + /// buffer for text with points + string m_data; + + CountryPolygons & m_country; + + public: + KmlParser(CountryPolygons & country); + + bool Push(string const & element); + void Pop(string const & element); + void AddAttr(string const &, string const &) {} + void CharData(string const & data); + }; + + KmlParser::KmlParser(CountryPolygons & country) : m_country(country) + { + } + + bool KmlParser::Push(string const & element) + { + m_tags.push_back(element); + + return true; + } + + typedef boost::polygon::point_data Point; + typedef vector PointsContainer; + + class PointsCollector + { + PointsContainer & m_points; + m2::RectD & m_rect; + + public: + PointsCollector(PointsContainer & container, m2::RectD & rect) + : m_points(container), m_rect(rect) {} + + void operator()(string const & latLon) + { + size_t const firstCommaPos = latLon.find(','); + CHECK(firstCommaPos != string::npos, ("invalid latlon", latLon)); + string const latStr = latLon.substr(0, firstCommaPos); + double lat; + CHECK(utils::to_double(latStr, lat), ("invalid lat", latStr)); + size_t const secondCommaPos = latLon.find(',', firstCommaPos + 1); + string lonStr; + if (secondCommaPos == string::npos) + lonStr = latLon.substr(firstCommaPos + 1); + else + lonStr = latLon.substr(firstCommaPos + 1, secondCommaPos - firstCommaPos - 1); + double lon; + CHECK(utils::to_double(lonStr, lon), ("invalid lon", lonStr)); + // to mercator + double const x = MercatorBounds::LonToX(lon); + double const y = MercatorBounds::LatToY(lat); + m_rect.Add(m2::PointD(x, y)); + // convert points to uint32_t + typedef CellIdConverter CellIdConverterType; + uint32_t const ix = static_cast(CellIdConverterType::XToCellIdX(x)); + uint32_t const iy = static_cast(CellIdConverterType::YToCellIdY(y)); + m_points.push_back(Point(ix, iy)); + } + }; + + void KmlParser::Pop(string const & element) + { + if (element == "Placemark") + { + } + else if (element == "coordinates") + { + PointsContainer points; + PointsCollector collector(points, m_country.m_rect); + utils::TokenizeString(m_data, " \n\r\a", collector); + CHECK(!points.empty(), ()); + + size_t const size = m_tags.size(); + CHECK(m_tags.size() > 3, ()); + CHECK(m_tags[size - 2] == "LinearRing", ()); + + using namespace boost::polygon::operators; + + if (m_tags[size - 3] == "outerBoundaryIs") + { + Polygon polygon; + set_points(polygon, points.begin(), points.end()); + m_country.m_polygons.push_back(polygon); + } + else if (m_tags[size - 3] == "innerBoundaryIs") + { // currently we're ignoring holes + } + else + { + CHECK(false, ("Unsupported tag", m_tags[size - 3])); + } + + m_data.clear(); + } + else if (element == "Polygon") + { + } + + m_tags.pop_back(); + } + + void KmlParser::CharData(string const & data) + { + size_t const size = m_tags.size(); + + if (size > 1 && m_tags[size - 1] == "name" && m_tags[size - 2] == "Placemark") + m_country.m_name = data; + else if (size > 4 && m_tags[size - 1] == "coordinates" + && m_tags[size - 2] == "LinearRing" && m_tags[size - 4] == "Polygon") + { + // text block can be really huge + m_data.append(data); + } + } + + void LoadPolygonsFromKml(string const & kmlFile, CountryPolygons & country) + { + KmlParser parser(country); + { + FileReader file(kmlFile); + ReaderSource source(file); + CHECK(ParseXML(source, parser, true), ("Error while parsing", kmlFile)); + } + } + + typedef SimpleTree TCountriesTree; + bool LoadCountriesList(string const & countriesListFile, TCountriesTree & countries) + { + countries.Clear(); + ifstream stream(countriesListFile.c_str()); + std::string line; + CountryPolygons * currentCountry = &countries.Value(); + while (stream.good()) + { + std::getline(stream, line); + if (line.empty()) + continue; + + // calculate spaces - depth inside the tree + int spaces = 0; + for (size_t i = 0; i < line.size(); ++i) + { + if (line[i] == ' ') + ++spaces; + else + break; + } + switch (spaces) + { + case 0: // this is value for current tree node + CHECK(false, ()); + break; + case 1: // country group + case 2: // country name + case 3: // region + currentCountry = &countries.AddAtDepth(spaces - 1, CountryPolygons(line.substr(spaces))); + break; + default: + return false; + } + } + return countries.SiblingsCount() > 0; + } + +} diff --git a/indexer/indexer_tool/kml_parser.hpp b/indexer/indexer_tool/kml_parser.hpp new file mode 100644 index 0000000000..19f078deee --- /dev/null +++ b/indexer/indexer_tool/kml_parser.hpp @@ -0,0 +1,43 @@ +#pragma once + +#include "../../geometry/rect2d.hpp" + +#include "../../std/string.hpp" +#include "../../std/vector.hpp" + +#include + +namespace boost +{ + namespace polygon + { + template <> + struct coordinate_traits { + typedef uint32_t coordinate_type; + typedef long double area_type; + typedef int64_t manhattan_area_type; + typedef uint64_t unsigned_area_type; + typedef int64_t coordinate_difference; + typedef long double coordinate_distance; + }; + } +} + +namespace kml +{ + + typedef uint32_t TCoordType; + typedef boost::polygon::polygon_data Polygon; + typedef std::vector PolygonSet; + + struct CountryPolygons + { + CountryPolygons(string const & name = "") : m_name(name) {} + PolygonSet m_polygons; + string m_name; + m2::RectD m_rect; + }; + + void LoadPolygonsFromKml(string const & kmlFile, CountryPolygons & country); + +}