Merge branch 'master' into remarks

This commit is contained in:
Nicola Jordan 2018-02-12 16:47:04 +01:00 committed by GitHub
commit f8376661e9
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
17 changed files with 2848 additions and 44 deletions

3
.gitignore vendored
View file

@ -6,6 +6,9 @@
*.gz
*.csv
*.pyc
*.pbf
*.lst
*.user
private/
data/
__pycache__/

View file

@ -2,6 +2,16 @@
## master branch
* Support for categories: `category_tag` and `categories` parameters in a profile.
* LibOsmium-based C++ filtering script for categories.
* More than one tag value works as "one of": `[('amenity', 'cafe', 'restaurant')]`.
* Query can be a list of queries, providing for "OR" clause. An example:
`[[('amenity', 'swimming_pool')], [('leisure', 'swimming_pool')]]`
* Parameters for profiles, using `-p` argument.
* No more default imports solely for profiles, import `re` and `zipfile` youself now.
## 1.2.3
_Released 2017-12-29_

View file

@ -29,7 +29,7 @@ For a simplest case, run:
conflate <profile.py> -o result.osm
You might want to add ``-v`` to get status messages, and other arguments
You might want to add other arguments
to pass a dataset file or write the resulting osmChange somewhere. Run
``conflate -h`` to see a list of arguments.

View file

@ -1,6 +1,7 @@
#!/usr/bin/env python3
import argparse
import codecs
import json
import kdtree
import logging
import math
@ -8,10 +9,6 @@ import requests
import os
import sys
from io import BytesIO
import json # for profiles
import re # for profiles
import zipfile # for profiles
from collections import defaultdict # for profiles
try:
from .version import __version__
except ImportError:
@ -81,6 +78,7 @@ class OSMPoint(SourcePoint):
self.version = version
self.members = None
self.action = None
self.categories = categories or set()
self.remarks = None
def copy(self):
@ -210,27 +208,38 @@ class OsmConflator:
(k, v) turns into [k=v], (k,) into [k], (k, None) into [!k], (k, "~v") into [k~v]."""
tags = self.profile.get(
'query', required="a list of tuples. E.g. [('amenity', 'cafe'), ('name', '~Mc.*lds')]")
tag_strs = []
if isinstance(tags, str):
tag_str = tags
tag_strs = [tags]
else:
tag_str = ''
for t in tags:
if len(t) == 1:
q = '"{}"'.format(t[0])
elif t[1] is None or len(t[1]) == 0:
q = '"!{}"'.format(t[0])
elif t[1][0] == '~':
q = '"{}"~"{}"'.format(t[0], t[1][1:])
else:
q = '"{}"="{}"'.format(t[0], t[1])
tag_str += '[' + q + ']'
if not isinstance(tags[0], str) and isinstance(tags[0][0], str):
tags = [tags]
for tags_q in tags:
if isinstance(tags_q, str):
tag_strs.append(tags_q)
continue
tag_str = ''
for t in tags_q:
if len(t) == 1:
q = '"{}"'.format(t[0])
elif t[1] is None or len(t[1]) == 0:
q = '"!{}"'.format(t[0])
elif t[1][0] == '~':
q = '"{}"~"{}",i'.format(t[0], t[1][1:])
elif len(t) > 2:
q = '"{}"~"^({})$"'.format(t[0], '|'.join(t[1:]))
else:
q = '"{}"="{}"'.format(t[0], t[1])
tag_str += '[' + q + ']'
tag_strs.append(tag_str)
timeout = self.profile.get('overpass_timeout', 120)
query = '[out:xml]{};('.format('' if timeout is None else '[timeout:{}]'.format(timeout))
for bbox in bboxes:
bbox_str = '' if bbox is None else '(' + ','.join([str(x) for x in bbox]) + ')'
for t in ('node', 'way', 'relation["type"="multipolygon"]'):
query += t + tag_str + bbox_str + ';'
for tag_str in tag_strs:
for t in ('node', 'way', 'relation["type"="multipolygon"]'):
query += t + tag_str + bbox_str + ';'
if self.ref is not None:
for t in ('node', 'way', 'relation'):
query += t + '["' + self.ref + '"];'
@ -349,21 +358,68 @@ class OsmConflator:
padding = self.profile.get('bbox_padding', BBOX_PADDING)
return [get_bbox(b, padding) for b in boxes]
def check_against_profile_tags(self, tags):
def get_categories(self, tags):
def match_query(tags, query):
for tag in query:
if len(tag) == 1:
if tag[0] in tags:
return False
elif tag[1] is None or tag[1] == '':
if tag[0] not in tags:
return False
else:
value = tags.get(tag[0], None)
if value is None:
return False
found = False
for t2 in tag[1:]:
if t2[0] == '~':
m = re.search(t2[1:], value)
if not m:
return False
elif t2[0] == '!':
if t2[1:].lower() in value.lower():
found = True
elif t2 == value:
found = True
if found:
break
if not found:
return False
return True
def tags_to_query(tags):
return [(k, v) for k, v in tags.items()]
result = set()
qualifies = self.profile.get('qualifies', args=tags)
if qualifies is not None:
return qualifies
if qualifies:
result.add(None)
return result
# First check default query
query = self.profile.get('query', None)
if query is not None and not isinstance(query, str):
for tag in query:
if len(tag) >= 1:
if tag[0] not in tags:
return False
if len(tag) >= 2 and tag[1][0] != '~':
if tag[1] != tags[tag[0]]:
return False
return True
if query is not None:
if isinstance(query, str):
result.add(None)
else:
if isinstance(query[0][0], str):
query = [query]
for q in query:
if match_query(tags, q):
result.add(None)
break
# Then check each category if we got these
categories = self.profile.get('categories', {})
for name, params in categories.items():
if 'tags' not in params and 'query' not in params:
raise ValueError('No tags and query attributes for category "{}"'.format(name))
if match_query(tags, params.get('query', tags_to_query(params.get('tags')))):
result.add(name)
return result
def download_osm(self):
"""Constructs an Overpass API query and requests objects
@ -424,7 +480,8 @@ class OsmConflator:
tags = {}
for tag in el.findall('tag'):
tags[tag.get('k')] = tag.get('v')
if not self.check_against_profile_tags(tags):
categories = self.get_categories(tags)
if categories is False or categories is None or len(categories) == 0:
continue
if el.tag == 'node':
@ -458,7 +515,7 @@ class OsmConflator:
continue
pt = OSMPoint(
el.tag, int(el.get('id')), int(el.get('version')),
coord[0], coord[1], tags)
coord[0], coord[1], tags, categories)
pt.members = members
if pt.is_poi():
if callable(weight_fn):
@ -649,7 +706,8 @@ class OsmConflator:
nearest = [p for p in nearest if match_func(p[0].data.tags, point.tags)]
if not nearest:
return None, None
nearest = [(n[0], n[0].data.distance(point)) for n in nearest]
nearest = [(n[0], n[0].data.distance(point))
for n in nearest if point.category in n[0].data.categories]
return sorted(nearest, key=lambda kv: kv[1])[0]
if not self.osmdata:
@ -849,6 +907,22 @@ def read_dataset(profile, fileobj):
required='returns a list of SourcePoints with the dataset')
def add_categories_to_dataset(profile, dataset):
categories = profile.get('categories')
if not categories:
return
tag = profile.get('category_tag')
other = categories.get('other', {})
for d in dataset:
if tag and tag in d.tags:
d.category = d.tags[tag]
del d.tags[tag]
if d.category:
cat_tags = categories.get(d.category, other).get('tags', None)
if cat_tags:
d.tags.update(cat_tags)
def transform_dataset(profile, dataset):
"""Transforms tags in the dataset using the "transform" method in the profile
or the instructions in that field in string or dict form."""
@ -918,6 +992,56 @@ def transform_dataset(profile, dataset):
d.tags[key] = value
def write_for_filter(profile, dataset, f):
def query_to_tag_strings(query):
if isinstance(query, str):
raise ValueError('Query string for filter should not be a string')
result = []
if not isinstance(query[0], str) and isinstance(query[0][0], str):
query = [query]
for q in query:
if isinstance(q, str):
raise ValueError('Query string for filter should not be a string')
parts = []
for part in q:
if len(part) == 1:
parts.append(part[0])
elif part[1] is None or len(part[1]) == 0:
parts.append('{}='.format(part[0]))
elif part[1][0] == '~':
raise ValueError('Cannot use regular expressions in filter')
elif '|' in part[1] or ';' in part[1]:
raise ValueError('"|" and ";" symbols is not allowed in query values')
else:
parts.append('='.join(part))
result.append('|'.join(parts))
return result
def tags_to_query(tags):
return [(k, v) for k, v in tags.items()]
categories = profile.get('categories', {})
p_query = profile.get('query', None)
if p_query is not None:
categories[None] = {'query': p_query}
cat_map = {}
i = 0
try:
for name, query in categories.items():
for tags in query_to_tag_strings(query.get('query', tags_to_query(query.get('tags')))):
f.write('{},{},{}\n'.format(i, name or '', tags))
cat_map[name] = i
i += 1
except ValueError as e:
logging.error(e)
return False
f.write('\n')
for d in dataset:
if d.category in cat_map:
f.write('{},{},{}\n'.format(d.lon, d.lat, cat_map[d.category]))
return True
def run(profile=None):
parser = argparse.ArgumentParser(
description='''{}.
@ -928,15 +1052,17 @@ def run(profile=None):
parser.add_argument('-i', '--source', type=argparse.FileType('rb'), help='Source file to pass to the profile dataset() function')
parser.add_argument('-a', '--audit', type=argparse.FileType('r'), help='Conflation validation result as a JSON file')
parser.add_argument('-o', '--output', type=argparse.FileType('w'), help='Output OSM XML file name')
parser.add_argument('-p', '--param', help='Optional parameter for the profile')
parser.add_argument('--osc', action='store_true', help='Produce an osmChange file instead of JOSM XML')
parser.add_argument('--osm', help='Instead of querying Overpass API, use this unpacked osm file. Create one from Overpass data if not found')
parser.add_argument('-c', '--changes', type=argparse.FileType('w'), help='Write changes as GeoJSON for visualization')
parser.add_argument('-m', '--check-move', action='store_true', help='Check for moveability of modified modes')
parser.add_argument('-f', '--for-filter', type=argparse.FileType('w'), help='Prepare a file for the filtering script')
parser.add_argument('--verbose', '-v', action='store_true', help='Display debug messages')
parser.add_argument('--quiet', '-q', action='store_true', help='Do not display informational messages')
options = parser.parse_args()
if not options.output and not options.changes:
if not options.output and not options.changes and not options.for_filter:
parser.print_help()
return
@ -952,6 +1078,8 @@ def run(profile=None):
if not profile:
logging.debug('Loading profile %s', options.profile)
global param
param = options.param
profile = Profile(profile or options.profile)
dataset = read_dataset(profile, options.source)
@ -959,8 +1087,14 @@ def run(profile=None):
logging.error('Empty source dataset')
sys.exit(2)
transform_dataset(profile, dataset)
add_categories_to_dataset(profile, dataset)
logging.info('Read %s items from the dataset', len(dataset))
if options.for_filter:
if write_for_filter(profile, dataset, options.for_filter):
logging.info('Prepared data for filtering, exitting')
return
audit = None
if options.audit:
audit = json.load(options.audit)

View file

@ -1 +1 @@
__version__ = '1.2.3'
__version__ = '1.3.0'

15
filter/CMakeLists.txt Normal file
View file

@ -0,0 +1,15 @@
cmake_minimum_required(VERSION 2.8)
set(NAME filter_planet_by_cats)
project(${NAME} C CXX)
set(CMAKE_CXX_STANDARD 11)
message(STATUS "Configuring ${NAME}")
list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}")
find_package(Osmium REQUIRED COMPONENTS io)
include_directories(SYSTEM ${OSMIUM_INCLUDE_DIRS})
add_executable(
${NAME}
${NAME}.cpp
RTree.h
xml_centers_output.hpp
)
target_link_libraries(${NAME} ${OSMIUM_IO_LIBRARIES})

354
filter/FindOsmium.cmake Normal file
View file

@ -0,0 +1,354 @@
#----------------------------------------------------------------------
#
# FindOsmium.cmake
#
# Find the Libosmium headers and, optionally, several components needed
# for different Libosmium functions.
#
#----------------------------------------------------------------------
#
# Usage:
#
# Copy this file somewhere into your project directory, where cmake can
# find it. Usually this will be a directory called "cmake" which you can
# add to the CMake module search path with the following line in your
# CMakeLists.txt:
#
# list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake")
#
# Then add the following in your CMakeLists.txt:
#
# find_package(Osmium [version] REQUIRED COMPONENTS <XXX>)
# include_directories(SYSTEM ${OSMIUM_INCLUDE_DIRS})
#
# The version number is optional. If it is not set, any version of
# libosmium will do.
#
# For the <XXX> substitute a space separated list of one or more of the
# following components:
#
# pbf - include libraries needed for PBF input and output
# xml - include libraries needed for XML input and output
# io - include libraries needed for any type of input/output
# geos - include if you want to use any of the GEOS functions
# gdal - include if you want to use any of the OGR functions
# proj - include if you want to use any of the Proj.4 functions
# sparsehash - include if you use the sparsehash index
#
# You can check for success with something like this:
#
# if(NOT OSMIUM_FOUND)
# message(WARNING "Libosmium not found!\n")
# endif()
#
#----------------------------------------------------------------------
#
# Variables:
#
# OSMIUM_FOUND - True if Osmium found.
# OSMIUM_INCLUDE_DIRS - Where to find include files.
# OSMIUM_XML_LIBRARIES - Libraries needed for XML I/O.
# OSMIUM_PBF_LIBRARIES - Libraries needed for PBF I/O.
# OSMIUM_IO_LIBRARIES - Libraries needed for XML or PBF I/O.
# OSMIUM_LIBRARIES - All libraries Osmium uses somewhere.
#
#----------------------------------------------------------------------
# This is the list of directories where we look for osmium includes.
set(_osmium_include_path
../libosmium
~/Library/Frameworks
/Library/Frameworks
/opt/local # DarwinPorts
/opt
)
# Look for the header file.
find_path(OSMIUM_INCLUDE_DIR osmium/version.hpp
PATH_SUFFIXES include
PATHS ${_osmium_include_path}
)
# Check libosmium version number
if(Osmium_FIND_VERSION)
file(STRINGS "${OSMIUM_INCLUDE_DIR}/osmium/version.hpp" _libosmium_version_define REGEX "#define LIBOSMIUM_VERSION_STRING")
if("${_libosmium_version_define}" MATCHES "#define LIBOSMIUM_VERSION_STRING \"([0-9.]+)\"")
set(_libosmium_version "${CMAKE_MATCH_1}")
else()
set(_libosmium_version "unknown")
endif()
endif()
set(OSMIUM_INCLUDE_DIRS "${OSMIUM_INCLUDE_DIR}")
#----------------------------------------------------------------------
#
# Check for optional components
#
#----------------------------------------------------------------------
if(Osmium_FIND_COMPONENTS)
foreach(_component ${Osmium_FIND_COMPONENTS})
string(TOUPPER ${_component} _component_uppercase)
set(Osmium_USE_${_component_uppercase} TRUE)
endforeach()
endif()
#----------------------------------------------------------------------
# Component 'io' is an alias for 'pbf' and 'xml'
if(Osmium_USE_IO)
set(Osmium_USE_PBF TRUE)
set(Osmium_USE_XML TRUE)
endif()
#----------------------------------------------------------------------
# Component 'ogr' is an alias for 'gdal'
if(Osmium_USE_OGR)
set(Osmium_USE_GDAL TRUE)
endif()
#----------------------------------------------------------------------
# Component 'pbf'
if(Osmium_USE_PBF)
find_package(ZLIB)
find_package(Threads)
find_package(Protozero 1.5.1)
list(APPEND OSMIUM_EXTRA_FIND_VARS ZLIB_FOUND Threads_FOUND PROTOZERO_INCLUDE_DIR)
if(ZLIB_FOUND AND Threads_FOUND AND PROTOZERO_FOUND)
list(APPEND OSMIUM_PBF_LIBRARIES
${ZLIB_LIBRARIES}
${CMAKE_THREAD_LIBS_INIT}
)
list(APPEND OSMIUM_INCLUDE_DIRS
${ZLIB_INCLUDE_DIR}
${PROTOZERO_INCLUDE_DIR}
)
else()
message(WARNING "Osmium: Can not find some libraries for PBF input/output, please install them or configure the paths.")
endif()
endif()
#----------------------------------------------------------------------
# Component 'xml'
if(Osmium_USE_XML)
find_package(EXPAT)
find_package(BZip2)
find_package(ZLIB)
find_package(Threads)
list(APPEND OSMIUM_EXTRA_FIND_VARS EXPAT_FOUND BZIP2_FOUND ZLIB_FOUND Threads_FOUND)
if(EXPAT_FOUND AND BZIP2_FOUND AND ZLIB_FOUND AND Threads_FOUND)
list(APPEND OSMIUM_XML_LIBRARIES
${EXPAT_LIBRARIES}
${BZIP2_LIBRARIES}
${ZLIB_LIBRARIES}
${CMAKE_THREAD_LIBS_INIT}
)
list(APPEND OSMIUM_INCLUDE_DIRS
${EXPAT_INCLUDE_DIR}
${BZIP2_INCLUDE_DIR}
${ZLIB_INCLUDE_DIR}
)
else()
message(WARNING "Osmium: Can not find some libraries for XML input/output, please install them or configure the paths.")
endif()
endif()
#----------------------------------------------------------------------
list(APPEND OSMIUM_IO_LIBRARIES
${OSMIUM_PBF_LIBRARIES}
${OSMIUM_XML_LIBRARIES}
)
list(APPEND OSMIUM_LIBRARIES
${OSMIUM_IO_LIBRARIES}
)
#----------------------------------------------------------------------
# Component 'geos'
if(Osmium_USE_GEOS)
find_path(GEOS_INCLUDE_DIR geos/geom.h)
find_library(GEOS_LIBRARY NAMES geos)
list(APPEND OSMIUM_EXTRA_FIND_VARS GEOS_INCLUDE_DIR GEOS_LIBRARY)
if(GEOS_INCLUDE_DIR AND GEOS_LIBRARY)
SET(GEOS_FOUND 1)
list(APPEND OSMIUM_LIBRARIES ${GEOS_LIBRARY})
list(APPEND OSMIUM_INCLUDE_DIRS ${GEOS_INCLUDE_DIR})
else()
message(WARNING "Osmium: GEOS library is required but not found, please install it or configure the paths.")
endif()
endif()
#----------------------------------------------------------------------
# Component 'gdal' (alias 'ogr')
if(Osmium_USE_GDAL)
find_package(GDAL)
list(APPEND OSMIUM_EXTRA_FIND_VARS GDAL_FOUND)
if(GDAL_FOUND)
list(APPEND OSMIUM_LIBRARIES ${GDAL_LIBRARIES})
list(APPEND OSMIUM_INCLUDE_DIRS ${GDAL_INCLUDE_DIRS})
else()
message(WARNING "Osmium: GDAL library is required but not found, please install it or configure the paths.")
endif()
endif()
#----------------------------------------------------------------------
# Component 'proj'
if(Osmium_USE_PROJ)
find_path(PROJ_INCLUDE_DIR proj_api.h)
find_library(PROJ_LIBRARY NAMES proj)
list(APPEND OSMIUM_EXTRA_FIND_VARS PROJ_INCLUDE_DIR PROJ_LIBRARY)
if(PROJ_INCLUDE_DIR AND PROJ_LIBRARY)
set(PROJ_FOUND 1)
list(APPEND OSMIUM_LIBRARIES ${PROJ_LIBRARY})
list(APPEND OSMIUM_INCLUDE_DIRS ${PROJ_INCLUDE_DIR})
else()
message(WARNING "Osmium: PROJ.4 library is required but not found, please install it or configure the paths.")
endif()
endif()
#----------------------------------------------------------------------
# Component 'sparsehash'
if(Osmium_USE_SPARSEHASH)
find_path(SPARSEHASH_INCLUDE_DIR google/sparsetable)
list(APPEND OSMIUM_EXTRA_FIND_VARS SPARSEHASH_INCLUDE_DIR)
if(SPARSEHASH_INCLUDE_DIR)
# Find size of sparsetable::size_type. This does not work on older
# CMake versions because they can do this check only in C, not in C++.
if(NOT CMAKE_VERSION VERSION_LESS 3.0)
include(CheckTypeSize)
set(CMAKE_REQUIRED_INCLUDES ${SPARSEHASH_INCLUDE_DIR})
set(CMAKE_EXTRA_INCLUDE_FILES "google/sparsetable")
check_type_size("google::sparsetable<int>::size_type" SPARSETABLE_SIZE_TYPE LANGUAGE CXX)
set(CMAKE_EXTRA_INCLUDE_FILES)
set(CMAKE_REQUIRED_INCLUDES)
else()
set(SPARSETABLE_SIZE_TYPE ${CMAKE_SIZEOF_VOID_P})
endif()
# Sparsetable::size_type must be at least 8 bytes (64bit), otherwise
# OSM object IDs will not fit.
if(SPARSETABLE_SIZE_TYPE GREATER 7)
set(SPARSEHASH_FOUND 1)
add_definitions(-DOSMIUM_WITH_SPARSEHASH=${SPARSEHASH_FOUND})
list(APPEND OSMIUM_INCLUDE_DIRS ${SPARSEHASH_INCLUDE_DIR})
else()
message(WARNING "Osmium: Disabled Google SparseHash library on 32bit system (size_type=${SPARSETABLE_SIZE_TYPE}).")
endif()
else()
message(WARNING "Osmium: Google SparseHash library is required but not found, please install it or configure the paths.")
endif()
endif()
#----------------------------------------------------------------------
list(REMOVE_DUPLICATES OSMIUM_INCLUDE_DIRS)
if(OSMIUM_XML_LIBRARIES)
list(REMOVE_DUPLICATES OSMIUM_XML_LIBRARIES)
endif()
if(OSMIUM_PBF_LIBRARIES)
list(REMOVE_DUPLICATES OSMIUM_PBF_LIBRARIES)
endif()
if(OSMIUM_IO_LIBRARIES)
list(REMOVE_DUPLICATES OSMIUM_IO_LIBRARIES)
endif()
if(OSMIUM_LIBRARIES)
list(REMOVE_DUPLICATES OSMIUM_LIBRARIES)
endif()
#----------------------------------------------------------------------
#
# Check that all required libraries are available
#
#----------------------------------------------------------------------
if(OSMIUM_EXTRA_FIND_VARS)
list(REMOVE_DUPLICATES OSMIUM_EXTRA_FIND_VARS)
endif()
# Handle the QUIETLY and REQUIRED arguments and the optional version check
# and set OSMIUM_FOUND to TRUE if all listed variables are TRUE.
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(Osmium
REQUIRED_VARS OSMIUM_INCLUDE_DIR ${OSMIUM_EXTRA_FIND_VARS}
VERSION_VAR _libosmium_version)
unset(OSMIUM_EXTRA_FIND_VARS)
#----------------------------------------------------------------------
#
# A function for setting the -pthread option in compilers/linkers
#
#----------------------------------------------------------------------
function(set_pthread_on_target _target)
if(NOT MSVC)
set_target_properties(${_target} PROPERTIES COMPILE_FLAGS "-pthread")
if(NOT APPLE)
set_target_properties(${_target} PROPERTIES LINK_FLAGS "-pthread")
endif()
endif()
endfunction()
#----------------------------------------------------------------------
#
# Add compiler flags
#
#----------------------------------------------------------------------
add_definitions(-D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64)
if(MSVC)
add_definitions(-wd4996)
# Disable warning C4068: "unknown pragma" because we want it to ignore
# pragmas for other compilers.
add_definitions(-wd4068)
# Disable warning C4715: "not all control paths return a value" because
# it generates too many false positives.
add_definitions(-wd4715)
# Disable warning C4351: new behavior: elements of array '...' will be
# default initialized. The new behaviour is correct and we don't support
# old compilers anyway.
add_definitions(-wd4351)
# Disable warning C4503: "decorated name length exceeded, name was truncated"
# there are more than 150 of generated names in libosmium longer than 4096 symbols supported in MSVC
add_definitions(-wd4503)
add_definitions(-DNOMINMAX -DWIN32_LEAN_AND_MEAN -D_CRT_SECURE_NO_WARNINGS)
endif()
if(APPLE)
# following only available from cmake 2.8.12:
# add_compile_options(-stdlib=libc++)
# so using this instead:
add_definitions(-stdlib=libc++)
set(LDFLAGS ${LDFLAGS} -stdlib=libc++)
endif()
#----------------------------------------------------------------------
# This is a set of recommended warning options that can be added when compiling
# libosmium code.
if(MSVC)
set(OSMIUM_WARNING_OPTIONS "/W3 /wd4514" CACHE STRING "Recommended warning options for libosmium")
else()
set(OSMIUM_WARNING_OPTIONS "-Wall -Wextra -pedantic -Wredundant-decls -Wdisabled-optimization -Wctor-dtor-privacy -Wnon-virtual-dtor -Woverloaded-virtual -Wsign-promo -Wold-style-cast" CACHE STRING "Recommended warning options for libosmium")
endif()
set(OSMIUM_DRACONIC_CLANG_OPTIONS "-Wdocumentation -Wunused-exception-parameter -Wmissing-declarations -Weverything -Wno-c++98-compat -Wno-c++98-compat-pedantic -Wno-unused-macros -Wno-exit-time-destructors -Wno-global-constructors -Wno-padded -Wno-switch-enum -Wno-missing-prototypes -Wno-weak-vtables -Wno-cast-align -Wno-float-equal")
if(Osmium_DEBUG)
message(STATUS "OSMIUM_XML_LIBRARIES=" ${OSMIUM_XML_LIBRARIES})
message(STATUS "OSMIUM_PBF_LIBRARIES=" ${OSMIUM_PBF_LIBRARIES})
message(STATUS "OSMIUM_IO_LIBRARIES=" ${OSMIUM_IO_LIBRARIES})
message(STATUS "OSMIUM_LIBRARIES=" ${OSMIUM_LIBRARIES})
message(STATUS "OSMIUM_INCLUDE_DIRS=" ${OSMIUM_INCLUDE_DIRS})
endif()

View file

@ -0,0 +1,63 @@
#----------------------------------------------------------------------
#
# FindProtozero.cmake
#
# Find the protozero headers.
#
#----------------------------------------------------------------------
#
# Usage:
#
# Copy this file somewhere into your project directory, where cmake can
# find it. Usually this will be a directory called "cmake" which you can
# add to the CMake module search path with the following line in your
# CMakeLists.txt:
#
# list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake")
#
# Then add the following in your CMakeLists.txt:
#
# find_package(Protozero [version] [REQUIRED])
# include_directories(SYSTEM ${PROTOZERO_INCLUDE_DIR})
#
# The version number is optional. If it is not set, any version of
# protozero will do.
#
# if(NOT PROTOZERO_FOUND)
# message(WARNING "Protozero not found!\n")
# endif()
#
#----------------------------------------------------------------------
#
# Variables:
#
# PROTOZERO_FOUND - True if Protozero was found.
# PROTOZERO_INCLUDE_DIR - Where to find include files.
#
#----------------------------------------------------------------------
# find include path
find_path(PROTOZERO_INCLUDE_DIR protozero/version.hpp
PATH_SUFFIXES include
PATHS ${CMAKE_SOURCE_DIR}/../protozero
)
# Check version number
if(Protozero_FIND_VERSION)
file(STRINGS "${PROTOZERO_INCLUDE_DIR}/protozero/version.hpp" _version_define REGEX "#define PROTOZERO_VERSION_STRING")
if("${_version_define}" MATCHES "#define PROTOZERO_VERSION_STRING \"([0-9.]+)\"")
set(_version "${CMAKE_MATCH_1}")
else()
set(_version "unknown")
endif()
endif()
#set(PROTOZERO_INCLUDE_DIRS "${PROTOZERO_INCLUDE_DIR}")
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(Protozero
REQUIRED_VARS PROTOZERO_INCLUDE_DIR
VERSION_VAR _version)
#----------------------------------------------------------------------

35
filter/README.md Normal file
View file

@ -0,0 +1,35 @@
# Filtering OSM by external dataset
When you got points of multiple categories, an Overpass API request may fail
from the number of query clauses. For that, you would need to filter the planet
file yourself. First, prepare a list of categories and dataset points:
conflate.py profile.py -f points.lst
Then compile the filtering tool:
mkdir build
cmake ..
make
Download a planet file or an extract for the country of import, update it to the minute,
and feed it to the filtering tool:
./filter_planet_by_cats points.lst planet-latest.osm.pbf > filtered.osm
This will take an hour or two. The resulting OSM file should be used as an input to
the conflation tool:
conflate.py profile.py --osm filtered.osm -c changes.json
## Authors and License
The `filter_planet_by_cats` script was written by Ilya Zverev for MAPS.ME and
published under Apache License 2.0.
The `xml_centers_output.hpp` and `*.cmake` files are based on
[libosmium](https://github.com/osmcode/libosmium) code and hence published
under the Boost License terms.
`RTree.h` is under public domain, downloaded from
[this repository](https://github.com/nushoin/RTree).

1602
filter/RTree.h Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,282 @@
/*
Filters a planet file by categories and location.
Serves as a replacement for Overpass API for the OSM Conflator.
Takes two parameters: a list of coordinates and categories prepared by
conflate.py and an OSM PBF/XML file. Prints an OSM XML file with
objects that will then be conflated with the external dataset.
Either specify that XML file name as the third parameter, or redirect
the output.
Based on the osmium_amenity_list.cpp from libosmium.
Published under Apache Public License 2.0.
Written by Ilya Zverev for MAPS.ME.
*/
#include <cctype>
#include <cstdio>
#include <cstdlib>
#include <iostream>
#include <fstream>
#include <string>
#include <map>
#include <osmium/geom/coordinates.hpp>
#include <osmium/handler/node_locations_for_ways.hpp>
#include <osmium/index/map/flex_mem.hpp>
#include <osmium/io/any_input.hpp>
#include <osmium/io/xml_output.hpp>
#include <osmium/relations/relations_manager.hpp>
#include <osmium/visitor.hpp>
#include "RTree.h"
#include "xml_centers_output.hpp"
using index_type = osmium::index::map::FlexMem<osmium::unsigned_object_id_type,
osmium::Location>;
using location_handler_type = osmium::handler::NodeLocationsForWays<index_type>;
bool AppendToVector(uint16_t cat_id, void *vec) {
static_cast<std::vector<uint16_t>*>(vec)->push_back(cat_id);
return true;
}
class AmenityHandler : public osmium::handler::Handler {
constexpr static double kSearchRadius = 0.01;
typedef RTree<uint16_t, int32_t, 2, double> DatasetTree;
typedef std::vector<std::vector<std::string>> TQuery;
typedef std::vector<TQuery> TCategory;
DatasetTree m_tree;
osmium::io::xmlcenters::XMLCentersOutput m_centers;
std::map<uint16_t, std::vector<TQuery>> m_categories;
std::map<uint16_t, std::string> m_category_names;
void print_object(const osmium::OSMObject &obj,
const osmium::Location &center) {
std::cout << m_centers.apply(obj, center);
}
// Calculate the center point of a NodeRefList.
osmium::Location calc_center(const osmium::NodeRefList &nr_list) {
int64_t x = 0;
int64_t y = 0;
for (const auto &nr : nr_list) {
x += nr.x();
y += nr.y();
}
x /= nr_list.size();
y /= nr_list.size();
return osmium::Location{x, y};
}
bool TestTags(osmium::TagList const & tags, TQuery const & query) {
for (std::vector<std::string> const & pair : query) {
const char *value = tags[pair[0].c_str()];
if (pair.size() == 2 && pair[1].empty()) {
if (value != nullptr)
return false;
} else {
if (value == nullptr)
return false;
if (pair.size() > 1) {
// TODO: substrings?
bool found = false;
for (size_t i = 1; i < pair.size(); i++) {
if (!strcmp(value, pair[i].c_str())) {
found = true;
break;
}
}
if (!found)
return false;
}
}
}
return true;
}
bool IsEligible(const osmium::Location & loc, osmium::TagList const & tags) {
if (tags.empty())
return false;
int32_t radius = osmium::Location::double_to_fix(kSearchRadius);
int32_t min[] = {loc.x() - radius, loc.y() - radius};
int32_t max[] = {loc.x() + radius, loc.y() + radius};
std::vector<uint16_t> found;
if (!m_tree.Search(min, max, &AppendToVector, &found))
return false;
for (uint16_t cat_id : found)
for (TQuery query : m_categories[cat_id])
if (TestTags(tags, query))
return true;
return false;
}
void SplitTrim(std::string const & s, char delimiter, std::size_t limit, std::vector<std::string> & target) {
target.clear();
std::size_t start = 0, end = 0;
while (start < s.length()) {
end = s.find(delimiter, start);
if (end == std::string::npos || target.size() == limit)
end = s.length();
while (start < end && std::isspace(s[start]))
start++;
std::size_t tmpend = end - 1;
while (tmpend > start && std::isspace(s[tmpend]))
tmpend++;
target.push_back(s.substr(start, tmpend - start + 1));
start = end + 1;
}
}
TQuery ParseQuery(std::string const & query) {
TQuery q;
std::vector<std::string> parts;
SplitTrim(query, '|', 100, parts);
for (std::string const & part : parts) {
std::vector<std::string> keys;
SplitTrim(part, '=', 100, keys);
if (keys.size() > 0)
q.push_back(keys);
}
return q;
}
void LoadCategories(const char *filename) {
std::ifstream infile(filename);
std::string line;
std::vector<std::string> parts;
bool parsingPoints = false;
while (std::getline(infile, line)) {
if (!parsingPoints) {
if (!line.size())
parsingPoints = true;
else {
SplitTrim(line, ',', 3, parts); // cat_id, name, query
uint16_t cat_id = std::stoi(parts[0]);
m_category_names[cat_id] = parts[1];
m_categories[cat_id].push_back(ParseQuery(parts[2]));
}
} else {
SplitTrim(line, ',', 3, parts); // lon, lat, cat_id
const osmium::Location loc(std::stod(parts[0]), std::stod(parts[1]));
int32_t coords[] = {loc.x(), loc.y()};
uint16_t cat_id = std::stoi(parts[2]);
m_tree.Insert(coords, coords, cat_id);
}
}
}
public:
AmenityHandler(const char *categories) {
LoadCategories(categories);
}
void node(osmium::Node const & node) {
if (IsEligible(node.location(), node.tags())) {
print_object(node, node.location());
}
}
void way(osmium::Way const & way) {
if (!way.is_closed())
return;
int64_t x = 0, y = 0, cnt = 0;
for (const auto& node_ref : way.nodes()) {
if (node_ref.location()) {
x += node_ref.x();
y += node_ref.y();
cnt++;
}
}
if (!cnt)
return;
const osmium::Location center(x / cnt, y / cnt);
if (IsEligible(center, way.tags())) {
print_object(way, center);
}
}
void multi(osmium::Relation const & rel, osmium::Location const & center) {
if (IsEligible(center, rel.tags())) {
print_object(rel, center);
}
}
}; // class AmenityHandler
class AmenityRelationsManager : public osmium::relations::RelationsManager<AmenityRelationsManager, false, true, false> {
AmenityHandler *m_handler;
public:
AmenityRelationsManager(AmenityHandler & handler) :
RelationsManager(),
m_handler(&handler) {
}
bool new_relation(osmium::Relation const & rel) noexcept {
const char *rel_type = rel.tags().get_value_by_key("type");
return rel_type && !std::strcmp(rel_type, "multipolygon");
}
void complete_relation(osmium::Relation const & rel) {
int64_t x = 0, y = 0, cnt = 0;
for (auto const & member : rel.members()) {
if (member.ref() != 0) {
const osmium::Way* way = this->get_member_way(member.ref());
for (const auto& node_ref : way->nodes()) {
if (node_ref.location()) {
x += node_ref.x();
y += node_ref.y();
cnt++;
}
}
}
}
if (cnt > 0)
m_handler->multi(rel, osmium::Location{x / cnt, y / cnt});
}
}; // class AmenityRelationsManager
int main(int argc, char *argv[]) {
if (argc < 3) {
std::cerr << "Usage: " << argv[0]
<< " <dataset.lst> <osmfile>\n";
std::exit(1);
}
const osmium::io::File input_file{argv[2]};
const osmium::io::File output_file{"", "osm"};
AmenityHandler data_handler(argv[1]);
AmenityRelationsManager manager(data_handler);
osmium::relations::read_relations(input_file, manager);
osmium::io::Header header;
header.set("generator", argv[0]);
osmium::io::Writer writer{output_file, header, osmium::io::overwrite::allow};
index_type index;
location_handler_type location_handler{index};
location_handler.ignore_errors();
osmium::io::Reader reader{input_file};
osmium::apply(reader, location_handler, data_handler, manager.handler());
std::cout.flush();
reader.close();
writer.close();
}

View file

@ -0,0 +1,279 @@
/*
This file is based on xml_output_format.hpp from the Osmium library
(http://osmcode.org/libosmium).
Copyright 2013-2017 Jochen Topf <jochen@topf.org> and others (see README).
Copyright 2017 Ilya Zverev <ilya@zverev.info>, MAPS.ME
Boost Software License - Version 1.0 - August 17th, 2003
Permission is hereby granted, free of charge, to any person or organization
obtaining a copy of the software and accompanying documentation covered by
this license (the "Software") to use, reproduce, display, distribute,
execute, and transmit the Software, and to prepare derivative works of the
Software, and to permit third-parties to whom the Software is furnished to
do so, all subject to the following:
The copyright notices in the Software and this entire statement, including
the above license grant, this restriction and the following disclaimer,
must be included in all copies of the Software, in whole or in part, and
all derivative works of the Software, unless such copies or derivative
works are solely in the form of machine-executable object code generated by
a source language processor.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*/
#include <osmium/io/detail/string_util.hpp>
#include <osmium/osm/box.hpp>
#include <osmium/osm/item_type.hpp>
#include <osmium/osm/location.hpp>
#include <osmium/osm/node.hpp>
#include <osmium/osm/node_ref.hpp>
#include <osmium/osm/object.hpp>
#include <osmium/osm/relation.hpp>
#include <osmium/osm/tag.hpp>
#include <osmium/osm/timestamp.hpp>
#include <osmium/osm/types.hpp>
#include <osmium/osm/way.hpp>
#include <iterator>
#include <memory>
#include <string>
#include <utility>
namespace osmium {
namespace io {
namespace xmlcenters {
namespace detail {
inline void append_lat_lon_attributes(std::string& out, const char* lat, const char* lon, const osmium::Location& location) {
out += ' ';
out += lat;
out += "=\"";
osmium::detail::append_location_coordinate_to_string(std::back_inserter(out), location.y());
out += "\" ";
out += lon;
out += "=\"";
osmium::detail::append_location_coordinate_to_string(std::back_inserter(out), location.x());
out += "\"";
}
} // namespace detail
class XMLCentersOutput {
std::shared_ptr<std::string> m_out;
inline void append_xml_encoded_string(std::string & out, const char *data) {
osmium::io::detail::append_xml_encoded_string(out, data);
}
void output_int(int64_t value) {
if (value < 0) {
*m_out += '-';
value = -value;
}
char temp[20];
char *t = temp;
do {
*t++ = char(value % 10) + '0';
value /= 10;
} while (value > 0);
const auto old_size = m_out->size();
m_out->resize(old_size + (t - temp));
char* data = &(*m_out)[old_size];
do {
*data++ += *--t;
} while (t != temp);
}
void write_spaces(int num) {
for (; num != 0; --num) {
*m_out += ' ';
}
}
void write_prefix() {
write_spaces(2);
}
template <typename T>
void write_attribute(const char* name, T value) {
*m_out += ' ';
*m_out += name;
*m_out += "=\"";
output_int(value);
*m_out += '"';
}
void write_meta(const osmium::OSMObject& object) {
write_attribute("id", object.id());
if (object.version()) {
write_attribute("version", object.version());
}
if (object.timestamp()) {
*m_out += " timestamp=\"";
*m_out += object.timestamp().to_iso();
*m_out += "\"";
}
if (!object.user_is_anonymous()) {
write_attribute("uid", object.uid());
*m_out += " user=\"";
append_xml_encoded_string(*m_out, object.user());
*m_out += "\"";
}
if (object.changeset()) {
write_attribute("changeset", object.changeset());
}
}
void write_tags(const osmium::TagList& tags) {
for (const auto& tag : tags) {
write_spaces(2);
*m_out += " <tag k=\"";
append_xml_encoded_string(*m_out, tag.key());
*m_out += "\" v=\"";
append_xml_encoded_string(*m_out, tag.value());
*m_out += "\"/>\n";
}
}
public:
XMLCentersOutput() : m_out(std::make_shared<std::string>()) {
}
std::string apply(osmium::OSMObject const & item, osmium::Location const & center) {
switch(item.type()) {
case osmium::item_type::node:
node(static_cast<const osmium::Node&>(item));
break;
case osmium::item_type::way:
way(static_cast<const osmium::Way&>(item), center);
break;
case osmium::item_type::relation:
relation(static_cast<const osmium::Relation&>(item), center);
break;
default:
throw osmium::unknown_type{};
}
std::string out;
using std::swap;
swap(out, *m_out);
return out;
}
void node(const osmium::Node& node) {
write_prefix();
*m_out += "<node";
write_meta(node);
if (node.location()) {
detail::append_lat_lon_attributes(*m_out, "lat", "lon", node.location());
}
if (node.tags().empty()) {
*m_out += "/>\n";
return;
}
*m_out += ">\n";
write_tags(node.tags());
write_prefix();
*m_out += "</node>\n";
}
void way(const osmium::Way& way, osmium::Location const & center) {
write_prefix();
*m_out += "<way";
write_meta(way);
if (way.tags().empty() && way.nodes().empty()) {
*m_out += "/>\n";
return;
}
*m_out += ">\n";
write_prefix();
*m_out += " <center";
detail::append_lat_lon_attributes(*m_out, "lat", "lon", center);
*m_out += "/>\n";
for (const auto& node_ref : way.nodes()) {
write_prefix();
*m_out += " <nd";
write_attribute("ref", node_ref.ref());
*m_out += "/>\n";
}
write_tags(way.tags());
write_prefix();
*m_out += "</way>\n";
}
void relation(const osmium::Relation& relation, osmium::Location const & center) {
write_prefix();
*m_out += "<relation";
write_meta(relation);
if (relation.tags().empty() && relation.members().empty()) {
*m_out += "/>\n";
return;
}
*m_out += ">\n";
write_prefix();
*m_out += " <center";
detail::append_lat_lon_attributes(*m_out, "lat", "lon", center);
*m_out += "/>\n";
for (const auto& member : relation.members()) {
write_prefix();
*m_out += " <member type=\"";
*m_out += item_type_to_name(member.type());
*m_out += '"';
write_attribute("ref", member.ref());
*m_out += " role=\"";
append_xml_encoded_string(*m_out, member.role());
*m_out += "\"/>\n";
}
write_tags(relation.tags());
write_prefix();
*m_out += "</relation>\n";
}
}; // class XMLCentersOutputBlock
} // namespace xmlcenters
} // namespace io
} // namespace osmium

View file

@ -8,7 +8,7 @@ source = 'auchan.ru'
# Not adding a ref:auchan tag, since we don't have good identifiers
no_dataset_id = True
# Using a name query with regular expressions
query = [('shop', '~supermarket|mall'), ('name', '~Ашан|АШАН')]
query = [('shop', 'supermarket', 'mall'), ('name', '~Ашан|АШАН')]
master_tags = ('name', 'opening_hours', 'phone', 'website')
# Empty dict so we don't add a fixme tag to unmatched objects
tag_unmatched = {}
@ -44,7 +44,7 @@ def dataset(fileobj):
# We are parsing HTML, and for that we need an lxml package
from lxml import html
global download_url_copy
global download_url_copy, re
h = html.fromstring(fileobj.read().decode('utf-8'))
shops = h.find_class('shops-in-the-city-holder')[0]
shops.make_links_absolute(download_url_copy)

View file

@ -1,3 +1,7 @@
# Note: the json file at the burgerking website was restructured
# and does not contain any useful data now.
# So this profile is here solely for demonstration purposes.
import json
import codecs
import re
@ -20,6 +24,7 @@ tag_unmatched = {
def dataset(fileobj):
def parse_hours(s):
global re
s = re.sub('^зал:? *', '', s.lower())
s = s.replace('<br />', ';').replace('<br>', ';').replace('\n', ';').replace(' ', '').replace(',', ';').replace('', '-')
s = s.replace('-00:', '-24:')
@ -66,7 +71,11 @@ def dataset(fileobj):
346: 'Передвинуть к кафе',
}
source = json.load(codecs.getreader('utf-8')(fileobj))
json_src = codecs.getreader('utf-8')(fileobj).read()
p = json_src.find('<div')
if p > 0:
json_src = json_src[:p]
source = json.loads(json_src)
data = []
for el in source:
gid = int(el['origID'])

View file

@ -11,7 +11,8 @@ master_tags = ('official_name', 'phone', 'opening_hours', 'website')
# Reading the dataset passport to determine an URL of the latest dataset version
def download_url(dataset_id='7705851331-theaters'):
def download_url():
dataset_id = '7705851331-' + (param or 'museums')
r = requests.get('http://opendata.mkrf.ru/opendata/{}/meta.json'.format(dataset_id))
if r.status_code != 200 or len(r.content) == 0:
logging.error('Could not get URL for dataset: %s %s', r.status_code, r.text)
@ -22,6 +23,22 @@ def download_url(dataset_id='7705851331-theaters'):
logging.info('Downloading %s from %s', result['title'], latest['created'])
return latest['source']
source = 'opendata.mkrf.ru'
dataset_id = 'mkrf_'+(param or 'museums')
if not param or param == 'museums':
query = [('tourism', 'museum')]
elif param == 'theaters':
query = [('amenity', 'theatre')]
elif param == 'circuses':
query = [('amenity', 'circus')]
elif param == 'philharmonic':
query = [('amenity', 'theatre')]
else:
raise ValueError('Unknown param value: {}'.format(param))
max_distance = 300
master_tags = ('official_name', 'phone', 'opening_hours', 'website')
def dataset(fileobj):
def make_wd_ranges(r):

View file

@ -1,12 +1,10 @@
# Available modules: codecs, logging, requests, json, re, etree. But importing these helps catch other errors
# Available modules: codecs, logging, requests, json, etree. But importing these helps catch other errors
import json
import re
import logging
import requests
import zipfile
def download_url(mos_dataset_id=1421):
import requests
r = requests.get('https://data.mos.ru/api/datasets/expformats/?datasetId={}'.format(mos_dataset_id))
if r.status_code != 200 or len(r.content) == 0:
logging.error('Could not get URL for dataset: %s %s', r.status_code, r.text)
@ -15,7 +13,7 @@ def download_url(mos_dataset_id=1421):
url = [x for x in r.json() if x['Format'] == 'json'][0]
version = '?'
title = 'dataset'
r = requests.get('https://data.mos.ru/apiproxy/opendata/1421/meta.json'.format(mos_dataset_id))
r = requests.get('https://data.mos.ru/apiproxy/opendata/{}/meta.json'.format(mos_dataset_id))
if r.status_code == 200:
title = r.json()['Title']
version = r.json()['VersionNumber']
@ -50,6 +48,8 @@ master_tags = ('zone:parking', 'ref', 'contact:phone', 'contact:website', 'opera
# A list of SourcePoint objects. Initialize with (id, lat, lon, {tags}).
def dataset(fileobj):
import zipfile
import re
zf = zipfile.ZipFile(fileobj)
source = json.loads(zf.read(zf.namelist()[0]).decode('cp1251'))
RE_NUM4 = re.compile(r'\d{4,6}')

View file

@ -56,6 +56,7 @@ def dataset(fileobj):
return '24/7'
return '; '.join(res).replace('23:59', '24:00')
global re, defaultdict
source = json.load(codecs.getreader('utf-8-sig')(fileobj))
data = []
for el in source['Locations']: