Merge branch 'master' into remarks

2018-02-12 16:47:04 +01:00 · 2018-02-12 16:47:04 +01:00 · f8376661e9
commit f8376661e9
parent a0b429b45f af20b5e506
17 changed files with 2848 additions and 44 deletions
--- a/.gitignore
+++ b/.gitignore
@ -6,6 +6,9 @@
 *.gz
 *.csv
 *.pyc
+*.pbf
+*.lst
+*.user
 private/
 data/
 __pycache__/
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -2,6 +2,16 @@

 ## master branch

+* Support for categories: `category_tag` and `categories` parameters in a profile.
+* LibOsmium-based C++ filtering script for categories.
+* More than one tag value works as "one of": `[('amenity', 'cafe', 'restaurant')]`.
+* Query can be a list of queries, providing for "OR" clause. An example:
+
+    `[[('amenity', 'swimming_pool')], [('leisure', 'swimming_pool')]]`
+
+* Parameters for profiles, using `-p` argument.
+* No more default imports solely for profiles, import `re` and `zipfile` youself now.
+
 ## 1.2.3

 _Released 2017-12-29_
--- a/README.rst
+++ b/README.rst
@ -29,7 +29,7 @@ For a simplest case, run:

    conflate <profile.py> -o result.osm

-You might want to add ``-v`` to get status messages, and other arguments
+You might want to add other arguments
 to pass a dataset file or write the resulting osmChange somewhere. Run
 ``conflate -h`` to see a list of arguments.

--- a/conflate/conflate.py
+++ b/conflate/conflate.py
@ -1,6 +1,7 @@
 #!/usr/bin/env python3
 import argparse
 import codecs
+import json
 import kdtree
 import logging
 import math
@ -8,10 +9,6 @@ import requests
 import os
 import sys
 from io import BytesIO
-import json    # for profiles
-import re      # for profiles
-import zipfile # for profiles
-from collections import defaultdict # for profiles
 try:
    from .version import __version__
 except ImportError:
@ -81,6 +78,7 @@ class OSMPoint(SourcePoint):
        self.version = version
        self.members = None
        self.action = None
+        self.categories = categories or set()
        self.remarks = None

    def copy(self):
@ -210,27 +208,38 @@ class OsmConflator:
        (k, v) turns into [k=v], (k,) into [k], (k, None) into [!k], (k, "~v") into [k~v]."""
        tags = self.profile.get(
            'query', required="a list of tuples. E.g. [('amenity', 'cafe'), ('name', '~Mc.*lds')]")
+        tag_strs = []
        if isinstance(tags, str):
-            tag_str = tags
+            tag_strs = [tags]
        else:
-            tag_str = ''
-            for t in tags:
-                if len(t) == 1:
-                    q = '"{}"'.format(t[0])
-                elif t[1] is None or len(t[1]) == 0:
-                    q = '"!{}"'.format(t[0])
-                elif t[1][0] == '~':
-                    q = '"{}"~"{}"'.format(t[0], t[1][1:])
-                else:
-                    q = '"{}"="{}"'.format(t[0], t[1])
-                tag_str += '[' + q + ']'
+            if not isinstance(tags[0], str) and isinstance(tags[0][0], str):
+                tags = [tags]
+            for tags_q in tags:
+                if isinstance(tags_q, str):
+                    tag_strs.append(tags_q)
+                    continue
+                tag_str = ''
+                for t in tags_q:
+                    if len(t) == 1:
+                        q = '"{}"'.format(t[0])
+                    elif t[1] is None or len(t[1]) == 0:
+                        q = '"!{}"'.format(t[0])
+                    elif t[1][0] == '~':
+                        q = '"{}"~"{}",i'.format(t[0], t[1][1:])
+                    elif len(t) > 2:
+                        q = '"{}"~"^({})$"'.format(t[0], '|'.join(t[1:]))
+                    else:
+                        q = '"{}"="{}"'.format(t[0], t[1])
+                    tag_str += '[' + q + ']'
+                tag_strs.append(tag_str)

        timeout = self.profile.get('overpass_timeout', 120)
        query = '[out:xml]{};('.format('' if timeout is None else '[timeout:{}]'.format(timeout))
        for bbox in bboxes:
            bbox_str = '' if bbox is None else '(' + ','.join([str(x) for x in bbox]) + ')'
-            for t in ('node', 'way', 'relation["type"="multipolygon"]'):
-                query += t + tag_str + bbox_str + ';'
+            for tag_str in tag_strs:
+                for t in ('node', 'way', 'relation["type"="multipolygon"]'):
+                    query += t + tag_str + bbox_str + ';'
        if self.ref is not None:
            for t in ('node', 'way', 'relation'):
                query += t + '["' + self.ref + '"];'
@ -349,21 +358,68 @@ class OsmConflator:
        padding = self.profile.get('bbox_padding', BBOX_PADDING)
        return [get_bbox(b, padding) for b in boxes]

-    def check_against_profile_tags(self, tags):
+    def get_categories(self, tags):
+        def match_query(tags, query):
+            for tag in query:
+                if len(tag) == 1:
+                    if tag[0] in tags:
+                        return False
+                    elif tag[1] is None or tag[1] == '':
+                        if tag[0] not in tags:
+                            return False
+                    else:
+                        value = tags.get(tag[0], None)
+                        if value is None:
+                            return False
+                        found = False
+                        for t2 in tag[1:]:
+                            if t2[0] == '~':
+                                m = re.search(t2[1:], value)
+                                if not m:
+                                    return False
+                            elif t2[0] == '!':
+                                if t2[1:].lower() in value.lower():
+                                    found = True
+                            elif t2 == value:
+                                found = True
+                            if found:
+                                break
+                        if not found:
+                            return False
+            return True
+
+        def tags_to_query(tags):
+            return [(k, v) for k, v in tags.items()]
+
+        result = set()
        qualifies = self.profile.get('qualifies', args=tags)
        if qualifies is not None:
-            return qualifies
+            if qualifies:
+                result.add(None)
+            return result

+        # First check default query
        query = self.profile.get('query', None)
-        if query is not None and not isinstance(query, str):
-            for tag in query:
-                if len(tag) >= 1:
-                    if tag[0] not in tags:
-                        return False
-                    if len(tag) >= 2 and tag[1][0] != '~':
-                        if tag[1] != tags[tag[0]]:
-                            return False
-        return True
+        if query is not None:
+            if isinstance(query, str):
+                result.add(None)
+            else:
+                if isinstance(query[0][0], str):
+                    query = [query]
+                for q in query:
+                    if match_query(tags, q):
+                        result.add(None)
+                        break
+
+        # Then check each category if we got these
+        categories = self.profile.get('categories', {})
+        for name, params in categories.items():
+            if 'tags' not in params and 'query' not in params:
+                raise ValueError('No tags and query attributes for category "{}"'.format(name))
+            if match_query(tags, params.get('query', tags_to_query(params.get('tags')))):
+                result.add(name)
+
+        return result

    def download_osm(self):
        """Constructs an Overpass API query and requests objects
@ -424,7 +480,8 @@ class OsmConflator:
            tags = {}
            for tag in el.findall('tag'):
                tags[tag.get('k')] = tag.get('v')
-            if not self.check_against_profile_tags(tags):
+            categories = self.get_categories(tags)
+            if categories is False or categories is None or len(categories) == 0:
                continue

            if el.tag == 'node':
@ -458,7 +515,7 @@ class OsmConflator:
                continue
            pt = OSMPoint(
                el.tag, int(el.get('id')), int(el.get('version')),
-                coord[0], coord[1], tags)
+                coord[0], coord[1], tags, categories)
            pt.members = members
            if pt.is_poi():
                if callable(weight_fn):
@ -649,7 +706,8 @@ class OsmConflator:
                nearest = [p for p in nearest if match_func(p[0].data.tags, point.tags)]
                if not nearest:
                    return None, None
-            nearest = [(n[0], n[0].data.distance(point)) for n in nearest]
+            nearest = [(n[0], n[0].data.distance(point))
+                       for n in nearest if point.category in n[0].data.categories]
            return sorted(nearest, key=lambda kv: kv[1])[0]

        if not self.osmdata:
@ -849,6 +907,22 @@ def read_dataset(profile, fileobj):
        required='returns a list of SourcePoints with the dataset')


+def add_categories_to_dataset(profile, dataset):
+    categories = profile.get('categories')
+    if not categories:
+        return
+    tag = profile.get('category_tag')
+    other = categories.get('other', {})
+    for d in dataset:
+        if tag and tag in d.tags:
+            d.category = d.tags[tag]
+            del d.tags[tag]
+        if d.category:
+            cat_tags = categories.get(d.category, other).get('tags', None)
+            if cat_tags:
+                d.tags.update(cat_tags)
+
+
 def transform_dataset(profile, dataset):
    """Transforms tags in the dataset using the "transform" method in the profile
    or the instructions in that field in string or dict form."""
@ -918,6 +992,56 @@ def transform_dataset(profile, dataset):
            d.tags[key] = value


+def write_for_filter(profile, dataset, f):
+    def query_to_tag_strings(query):
+        if isinstance(query, str):
+            raise ValueError('Query string for filter should not be a string')
+        result = []
+        if not isinstance(query[0], str) and isinstance(query[0][0], str):
+            query = [query]
+        for q in query:
+            if isinstance(q, str):
+                raise ValueError('Query string for filter should not be a string')
+            parts = []
+            for part in q:
+                if len(part) == 1:
+                    parts.append(part[0])
+                elif part[1] is None or len(part[1]) == 0:
+                    parts.append('{}='.format(part[0]))
+                elif part[1][0] == '~':
+                    raise ValueError('Cannot use regular expressions in filter')
+                elif '|' in part[1] or ';' in part[1]:
+                    raise ValueError('"|" and ";" symbols is not allowed in query values')
+                else:
+                    parts.append('='.join(part))
+            result.append('|'.join(parts))
+        return result
+
+    def tags_to_query(tags):
+        return [(k, v) for k, v in tags.items()]
+
+    categories = profile.get('categories', {})
+    p_query = profile.get('query', None)
+    if p_query is not None:
+        categories[None] = {'query': p_query}
+    cat_map = {}
+    i = 0
+    try:
+        for name, query in categories.items():
+            for tags in query_to_tag_strings(query.get('query', tags_to_query(query.get('tags')))):
+                f.write('{},{},{}\n'.format(i, name or '', tags))
+            cat_map[name] = i
+            i += 1
+    except ValueError as e:
+        logging.error(e)
+        return False
+    f.write('\n')
+    for d in dataset:
+        if d.category in cat_map:
+            f.write('{},{},{}\n'.format(d.lon, d.lat, cat_map[d.category]))
+    return True
+
+
 def run(profile=None):
    parser = argparse.ArgumentParser(
        description='''{}.
@ -928,15 +1052,17 @@ def run(profile=None):
    parser.add_argument('-i', '--source', type=argparse.FileType('rb'), help='Source file to pass to the profile dataset() function')
    parser.add_argument('-a', '--audit', type=argparse.FileType('r'), help='Conflation validation result as a JSON file')
    parser.add_argument('-o', '--output', type=argparse.FileType('w'), help='Output OSM XML file name')
+    parser.add_argument('-p', '--param', help='Optional parameter for the profile')
    parser.add_argument('--osc', action='store_true', help='Produce an osmChange file instead of JOSM XML')
    parser.add_argument('--osm', help='Instead of querying Overpass API, use this unpacked osm file. Create one from Overpass data if not found')
    parser.add_argument('-c', '--changes', type=argparse.FileType('w'), help='Write changes as GeoJSON for visualization')
    parser.add_argument('-m', '--check-move', action='store_true', help='Check for moveability of modified modes')
+    parser.add_argument('-f', '--for-filter', type=argparse.FileType('w'), help='Prepare a file for the filtering script')
    parser.add_argument('--verbose', '-v', action='store_true', help='Display debug messages')
    parser.add_argument('--quiet', '-q', action='store_true', help='Do not display informational messages')
    options = parser.parse_args()

-    if not options.output and not options.changes:
+    if not options.output and not options.changes and not options.for_filter:
        parser.print_help()
        return

@ -952,6 +1078,8 @@ def run(profile=None):

    if not profile:
        logging.debug('Loading profile %s', options.profile)
+    global param
+    param = options.param
    profile = Profile(profile or options.profile)

    dataset = read_dataset(profile, options.source)
@ -959,8 +1087,14 @@ def run(profile=None):
        logging.error('Empty source dataset')
        sys.exit(2)
    transform_dataset(profile, dataset)
+    add_categories_to_dataset(profile, dataset)
    logging.info('Read %s items from the dataset', len(dataset))

+    if options.for_filter:
+        if write_for_filter(profile, dataset, options.for_filter):
+            logging.info('Prepared data for filtering, exitting')
+        return
+
    audit = None
    if options.audit:
        audit = json.load(options.audit)
--- a/conflate/version.py
+++ b/conflate/version.py
@ -1 +1 @@
-__version__ = '1.2.3'
+__version__ = '1.3.0'
--- a/filter/CMakeLists.txt
+++ b/filter/CMakeLists.txt
@ -0,0 +1,15 @@
+cmake_minimum_required(VERSION 2.8)
+set(NAME filter_planet_by_cats)
+project(${NAME} C CXX)
+set(CMAKE_CXX_STANDARD 11)
+message(STATUS "Configuring ${NAME}")
+list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}")
+find_package(Osmium REQUIRED COMPONENTS io)
+include_directories(SYSTEM ${OSMIUM_INCLUDE_DIRS})
+add_executable(
+  ${NAME}
+  ${NAME}.cpp
+  RTree.h
+  xml_centers_output.hpp
+)
+target_link_libraries(${NAME} ${OSMIUM_IO_LIBRARIES})
--- a/filter/FindOsmium.cmake
+++ b/filter/FindOsmium.cmake
@ -0,0 +1,354 @@
+#----------------------------------------------------------------------
+#
+#  FindOsmium.cmake
+#
+#  Find the Libosmium headers and, optionally, several components needed
+#  for different Libosmium functions.
+#
+#----------------------------------------------------------------------
+#
+#  Usage:
+#
+#    Copy this file somewhere into your project directory, where cmake can
+#    find it. Usually this will be a directory called "cmake" which you can
+#    add to the CMake module search path with the following line in your
+#    CMakeLists.txt:
+#
+#      list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake")
+#
+#    Then add the following in your CMakeLists.txt:
+#
+#      find_package(Osmium [version] REQUIRED COMPONENTS <XXX>)
+#      include_directories(SYSTEM ${OSMIUM_INCLUDE_DIRS})
+#
+#    The version number is optional. If it is not set, any version of
+#    libosmium will do.
+#
+#    For the <XXX> substitute a space separated list of one or more of the
+#    following components:
+#
+#      pbf        - include libraries needed for PBF input and output
+#      xml        - include libraries needed for XML input and output
+#      io         - include libraries needed for any type of input/output
+#      geos       - include if you want to use any of the GEOS functions
+#      gdal       - include if you want to use any of the OGR functions
+#      proj       - include if you want to use any of the Proj.4 functions
+#      sparsehash - include if you use the sparsehash index
+#
+#    You can check for success with something like this:
+#
+#      if(NOT OSMIUM_FOUND)
+#          message(WARNING "Libosmium not found!\n")
+#      endif()
+#
+#----------------------------------------------------------------------
+#
+#  Variables:
+#
+#    OSMIUM_FOUND         - True if Osmium found.
+#    OSMIUM_INCLUDE_DIRS  - Where to find include files.
+#    OSMIUM_XML_LIBRARIES - Libraries needed for XML I/O.
+#    OSMIUM_PBF_LIBRARIES - Libraries needed for PBF I/O.
+#    OSMIUM_IO_LIBRARIES  - Libraries needed for XML or PBF I/O.
+#    OSMIUM_LIBRARIES     - All libraries Osmium uses somewhere.
+#
+#----------------------------------------------------------------------
+
+# This is the list of directories where we look for osmium includes.
+set(_osmium_include_path
+        ../libosmium
+        ~/Library/Frameworks
+        /Library/Frameworks
+        /opt/local # DarwinPorts
+        /opt
+)
+
+# Look for the header file.
+find_path(OSMIUM_INCLUDE_DIR osmium/version.hpp
+    PATH_SUFFIXES include
+    PATHS ${_osmium_include_path}
+)
+
+# Check libosmium version number
+if(Osmium_FIND_VERSION)
+    file(STRINGS "${OSMIUM_INCLUDE_DIR}/osmium/version.hpp" _libosmium_version_define REGEX "#define LIBOSMIUM_VERSION_STRING")
+    if("${_libosmium_version_define}" MATCHES "#define LIBOSMIUM_VERSION_STRING \"([0-9.]+)\"")
+        set(_libosmium_version "${CMAKE_MATCH_1}")
+    else()
+        set(_libosmium_version "unknown")
+    endif()
+endif()
+
+set(OSMIUM_INCLUDE_DIRS "${OSMIUM_INCLUDE_DIR}")
+
+#----------------------------------------------------------------------
+#
+#  Check for optional components
+#
+#----------------------------------------------------------------------
+if(Osmium_FIND_COMPONENTS)
+    foreach(_component ${Osmium_FIND_COMPONENTS})
+        string(TOUPPER ${_component} _component_uppercase)
+        set(Osmium_USE_${_component_uppercase} TRUE)
+    endforeach()
+endif()
+
+#----------------------------------------------------------------------
+# Component 'io' is an alias for 'pbf' and 'xml'
+if(Osmium_USE_IO)
+    set(Osmium_USE_PBF TRUE)
+    set(Osmium_USE_XML TRUE)
+endif()
+
+#----------------------------------------------------------------------
+# Component 'ogr' is an alias for 'gdal'
+if(Osmium_USE_OGR)
+    set(Osmium_USE_GDAL TRUE)
+endif()
+
+#----------------------------------------------------------------------
+# Component 'pbf'
+if(Osmium_USE_PBF)
+    find_package(ZLIB)
+    find_package(Threads)
+    find_package(Protozero 1.5.1)
+
+    list(APPEND OSMIUM_EXTRA_FIND_VARS ZLIB_FOUND Threads_FOUND PROTOZERO_INCLUDE_DIR)
+    if(ZLIB_FOUND AND Threads_FOUND AND PROTOZERO_FOUND)
+        list(APPEND OSMIUM_PBF_LIBRARIES
+            ${ZLIB_LIBRARIES}
+            ${CMAKE_THREAD_LIBS_INIT}
+        )
+        list(APPEND OSMIUM_INCLUDE_DIRS
+            ${ZLIB_INCLUDE_DIR}
+            ${PROTOZERO_INCLUDE_DIR}
+        )
+    else()
+        message(WARNING "Osmium: Can not find some libraries for PBF input/output, please install them or configure the paths.")
+    endif()
+endif()
+
+#----------------------------------------------------------------------
+# Component 'xml'
+if(Osmium_USE_XML)
+    find_package(EXPAT)
+    find_package(BZip2)
+    find_package(ZLIB)
+    find_package(Threads)
+
+    list(APPEND OSMIUM_EXTRA_FIND_VARS EXPAT_FOUND BZIP2_FOUND ZLIB_FOUND Threads_FOUND)
+    if(EXPAT_FOUND AND BZIP2_FOUND AND ZLIB_FOUND AND Threads_FOUND)
+        list(APPEND OSMIUM_XML_LIBRARIES
+            ${EXPAT_LIBRARIES}
+            ${BZIP2_LIBRARIES}
+            ${ZLIB_LIBRARIES}
+            ${CMAKE_THREAD_LIBS_INIT}
+        )
+        list(APPEND OSMIUM_INCLUDE_DIRS
+            ${EXPAT_INCLUDE_DIR}
+            ${BZIP2_INCLUDE_DIR}
+            ${ZLIB_INCLUDE_DIR}
+        )
+    else()
+        message(WARNING "Osmium: Can not find some libraries for XML input/output, please install them or configure the paths.")
+    endif()
+endif()
+
+#----------------------------------------------------------------------
+list(APPEND OSMIUM_IO_LIBRARIES
+    ${OSMIUM_PBF_LIBRARIES}
+    ${OSMIUM_XML_LIBRARIES}
+)
+
+list(APPEND OSMIUM_LIBRARIES
+    ${OSMIUM_IO_LIBRARIES}
+)
+
+#----------------------------------------------------------------------
+# Component 'geos'
+if(Osmium_USE_GEOS)
+    find_path(GEOS_INCLUDE_DIR geos/geom.h)
+    find_library(GEOS_LIBRARY NAMES geos)
+
+    list(APPEND OSMIUM_EXTRA_FIND_VARS GEOS_INCLUDE_DIR GEOS_LIBRARY)
+    if(GEOS_INCLUDE_DIR AND GEOS_LIBRARY)
+        SET(GEOS_FOUND 1)
+        list(APPEND OSMIUM_LIBRARIES ${GEOS_LIBRARY})
+        list(APPEND OSMIUM_INCLUDE_DIRS ${GEOS_INCLUDE_DIR})
+    else()
+        message(WARNING "Osmium: GEOS library is required but not found, please install it or configure the paths.")
+    endif()
+endif()
+
+#----------------------------------------------------------------------
+# Component 'gdal' (alias 'ogr')
+if(Osmium_USE_GDAL)
+    find_package(GDAL)
+
+    list(APPEND OSMIUM_EXTRA_FIND_VARS GDAL_FOUND)
+    if(GDAL_FOUND)
+        list(APPEND OSMIUM_LIBRARIES ${GDAL_LIBRARIES})
+        list(APPEND OSMIUM_INCLUDE_DIRS ${GDAL_INCLUDE_DIRS})
+    else()
+        message(WARNING "Osmium: GDAL library is required but not found, please install it or configure the paths.")
+    endif()
+endif()
+
+#----------------------------------------------------------------------
+# Component 'proj'
+if(Osmium_USE_PROJ)
+    find_path(PROJ_INCLUDE_DIR proj_api.h)
+    find_library(PROJ_LIBRARY NAMES proj)
+
+    list(APPEND OSMIUM_EXTRA_FIND_VARS PROJ_INCLUDE_DIR PROJ_LIBRARY)
+    if(PROJ_INCLUDE_DIR AND PROJ_LIBRARY)
+        set(PROJ_FOUND 1)
+        list(APPEND OSMIUM_LIBRARIES ${PROJ_LIBRARY})
+        list(APPEND OSMIUM_INCLUDE_DIRS ${PROJ_INCLUDE_DIR})
+    else()
+        message(WARNING "Osmium: PROJ.4 library is required but not found, please install it or configure the paths.")
+    endif()
+endif()
+
+#----------------------------------------------------------------------
+# Component 'sparsehash'
+if(Osmium_USE_SPARSEHASH)
+    find_path(SPARSEHASH_INCLUDE_DIR google/sparsetable)
+
+    list(APPEND OSMIUM_EXTRA_FIND_VARS SPARSEHASH_INCLUDE_DIR)
+    if(SPARSEHASH_INCLUDE_DIR)
+        # Find size of sparsetable::size_type. This does not work on older
+        # CMake versions because they can do this check only in C, not in C++.
+        if(NOT CMAKE_VERSION VERSION_LESS 3.0)
+           include(CheckTypeSize)
+           set(CMAKE_REQUIRED_INCLUDES ${SPARSEHASH_INCLUDE_DIR})
+           set(CMAKE_EXTRA_INCLUDE_FILES "google/sparsetable")
+           check_type_size("google::sparsetable<int>::size_type" SPARSETABLE_SIZE_TYPE LANGUAGE CXX)
+           set(CMAKE_EXTRA_INCLUDE_FILES)
+           set(CMAKE_REQUIRED_INCLUDES)
+        else()
+           set(SPARSETABLE_SIZE_TYPE ${CMAKE_SIZEOF_VOID_P})
+        endif()
+
+        # Sparsetable::size_type must be at least 8 bytes (64bit), otherwise
+        # OSM object IDs will not fit.
+        if(SPARSETABLE_SIZE_TYPE GREATER 7)
+            set(SPARSEHASH_FOUND 1)
+            add_definitions(-DOSMIUM_WITH_SPARSEHASH=${SPARSEHASH_FOUND})
+            list(APPEND OSMIUM_INCLUDE_DIRS ${SPARSEHASH_INCLUDE_DIR})
+        else()
+            message(WARNING "Osmium: Disabled Google SparseHash library on 32bit system (size_type=${SPARSETABLE_SIZE_TYPE}).")
+        endif()
+    else()
+        message(WARNING "Osmium: Google SparseHash library is required but not found, please install it or configure the paths.")
+    endif()
+endif()
+
+#----------------------------------------------------------------------
+
+list(REMOVE_DUPLICATES OSMIUM_INCLUDE_DIRS)
+
+if(OSMIUM_XML_LIBRARIES)
+    list(REMOVE_DUPLICATES OSMIUM_XML_LIBRARIES)
+endif()
+
+if(OSMIUM_PBF_LIBRARIES)
+    list(REMOVE_DUPLICATES OSMIUM_PBF_LIBRARIES)
+endif()
+
+if(OSMIUM_IO_LIBRARIES)
+    list(REMOVE_DUPLICATES OSMIUM_IO_LIBRARIES)
+endif()
+
+if(OSMIUM_LIBRARIES)
+    list(REMOVE_DUPLICATES OSMIUM_LIBRARIES)
+endif()
+
+#----------------------------------------------------------------------
+#
+#  Check that all required libraries are available
+#
+#----------------------------------------------------------------------
+if(OSMIUM_EXTRA_FIND_VARS)
+    list(REMOVE_DUPLICATES OSMIUM_EXTRA_FIND_VARS)
+endif()
+# Handle the QUIETLY and REQUIRED arguments and the optional version check
+# and set OSMIUM_FOUND to TRUE if all listed variables are TRUE.
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(Osmium
+                                  REQUIRED_VARS OSMIUM_INCLUDE_DIR ${OSMIUM_EXTRA_FIND_VARS}
+                                  VERSION_VAR _libosmium_version)
+unset(OSMIUM_EXTRA_FIND_VARS)
+
+#----------------------------------------------------------------------
+#
+#  A function for setting the -pthread option in compilers/linkers
+#
+#----------------------------------------------------------------------
+function(set_pthread_on_target _target)
+    if(NOT MSVC)
+        set_target_properties(${_target} PROPERTIES COMPILE_FLAGS "-pthread")
+        if(NOT APPLE)
+            set_target_properties(${_target} PROPERTIES LINK_FLAGS "-pthread")
+        endif()
+    endif()
+endfunction()
+
+#----------------------------------------------------------------------
+#
+#  Add compiler flags
+#
+#----------------------------------------------------------------------
+add_definitions(-D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64)
+
+if(MSVC)
+    add_definitions(-wd4996)
+
+    # Disable warning C4068: "unknown pragma" because we want it to ignore
+    # pragmas for other compilers.
+    add_definitions(-wd4068)
+
+    # Disable warning C4715: "not all control paths return a value" because
+    # it generates too many false positives.
+    add_definitions(-wd4715)
+
+    # Disable warning C4351: new behavior: elements of array '...' will be
+    # default initialized. The new behaviour is correct and we don't support
+    # old compilers anyway.
+    add_definitions(-wd4351)
+
+    # Disable warning C4503: "decorated name length exceeded, name was truncated"
+    # there are more than 150 of generated names in libosmium longer than 4096 symbols supported in MSVC
+    add_definitions(-wd4503)
+
+    add_definitions(-DNOMINMAX -DWIN32_LEAN_AND_MEAN -D_CRT_SECURE_NO_WARNINGS)
+endif()
+
+if(APPLE)
+# following only available from cmake 2.8.12:
+#   add_compile_options(-stdlib=libc++)
+# so using this instead:
+    add_definitions(-stdlib=libc++)
+    set(LDFLAGS ${LDFLAGS} -stdlib=libc++)
+endif()
+
+#----------------------------------------------------------------------
+
+# This is a set of recommended warning options that can be added when compiling
+# libosmium code.
+if(MSVC)
+    set(OSMIUM_WARNING_OPTIONS "/W3 /wd4514" CACHE STRING "Recommended warning options for libosmium")
+else()
+    set(OSMIUM_WARNING_OPTIONS "-Wall -Wextra -pedantic -Wredundant-decls -Wdisabled-optimization -Wctor-dtor-privacy -Wnon-virtual-dtor -Woverloaded-virtual -Wsign-promo -Wold-style-cast" CACHE STRING "Recommended warning options for libosmium")
+endif()
+
+set(OSMIUM_DRACONIC_CLANG_OPTIONS "-Wdocumentation -Wunused-exception-parameter -Wmissing-declarations -Weverything -Wno-c++98-compat -Wno-c++98-compat-pedantic -Wno-unused-macros -Wno-exit-time-destructors -Wno-global-constructors -Wno-padded -Wno-switch-enum -Wno-missing-prototypes -Wno-weak-vtables -Wno-cast-align -Wno-float-equal")
+
+if(Osmium_DEBUG)
+    message(STATUS "OSMIUM_XML_LIBRARIES=" ${OSMIUM_XML_LIBRARIES})
+    message(STATUS "OSMIUM_PBF_LIBRARIES=" ${OSMIUM_PBF_LIBRARIES})
+    message(STATUS "OSMIUM_IO_LIBRARIES=" ${OSMIUM_IO_LIBRARIES})
+    message(STATUS "OSMIUM_LIBRARIES=" ${OSMIUM_LIBRARIES})
+    message(STATUS "OSMIUM_INCLUDE_DIRS=" ${OSMIUM_INCLUDE_DIRS})
+endif()
+
--- a/filter/FindProtozero.cmake
+++ b/filter/FindProtozero.cmake
@ -0,0 +1,63 @@
+#----------------------------------------------------------------------
+#
+#  FindProtozero.cmake
+#
+#  Find the protozero headers.
+#
+#----------------------------------------------------------------------
+#
+#  Usage:
+#
+#    Copy this file somewhere into your project directory, where cmake can
+#    find it. Usually this will be a directory called "cmake" which you can
+#    add to the CMake module search path with the following line in your
+#    CMakeLists.txt:
+#
+#      list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake")
+#
+#    Then add the following in your CMakeLists.txt:
+#
+#      find_package(Protozero [version] [REQUIRED])
+#      include_directories(SYSTEM ${PROTOZERO_INCLUDE_DIR})
+#
+#    The version number is optional. If it is not set, any version of
+#    protozero will do.
+#
+#      if(NOT PROTOZERO_FOUND)
+#          message(WARNING "Protozero not found!\n")
+#      endif()
+#
+#----------------------------------------------------------------------
+#
+#  Variables:
+#
+#    PROTOZERO_FOUND        - True if Protozero was found.
+#    PROTOZERO_INCLUDE_DIR  - Where to find include files.
+#
+#----------------------------------------------------------------------
+
+# find include path
+find_path(PROTOZERO_INCLUDE_DIR protozero/version.hpp
+    PATH_SUFFIXES include
+    PATHS ${CMAKE_SOURCE_DIR}/../protozero
+)
+
+# Check version number
+if(Protozero_FIND_VERSION)
+    file(STRINGS "${PROTOZERO_INCLUDE_DIR}/protozero/version.hpp" _version_define REGEX "#define PROTOZERO_VERSION_STRING")
+    if("${_version_define}" MATCHES "#define PROTOZERO_VERSION_STRING \"([0-9.]+)\"")
+        set(_version "${CMAKE_MATCH_1}")
+    else()
+        set(_version "unknown")
+    endif()
+endif()
+
+#set(PROTOZERO_INCLUDE_DIRS "${PROTOZERO_INCLUDE_DIR}")
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(Protozero
+                                  REQUIRED_VARS PROTOZERO_INCLUDE_DIR
+                                  VERSION_VAR _version)
+
+
+#----------------------------------------------------------------------
--- a/filter/README.md
+++ b/filter/README.md
@ -0,0 +1,35 @@
+# Filtering OSM by external dataset
+
+When you got points of multiple categories, an Overpass API request may fail
+from the number of query clauses. For that, you would need to filter the planet
+file yourself. First, prepare a list of categories and dataset points:
+
+    conflate.py profile.py -f points.lst
+
+Then compile the filtering tool:
+
+    mkdir build
+    cmake ..
+    make
+
+Download a planet file or an extract for the country of import, update it to the minute,
+and feed it to the filtering tool:
+
+    ./filter_planet_by_cats points.lst planet-latest.osm.pbf > filtered.osm
+
+This will take an hour or two. The resulting OSM file should be used as an input to
+the conflation tool:
+
+    conflate.py profile.py --osm filtered.osm -c changes.json
+
+## Authors and License
+
+The `filter_planet_by_cats` script was written by Ilya Zverev for MAPS.ME and
+published under Apache License 2.0.
+
+The `xml_centers_output.hpp` and `*.cmake` files are based on
+[libosmium](https://github.com/osmcode/libosmium) code and hence published
+under the Boost License terms.
+
+`RTree.h` is under public domain, downloaded from
+[this repository](https://github.com/nushoin/RTree).
--- a/filter/RTree.h
+++ b/filter/RTree.h
--- a/filter/filter_planet_by_cats.cpp
+++ b/filter/filter_planet_by_cats.cpp
@ -0,0 +1,282 @@
+/*
+    Filters a planet file by categories and location.
+
+    Serves as a replacement for Overpass API for the OSM Conflator.
+    Takes two parameters: a list of coordinates and categories prepared by
+    conflate.py and an OSM PBF/XML file. Prints an OSM XML file with
+    objects that will then be conflated with the external dataset.
+    Either specify that XML file name as the third parameter, or redirect
+    the output.
+
+    Based on the osmium_amenity_list.cpp from libosmium.
+
+    Published under Apache Public License 2.0.
+
+    Written by Ilya Zverev for MAPS.ME.
+*/
+
+#include <cctype>
+#include <cstdio>
+#include <cstdlib>
+#include <iostream>
+#include <fstream>
+#include <string>
+#include <map>
+
+#include <osmium/geom/coordinates.hpp>
+#include <osmium/handler/node_locations_for_ways.hpp>
+#include <osmium/index/map/flex_mem.hpp>
+#include <osmium/io/any_input.hpp>
+#include <osmium/io/xml_output.hpp>
+#include <osmium/relations/relations_manager.hpp>
+#include <osmium/visitor.hpp>
+
+#include "RTree.h"
+#include "xml_centers_output.hpp"
+
+using index_type = osmium::index::map::FlexMem<osmium::unsigned_object_id_type,
+                                               osmium::Location>;
+using location_handler_type = osmium::handler::NodeLocationsForWays<index_type>;
+
+bool AppendToVector(uint16_t cat_id, void *vec) {
+  static_cast<std::vector<uint16_t>*>(vec)->push_back(cat_id);
+  return true;
+}
+
+class AmenityHandler : public osmium::handler::Handler {
+
+  constexpr static double kSearchRadius = 0.01;
+
+  typedef RTree<uint16_t, int32_t, 2, double> DatasetTree;
+  typedef std::vector<std::vector<std::string>> TQuery;
+  typedef std::vector<TQuery> TCategory;
+
+  DatasetTree m_tree;
+  osmium::io::xmlcenters::XMLCentersOutput m_centers;
+  std::map<uint16_t, std::vector<TQuery>> m_categories;
+  std::map<uint16_t, std::string> m_category_names;
+
+  void print_object(const osmium::OSMObject &obj,
+                    const osmium::Location &center) {
+    std::cout << m_centers.apply(obj, center);
+  }
+
+  // Calculate the center point of a NodeRefList.
+  osmium::Location calc_center(const osmium::NodeRefList &nr_list) {
+    int64_t x = 0;
+    int64_t y = 0;
+
+    for (const auto &nr : nr_list) {
+      x += nr.x();
+      y += nr.y();
+    }
+
+    x /= nr_list.size();
+    y /= nr_list.size();
+
+    return osmium::Location{x, y};
+  }
+
+  bool TestTags(osmium::TagList const & tags, TQuery const & query) {
+    for (std::vector<std::string> const & pair : query) {
+      const char *value = tags[pair[0].c_str()];
+      if (pair.size() == 2 && pair[1].empty()) {
+        if (value != nullptr)
+          return false;
+      } else {
+        if (value == nullptr)
+          return false;
+        if (pair.size() > 1) {
+          // TODO: substrings?
+          bool found = false;
+          for (size_t i = 1; i < pair.size(); i++) {
+            if (!strcmp(value, pair[i].c_str())) {
+              found = true;
+              break;
+            }
+          }
+          if (!found)
+            return false;
+        }
+      }
+    }
+    return true;
+  }
+
+  bool IsEligible(const osmium::Location & loc, osmium::TagList const & tags) {
+    if (tags.empty())
+      return false;
+
+    int32_t radius = osmium::Location::double_to_fix(kSearchRadius);
+    int32_t min[] = {loc.x() - radius, loc.y() - radius};
+    int32_t max[] = {loc.x() + radius, loc.y() + radius};
+    std::vector<uint16_t> found;
+    if (!m_tree.Search(min, max, &AppendToVector, &found))
+      return false;
+    for (uint16_t cat_id : found)
+      for (TQuery query : m_categories[cat_id])
+        if (TestTags(tags, query))
+          return true;
+    return false;
+  }
+
+  void SplitTrim(std::string const & s, char delimiter, std::size_t limit, std::vector<std::string> & target) {
+    target.clear();
+    std::size_t start = 0, end = 0;
+    while (start < s.length()) {
+      end = s.find(delimiter, start);
+      if (end == std::string::npos || target.size() == limit)
+        end = s.length();
+      while (start < end && std::isspace(s[start]))
+        start++;
+
+      std::size_t tmpend = end - 1;
+      while (tmpend > start && std::isspace(s[tmpend]))
+        tmpend++;
+      target.push_back(s.substr(start, tmpend - start + 1));
+      start = end + 1;
+    }
+  }
+
+  TQuery ParseQuery(std::string const & query) {
+    TQuery q;
+    std::vector<std::string> parts;
+    SplitTrim(query, '|', 100, parts);
+    for (std::string const & part : parts) {
+      std::vector<std::string> keys;
+      SplitTrim(part, '=', 100, keys);
+      if (keys.size() > 0)
+          q.push_back(keys);
+    }
+    return q;
+  }
+
+  void LoadCategories(const char *filename) {
+    std::ifstream infile(filename);
+    std::string line;
+    std::vector<std::string> parts;
+    bool parsingPoints = false;
+    while (std::getline(infile, line)) {
+      if (!parsingPoints) {
+        if (!line.size())
+          parsingPoints = true;
+        else {
+          SplitTrim(line, ',', 3, parts); // cat_id, name, query
+          uint16_t cat_id = std::stoi(parts[0]);
+          m_category_names[cat_id] = parts[1];
+          m_categories[cat_id].push_back(ParseQuery(parts[2]));
+        }
+      } else {
+        SplitTrim(line, ',', 3, parts); // lon, lat, cat_id
+        const osmium::Location loc(std::stod(parts[0]), std::stod(parts[1]));
+        int32_t coords[] = {loc.x(), loc.y()};
+        uint16_t cat_id = std::stoi(parts[2]);
+        m_tree.Insert(coords, coords, cat_id);
+      }
+    }
+  }
+
+public:
+  AmenityHandler(const char *categories) {
+    LoadCategories(categories);
+  }
+
+  void node(osmium::Node const & node) {
+    if (IsEligible(node.location(), node.tags())) {
+      print_object(node, node.location());
+    }
+  }
+
+  void way(osmium::Way const & way) {
+    if (!way.is_closed())
+      return;
+
+    int64_t x = 0, y = 0, cnt = 0;
+    for (const auto& node_ref : way.nodes()) {
+        if (node_ref.location()) {
+            x += node_ref.x();
+            y += node_ref.y();
+            cnt++;
+        }
+    }
+    if (!cnt)
+      return;
+
+    const osmium::Location center(x / cnt, y / cnt);
+    if (IsEligible(center, way.tags())) {
+      print_object(way, center);
+    }
+  }
+
+  void multi(osmium::Relation const & rel, osmium::Location const & center) {
+    if (IsEligible(center, rel.tags())) {
+      print_object(rel, center);
+    }
+  }
+
+}; // class AmenityHandler
+
+class AmenityRelationsManager : public osmium::relations::RelationsManager<AmenityRelationsManager, false, true, false> {
+
+    AmenityHandler *m_handler;
+
+public:
+
+  AmenityRelationsManager(AmenityHandler & handler) :
+      RelationsManager(),
+      m_handler(&handler) {
+  }
+
+  bool new_relation(osmium::Relation const & rel) noexcept {
+    const char *rel_type = rel.tags().get_value_by_key("type");
+    return rel_type && !std::strcmp(rel_type, "multipolygon");
+  }
+
+  void complete_relation(osmium::Relation const & rel) {
+    int64_t x = 0, y = 0, cnt = 0;
+    for (auto const & member : rel.members()) {
+        if (member.ref() != 0) {
+            const osmium::Way* way = this->get_member_way(member.ref());
+            for (const auto& node_ref : way->nodes()) {
+                if (node_ref.location()) {
+                    x += node_ref.x();
+                    y += node_ref.y();
+                    cnt++;
+                }
+            }
+        }
+    }
+    if (cnt > 0)
+        m_handler->multi(rel, osmium::Location{x / cnt, y / cnt});
+  }
+}; // class AmenityRelationsManager
+
+int main(int argc, char *argv[]) {
+  if (argc < 3) {
+    std::cerr << "Usage: " << argv[0]
+              << " <dataset.lst> <osmfile>\n";
+    std::exit(1);
+  }
+
+  const osmium::io::File input_file{argv[2]};
+  const osmium::io::File output_file{"", "osm"};
+
+  AmenityHandler data_handler(argv[1]);
+  AmenityRelationsManager manager(data_handler);
+  osmium::relations::read_relations(input_file, manager);
+
+  osmium::io::Header header;
+  header.set("generator", argv[0]);
+  osmium::io::Writer writer{output_file, header, osmium::io::overwrite::allow};
+
+  index_type index;
+  location_handler_type location_handler{index};
+  location_handler.ignore_errors();
+  osmium::io::Reader reader{input_file};
+
+  osmium::apply(reader, location_handler, data_handler, manager.handler());
+
+  std::cout.flush();
+  reader.close();
+  writer.close();
+}
--- a/filter/xml_centers_output.hpp
+++ b/filter/xml_centers_output.hpp
@ -0,0 +1,279 @@
+/*
+
+This file is based on xml_output_format.hpp from the Osmium library
+(http://osmcode.org/libosmium).
+
+Copyright 2013-2017 Jochen Topf <jochen@topf.org> and others (see README).
+Copyright 2017 Ilya Zverev <ilya@zverev.info>, MAPS.ME
+
+Boost Software License - Version 1.0 - August 17th, 2003
+
+Permission is hereby granted, free of charge, to any person or organization
+obtaining a copy of the software and accompanying documentation covered by
+this license (the "Software") to use, reproduce, display, distribute,
+execute, and transmit the Software, and to prepare derivative works of the
+Software, and to permit third-parties to whom the Software is furnished to
+do so, all subject to the following:
+
+The copyright notices in the Software and this entire statement, including
+the above license grant, this restriction and the following disclaimer,
+must be included in all copies of the Software, in whole or in part, and
+all derivative works of the Software, unless such copies or derivative
+works are solely in the form of machine-executable object code generated by
+a source language processor.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
+SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
+FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
+
+*/
+
+#include <osmium/io/detail/string_util.hpp>
+#include <osmium/osm/box.hpp>
+#include <osmium/osm/item_type.hpp>
+#include <osmium/osm/location.hpp>
+#include <osmium/osm/node.hpp>
+#include <osmium/osm/node_ref.hpp>
+#include <osmium/osm/object.hpp>
+#include <osmium/osm/relation.hpp>
+#include <osmium/osm/tag.hpp>
+#include <osmium/osm/timestamp.hpp>
+#include <osmium/osm/types.hpp>
+#include <osmium/osm/way.hpp>
+
+#include <iterator>
+#include <memory>
+#include <string>
+#include <utility>
+
+namespace osmium {
+
+    namespace io {
+
+        namespace xmlcenters {
+
+            namespace detail {
+
+                inline void append_lat_lon_attributes(std::string& out, const char* lat, const char* lon, const osmium::Location& location) {
+                    out += ' ';
+                    out += lat;
+                    out += "=\"";
+                    osmium::detail::append_location_coordinate_to_string(std::back_inserter(out), location.y());
+                    out += "\" ";
+                    out += lon;
+                    out += "=\"";
+                    osmium::detail::append_location_coordinate_to_string(std::back_inserter(out), location.x());
+                    out += "\"";
+                }
+
+            } // namespace detail
+
+            class XMLCentersOutput {
+
+                std::shared_ptr<std::string> m_out;
+
+                inline void append_xml_encoded_string(std::string & out, const char *data) {
+                    osmium::io::detail::append_xml_encoded_string(out, data);
+                }
+
+                void output_int(int64_t value) {
+                    if (value < 0) {
+                        *m_out += '-';
+                        value = -value;
+                    }
+
+                    char temp[20];
+                    char *t = temp;
+                    do {
+                        *t++ = char(value % 10) + '0';
+                        value /= 10;
+                    } while (value > 0);
+
+                    const auto old_size = m_out->size();
+                    m_out->resize(old_size + (t - temp));
+                    char* data = &(*m_out)[old_size];
+                    do {
+                        *data++ += *--t;
+                    } while (t != temp);
+                }
+
+                void write_spaces(int num) {
+                    for (; num != 0; --num) {
+                        *m_out += ' ';
+                    }
+                }
+
+                void write_prefix() {
+                    write_spaces(2);
+                }
+
+                template <typename T>
+                void write_attribute(const char* name, T value) {
+                    *m_out += ' ';
+                    *m_out += name;
+                    *m_out += "=\"";
+                    output_int(value);
+                    *m_out += '"';
+                }
+
+                void write_meta(const osmium::OSMObject& object) {
+                    write_attribute("id", object.id());
+
+                    if (object.version()) {
+                        write_attribute("version", object.version());
+                    }
+
+                    if (object.timestamp()) {
+                        *m_out += " timestamp=\"";
+                        *m_out += object.timestamp().to_iso();
+                        *m_out += "\"";
+                    }
+
+                    if (!object.user_is_anonymous()) {
+                        write_attribute("uid", object.uid());
+                        *m_out += " user=\"";
+                        append_xml_encoded_string(*m_out, object.user());
+                        *m_out += "\"";
+                    }
+
+                    if (object.changeset()) {
+                        write_attribute("changeset", object.changeset());
+                    }
+                }
+
+                void write_tags(const osmium::TagList& tags) {
+                    for (const auto& tag : tags) {
+                        write_spaces(2);
+                        *m_out += "  <tag k=\"";
+                        append_xml_encoded_string(*m_out, tag.key());
+                        *m_out += "\" v=\"";
+                        append_xml_encoded_string(*m_out, tag.value());
+                        *m_out += "\"/>\n";
+                    }
+                }
+
+            public:
+
+                XMLCentersOutput() : m_out(std::make_shared<std::string>()) {
+                }
+
+                std::string apply(osmium::OSMObject const & item, osmium::Location const & center) {
+                    switch(item.type()) {
+                        case osmium::item_type::node:
+                            node(static_cast<const osmium::Node&>(item));
+                            break;
+                        case osmium::item_type::way:
+                            way(static_cast<const osmium::Way&>(item), center);
+                            break;
+                        case osmium::item_type::relation:
+                            relation(static_cast<const osmium::Relation&>(item), center);
+                            break;
+                        default:
+                            throw osmium::unknown_type{};
+                    }
+
+                    std::string out;
+                    using std::swap;
+                    swap(out, *m_out);
+
+                    return out;
+                }
+
+                void node(const osmium::Node& node) {
+                    write_prefix();
+                    *m_out += "<node";
+
+                    write_meta(node);
+
+                    if (node.location()) {
+                        detail::append_lat_lon_attributes(*m_out, "lat", "lon", node.location());
+                    }
+
+                    if (node.tags().empty()) {
+                        *m_out += "/>\n";
+                        return;
+                    }
+
+                    *m_out += ">\n";
+
+                    write_tags(node.tags());
+
+                    write_prefix();
+                    *m_out += "</node>\n";
+                }
+
+                void way(const osmium::Way& way, osmium::Location const & center) {
+                    write_prefix();
+                    *m_out += "<way";
+                    write_meta(way);
+
+                    if (way.tags().empty() && way.nodes().empty()) {
+                        *m_out += "/>\n";
+                        return;
+                    }
+
+                    *m_out += ">\n";
+
+                    write_prefix();
+                    *m_out += "  <center";
+                    detail::append_lat_lon_attributes(*m_out, "lat", "lon", center);
+                    *m_out += "/>\n";
+
+                    for (const auto& node_ref : way.nodes()) {
+                        write_prefix();
+                        *m_out += "  <nd";
+                        write_attribute("ref", node_ref.ref());
+                        *m_out += "/>\n";
+                    }
+
+                    write_tags(way.tags());
+
+                    write_prefix();
+                    *m_out += "</way>\n";
+                }
+
+                void relation(const osmium::Relation& relation, osmium::Location const & center) {
+                    write_prefix();
+                    *m_out += "<relation";
+                    write_meta(relation);
+
+                    if (relation.tags().empty() && relation.members().empty()) {
+                        *m_out += "/>\n";
+                        return;
+                    }
+
+                    *m_out += ">\n";
+
+                    write_prefix();
+                    *m_out += "  <center";
+                    detail::append_lat_lon_attributes(*m_out, "lat", "lon", center);
+                    *m_out += "/>\n";
+
+                    for (const auto& member : relation.members()) {
+                        write_prefix();
+                        *m_out += "  <member type=\"";
+                        *m_out += item_type_to_name(member.type());
+                        *m_out += '"';
+                        write_attribute("ref", member.ref());
+                        *m_out += " role=\"";
+                        append_xml_encoded_string(*m_out, member.role());
+                        *m_out += "\"/>\n";
+                    }
+
+                    write_tags(relation.tags());
+
+                    write_prefix();
+                    *m_out += "</relation>\n";
+                }
+
+            }; // class XMLCentersOutputBlock
+
+        } // namespace xmlcenters
+
+    } // namespace io
+
+} // namespace osmium
--- a/profiles/auchan_moscow.py
+++ b/profiles/auchan_moscow.py
@ -8,7 +8,7 @@ source = 'auchan.ru'
 # Not adding a ref:auchan tag, since we don't have good identifiers
 no_dataset_id = True
 # Using a name query with regular expressions
-query = [('shop', '~supermarket|mall'), ('name', '~Ашан|АШАН')]
+query = [('shop', 'supermarket', 'mall'), ('name', '~Ашан|АШАН')]
 master_tags = ('name', 'opening_hours', 'phone', 'website')
 # Empty dict so we don't add a fixme tag to unmatched objects
 tag_unmatched = {}
@ -44,7 +44,7 @@ def dataset(fileobj):

    # We are parsing HTML, and for that we need an lxml package
    from lxml import html
-    global download_url_copy
+    global download_url_copy, re
    h = html.fromstring(fileobj.read().decode('utf-8'))
    shops = h.find_class('shops-in-the-city-holder')[0]
    shops.make_links_absolute(download_url_copy)
--- a/profiles/burgerking.py
+++ b/profiles/burgerking.py
@ -1,3 +1,7 @@
+# Note: the json file at the burgerking website was restructured
+# and does not contain any useful data now.
+# So this profile is here solely for demonstration purposes.
+
 import json
 import codecs
 import re
@ -20,6 +24,7 @@ tag_unmatched = {

 def dataset(fileobj):
    def parse_hours(s):
+        global re
        s = re.sub('^зал:? *', '', s.lower())
        s = s.replace('<br />', ';').replace('<br>', ';').replace('\n', ';').replace(' ', '').replace(',', ';').replace('–', '-')
        s = s.replace('-00:', '-24:')
@ -66,7 +71,11 @@ def dataset(fileobj):
        346: 'Передвинуть к кафе',

    }
-    source = json.load(codecs.getreader('utf-8')(fileobj))
+    json_src = codecs.getreader('utf-8')(fileobj).read()
+    p = json_src.find('<div')
+    if p > 0:
+        json_src = json_src[:p]
+    source = json.loads(json_src)
    data = []
    for el in source:
        gid = int(el['origID'])
--- a/profiles/minkult.py
+++ b/profiles/minkult.py
@ -11,7 +11,8 @@ master_tags = ('official_name', 'phone', 'opening_hours', 'website')


 # Reading the dataset passport to determine an URL of the latest dataset version
-def download_url(dataset_id='7705851331-theaters'):
+def download_url():
+    dataset_id = '7705851331-' + (param or 'museums')
    r = requests.get('http://opendata.mkrf.ru/opendata/{}/meta.json'.format(dataset_id))
    if r.status_code != 200 or len(r.content) == 0:
        logging.error('Could not get URL for dataset: %s %s', r.status_code, r.text)
@ -22,6 +23,22 @@ def download_url(dataset_id='7705851331-theaters'):
    logging.info('Downloading %s from %s', result['title'], latest['created'])
    return latest['source']

+source = 'opendata.mkrf.ru'
+dataset_id = 'mkrf_'+(param or 'museums')
+if not param or param == 'museums':
+    query = [('tourism', 'museum')]
+elif param == 'theaters':
+    query = [('amenity', 'theatre')]
+elif param == 'circuses':
+    query = [('amenity', 'circus')]
+elif param == 'philharmonic':
+    query = [('amenity', 'theatre')]
+else:
+    raise ValueError('Unknown param value: {}'.format(param))
+
+max_distance = 300
+master_tags = ('official_name', 'phone', 'opening_hours', 'website')
+

 def dataset(fileobj):
    def make_wd_ranges(r):
--- a/profiles/moscow_parkomats.py
+++ b/profiles/moscow_parkomats.py
@ -1,12 +1,10 @@
-# Available modules: codecs, logging, requests, json, re, etree. But importing these helps catch other errors
+# Available modules: codecs, logging, requests, json, etree. But importing these helps catch other errors
 import json
-import re
 import logging
-import requests
-import zipfile


 def download_url(mos_dataset_id=1421):
+    import requests
    r = requests.get('https://data.mos.ru/api/datasets/expformats/?datasetId={}'.format(mos_dataset_id))
    if r.status_code != 200 or len(r.content) == 0:
        logging.error('Could not get URL for dataset: %s %s', r.status_code, r.text)
@ -15,7 +13,7 @@ def download_url(mos_dataset_id=1421):
    url = [x for x in r.json() if x['Format'] == 'json'][0]
    version = '?'
    title = 'dataset'
-    r = requests.get('https://data.mos.ru/apiproxy/opendata/1421/meta.json'.format(mos_dataset_id))
+    r = requests.get('https://data.mos.ru/apiproxy/opendata/{}/meta.json'.format(mos_dataset_id))
    if r.status_code == 200:
        title = r.json()['Title']
        version = r.json()['VersionNumber']
@ -50,6 +48,8 @@ master_tags = ('zone:parking', 'ref', 'contact:phone', 'contact:website', 'opera

 # A list of SourcePoint objects. Initialize with (id, lat, lon, {tags}).
 def dataset(fileobj):
+    import zipfile
+    import re
    zf = zipfile.ZipFile(fileobj)
    source = json.loads(zf.read(zf.namelist()[0]).decode('cp1251'))
    RE_NUM4 = re.compile(r'\d{4,6}')
--- a/profiles/navads_shell_json.py
+++ b/profiles/navads_shell_json.py
@ -56,6 +56,7 @@ def dataset(fileobj):
            return '24/7'
        return '; '.join(res).replace('23:59', '24:00')

+    global re, defaultdict
    source = json.load(codecs.getreader('utf-8-sig')(fileobj))
    data = []
    for el in source['Locations']: