diff --git a/.gitignore b/.gitignore index 9f81f34..cd7e773 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,9 @@ *.gz *.csv *.pyc +*.pbf +*.lst +*.user private/ data/ __pycache__/ diff --git a/CHANGELOG.md b/CHANGELOG.md index f8be4cb..680b129 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,16 @@ ## master branch +* Support for categories: `category_tag` and `categories` parameters in a profile. +* LibOsmium-based C++ filtering script for categories. +* More than one tag value works as "one of": `[('amenity', 'cafe', 'restaurant')]`. +* Query can be a list of queries, providing for "OR" clause. An example: + + `[[('amenity', 'swimming_pool')], [('leisure', 'swimming_pool')]]` + +* Parameters for profiles, using `-p` argument. +* No more default imports solely for profiles, import `re` and `zipfile` youself now. + ## 1.2.3 _Released 2017-12-29_ diff --git a/README.rst b/README.rst index a9ac227..ca46f3f 100644 --- a/README.rst +++ b/README.rst @@ -29,7 +29,7 @@ For a simplest case, run: conflate -o result.osm -You might want to add ``-v`` to get status messages, and other arguments +You might want to add other arguments to pass a dataset file or write the resulting osmChange somewhere. Run ``conflate -h`` to see a list of arguments. diff --git a/conflate/conflate.py b/conflate/conflate.py index d79daee..bfc0c36 100755 --- a/conflate/conflate.py +++ b/conflate/conflate.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import argparse import codecs +import json import kdtree import logging import math @@ -8,10 +9,6 @@ import requests import os import sys from io import BytesIO -import json # for profiles -import re # for profiles -import zipfile # for profiles -from collections import defaultdict # for profiles try: from .version import __version__ except ImportError: @@ -81,6 +78,7 @@ class OSMPoint(SourcePoint): self.version = version self.members = None self.action = None + self.categories = categories or set() self.remarks = None def copy(self): @@ -210,27 +208,38 @@ class OsmConflator: (k, v) turns into [k=v], (k,) into [k], (k, None) into [!k], (k, "~v") into [k~v].""" tags = self.profile.get( 'query', required="a list of tuples. E.g. [('amenity', 'cafe'), ('name', '~Mc.*lds')]") + tag_strs = [] if isinstance(tags, str): - tag_str = tags + tag_strs = [tags] else: - tag_str = '' - for t in tags: - if len(t) == 1: - q = '"{}"'.format(t[0]) - elif t[1] is None or len(t[1]) == 0: - q = '"!{}"'.format(t[0]) - elif t[1][0] == '~': - q = '"{}"~"{}"'.format(t[0], t[1][1:]) - else: - q = '"{}"="{}"'.format(t[0], t[1]) - tag_str += '[' + q + ']' + if not isinstance(tags[0], str) and isinstance(tags[0][0], str): + tags = [tags] + for tags_q in tags: + if isinstance(tags_q, str): + tag_strs.append(tags_q) + continue + tag_str = '' + for t in tags_q: + if len(t) == 1: + q = '"{}"'.format(t[0]) + elif t[1] is None or len(t[1]) == 0: + q = '"!{}"'.format(t[0]) + elif t[1][0] == '~': + q = '"{}"~"{}",i'.format(t[0], t[1][1:]) + elif len(t) > 2: + q = '"{}"~"^({})$"'.format(t[0], '|'.join(t[1:])) + else: + q = '"{}"="{}"'.format(t[0], t[1]) + tag_str += '[' + q + ']' + tag_strs.append(tag_str) timeout = self.profile.get('overpass_timeout', 120) query = '[out:xml]{};('.format('' if timeout is None else '[timeout:{}]'.format(timeout)) for bbox in bboxes: bbox_str = '' if bbox is None else '(' + ','.join([str(x) for x in bbox]) + ')' - for t in ('node', 'way', 'relation["type"="multipolygon"]'): - query += t + tag_str + bbox_str + ';' + for tag_str in tag_strs: + for t in ('node', 'way', 'relation["type"="multipolygon"]'): + query += t + tag_str + bbox_str + ';' if self.ref is not None: for t in ('node', 'way', 'relation'): query += t + '["' + self.ref + '"];' @@ -349,21 +358,68 @@ class OsmConflator: padding = self.profile.get('bbox_padding', BBOX_PADDING) return [get_bbox(b, padding) for b in boxes] - def check_against_profile_tags(self, tags): + def get_categories(self, tags): + def match_query(tags, query): + for tag in query: + if len(tag) == 1: + if tag[0] in tags: + return False + elif tag[1] is None or tag[1] == '': + if tag[0] not in tags: + return False + else: + value = tags.get(tag[0], None) + if value is None: + return False + found = False + for t2 in tag[1:]: + if t2[0] == '~': + m = re.search(t2[1:], value) + if not m: + return False + elif t2[0] == '!': + if t2[1:].lower() in value.lower(): + found = True + elif t2 == value: + found = True + if found: + break + if not found: + return False + return True + + def tags_to_query(tags): + return [(k, v) for k, v in tags.items()] + + result = set() qualifies = self.profile.get('qualifies', args=tags) if qualifies is not None: - return qualifies + if qualifies: + result.add(None) + return result + # First check default query query = self.profile.get('query', None) - if query is not None and not isinstance(query, str): - for tag in query: - if len(tag) >= 1: - if tag[0] not in tags: - return False - if len(tag) >= 2 and tag[1][0] != '~': - if tag[1] != tags[tag[0]]: - return False - return True + if query is not None: + if isinstance(query, str): + result.add(None) + else: + if isinstance(query[0][0], str): + query = [query] + for q in query: + if match_query(tags, q): + result.add(None) + break + + # Then check each category if we got these + categories = self.profile.get('categories', {}) + for name, params in categories.items(): + if 'tags' not in params and 'query' not in params: + raise ValueError('No tags and query attributes for category "{}"'.format(name)) + if match_query(tags, params.get('query', tags_to_query(params.get('tags')))): + result.add(name) + + return result def download_osm(self): """Constructs an Overpass API query and requests objects @@ -424,7 +480,8 @@ class OsmConflator: tags = {} for tag in el.findall('tag'): tags[tag.get('k')] = tag.get('v') - if not self.check_against_profile_tags(tags): + categories = self.get_categories(tags) + if categories is False or categories is None or len(categories) == 0: continue if el.tag == 'node': @@ -458,7 +515,7 @@ class OsmConflator: continue pt = OSMPoint( el.tag, int(el.get('id')), int(el.get('version')), - coord[0], coord[1], tags) + coord[0], coord[1], tags, categories) pt.members = members if pt.is_poi(): if callable(weight_fn): @@ -649,7 +706,8 @@ class OsmConflator: nearest = [p for p in nearest if match_func(p[0].data.tags, point.tags)] if not nearest: return None, None - nearest = [(n[0], n[0].data.distance(point)) for n in nearest] + nearest = [(n[0], n[0].data.distance(point)) + for n in nearest if point.category in n[0].data.categories] return sorted(nearest, key=lambda kv: kv[1])[0] if not self.osmdata: @@ -849,6 +907,22 @@ def read_dataset(profile, fileobj): required='returns a list of SourcePoints with the dataset') +def add_categories_to_dataset(profile, dataset): + categories = profile.get('categories') + if not categories: + return + tag = profile.get('category_tag') + other = categories.get('other', {}) + for d in dataset: + if tag and tag in d.tags: + d.category = d.tags[tag] + del d.tags[tag] + if d.category: + cat_tags = categories.get(d.category, other).get('tags', None) + if cat_tags: + d.tags.update(cat_tags) + + def transform_dataset(profile, dataset): """Transforms tags in the dataset using the "transform" method in the profile or the instructions in that field in string or dict form.""" @@ -918,6 +992,56 @@ def transform_dataset(profile, dataset): d.tags[key] = value +def write_for_filter(profile, dataset, f): + def query_to_tag_strings(query): + if isinstance(query, str): + raise ValueError('Query string for filter should not be a string') + result = [] + if not isinstance(query[0], str) and isinstance(query[0][0], str): + query = [query] + for q in query: + if isinstance(q, str): + raise ValueError('Query string for filter should not be a string') + parts = [] + for part in q: + if len(part) == 1: + parts.append(part[0]) + elif part[1] is None or len(part[1]) == 0: + parts.append('{}='.format(part[0])) + elif part[1][0] == '~': + raise ValueError('Cannot use regular expressions in filter') + elif '|' in part[1] or ';' in part[1]: + raise ValueError('"|" and ";" symbols is not allowed in query values') + else: + parts.append('='.join(part)) + result.append('|'.join(parts)) + return result + + def tags_to_query(tags): + return [(k, v) for k, v in tags.items()] + + categories = profile.get('categories', {}) + p_query = profile.get('query', None) + if p_query is not None: + categories[None] = {'query': p_query} + cat_map = {} + i = 0 + try: + for name, query in categories.items(): + for tags in query_to_tag_strings(query.get('query', tags_to_query(query.get('tags')))): + f.write('{},{},{}\n'.format(i, name or '', tags)) + cat_map[name] = i + i += 1 + except ValueError as e: + logging.error(e) + return False + f.write('\n') + for d in dataset: + if d.category in cat_map: + f.write('{},{},{}\n'.format(d.lon, d.lat, cat_map[d.category])) + return True + + def run(profile=None): parser = argparse.ArgumentParser( description='''{}. @@ -928,15 +1052,17 @@ def run(profile=None): parser.add_argument('-i', '--source', type=argparse.FileType('rb'), help='Source file to pass to the profile dataset() function') parser.add_argument('-a', '--audit', type=argparse.FileType('r'), help='Conflation validation result as a JSON file') parser.add_argument('-o', '--output', type=argparse.FileType('w'), help='Output OSM XML file name') + parser.add_argument('-p', '--param', help='Optional parameter for the profile') parser.add_argument('--osc', action='store_true', help='Produce an osmChange file instead of JOSM XML') parser.add_argument('--osm', help='Instead of querying Overpass API, use this unpacked osm file. Create one from Overpass data if not found') parser.add_argument('-c', '--changes', type=argparse.FileType('w'), help='Write changes as GeoJSON for visualization') parser.add_argument('-m', '--check-move', action='store_true', help='Check for moveability of modified modes') + parser.add_argument('-f', '--for-filter', type=argparse.FileType('w'), help='Prepare a file for the filtering script') parser.add_argument('--verbose', '-v', action='store_true', help='Display debug messages') parser.add_argument('--quiet', '-q', action='store_true', help='Do not display informational messages') options = parser.parse_args() - if not options.output and not options.changes: + if not options.output and not options.changes and not options.for_filter: parser.print_help() return @@ -952,6 +1078,8 @@ def run(profile=None): if not profile: logging.debug('Loading profile %s', options.profile) + global param + param = options.param profile = Profile(profile or options.profile) dataset = read_dataset(profile, options.source) @@ -959,8 +1087,14 @@ def run(profile=None): logging.error('Empty source dataset') sys.exit(2) transform_dataset(profile, dataset) + add_categories_to_dataset(profile, dataset) logging.info('Read %s items from the dataset', len(dataset)) + if options.for_filter: + if write_for_filter(profile, dataset, options.for_filter): + logging.info('Prepared data for filtering, exitting') + return + audit = None if options.audit: audit = json.load(options.audit) diff --git a/conflate/version.py b/conflate/version.py index 5a5df3b..19b4f1d 100644 --- a/conflate/version.py +++ b/conflate/version.py @@ -1 +1 @@ -__version__ = '1.2.3' +__version__ = '1.3.0' diff --git a/filter/CMakeLists.txt b/filter/CMakeLists.txt new file mode 100644 index 0000000..990db22 --- /dev/null +++ b/filter/CMakeLists.txt @@ -0,0 +1,15 @@ +cmake_minimum_required(VERSION 2.8) +set(NAME filter_planet_by_cats) +project(${NAME} C CXX) +set(CMAKE_CXX_STANDARD 11) +message(STATUS "Configuring ${NAME}") +list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}") +find_package(Osmium REQUIRED COMPONENTS io) +include_directories(SYSTEM ${OSMIUM_INCLUDE_DIRS}) +add_executable( + ${NAME} + ${NAME}.cpp + RTree.h + xml_centers_output.hpp +) +target_link_libraries(${NAME} ${OSMIUM_IO_LIBRARIES}) diff --git a/filter/FindOsmium.cmake b/filter/FindOsmium.cmake new file mode 100644 index 0000000..513ff21 --- /dev/null +++ b/filter/FindOsmium.cmake @@ -0,0 +1,354 @@ +#---------------------------------------------------------------------- +# +# FindOsmium.cmake +# +# Find the Libosmium headers and, optionally, several components needed +# for different Libosmium functions. +# +#---------------------------------------------------------------------- +# +# Usage: +# +# Copy this file somewhere into your project directory, where cmake can +# find it. Usually this will be a directory called "cmake" which you can +# add to the CMake module search path with the following line in your +# CMakeLists.txt: +# +# list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake") +# +# Then add the following in your CMakeLists.txt: +# +# find_package(Osmium [version] REQUIRED COMPONENTS ) +# include_directories(SYSTEM ${OSMIUM_INCLUDE_DIRS}) +# +# The version number is optional. If it is not set, any version of +# libosmium will do. +# +# For the substitute a space separated list of one or more of the +# following components: +# +# pbf - include libraries needed for PBF input and output +# xml - include libraries needed for XML input and output +# io - include libraries needed for any type of input/output +# geos - include if you want to use any of the GEOS functions +# gdal - include if you want to use any of the OGR functions +# proj - include if you want to use any of the Proj.4 functions +# sparsehash - include if you use the sparsehash index +# +# You can check for success with something like this: +# +# if(NOT OSMIUM_FOUND) +# message(WARNING "Libosmium not found!\n") +# endif() +# +#---------------------------------------------------------------------- +# +# Variables: +# +# OSMIUM_FOUND - True if Osmium found. +# OSMIUM_INCLUDE_DIRS - Where to find include files. +# OSMIUM_XML_LIBRARIES - Libraries needed for XML I/O. +# OSMIUM_PBF_LIBRARIES - Libraries needed for PBF I/O. +# OSMIUM_IO_LIBRARIES - Libraries needed for XML or PBF I/O. +# OSMIUM_LIBRARIES - All libraries Osmium uses somewhere. +# +#---------------------------------------------------------------------- + +# This is the list of directories where we look for osmium includes. +set(_osmium_include_path + ../libosmium + ~/Library/Frameworks + /Library/Frameworks + /opt/local # DarwinPorts + /opt +) + +# Look for the header file. +find_path(OSMIUM_INCLUDE_DIR osmium/version.hpp + PATH_SUFFIXES include + PATHS ${_osmium_include_path} +) + +# Check libosmium version number +if(Osmium_FIND_VERSION) + file(STRINGS "${OSMIUM_INCLUDE_DIR}/osmium/version.hpp" _libosmium_version_define REGEX "#define LIBOSMIUM_VERSION_STRING") + if("${_libosmium_version_define}" MATCHES "#define LIBOSMIUM_VERSION_STRING \"([0-9.]+)\"") + set(_libosmium_version "${CMAKE_MATCH_1}") + else() + set(_libosmium_version "unknown") + endif() +endif() + +set(OSMIUM_INCLUDE_DIRS "${OSMIUM_INCLUDE_DIR}") + +#---------------------------------------------------------------------- +# +# Check for optional components +# +#---------------------------------------------------------------------- +if(Osmium_FIND_COMPONENTS) + foreach(_component ${Osmium_FIND_COMPONENTS}) + string(TOUPPER ${_component} _component_uppercase) + set(Osmium_USE_${_component_uppercase} TRUE) + endforeach() +endif() + +#---------------------------------------------------------------------- +# Component 'io' is an alias for 'pbf' and 'xml' +if(Osmium_USE_IO) + set(Osmium_USE_PBF TRUE) + set(Osmium_USE_XML TRUE) +endif() + +#---------------------------------------------------------------------- +# Component 'ogr' is an alias for 'gdal' +if(Osmium_USE_OGR) + set(Osmium_USE_GDAL TRUE) +endif() + +#---------------------------------------------------------------------- +# Component 'pbf' +if(Osmium_USE_PBF) + find_package(ZLIB) + find_package(Threads) + find_package(Protozero 1.5.1) + + list(APPEND OSMIUM_EXTRA_FIND_VARS ZLIB_FOUND Threads_FOUND PROTOZERO_INCLUDE_DIR) + if(ZLIB_FOUND AND Threads_FOUND AND PROTOZERO_FOUND) + list(APPEND OSMIUM_PBF_LIBRARIES + ${ZLIB_LIBRARIES} + ${CMAKE_THREAD_LIBS_INIT} + ) + list(APPEND OSMIUM_INCLUDE_DIRS + ${ZLIB_INCLUDE_DIR} + ${PROTOZERO_INCLUDE_DIR} + ) + else() + message(WARNING "Osmium: Can not find some libraries for PBF input/output, please install them or configure the paths.") + endif() +endif() + +#---------------------------------------------------------------------- +# Component 'xml' +if(Osmium_USE_XML) + find_package(EXPAT) + find_package(BZip2) + find_package(ZLIB) + find_package(Threads) + + list(APPEND OSMIUM_EXTRA_FIND_VARS EXPAT_FOUND BZIP2_FOUND ZLIB_FOUND Threads_FOUND) + if(EXPAT_FOUND AND BZIP2_FOUND AND ZLIB_FOUND AND Threads_FOUND) + list(APPEND OSMIUM_XML_LIBRARIES + ${EXPAT_LIBRARIES} + ${BZIP2_LIBRARIES} + ${ZLIB_LIBRARIES} + ${CMAKE_THREAD_LIBS_INIT} + ) + list(APPEND OSMIUM_INCLUDE_DIRS + ${EXPAT_INCLUDE_DIR} + ${BZIP2_INCLUDE_DIR} + ${ZLIB_INCLUDE_DIR} + ) + else() + message(WARNING "Osmium: Can not find some libraries for XML input/output, please install them or configure the paths.") + endif() +endif() + +#---------------------------------------------------------------------- +list(APPEND OSMIUM_IO_LIBRARIES + ${OSMIUM_PBF_LIBRARIES} + ${OSMIUM_XML_LIBRARIES} +) + +list(APPEND OSMIUM_LIBRARIES + ${OSMIUM_IO_LIBRARIES} +) + +#---------------------------------------------------------------------- +# Component 'geos' +if(Osmium_USE_GEOS) + find_path(GEOS_INCLUDE_DIR geos/geom.h) + find_library(GEOS_LIBRARY NAMES geos) + + list(APPEND OSMIUM_EXTRA_FIND_VARS GEOS_INCLUDE_DIR GEOS_LIBRARY) + if(GEOS_INCLUDE_DIR AND GEOS_LIBRARY) + SET(GEOS_FOUND 1) + list(APPEND OSMIUM_LIBRARIES ${GEOS_LIBRARY}) + list(APPEND OSMIUM_INCLUDE_DIRS ${GEOS_INCLUDE_DIR}) + else() + message(WARNING "Osmium: GEOS library is required but not found, please install it or configure the paths.") + endif() +endif() + +#---------------------------------------------------------------------- +# Component 'gdal' (alias 'ogr') +if(Osmium_USE_GDAL) + find_package(GDAL) + + list(APPEND OSMIUM_EXTRA_FIND_VARS GDAL_FOUND) + if(GDAL_FOUND) + list(APPEND OSMIUM_LIBRARIES ${GDAL_LIBRARIES}) + list(APPEND OSMIUM_INCLUDE_DIRS ${GDAL_INCLUDE_DIRS}) + else() + message(WARNING "Osmium: GDAL library is required but not found, please install it or configure the paths.") + endif() +endif() + +#---------------------------------------------------------------------- +# Component 'proj' +if(Osmium_USE_PROJ) + find_path(PROJ_INCLUDE_DIR proj_api.h) + find_library(PROJ_LIBRARY NAMES proj) + + list(APPEND OSMIUM_EXTRA_FIND_VARS PROJ_INCLUDE_DIR PROJ_LIBRARY) + if(PROJ_INCLUDE_DIR AND PROJ_LIBRARY) + set(PROJ_FOUND 1) + list(APPEND OSMIUM_LIBRARIES ${PROJ_LIBRARY}) + list(APPEND OSMIUM_INCLUDE_DIRS ${PROJ_INCLUDE_DIR}) + else() + message(WARNING "Osmium: PROJ.4 library is required but not found, please install it or configure the paths.") + endif() +endif() + +#---------------------------------------------------------------------- +# Component 'sparsehash' +if(Osmium_USE_SPARSEHASH) + find_path(SPARSEHASH_INCLUDE_DIR google/sparsetable) + + list(APPEND OSMIUM_EXTRA_FIND_VARS SPARSEHASH_INCLUDE_DIR) + if(SPARSEHASH_INCLUDE_DIR) + # Find size of sparsetable::size_type. This does not work on older + # CMake versions because they can do this check only in C, not in C++. + if(NOT CMAKE_VERSION VERSION_LESS 3.0) + include(CheckTypeSize) + set(CMAKE_REQUIRED_INCLUDES ${SPARSEHASH_INCLUDE_DIR}) + set(CMAKE_EXTRA_INCLUDE_FILES "google/sparsetable") + check_type_size("google::sparsetable::size_type" SPARSETABLE_SIZE_TYPE LANGUAGE CXX) + set(CMAKE_EXTRA_INCLUDE_FILES) + set(CMAKE_REQUIRED_INCLUDES) + else() + set(SPARSETABLE_SIZE_TYPE ${CMAKE_SIZEOF_VOID_P}) + endif() + + # Sparsetable::size_type must be at least 8 bytes (64bit), otherwise + # OSM object IDs will not fit. + if(SPARSETABLE_SIZE_TYPE GREATER 7) + set(SPARSEHASH_FOUND 1) + add_definitions(-DOSMIUM_WITH_SPARSEHASH=${SPARSEHASH_FOUND}) + list(APPEND OSMIUM_INCLUDE_DIRS ${SPARSEHASH_INCLUDE_DIR}) + else() + message(WARNING "Osmium: Disabled Google SparseHash library on 32bit system (size_type=${SPARSETABLE_SIZE_TYPE}).") + endif() + else() + message(WARNING "Osmium: Google SparseHash library is required but not found, please install it or configure the paths.") + endif() +endif() + +#---------------------------------------------------------------------- + +list(REMOVE_DUPLICATES OSMIUM_INCLUDE_DIRS) + +if(OSMIUM_XML_LIBRARIES) + list(REMOVE_DUPLICATES OSMIUM_XML_LIBRARIES) +endif() + +if(OSMIUM_PBF_LIBRARIES) + list(REMOVE_DUPLICATES OSMIUM_PBF_LIBRARIES) +endif() + +if(OSMIUM_IO_LIBRARIES) + list(REMOVE_DUPLICATES OSMIUM_IO_LIBRARIES) +endif() + +if(OSMIUM_LIBRARIES) + list(REMOVE_DUPLICATES OSMIUM_LIBRARIES) +endif() + +#---------------------------------------------------------------------- +# +# Check that all required libraries are available +# +#---------------------------------------------------------------------- +if(OSMIUM_EXTRA_FIND_VARS) + list(REMOVE_DUPLICATES OSMIUM_EXTRA_FIND_VARS) +endif() +# Handle the QUIETLY and REQUIRED arguments and the optional version check +# and set OSMIUM_FOUND to TRUE if all listed variables are TRUE. +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(Osmium + REQUIRED_VARS OSMIUM_INCLUDE_DIR ${OSMIUM_EXTRA_FIND_VARS} + VERSION_VAR _libosmium_version) +unset(OSMIUM_EXTRA_FIND_VARS) + +#---------------------------------------------------------------------- +# +# A function for setting the -pthread option in compilers/linkers +# +#---------------------------------------------------------------------- +function(set_pthread_on_target _target) + if(NOT MSVC) + set_target_properties(${_target} PROPERTIES COMPILE_FLAGS "-pthread") + if(NOT APPLE) + set_target_properties(${_target} PROPERTIES LINK_FLAGS "-pthread") + endif() + endif() +endfunction() + +#---------------------------------------------------------------------- +# +# Add compiler flags +# +#---------------------------------------------------------------------- +add_definitions(-D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64) + +if(MSVC) + add_definitions(-wd4996) + + # Disable warning C4068: "unknown pragma" because we want it to ignore + # pragmas for other compilers. + add_definitions(-wd4068) + + # Disable warning C4715: "not all control paths return a value" because + # it generates too many false positives. + add_definitions(-wd4715) + + # Disable warning C4351: new behavior: elements of array '...' will be + # default initialized. The new behaviour is correct and we don't support + # old compilers anyway. + add_definitions(-wd4351) + + # Disable warning C4503: "decorated name length exceeded, name was truncated" + # there are more than 150 of generated names in libosmium longer than 4096 symbols supported in MSVC + add_definitions(-wd4503) + + add_definitions(-DNOMINMAX -DWIN32_LEAN_AND_MEAN -D_CRT_SECURE_NO_WARNINGS) +endif() + +if(APPLE) +# following only available from cmake 2.8.12: +# add_compile_options(-stdlib=libc++) +# so using this instead: + add_definitions(-stdlib=libc++) + set(LDFLAGS ${LDFLAGS} -stdlib=libc++) +endif() + +#---------------------------------------------------------------------- + +# This is a set of recommended warning options that can be added when compiling +# libosmium code. +if(MSVC) + set(OSMIUM_WARNING_OPTIONS "/W3 /wd4514" CACHE STRING "Recommended warning options for libosmium") +else() + set(OSMIUM_WARNING_OPTIONS "-Wall -Wextra -pedantic -Wredundant-decls -Wdisabled-optimization -Wctor-dtor-privacy -Wnon-virtual-dtor -Woverloaded-virtual -Wsign-promo -Wold-style-cast" CACHE STRING "Recommended warning options for libosmium") +endif() + +set(OSMIUM_DRACONIC_CLANG_OPTIONS "-Wdocumentation -Wunused-exception-parameter -Wmissing-declarations -Weverything -Wno-c++98-compat -Wno-c++98-compat-pedantic -Wno-unused-macros -Wno-exit-time-destructors -Wno-global-constructors -Wno-padded -Wno-switch-enum -Wno-missing-prototypes -Wno-weak-vtables -Wno-cast-align -Wno-float-equal") + +if(Osmium_DEBUG) + message(STATUS "OSMIUM_XML_LIBRARIES=" ${OSMIUM_XML_LIBRARIES}) + message(STATUS "OSMIUM_PBF_LIBRARIES=" ${OSMIUM_PBF_LIBRARIES}) + message(STATUS "OSMIUM_IO_LIBRARIES=" ${OSMIUM_IO_LIBRARIES}) + message(STATUS "OSMIUM_LIBRARIES=" ${OSMIUM_LIBRARIES}) + message(STATUS "OSMIUM_INCLUDE_DIRS=" ${OSMIUM_INCLUDE_DIRS}) +endif() + diff --git a/filter/FindProtozero.cmake b/filter/FindProtozero.cmake new file mode 100644 index 0000000..ad16cab --- /dev/null +++ b/filter/FindProtozero.cmake @@ -0,0 +1,63 @@ +#---------------------------------------------------------------------- +# +# FindProtozero.cmake +# +# Find the protozero headers. +# +#---------------------------------------------------------------------- +# +# Usage: +# +# Copy this file somewhere into your project directory, where cmake can +# find it. Usually this will be a directory called "cmake" which you can +# add to the CMake module search path with the following line in your +# CMakeLists.txt: +# +# list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake") +# +# Then add the following in your CMakeLists.txt: +# +# find_package(Protozero [version] [REQUIRED]) +# include_directories(SYSTEM ${PROTOZERO_INCLUDE_DIR}) +# +# The version number is optional. If it is not set, any version of +# protozero will do. +# +# if(NOT PROTOZERO_FOUND) +# message(WARNING "Protozero not found!\n") +# endif() +# +#---------------------------------------------------------------------- +# +# Variables: +# +# PROTOZERO_FOUND - True if Protozero was found. +# PROTOZERO_INCLUDE_DIR - Where to find include files. +# +#---------------------------------------------------------------------- + +# find include path +find_path(PROTOZERO_INCLUDE_DIR protozero/version.hpp + PATH_SUFFIXES include + PATHS ${CMAKE_SOURCE_DIR}/../protozero +) + +# Check version number +if(Protozero_FIND_VERSION) + file(STRINGS "${PROTOZERO_INCLUDE_DIR}/protozero/version.hpp" _version_define REGEX "#define PROTOZERO_VERSION_STRING") + if("${_version_define}" MATCHES "#define PROTOZERO_VERSION_STRING \"([0-9.]+)\"") + set(_version "${CMAKE_MATCH_1}") + else() + set(_version "unknown") + endif() +endif() + +#set(PROTOZERO_INCLUDE_DIRS "${PROTOZERO_INCLUDE_DIR}") + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(Protozero + REQUIRED_VARS PROTOZERO_INCLUDE_DIR + VERSION_VAR _version) + + +#---------------------------------------------------------------------- diff --git a/filter/README.md b/filter/README.md new file mode 100644 index 0000000..7d07be5 --- /dev/null +++ b/filter/README.md @@ -0,0 +1,35 @@ +# Filtering OSM by external dataset + +When you got points of multiple categories, an Overpass API request may fail +from the number of query clauses. For that, you would need to filter the planet +file yourself. First, prepare a list of categories and dataset points: + + conflate.py profile.py -f points.lst + +Then compile the filtering tool: + + mkdir build + cmake .. + make + +Download a planet file or an extract for the country of import, update it to the minute, +and feed it to the filtering tool: + + ./filter_planet_by_cats points.lst planet-latest.osm.pbf > filtered.osm + +This will take an hour or two. The resulting OSM file should be used as an input to +the conflation tool: + + conflate.py profile.py --osm filtered.osm -c changes.json + +## Authors and License + +The `filter_planet_by_cats` script was written by Ilya Zverev for MAPS.ME and +published under Apache License 2.0. + +The `xml_centers_output.hpp` and `*.cmake` files are based on +[libosmium](https://github.com/osmcode/libosmium) code and hence published +under the Boost License terms. + +`RTree.h` is under public domain, downloaded from +[this repository](https://github.com/nushoin/RTree). diff --git a/filter/RTree.h b/filter/RTree.h new file mode 100644 index 0000000..14667c4 --- /dev/null +++ b/filter/RTree.h @@ -0,0 +1,1602 @@ +#ifndef RTREE_H +#define RTREE_H + +// NOTE This file compiles under MSVC 6 SP5 and MSVC .Net 2003 it may not work on other compilers without modification. + +// NOTE These next few lines may be win32 specific, you may need to modify them to compile on other platform +#include +#include +#include +#include + +#include + +#define ASSERT assert // RTree uses ASSERT( condition ) +#ifndef Min + #define Min std::min +#endif //Min +#ifndef Max + #define Max std::max +#endif //Max + +// +// RTree.h +// + +#define RTREE_TEMPLATE template +#define RTREE_QUAL RTree + +#define RTREE_DONT_USE_MEMPOOLS // This version does not contain a fixed memory allocator, fill in lines with EXAMPLE to implement one. +#define RTREE_USE_SPHERICAL_VOLUME // Better split classification, may be slower on some systems + +// Fwd decl +class RTFileStream; // File I/O helper class, look below for implementation and notes. + + +/// \class RTree +/// Implementation of RTree, a multidimensional bounding rectangle tree. +/// Example usage: For a 3-dimensional tree use RTree myTree; +/// +/// This modified, templated C++ version by Greg Douglas at Auran (http://www.auran.com) +/// +/// DATATYPE Referenced data, should be int, void*, obj* etc. no larger than sizeof and simple type +/// ELEMTYPE Type of element such as int or float +/// NUMDIMS Number of dimensions such as 2 or 3 +/// ELEMTYPEREAL Type of element that allows fractional and large values such as float or double, for use in volume calcs +/// +/// NOTES: Inserting and removing data requires the knowledge of its constant Minimal Bounding Rectangle. +/// This version uses new/delete for nodes, I recommend using a fixed size allocator for efficiency. +/// Instead of using a callback function for returned results, I recommend and efficient pre-sized, grow-only memory +/// array similar to MFC CArray or STL Vector for returning search query result. +/// +template +class RTree +{ +protected: + + struct Node; // Fwd decl. Used by other internal structs and iterator + +public: + + // These constant must be declared after Branch and before Node struct + // Stuck up here for MSVC 6 compiler. NSVC .NET 2003 is much happier. + enum + { + MAXNODES = TMAXNODES, ///< Max elements in node + MINNODES = TMINNODES, ///< Min elements in node + }; + + typedef bool (*t_resultCallback)(DATATYPE, void*); + +public: + + RTree(); + virtual ~RTree(); + + /// Insert entry + /// \param a_min Min of bounding rect + /// \param a_max Max of bounding rect + /// \param a_dataId Positive Id of data. Maybe zero, but negative numbers not allowed. + void Insert(const ELEMTYPE a_min[NUMDIMS], const ELEMTYPE a_max[NUMDIMS], const DATATYPE& a_dataId); + + /// Remove entry + /// \param a_min Min of bounding rect + /// \param a_max Max of bounding rect + /// \param a_dataId Positive Id of data. Maybe zero, but negative numbers not allowed. + void Remove(const ELEMTYPE a_min[NUMDIMS], const ELEMTYPE a_max[NUMDIMS], const DATATYPE& a_dataId); + + /// Find all within search rectangle + /// \param a_min Min of search bounding rect + /// \param a_max Max of search bounding rect + /// \param a_searchResult Search result array. Caller should set grow size. Function will reset, not append to array. + /// \param a_resultCallback Callback function to return result. Callback should return 'true' to continue searching + /// \param a_context User context to pass as parameter to a_resultCallback + /// \return Returns the number of entries found + int Search(const ELEMTYPE a_min[NUMDIMS], const ELEMTYPE a_max[NUMDIMS], t_resultCallback a_resultCallback, void* a_context); + + /// Remove all entries from tree + void RemoveAll(); + + /// Count the data elements in this container. This is slow as no internal counter is maintained. + int Count(); + + /// Load tree contents from file + bool Load(const char* a_fileName); + /// Load tree contents from stream + bool Load(RTFileStream& a_stream); + + + /// Save tree contents to file + bool Save(const char* a_fileName); + /// Save tree contents to stream + bool Save(RTFileStream& a_stream); + + /// Iterator is not remove safe. + class Iterator + { + private: + + enum { MAX_STACK = 32 }; // Max stack size. Allows almost n^32 where n is number of branches in node + + struct StackElement + { + Node* m_node; + int m_branchIndex; + }; + + public: + + Iterator() { Init(); } + + ~Iterator() { } + + /// Is iterator invalid + bool IsNull() { return (m_tos <= 0); } + + /// Is iterator pointing to valid data + bool IsNotNull() { return (m_tos > 0); } + + /// Access the current data element. Caller must be sure iterator is not NULL first. + DATATYPE& operator*() + { + ASSERT(IsNotNull()); + StackElement& curTos = m_stack[m_tos - 1]; + return curTos.m_node->m_branch[curTos.m_branchIndex].m_data; + } + + /// Access the current data element. Caller must be sure iterator is not NULL first. + const DATATYPE& operator*() const + { + ASSERT(IsNotNull()); + StackElement& curTos = m_stack[m_tos - 1]; + return curTos.m_node->m_branch[curTos.m_branchIndex].m_data; + } + + /// Find the next data element + bool operator++() { return FindNextData(); } + + /// Get the bounds for this node + void GetBounds(ELEMTYPE a_min[NUMDIMS], ELEMTYPE a_max[NUMDIMS]) + { + ASSERT(IsNotNull()); + StackElement& curTos = m_stack[m_tos - 1]; + Branch& curBranch = curTos.m_node->m_branch[curTos.m_branchIndex]; + + for(int index = 0; index < NUMDIMS; ++index) + { + a_min[index] = curBranch.m_rect.m_min[index]; + a_max[index] = curBranch.m_rect.m_max[index]; + } + } + + private: + + /// Reset iterator + void Init() { m_tos = 0; } + + /// Find the next data element in the tree (For internal use only) + bool FindNextData() + { + for(;;) + { + if(m_tos <= 0) + { + return false; + } + StackElement curTos = Pop(); // Copy stack top cause it may change as we use it + + if(curTos.m_node->IsLeaf()) + { + // Keep walking through data while we can + if(curTos.m_branchIndex+1 < curTos.m_node->m_count) + { + // There is more data, just point to the next one + Push(curTos.m_node, curTos.m_branchIndex + 1); + return true; + } + // No more data, so it will fall back to previous level + } + else + { + if(curTos.m_branchIndex+1 < curTos.m_node->m_count) + { + // Push sibling on for future tree walk + // This is the 'fall back' node when we finish with the current level + Push(curTos.m_node, curTos.m_branchIndex + 1); + } + // Since cur node is not a leaf, push first of next level to get deeper into the tree + Node* nextLevelnode = curTos.m_node->m_branch[curTos.m_branchIndex].m_child; + Push(nextLevelnode, 0); + + // If we pushed on a new leaf, exit as the data is ready at TOS + if(nextLevelnode->IsLeaf()) + { + return true; + } + } + } + } + + /// Push node and branch onto iteration stack (For internal use only) + void Push(Node* a_node, int a_branchIndex) + { + m_stack[m_tos].m_node = a_node; + m_stack[m_tos].m_branchIndex = a_branchIndex; + ++m_tos; + ASSERT(m_tos <= MAX_STACK); + } + + /// Pop element off iteration stack (For internal use only) + StackElement& Pop() + { + ASSERT(m_tos > 0); + --m_tos; + return m_stack[m_tos]; + } + + StackElement m_stack[MAX_STACK]; ///< Stack as we are doing iteration instead of recursion + int m_tos; ///< Top Of Stack index + + friend class RTree; // Allow hiding of non-public functions while allowing manipulation by logical owner + }; + + /// Get 'first' for iteration + void GetFirst(Iterator& a_it) + { + a_it.Init(); + Node* first = m_root; + while(first) + { + if(first->IsInternalNode() && first->m_count > 1) + { + a_it.Push(first, 1); // Descend sibling branch later + } + else if(first->IsLeaf()) + { + if(first->m_count) + { + a_it.Push(first, 0); + } + break; + } + first = first->m_branch[0].m_child; + } + } + + /// Get Next for iteration + void GetNext(Iterator& a_it) { ++a_it; } + + /// Is iterator NULL, or at end? + bool IsNull(Iterator& a_it) { return a_it.IsNull(); } + + /// Get object at iterator position + DATATYPE& GetAt(Iterator& a_it) { return *a_it; } + +protected: + + /// Minimal bounding rectangle (n-dimensional) + struct Rect + { + ELEMTYPE m_min[NUMDIMS]; ///< Min dimensions of bounding box + ELEMTYPE m_max[NUMDIMS]; ///< Max dimensions of bounding box + }; + + /// May be data or may be another subtree + /// The parents level determines this. + /// If the parents level is 0, then this is data + struct Branch + { + Rect m_rect; ///< Bounds + Node* m_child; ///< Child node + DATATYPE m_data; ///< Data Id + }; + + /// Node for each branch level + struct Node + { + bool IsInternalNode() { return (m_level > 0); } // Not a leaf, but a internal node + bool IsLeaf() { return (m_level == 0); } // A leaf, contains data + + int m_count; ///< Count + int m_level; ///< Leaf is zero, others positive + Branch m_branch[MAXNODES]; ///< Branch + }; + + /// A link list of nodes for reinsertion after a delete operation + struct ListNode + { + ListNode* m_next; ///< Next in list + Node* m_node; ///< Node + }; + + /// Variables for finding a split partition + struct PartitionVars + { + enum { NOT_TAKEN = -1 }; // indicates that position + + int m_partition[MAXNODES+1]; + int m_total; + int m_minFill; + int m_count[2]; + Rect m_cover[2]; + ELEMTYPEREAL m_area[2]; + + Branch m_branchBuf[MAXNODES+1]; + int m_branchCount; + Rect m_coverSplit; + ELEMTYPEREAL m_coverSplitArea; + }; + + Node* AllocNode(); + void FreeNode(Node* a_node); + void InitNode(Node* a_node); + void InitRect(Rect* a_rect); + bool InsertRectRec(const Branch& a_branch, Node* a_node, Node** a_newNode, int a_level); + bool InsertRect(const Branch& a_branch, Node** a_root, int a_level); + Rect NodeCover(Node* a_node); + bool AddBranch(const Branch* a_branch, Node* a_node, Node** a_newNode); + void DisconnectBranch(Node* a_node, int a_index); + int PickBranch(const Rect* a_rect, Node* a_node); + Rect CombineRect(const Rect* a_rectA, const Rect* a_rectB); + void SplitNode(Node* a_node, const Branch* a_branch, Node** a_newNode); + ELEMTYPEREAL RectSphericalVolume(Rect* a_rect); + ELEMTYPEREAL RectVolume(Rect* a_rect); + ELEMTYPEREAL CalcRectVolume(Rect* a_rect); + void GetBranches(Node* a_node, const Branch* a_branch, PartitionVars* a_parVars); + void ChoosePartition(PartitionVars* a_parVars, int a_minFill); + void LoadNodes(Node* a_nodeA, Node* a_nodeB, PartitionVars* a_parVars); + void InitParVars(PartitionVars* a_parVars, int a_maxRects, int a_minFill); + void PickSeeds(PartitionVars* a_parVars); + void Classify(int a_index, int a_group, PartitionVars* a_parVars); + bool RemoveRect(Rect* a_rect, const DATATYPE& a_id, Node** a_root); + bool RemoveRectRec(Rect* a_rect, const DATATYPE& a_id, Node* a_node, ListNode** a_listNode); + ListNode* AllocListNode(); + void FreeListNode(ListNode* a_listNode); + bool Overlap(Rect* a_rectA, Rect* a_rectB); + void ReInsert(Node* a_node, ListNode** a_listNode); + bool Search(Node* a_node, Rect* a_rect, int& a_foundCount, t_resultCallback a_resultCallback, void* a_context); + void RemoveAllRec(Node* a_node); + void Reset(); + void CountRec(Node* a_node, int& a_count); + + bool SaveRec(Node* a_node, RTFileStream& a_stream); + bool LoadRec(Node* a_node, RTFileStream& a_stream); + + Node* m_root; ///< Root of tree + ELEMTYPEREAL m_unitSphereVolume; ///< Unit sphere constant for required number of dimensions +}; + + +// Because there is not stream support, this is a quick and dirty file I/O helper. +// Users will likely replace its usage with a Stream implementation from their favorite API. +class RTFileStream +{ + FILE* m_file; + +public: + + + RTFileStream() + { + m_file = NULL; + } + + ~RTFileStream() + { + Close(); + } + + bool OpenRead(const char* a_fileName) + { + m_file = fopen(a_fileName, "rb"); + if(!m_file) + { + return false; + } + return true; + } + + bool OpenWrite(const char* a_fileName) + { + m_file = fopen(a_fileName, "wb"); + if(!m_file) + { + return false; + } + return true; + } + + void Close() + { + if(m_file) + { + fclose(m_file); + m_file = NULL; + } + } + + template< typename TYPE > + size_t Write(const TYPE& a_value) + { + ASSERT(m_file); + return fwrite((void*)&a_value, sizeof(a_value), 1, m_file); + } + + template< typename TYPE > + size_t WriteArray(const TYPE* a_array, int a_count) + { + ASSERT(m_file); + return fwrite((void*)a_array, sizeof(TYPE) * a_count, 1, m_file); + } + + template< typename TYPE > + size_t Read(TYPE& a_value) + { + ASSERT(m_file); + return fread((void*)&a_value, sizeof(a_value), 1, m_file); + } + + template< typename TYPE > + size_t ReadArray(TYPE* a_array, int a_count) + { + ASSERT(m_file); + return fread((void*)a_array, sizeof(TYPE) * a_count, 1, m_file); + } +}; + + +RTREE_TEMPLATE +RTREE_QUAL::RTree() +{ + ASSERT(MAXNODES > MINNODES); + ASSERT(MINNODES > 0); + + // Precomputed volumes of the unit spheres for the first few dimensions + const float UNIT_SPHERE_VOLUMES[] = { + 0.000000f, 2.000000f, 3.141593f, // Dimension 0,1,2 + 4.188790f, 4.934802f, 5.263789f, // Dimension 3,4,5 + 5.167713f, 4.724766f, 4.058712f, // Dimension 6,7,8 + 3.298509f, 2.550164f, 1.884104f, // Dimension 9,10,11 + 1.335263f, 0.910629f, 0.599265f, // Dimension 12,13,14 + 0.381443f, 0.235331f, 0.140981f, // Dimension 15,16,17 + 0.082146f, 0.046622f, 0.025807f, // Dimension 18,19,20 + }; + + m_root = AllocNode(); + m_root->m_level = 0; + m_unitSphereVolume = (ELEMTYPEREAL)UNIT_SPHERE_VOLUMES[NUMDIMS]; +} + + +RTREE_TEMPLATE +RTREE_QUAL::~RTree() +{ + Reset(); // Free, or reset node memory +} + + +RTREE_TEMPLATE +void RTREE_QUAL::Insert(const ELEMTYPE a_min[NUMDIMS], const ELEMTYPE a_max[NUMDIMS], const DATATYPE& a_dataId) +{ +#ifdef _DEBUG + for(int index=0; indexIsInternalNode()) // not a leaf node + { + for(int index = 0; index < a_node->m_count; ++index) + { + CountRec(a_node->m_branch[index].m_child, a_count); + } + } + else // A leaf node + { + a_count += a_node->m_count; + } +} + + +RTREE_TEMPLATE +bool RTREE_QUAL::Load(const char* a_fileName) +{ + RemoveAll(); // Clear existing tree + + RTFileStream stream; + if(!stream.OpenRead(a_fileName)) + { + return false; + } + + bool result = Load(stream); + + stream.Close(); + + return result; +} + + + +RTREE_TEMPLATE +bool RTREE_QUAL::Load(RTFileStream& a_stream) +{ + // Write some kind of header + int _dataFileId = ('R'<<0)|('T'<<8)|('R'<<16)|('E'<<24); + int _dataSize = sizeof(DATATYPE); + int _dataNumDims = NUMDIMS; + int _dataElemSize = sizeof(ELEMTYPE); + int _dataElemRealSize = sizeof(ELEMTYPEREAL); + int _dataMaxNodes = TMAXNODES; + int _dataMinNodes = TMINNODES; + + int dataFileId = 0; + int dataSize = 0; + int dataNumDims = 0; + int dataElemSize = 0; + int dataElemRealSize = 0; + int dataMaxNodes = 0; + int dataMinNodes = 0; + + a_stream.Read(dataFileId); + a_stream.Read(dataSize); + a_stream.Read(dataNumDims); + a_stream.Read(dataElemSize); + a_stream.Read(dataElemRealSize); + a_stream.Read(dataMaxNodes); + a_stream.Read(dataMinNodes); + + bool result = false; + + // Test if header was valid and compatible + if( (dataFileId == _dataFileId) + && (dataSize == _dataSize) + && (dataNumDims == _dataNumDims) + && (dataElemSize == _dataElemSize) + && (dataElemRealSize == _dataElemRealSize) + && (dataMaxNodes == _dataMaxNodes) + && (dataMinNodes == _dataMinNodes) + ) + { + // Recursively load tree + result = LoadRec(m_root, a_stream); + } + + return result; +} + + +RTREE_TEMPLATE +bool RTREE_QUAL::LoadRec(Node* a_node, RTFileStream& a_stream) +{ + a_stream.Read(a_node->m_level); + a_stream.Read(a_node->m_count); + + if(a_node->IsInternalNode()) // not a leaf node + { + for(int index = 0; index < a_node->m_count; ++index) + { + Branch* curBranch = &a_node->m_branch[index]; + + a_stream.ReadArray(curBranch->m_rect.m_min, NUMDIMS); + a_stream.ReadArray(curBranch->m_rect.m_max, NUMDIMS); + + curBranch->m_child = AllocNode(); + LoadRec(curBranch->m_child, a_stream); + } + } + else // A leaf node + { + for(int index = 0; index < a_node->m_count; ++index) + { + Branch* curBranch = &a_node->m_branch[index]; + + a_stream.ReadArray(curBranch->m_rect.m_min, NUMDIMS); + a_stream.ReadArray(curBranch->m_rect.m_max, NUMDIMS); + + a_stream.Read(curBranch->m_data); + } + } + + return true; // Should do more error checking on I/O operations +} + + +RTREE_TEMPLATE +bool RTREE_QUAL::Save(const char* a_fileName) +{ + RTFileStream stream; + if(!stream.OpenWrite(a_fileName)) + { + return false; + } + + bool result = Save(stream); + + stream.Close(); + + return result; +} + + +RTREE_TEMPLATE +bool RTREE_QUAL::Save(RTFileStream& a_stream) +{ + // Write some kind of header + int dataFileId = ('R'<<0)|('T'<<8)|('R'<<16)|('E'<<24); + int dataSize = sizeof(DATATYPE); + int dataNumDims = NUMDIMS; + int dataElemSize = sizeof(ELEMTYPE); + int dataElemRealSize = sizeof(ELEMTYPEREAL); + int dataMaxNodes = TMAXNODES; + int dataMinNodes = TMINNODES; + + a_stream.Write(dataFileId); + a_stream.Write(dataSize); + a_stream.Write(dataNumDims); + a_stream.Write(dataElemSize); + a_stream.Write(dataElemRealSize); + a_stream.Write(dataMaxNodes); + a_stream.Write(dataMinNodes); + + // Recursively save tree + bool result = SaveRec(m_root, a_stream); + + return result; +} + + +RTREE_TEMPLATE +bool RTREE_QUAL::SaveRec(Node* a_node, RTFileStream& a_stream) +{ + a_stream.Write(a_node->m_level); + a_stream.Write(a_node->m_count); + + if(a_node->IsInternalNode()) // not a leaf node + { + for(int index = 0; index < a_node->m_count; ++index) + { + Branch* curBranch = &a_node->m_branch[index]; + + a_stream.WriteArray(curBranch->m_rect.m_min, NUMDIMS); + a_stream.WriteArray(curBranch->m_rect.m_max, NUMDIMS); + + SaveRec(curBranch->m_child, a_stream); + } + } + else // A leaf node + { + for(int index = 0; index < a_node->m_count; ++index) + { + Branch* curBranch = &a_node->m_branch[index]; + + a_stream.WriteArray(curBranch->m_rect.m_min, NUMDIMS); + a_stream.WriteArray(curBranch->m_rect.m_max, NUMDIMS); + + a_stream.Write(curBranch->m_data); + } + } + + return true; // Should do more error checking on I/O operations +} + + +RTREE_TEMPLATE +void RTREE_QUAL::RemoveAll() +{ + // Delete all existing nodes + Reset(); + + m_root = AllocNode(); + m_root->m_level = 0; +} + + +RTREE_TEMPLATE +void RTREE_QUAL::Reset() +{ +#ifdef RTREE_DONT_USE_MEMPOOLS + // Delete all existing nodes + RemoveAllRec(m_root); +#else // RTREE_DONT_USE_MEMPOOLS + // Just reset memory pools. We are not using complex types + // EXAMPLE +#endif // RTREE_DONT_USE_MEMPOOLS +} + + +RTREE_TEMPLATE +void RTREE_QUAL::RemoveAllRec(Node* a_node) +{ + ASSERT(a_node); + ASSERT(a_node->m_level >= 0); + + if(a_node->IsInternalNode()) // This is an internal node in the tree + { + for(int index=0; index < a_node->m_count; ++index) + { + RemoveAllRec(a_node->m_branch[index].m_child); + } + } + FreeNode(a_node); +} + + +RTREE_TEMPLATE +typename RTREE_QUAL::Node* RTREE_QUAL::AllocNode() +{ + Node* newNode; +#ifdef RTREE_DONT_USE_MEMPOOLS + newNode = new Node; +#else // RTREE_DONT_USE_MEMPOOLS + // EXAMPLE +#endif // RTREE_DONT_USE_MEMPOOLS + InitNode(newNode); + return newNode; +} + + +RTREE_TEMPLATE +void RTREE_QUAL::FreeNode(Node* a_node) +{ + ASSERT(a_node); + +#ifdef RTREE_DONT_USE_MEMPOOLS + delete a_node; +#else // RTREE_DONT_USE_MEMPOOLS + // EXAMPLE +#endif // RTREE_DONT_USE_MEMPOOLS +} + + +// Allocate space for a node in the list used in DeletRect to +// store Nodes that are too empty. +RTREE_TEMPLATE +typename RTREE_QUAL::ListNode* RTREE_QUAL::AllocListNode() +{ +#ifdef RTREE_DONT_USE_MEMPOOLS + return new ListNode; +#else // RTREE_DONT_USE_MEMPOOLS + // EXAMPLE +#endif // RTREE_DONT_USE_MEMPOOLS +} + + +RTREE_TEMPLATE +void RTREE_QUAL::FreeListNode(ListNode* a_listNode) +{ +#ifdef RTREE_DONT_USE_MEMPOOLS + delete a_listNode; +#else // RTREE_DONT_USE_MEMPOOLS + // EXAMPLE +#endif // RTREE_DONT_USE_MEMPOOLS +} + + +RTREE_TEMPLATE +void RTREE_QUAL::InitNode(Node* a_node) +{ + a_node->m_count = 0; + a_node->m_level = -1; +} + + +RTREE_TEMPLATE +void RTREE_QUAL::InitRect(Rect* a_rect) +{ + for(int index = 0; index < NUMDIMS; ++index) + { + a_rect->m_min[index] = (ELEMTYPE)0; + a_rect->m_max[index] = (ELEMTYPE)0; + } +} + + +// Inserts a new data rectangle into the index structure. +// Recursively descends tree, propagates splits back up. +// Returns 0 if node was not split. Old node updated. +// If node was split, returns 1 and sets the pointer pointed to by +// new_node to point to the new node. Old node updated to become one of two. +// The level argument specifies the number of steps up from the leaf +// level to insert; e.g. a data rectangle goes in at level = 0. +RTREE_TEMPLATE +bool RTREE_QUAL::InsertRectRec(const Branch& a_branch, Node* a_node, Node** a_newNode, int a_level) +{ + ASSERT(a_node && a_newNode); + ASSERT(a_level >= 0 && a_level <= a_node->m_level); + + // recurse until we reach the correct level for the new record. data records + // will always be called with a_level == 0 (leaf) + if(a_node->m_level > a_level) + { + // Still above level for insertion, go down tree recursively + Node* otherNode; + + // find the optimal branch for this record + int index = PickBranch(&a_branch.m_rect, a_node); + + // recursively insert this record into the picked branch + bool childWasSplit = InsertRectRec(a_branch, a_node->m_branch[index].m_child, &otherNode, a_level); + + if (!childWasSplit) + { + // Child was not split. Merge the bounding box of the new record with the + // existing bounding box + a_node->m_branch[index].m_rect = CombineRect(&a_branch.m_rect, &(a_node->m_branch[index].m_rect)); + return false; + } + else + { + // Child was split. The old branches are now re-partitioned to two nodes + // so we have to re-calculate the bounding boxes of each node + a_node->m_branch[index].m_rect = NodeCover(a_node->m_branch[index].m_child); + Branch branch; + branch.m_child = otherNode; + branch.m_rect = NodeCover(otherNode); + + // The old node is already a child of a_node. Now add the newly-created + // node to a_node as well. a_node might be split because of that. + return AddBranch(&branch, a_node, a_newNode); + } + } + else if(a_node->m_level == a_level) + { + // We have reached level for insertion. Add rect, split if necessary + return AddBranch(&a_branch, a_node, a_newNode); + } + else + { + // Should never occur + ASSERT(0); + return false; + } +} + + +// Insert a data rectangle into an index structure. +// InsertRect provides for splitting the root; +// returns 1 if root was split, 0 if it was not. +// The level argument specifies the number of steps up from the leaf +// level to insert; e.g. a data rectangle goes in at level = 0. +// InsertRect2 does the recursion. +// +RTREE_TEMPLATE +bool RTREE_QUAL::InsertRect(const Branch& a_branch, Node** a_root, int a_level) +{ + ASSERT(a_root); + ASSERT(a_level >= 0 && a_level <= (*a_root)->m_level); +#ifdef _DEBUG + for(int index=0; index < NUMDIMS; ++index) + { + ASSERT(a_branch.m_rect.m_min[index] <= a_branch.m_rect.m_max[index]); + } +#endif //_DEBUG + + Node* newNode; + + if(InsertRectRec(a_branch, *a_root, &newNode, a_level)) // Root split + { + // Grow tree taller and new root + Node* newRoot = AllocNode(); + newRoot->m_level = (*a_root)->m_level + 1; + + Branch branch; + + // add old root node as a child of the new root + branch.m_rect = NodeCover(*a_root); + branch.m_child = *a_root; + AddBranch(&branch, newRoot, NULL); + + // add the split node as a child of the new root + branch.m_rect = NodeCover(newNode); + branch.m_child = newNode; + AddBranch(&branch, newRoot, NULL); + + // set the new root as the root node + *a_root = newRoot; + + return true; + } + + return false; +} + + +// Find the smallest rectangle that includes all rectangles in branches of a node. +RTREE_TEMPLATE +typename RTREE_QUAL::Rect RTREE_QUAL::NodeCover(Node* a_node) +{ + ASSERT(a_node); + + Rect rect = a_node->m_branch[0].m_rect; + for(int index = 1; index < a_node->m_count; ++index) + { + rect = CombineRect(&rect, &(a_node->m_branch[index].m_rect)); + } + + return rect; +} + + +// Add a branch to a node. Split the node if necessary. +// Returns 0 if node not split. Old node updated. +// Returns 1 if node split, sets *new_node to address of new node. +// Old node updated, becomes one of two. +RTREE_TEMPLATE +bool RTREE_QUAL::AddBranch(const Branch* a_branch, Node* a_node, Node** a_newNode) +{ + ASSERT(a_branch); + ASSERT(a_node); + + if(a_node->m_count < MAXNODES) // Split won't be necessary + { + a_node->m_branch[a_node->m_count] = *a_branch; + ++a_node->m_count; + + return false; + } + else + { + ASSERT(a_newNode); + + SplitNode(a_node, a_branch, a_newNode); + return true; + } +} + + +// Disconnect a dependent node. +// Caller must return (or stop using iteration index) after this as count has changed +RTREE_TEMPLATE +void RTREE_QUAL::DisconnectBranch(Node* a_node, int a_index) +{ + ASSERT(a_node && (a_index >= 0) && (a_index < MAXNODES)); + ASSERT(a_node->m_count > 0); + + // Remove element by swapping with the last element to prevent gaps in array + a_node->m_branch[a_index] = a_node->m_branch[a_node->m_count - 1]; + + --a_node->m_count; +} + + +// Pick a branch. Pick the one that will need the smallest increase +// in area to accomodate the new rectangle. This will result in the +// least total area for the covering rectangles in the current node. +// In case of a tie, pick the one which was smaller before, to get +// the best resolution when searching. +RTREE_TEMPLATE +int RTREE_QUAL::PickBranch(const Rect* a_rect, Node* a_node) +{ + ASSERT(a_rect && a_node); + + bool firstTime = true; + ELEMTYPEREAL increase; + ELEMTYPEREAL bestIncr = (ELEMTYPEREAL)-1; + ELEMTYPEREAL area; + ELEMTYPEREAL bestArea; + int best; + Rect tempRect; + + for(int index=0; index < a_node->m_count; ++index) + { + Rect* curRect = &a_node->m_branch[index].m_rect; + area = CalcRectVolume(curRect); + tempRect = CombineRect(a_rect, curRect); + increase = CalcRectVolume(&tempRect) - area; + if((increase < bestIncr) || firstTime) + { + best = index; + bestArea = area; + bestIncr = increase; + firstTime = false; + } + else if((increase == bestIncr) && (area < bestArea)) + { + best = index; + bestArea = area; + bestIncr = increase; + } + } + return best; +} + + +// Combine two rectangles into larger one containing both +RTREE_TEMPLATE +typename RTREE_QUAL::Rect RTREE_QUAL::CombineRect(const Rect* a_rectA, const Rect* a_rectB) +{ + ASSERT(a_rectA && a_rectB); + + Rect newRect; + + for(int index = 0; index < NUMDIMS; ++index) + { + newRect.m_min[index] = Min(a_rectA->m_min[index], a_rectB->m_min[index]); + newRect.m_max[index] = Max(a_rectA->m_max[index], a_rectB->m_max[index]); + } + + return newRect; +} + + + +// Split a node. +// Divides the nodes branches and the extra one between two nodes. +// Old node is one of the new ones, and one really new one is created. +// Tries more than one method for choosing a partition, uses best result. +RTREE_TEMPLATE +void RTREE_QUAL::SplitNode(Node* a_node, const Branch* a_branch, Node** a_newNode) +{ + ASSERT(a_node); + ASSERT(a_branch); + + // Could just use local here, but member or external is faster since it is reused + PartitionVars localVars; + PartitionVars* parVars = &localVars; + + // Load all the branches into a buffer, initialize old node + GetBranches(a_node, a_branch, parVars); + + // Find partition + ChoosePartition(parVars, MINNODES); + + // Create a new node to hold (about) half of the branches + *a_newNode = AllocNode(); + (*a_newNode)->m_level = a_node->m_level; + + // Put branches from buffer into 2 nodes according to the chosen partition + a_node->m_count = 0; + LoadNodes(a_node, *a_newNode, parVars); + + ASSERT((a_node->m_count + (*a_newNode)->m_count) == parVars->m_total); +} + + +// Calculate the n-dimensional volume of a rectangle +RTREE_TEMPLATE +ELEMTYPEREAL RTREE_QUAL::RectVolume(Rect* a_rect) +{ + ASSERT(a_rect); + + ELEMTYPEREAL volume = (ELEMTYPEREAL)1; + + for(int index=0; indexm_max[index] - a_rect->m_min[index]; + } + + ASSERT(volume >= (ELEMTYPEREAL)0); + + return volume; +} + + +// The exact volume of the bounding sphere for the given Rect +RTREE_TEMPLATE +ELEMTYPEREAL RTREE_QUAL::RectSphericalVolume(Rect* a_rect) +{ + ASSERT(a_rect); + + ELEMTYPEREAL sumOfSquares = (ELEMTYPEREAL)0; + ELEMTYPEREAL radius; + + for(int index=0; index < NUMDIMS; ++index) + { + ELEMTYPEREAL halfExtent = ((ELEMTYPEREAL)a_rect->m_max[index] - (ELEMTYPEREAL)a_rect->m_min[index]) * 0.5f; + sumOfSquares += halfExtent * halfExtent; + } + + radius = (ELEMTYPEREAL)sqrt(sumOfSquares); + + // Pow maybe slow, so test for common dims like 2,3 and just use x*x, x*x*x. + if(NUMDIMS == 3) + { + return (radius * radius * radius * m_unitSphereVolume); + } + else if(NUMDIMS == 2) + { + return (radius * radius * m_unitSphereVolume); + } + else + { + return (ELEMTYPEREAL)(pow(radius, NUMDIMS) * m_unitSphereVolume); + } +} + + +// Use one of the methods to calculate retangle volume +RTREE_TEMPLATE +ELEMTYPEREAL RTREE_QUAL::CalcRectVolume(Rect* a_rect) +{ +#ifdef RTREE_USE_SPHERICAL_VOLUME + return RectSphericalVolume(a_rect); // Slower but helps certain merge cases +#else // RTREE_USE_SPHERICAL_VOLUME + return RectVolume(a_rect); // Faster but can cause poor merges +#endif // RTREE_USE_SPHERICAL_VOLUME +} + + +// Load branch buffer with branches from full node plus the extra branch. +RTREE_TEMPLATE +void RTREE_QUAL::GetBranches(Node* a_node, const Branch* a_branch, PartitionVars* a_parVars) +{ + ASSERT(a_node); + ASSERT(a_branch); + + ASSERT(a_node->m_count == MAXNODES); + + // Load the branch buffer + for(int index=0; index < MAXNODES; ++index) + { + a_parVars->m_branchBuf[index] = a_node->m_branch[index]; + } + a_parVars->m_branchBuf[MAXNODES] = *a_branch; + a_parVars->m_branchCount = MAXNODES + 1; + + // Calculate rect containing all in the set + a_parVars->m_coverSplit = a_parVars->m_branchBuf[0].m_rect; + for(int index=1; index < MAXNODES+1; ++index) + { + a_parVars->m_coverSplit = CombineRect(&a_parVars->m_coverSplit, &a_parVars->m_branchBuf[index].m_rect); + } + a_parVars->m_coverSplitArea = CalcRectVolume(&a_parVars->m_coverSplit); +} + + +// Method #0 for choosing a partition: +// As the seeds for the two groups, pick the two rects that would waste the +// most area if covered by a single rectangle, i.e. evidently the worst pair +// to have in the same group. +// Of the remaining, one at a time is chosen to be put in one of the two groups. +// The one chosen is the one with the greatest difference in area expansion +// depending on which group - the rect most strongly attracted to one group +// and repelled from the other. +// If one group gets too full (more would force other group to violate min +// fill requirement) then other group gets the rest. +// These last are the ones that can go in either group most easily. +RTREE_TEMPLATE +void RTREE_QUAL::ChoosePartition(PartitionVars* a_parVars, int a_minFill) +{ + ASSERT(a_parVars); + + ELEMTYPEREAL biggestDiff; + int group, chosen, betterGroup; + + InitParVars(a_parVars, a_parVars->m_branchCount, a_minFill); + PickSeeds(a_parVars); + + while (((a_parVars->m_count[0] + a_parVars->m_count[1]) < a_parVars->m_total) + && (a_parVars->m_count[0] < (a_parVars->m_total - a_parVars->m_minFill)) + && (a_parVars->m_count[1] < (a_parVars->m_total - a_parVars->m_minFill))) + { + biggestDiff = (ELEMTYPEREAL) -1; + for(int index=0; indexm_total; ++index) + { + if(PartitionVars::NOT_TAKEN == a_parVars->m_partition[index]) + { + Rect* curRect = &a_parVars->m_branchBuf[index].m_rect; + Rect rect0 = CombineRect(curRect, &a_parVars->m_cover[0]); + Rect rect1 = CombineRect(curRect, &a_parVars->m_cover[1]); + ELEMTYPEREAL growth0 = CalcRectVolume(&rect0) - a_parVars->m_area[0]; + ELEMTYPEREAL growth1 = CalcRectVolume(&rect1) - a_parVars->m_area[1]; + ELEMTYPEREAL diff = growth1 - growth0; + if(diff >= 0) + { + group = 0; + } + else + { + group = 1; + diff = -diff; + } + + if(diff > biggestDiff) + { + biggestDiff = diff; + chosen = index; + betterGroup = group; + } + else if((diff == biggestDiff) && (a_parVars->m_count[group] < a_parVars->m_count[betterGroup])) + { + chosen = index; + betterGroup = group; + } + } + } + Classify(chosen, betterGroup, a_parVars); + } + + // If one group too full, put remaining rects in the other + if((a_parVars->m_count[0] + a_parVars->m_count[1]) < a_parVars->m_total) + { + if(a_parVars->m_count[0] >= a_parVars->m_total - a_parVars->m_minFill) + { + group = 1; + } + else + { + group = 0; + } + for(int index=0; indexm_total; ++index) + { + if(PartitionVars::NOT_TAKEN == a_parVars->m_partition[index]) + { + Classify(index, group, a_parVars); + } + } + } + + ASSERT((a_parVars->m_count[0] + a_parVars->m_count[1]) == a_parVars->m_total); + ASSERT((a_parVars->m_count[0] >= a_parVars->m_minFill) && + (a_parVars->m_count[1] >= a_parVars->m_minFill)); +} + + +// Copy branches from the buffer into two nodes according to the partition. +RTREE_TEMPLATE +void RTREE_QUAL::LoadNodes(Node* a_nodeA, Node* a_nodeB, PartitionVars* a_parVars) +{ + ASSERT(a_nodeA); + ASSERT(a_nodeB); + ASSERT(a_parVars); + + for(int index=0; index < a_parVars->m_total; ++index) + { + ASSERT(a_parVars->m_partition[index] == 0 || a_parVars->m_partition[index] == 1); + + int targetNodeIndex = a_parVars->m_partition[index]; + Node* targetNodes[] = {a_nodeA, a_nodeB}; + + // It is assured that AddBranch here will not cause a node split. + bool nodeWasSplit = AddBranch(&a_parVars->m_branchBuf[index], targetNodes[targetNodeIndex], NULL); + ASSERT(!nodeWasSplit); + } +} + + +// Initialize a PartitionVars structure. +RTREE_TEMPLATE +void RTREE_QUAL::InitParVars(PartitionVars* a_parVars, int a_maxRects, int a_minFill) +{ + ASSERT(a_parVars); + + a_parVars->m_count[0] = a_parVars->m_count[1] = 0; + a_parVars->m_area[0] = a_parVars->m_area[1] = (ELEMTYPEREAL)0; + a_parVars->m_total = a_maxRects; + a_parVars->m_minFill = a_minFill; + for(int index=0; index < a_maxRects; ++index) + { + a_parVars->m_partition[index] = PartitionVars::NOT_TAKEN; + } +} + + +RTREE_TEMPLATE +void RTREE_QUAL::PickSeeds(PartitionVars* a_parVars) +{ + int seed0, seed1; + ELEMTYPEREAL worst, waste; + ELEMTYPEREAL area[MAXNODES+1]; + + for(int index=0; indexm_total; ++index) + { + area[index] = CalcRectVolume(&a_parVars->m_branchBuf[index].m_rect); + } + + worst = -a_parVars->m_coverSplitArea - 1; + for(int indexA=0; indexA < a_parVars->m_total-1; ++indexA) + { + for(int indexB = indexA+1; indexB < a_parVars->m_total; ++indexB) + { + Rect oneRect = CombineRect(&a_parVars->m_branchBuf[indexA].m_rect, &a_parVars->m_branchBuf[indexB].m_rect); + waste = CalcRectVolume(&oneRect) - area[indexA] - area[indexB]; + if(waste > worst) + { + worst = waste; + seed0 = indexA; + seed1 = indexB; + } + } + } + + Classify(seed0, 0, a_parVars); + Classify(seed1, 1, a_parVars); +} + + +// Put a branch in one of the groups. +RTREE_TEMPLATE +void RTREE_QUAL::Classify(int a_index, int a_group, PartitionVars* a_parVars) +{ + ASSERT(a_parVars); + ASSERT(PartitionVars::NOT_TAKEN == a_parVars->m_partition[a_index]); + + a_parVars->m_partition[a_index] = a_group; + + // Calculate combined rect + if (a_parVars->m_count[a_group] == 0) + { + a_parVars->m_cover[a_group] = a_parVars->m_branchBuf[a_index].m_rect; + } + else + { + a_parVars->m_cover[a_group] = CombineRect(&a_parVars->m_branchBuf[a_index].m_rect, &a_parVars->m_cover[a_group]); + } + + // Calculate volume of combined rect + a_parVars->m_area[a_group] = CalcRectVolume(&a_parVars->m_cover[a_group]); + + ++a_parVars->m_count[a_group]; +} + + +// Delete a data rectangle from an index structure. +// Pass in a pointer to a Rect, the tid of the record, ptr to ptr to root node. +// Returns 1 if record not found, 0 if success. +// RemoveRect provides for eliminating the root. +RTREE_TEMPLATE +bool RTREE_QUAL::RemoveRect(Rect* a_rect, const DATATYPE& a_id, Node** a_root) +{ + ASSERT(a_rect && a_root); + ASSERT(*a_root); + + ListNode* reInsertList = NULL; + + if(!RemoveRectRec(a_rect, a_id, *a_root, &reInsertList)) + { + // Found and deleted a data item + // Reinsert any branches from eliminated nodes + while(reInsertList) + { + Node* tempNode = reInsertList->m_node; + + for(int index = 0; index < tempNode->m_count; ++index) + { + // TODO go over this code. should I use (tempNode->m_level - 1)? + InsertRect(tempNode->m_branch[index], + a_root, + tempNode->m_level); + } + + ListNode* remLNode = reInsertList; + reInsertList = reInsertList->m_next; + + FreeNode(remLNode->m_node); + FreeListNode(remLNode); + } + + // Check for redundant root (not leaf, 1 child) and eliminate TODO replace + // if with while? In case there is a whole branch of redundant roots... + if((*a_root)->m_count == 1 && (*a_root)->IsInternalNode()) + { + Node* tempNode = (*a_root)->m_branch[0].m_child; + + ASSERT(tempNode); + FreeNode(*a_root); + *a_root = tempNode; + } + return false; + } + else + { + return true; + } +} + + +// Delete a rectangle from non-root part of an index structure. +// Called by RemoveRect. Descends tree recursively, +// merges branches on the way back up. +// Returns 1 if record not found, 0 if success. +RTREE_TEMPLATE +bool RTREE_QUAL::RemoveRectRec(Rect* a_rect, const DATATYPE& a_id, Node* a_node, ListNode** a_listNode) +{ + ASSERT(a_rect && a_node && a_listNode); + ASSERT(a_node->m_level >= 0); + + if(a_node->IsInternalNode()) // not a leaf node + { + for(int index = 0; index < a_node->m_count; ++index) + { + if(Overlap(a_rect, &(a_node->m_branch[index].m_rect))) + { + if(!RemoveRectRec(a_rect, a_id, a_node->m_branch[index].m_child, a_listNode)) + { + if(a_node->m_branch[index].m_child->m_count >= MINNODES) + { + // child removed, just resize parent rect + a_node->m_branch[index].m_rect = NodeCover(a_node->m_branch[index].m_child); + } + else + { + // child removed, not enough entries in node, eliminate node + ReInsert(a_node->m_branch[index].m_child, a_listNode); + DisconnectBranch(a_node, index); // Must return after this call as count has changed + } + return false; + } + } + } + return true; + } + else // A leaf node + { + for(int index = 0; index < a_node->m_count; ++index) + { + if(a_node->m_branch[index].m_data == a_id) + { + DisconnectBranch(a_node, index); // Must return after this call as count has changed + return false; + } + } + return true; + } +} + + +// Decide whether two rectangles overlap. +RTREE_TEMPLATE +bool RTREE_QUAL::Overlap(Rect* a_rectA, Rect* a_rectB) +{ + ASSERT(a_rectA && a_rectB); + + for(int index=0; index < NUMDIMS; ++index) + { + if (a_rectA->m_min[index] > a_rectB->m_max[index] || + a_rectB->m_min[index] > a_rectA->m_max[index]) + { + return false; + } + } + return true; +} + + +// Add a node to the reinsertion list. All its branches will later +// be reinserted into the index structure. +RTREE_TEMPLATE +void RTREE_QUAL::ReInsert(Node* a_node, ListNode** a_listNode) +{ + ListNode* newListNode; + + newListNode = AllocListNode(); + newListNode->m_node = a_node; + newListNode->m_next = *a_listNode; + *a_listNode = newListNode; +} + + +// Search in an index tree or subtree for all data retangles that overlap the argument rectangle. +RTREE_TEMPLATE +bool RTREE_QUAL::Search(Node* a_node, Rect* a_rect, int& a_foundCount, t_resultCallback a_resultCallback, void* a_context) +{ + ASSERT(a_node); + ASSERT(a_node->m_level >= 0); + ASSERT(a_rect); + + if(a_node->IsInternalNode()) + { + // This is an internal node in the tree + for(int index=0; index < a_node->m_count; ++index) + { + if(Overlap(a_rect, &a_node->m_branch[index].m_rect)) + { + if(!Search(a_node->m_branch[index].m_child, a_rect, a_foundCount, a_resultCallback, a_context)) + { + // The callback indicated to stop searching + return false; + } + } + } + } + else + { + // This is a leaf node + for(int index=0; index < a_node->m_count; ++index) + { + if(Overlap(a_rect, &a_node->m_branch[index].m_rect)) + { + DATATYPE& id = a_node->m_branch[index].m_data; + ++a_foundCount; + + // NOTE: There are different ways to return results. Here's where to modify + if(a_resultCallback) + { + if(!a_resultCallback(id, a_context)) + { + return false; // Don't continue searching + } + } + } + } + } + + return true; // Continue searching +} + + +#undef RTREE_TEMPLATE +#undef RTREE_QUAL + +#endif //RTREE_H + diff --git a/filter/filter_planet_by_cats.cpp b/filter/filter_planet_by_cats.cpp new file mode 100644 index 0000000..8ed9304 --- /dev/null +++ b/filter/filter_planet_by_cats.cpp @@ -0,0 +1,282 @@ +/* + Filters a planet file by categories and location. + + Serves as a replacement for Overpass API for the OSM Conflator. + Takes two parameters: a list of coordinates and categories prepared by + conflate.py and an OSM PBF/XML file. Prints an OSM XML file with + objects that will then be conflated with the external dataset. + Either specify that XML file name as the third parameter, or redirect + the output. + + Based on the osmium_amenity_list.cpp from libosmium. + + Published under Apache Public License 2.0. + + Written by Ilya Zverev for MAPS.ME. +*/ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "RTree.h" +#include "xml_centers_output.hpp" + +using index_type = osmium::index::map::FlexMem; +using location_handler_type = osmium::handler::NodeLocationsForWays; + +bool AppendToVector(uint16_t cat_id, void *vec) { + static_cast*>(vec)->push_back(cat_id); + return true; +} + +class AmenityHandler : public osmium::handler::Handler { + + constexpr static double kSearchRadius = 0.01; + + typedef RTree DatasetTree; + typedef std::vector> TQuery; + typedef std::vector TCategory; + + DatasetTree m_tree; + osmium::io::xmlcenters::XMLCentersOutput m_centers; + std::map> m_categories; + std::map m_category_names; + + void print_object(const osmium::OSMObject &obj, + const osmium::Location ¢er) { + std::cout << m_centers.apply(obj, center); + } + + // Calculate the center point of a NodeRefList. + osmium::Location calc_center(const osmium::NodeRefList &nr_list) { + int64_t x = 0; + int64_t y = 0; + + for (const auto &nr : nr_list) { + x += nr.x(); + y += nr.y(); + } + + x /= nr_list.size(); + y /= nr_list.size(); + + return osmium::Location{x, y}; + } + + bool TestTags(osmium::TagList const & tags, TQuery const & query) { + for (std::vector const & pair : query) { + const char *value = tags[pair[0].c_str()]; + if (pair.size() == 2 && pair[1].empty()) { + if (value != nullptr) + return false; + } else { + if (value == nullptr) + return false; + if (pair.size() > 1) { + // TODO: substrings? + bool found = false; + for (size_t i = 1; i < pair.size(); i++) { + if (!strcmp(value, pair[i].c_str())) { + found = true; + break; + } + } + if (!found) + return false; + } + } + } + return true; + } + + bool IsEligible(const osmium::Location & loc, osmium::TagList const & tags) { + if (tags.empty()) + return false; + + int32_t radius = osmium::Location::double_to_fix(kSearchRadius); + int32_t min[] = {loc.x() - radius, loc.y() - radius}; + int32_t max[] = {loc.x() + radius, loc.y() + radius}; + std::vector found; + if (!m_tree.Search(min, max, &AppendToVector, &found)) + return false; + for (uint16_t cat_id : found) + for (TQuery query : m_categories[cat_id]) + if (TestTags(tags, query)) + return true; + return false; + } + + void SplitTrim(std::string const & s, char delimiter, std::size_t limit, std::vector & target) { + target.clear(); + std::size_t start = 0, end = 0; + while (start < s.length()) { + end = s.find(delimiter, start); + if (end == std::string::npos || target.size() == limit) + end = s.length(); + while (start < end && std::isspace(s[start])) + start++; + + std::size_t tmpend = end - 1; + while (tmpend > start && std::isspace(s[tmpend])) + tmpend++; + target.push_back(s.substr(start, tmpend - start + 1)); + start = end + 1; + } + } + + TQuery ParseQuery(std::string const & query) { + TQuery q; + std::vector parts; + SplitTrim(query, '|', 100, parts); + for (std::string const & part : parts) { + std::vector keys; + SplitTrim(part, '=', 100, keys); + if (keys.size() > 0) + q.push_back(keys); + } + return q; + } + + void LoadCategories(const char *filename) { + std::ifstream infile(filename); + std::string line; + std::vector parts; + bool parsingPoints = false; + while (std::getline(infile, line)) { + if (!parsingPoints) { + if (!line.size()) + parsingPoints = true; + else { + SplitTrim(line, ',', 3, parts); // cat_id, name, query + uint16_t cat_id = std::stoi(parts[0]); + m_category_names[cat_id] = parts[1]; + m_categories[cat_id].push_back(ParseQuery(parts[2])); + } + } else { + SplitTrim(line, ',', 3, parts); // lon, lat, cat_id + const osmium::Location loc(std::stod(parts[0]), std::stod(parts[1])); + int32_t coords[] = {loc.x(), loc.y()}; + uint16_t cat_id = std::stoi(parts[2]); + m_tree.Insert(coords, coords, cat_id); + } + } + } + +public: + AmenityHandler(const char *categories) { + LoadCategories(categories); + } + + void node(osmium::Node const & node) { + if (IsEligible(node.location(), node.tags())) { + print_object(node, node.location()); + } + } + + void way(osmium::Way const & way) { + if (!way.is_closed()) + return; + + int64_t x = 0, y = 0, cnt = 0; + for (const auto& node_ref : way.nodes()) { + if (node_ref.location()) { + x += node_ref.x(); + y += node_ref.y(); + cnt++; + } + } + if (!cnt) + return; + + const osmium::Location center(x / cnt, y / cnt); + if (IsEligible(center, way.tags())) { + print_object(way, center); + } + } + + void multi(osmium::Relation const & rel, osmium::Location const & center) { + if (IsEligible(center, rel.tags())) { + print_object(rel, center); + } + } + +}; // class AmenityHandler + +class AmenityRelationsManager : public osmium::relations::RelationsManager { + + AmenityHandler *m_handler; + +public: + + AmenityRelationsManager(AmenityHandler & handler) : + RelationsManager(), + m_handler(&handler) { + } + + bool new_relation(osmium::Relation const & rel) noexcept { + const char *rel_type = rel.tags().get_value_by_key("type"); + return rel_type && !std::strcmp(rel_type, "multipolygon"); + } + + void complete_relation(osmium::Relation const & rel) { + int64_t x = 0, y = 0, cnt = 0; + for (auto const & member : rel.members()) { + if (member.ref() != 0) { + const osmium::Way* way = this->get_member_way(member.ref()); + for (const auto& node_ref : way->nodes()) { + if (node_ref.location()) { + x += node_ref.x(); + y += node_ref.y(); + cnt++; + } + } + } + } + if (cnt > 0) + m_handler->multi(rel, osmium::Location{x / cnt, y / cnt}); + } +}; // class AmenityRelationsManager + +int main(int argc, char *argv[]) { + if (argc < 3) { + std::cerr << "Usage: " << argv[0] + << " \n"; + std::exit(1); + } + + const osmium::io::File input_file{argv[2]}; + const osmium::io::File output_file{"", "osm"}; + + AmenityHandler data_handler(argv[1]); + AmenityRelationsManager manager(data_handler); + osmium::relations::read_relations(input_file, manager); + + osmium::io::Header header; + header.set("generator", argv[0]); + osmium::io::Writer writer{output_file, header, osmium::io::overwrite::allow}; + + index_type index; + location_handler_type location_handler{index}; + location_handler.ignore_errors(); + osmium::io::Reader reader{input_file}; + + osmium::apply(reader, location_handler, data_handler, manager.handler()); + + std::cout.flush(); + reader.close(); + writer.close(); +} diff --git a/filter/xml_centers_output.hpp b/filter/xml_centers_output.hpp new file mode 100644 index 0000000..5694be3 --- /dev/null +++ b/filter/xml_centers_output.hpp @@ -0,0 +1,279 @@ +/* + +This file is based on xml_output_format.hpp from the Osmium library +(http://osmcode.org/libosmium). + +Copyright 2013-2017 Jochen Topf and others (see README). +Copyright 2017 Ilya Zverev , MAPS.ME + +Boost Software License - Version 1.0 - August 17th, 2003 + +Permission is hereby granted, free of charge, to any person or organization +obtaining a copy of the software and accompanying documentation covered by +this license (the "Software") to use, reproduce, display, distribute, +execute, and transmit the Software, and to prepare derivative works of the +Software, and to permit third-parties to whom the Software is furnished to +do so, all subject to the following: + +The copyright notices in the Software and this entire statement, including +the above license grant, this restriction and the following disclaimer, +must be included in all copies of the Software, in whole or in part, and +all derivative works of the Software, unless such copies or derivative +works are solely in the form of machine-executable object code generated by +a source language processor. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. + +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace osmium { + + namespace io { + + namespace xmlcenters { + + namespace detail { + + inline void append_lat_lon_attributes(std::string& out, const char* lat, const char* lon, const osmium::Location& location) { + out += ' '; + out += lat; + out += "=\""; + osmium::detail::append_location_coordinate_to_string(std::back_inserter(out), location.y()); + out += "\" "; + out += lon; + out += "=\""; + osmium::detail::append_location_coordinate_to_string(std::back_inserter(out), location.x()); + out += "\""; + } + + } // namespace detail + + class XMLCentersOutput { + + std::shared_ptr m_out; + + inline void append_xml_encoded_string(std::string & out, const char *data) { + osmium::io::detail::append_xml_encoded_string(out, data); + } + + void output_int(int64_t value) { + if (value < 0) { + *m_out += '-'; + value = -value; + } + + char temp[20]; + char *t = temp; + do { + *t++ = char(value % 10) + '0'; + value /= 10; + } while (value > 0); + + const auto old_size = m_out->size(); + m_out->resize(old_size + (t - temp)); + char* data = &(*m_out)[old_size]; + do { + *data++ += *--t; + } while (t != temp); + } + + void write_spaces(int num) { + for (; num != 0; --num) { + *m_out += ' '; + } + } + + void write_prefix() { + write_spaces(2); + } + + template + void write_attribute(const char* name, T value) { + *m_out += ' '; + *m_out += name; + *m_out += "=\""; + output_int(value); + *m_out += '"'; + } + + void write_meta(const osmium::OSMObject& object) { + write_attribute("id", object.id()); + + if (object.version()) { + write_attribute("version", object.version()); + } + + if (object.timestamp()) { + *m_out += " timestamp=\""; + *m_out += object.timestamp().to_iso(); + *m_out += "\""; + } + + if (!object.user_is_anonymous()) { + write_attribute("uid", object.uid()); + *m_out += " user=\""; + append_xml_encoded_string(*m_out, object.user()); + *m_out += "\""; + } + + if (object.changeset()) { + write_attribute("changeset", object.changeset()); + } + } + + void write_tags(const osmium::TagList& tags) { + for (const auto& tag : tags) { + write_spaces(2); + *m_out += " \n"; + } + } + + public: + + XMLCentersOutput() : m_out(std::make_shared()) { + } + + std::string apply(osmium::OSMObject const & item, osmium::Location const & center) { + switch(item.type()) { + case osmium::item_type::node: + node(static_cast(item)); + break; + case osmium::item_type::way: + way(static_cast(item), center); + break; + case osmium::item_type::relation: + relation(static_cast(item), center); + break; + default: + throw osmium::unknown_type{}; + } + + std::string out; + using std::swap; + swap(out, *m_out); + + return out; + } + + void node(const osmium::Node& node) { + write_prefix(); + *m_out += "\n"; + } + + write_tags(relation.tags()); + + write_prefix(); + *m_out += "\n"; + } + + }; // class XMLCentersOutputBlock + + } // namespace xmlcenters + + } // namespace io + +} // namespace osmium diff --git a/profiles/auchan_moscow.py b/profiles/auchan_moscow.py index a6e8625..735e089 100644 --- a/profiles/auchan_moscow.py +++ b/profiles/auchan_moscow.py @@ -8,7 +8,7 @@ source = 'auchan.ru' # Not adding a ref:auchan tag, since we don't have good identifiers no_dataset_id = True # Using a name query with regular expressions -query = [('shop', '~supermarket|mall'), ('name', '~Ашан|АШАН')] +query = [('shop', 'supermarket', 'mall'), ('name', '~Ашан|АШАН')] master_tags = ('name', 'opening_hours', 'phone', 'website') # Empty dict so we don't add a fixme tag to unmatched objects tag_unmatched = {} @@ -44,7 +44,7 @@ def dataset(fileobj): # We are parsing HTML, and for that we need an lxml package from lxml import html - global download_url_copy + global download_url_copy, re h = html.fromstring(fileobj.read().decode('utf-8')) shops = h.find_class('shops-in-the-city-holder')[0] shops.make_links_absolute(download_url_copy) diff --git a/profiles/burgerking.py b/profiles/burgerking.py index fac35d8..e5d76fa 100644 --- a/profiles/burgerking.py +++ b/profiles/burgerking.py @@ -1,3 +1,7 @@ +# Note: the json file at the burgerking website was restructured +# and does not contain any useful data now. +# So this profile is here solely for demonstration purposes. + import json import codecs import re @@ -20,6 +24,7 @@ tag_unmatched = { def dataset(fileobj): def parse_hours(s): + global re s = re.sub('^зал:? *', '', s.lower()) s = s.replace('
', ';').replace('
', ';').replace('\n', ';').replace(' ', '').replace(',', ';').replace('–', '-') s = s.replace('-00:', '-24:') @@ -66,7 +71,11 @@ def dataset(fileobj): 346: 'Передвинуть к кафе', } - source = json.load(codecs.getreader('utf-8')(fileobj)) + json_src = codecs.getreader('utf-8')(fileobj).read() + p = json_src.find(' 0: + json_src = json_src[:p] + source = json.loads(json_src) data = [] for el in source: gid = int(el['origID']) diff --git a/profiles/minkult.py b/profiles/minkult.py index ba8efe4..98dcb1a 100644 --- a/profiles/minkult.py +++ b/profiles/minkult.py @@ -11,7 +11,8 @@ master_tags = ('official_name', 'phone', 'opening_hours', 'website') # Reading the dataset passport to determine an URL of the latest dataset version -def download_url(dataset_id='7705851331-theaters'): +def download_url(): + dataset_id = '7705851331-' + (param or 'museums') r = requests.get('http://opendata.mkrf.ru/opendata/{}/meta.json'.format(dataset_id)) if r.status_code != 200 or len(r.content) == 0: logging.error('Could not get URL for dataset: %s %s', r.status_code, r.text) @@ -22,6 +23,22 @@ def download_url(dataset_id='7705851331-theaters'): logging.info('Downloading %s from %s', result['title'], latest['created']) return latest['source'] +source = 'opendata.mkrf.ru' +dataset_id = 'mkrf_'+(param or 'museums') +if not param or param == 'museums': + query = [('tourism', 'museum')] +elif param == 'theaters': + query = [('amenity', 'theatre')] +elif param == 'circuses': + query = [('amenity', 'circus')] +elif param == 'philharmonic': + query = [('amenity', 'theatre')] +else: + raise ValueError('Unknown param value: {}'.format(param)) + +max_distance = 300 +master_tags = ('official_name', 'phone', 'opening_hours', 'website') + def dataset(fileobj): def make_wd_ranges(r): diff --git a/profiles/moscow_parkomats.py b/profiles/moscow_parkomats.py index 615db98..98752b8 100644 --- a/profiles/moscow_parkomats.py +++ b/profiles/moscow_parkomats.py @@ -1,12 +1,10 @@ -# Available modules: codecs, logging, requests, json, re, etree. But importing these helps catch other errors +# Available modules: codecs, logging, requests, json, etree. But importing these helps catch other errors import json -import re import logging -import requests -import zipfile def download_url(mos_dataset_id=1421): + import requests r = requests.get('https://data.mos.ru/api/datasets/expformats/?datasetId={}'.format(mos_dataset_id)) if r.status_code != 200 or len(r.content) == 0: logging.error('Could not get URL for dataset: %s %s', r.status_code, r.text) @@ -15,7 +13,7 @@ def download_url(mos_dataset_id=1421): url = [x for x in r.json() if x['Format'] == 'json'][0] version = '?' title = 'dataset' - r = requests.get('https://data.mos.ru/apiproxy/opendata/1421/meta.json'.format(mos_dataset_id)) + r = requests.get('https://data.mos.ru/apiproxy/opendata/{}/meta.json'.format(mos_dataset_id)) if r.status_code == 200: title = r.json()['Title'] version = r.json()['VersionNumber'] @@ -50,6 +48,8 @@ master_tags = ('zone:parking', 'ref', 'contact:phone', 'contact:website', 'opera # A list of SourcePoint objects. Initialize with (id, lat, lon, {tags}). def dataset(fileobj): + import zipfile + import re zf = zipfile.ZipFile(fileobj) source = json.loads(zf.read(zf.namelist()[0]).decode('cp1251')) RE_NUM4 = re.compile(r'\d{4,6}') diff --git a/profiles/navads_shell_json.py b/profiles/navads_shell_json.py index 3457199..4396189 100644 --- a/profiles/navads_shell_json.py +++ b/profiles/navads_shell_json.py @@ -56,6 +56,7 @@ def dataset(fileobj): return '24/7' return '; '.join(res).replace('23:59', '24:00') + global re, defaultdict source = json.load(codecs.getreader('utf-8-sig')(fileobj)) data = [] for el in source['Locations']: