391 lines
16 KiB
Python
391 lines
16 KiB
Python
import logging
|
|
import requests
|
|
import re
|
|
from .data import OSMPoint
|
|
from . import etree
|
|
|
|
|
|
OVERPASS_SERVER = 'https://overpass-api.de/api/'
|
|
ALT_OVERPASS_SERVER = 'https://overpass.kumi.systems/api/'
|
|
OSM_API_SERVER = 'https://api.openstreetmap.org/api/0.6/'
|
|
BBOX_PADDING = 0.003 # in degrees, ~330 m default
|
|
|
|
|
|
class OsmDownloader:
|
|
def __init__(self, profile):
|
|
self.profile = profile
|
|
|
|
def set_overpass(self, server='alt'):
|
|
global OVERPASS_SERVER
|
|
if server == 'alt':
|
|
OVERPASS_SERVER = ALT_OVERPASS_SERVER
|
|
else:
|
|
OVERPASS_SERVER = server
|
|
|
|
def construct_overpass_query(self, bboxes):
|
|
"""Constructs an Overpass API query from the "query" list in the profile.
|
|
(k, v) turns into [k=v], (k,) into [k], (k, None) into [!k], (k, "~v") into [k~v]."""
|
|
tags = self.profile.get(
|
|
'query', required="a list of tuples. E.g. [('amenity', 'cafe'), ('name', '~Mc.*lds')]")
|
|
tag_strs = []
|
|
if isinstance(tags, str):
|
|
tag_strs = [tags]
|
|
else:
|
|
if not isinstance(tags[0], str) and isinstance(tags[0][0], str):
|
|
tags = [tags]
|
|
for tags_q in tags:
|
|
if isinstance(tags_q, str):
|
|
tag_strs.append(tags_q)
|
|
continue
|
|
tag_str = ''
|
|
for t in tags_q:
|
|
if len(t) == 1:
|
|
q = '"{}"'.format(t[0])
|
|
elif t[1] is None or len(t[1]) == 0:
|
|
q = '"!{}"'.format(t[0])
|
|
elif t[1][0] == '~':
|
|
q = '"{}"~"{}",i'.format(t[0], t[1][1:])
|
|
elif len(t) > 2:
|
|
q = '"{}"~"^({})$"'.format(t[0], '|'.join(t[1:]))
|
|
else:
|
|
q = '"{}"="{}"'.format(t[0], t[1])
|
|
tag_str += '[' + q + ']'
|
|
tag_strs.append(tag_str)
|
|
|
|
if self.profile.get('no_dataset_id', False):
|
|
ref = None
|
|
else:
|
|
ref = 'nwr["ref:' + self.profile.get(
|
|
'dataset_id', required='A fairly unique id of the dataset to query OSM') + '"]'
|
|
timeout = self.profile.get('overpass_timeout', 120)
|
|
query = '[out:xml]{};('.format('' if timeout is None else '[timeout:{}]'.format(timeout))
|
|
for bbox in bboxes:
|
|
bbox_str = '' if bbox is None else '(' + ','.join([str(x) for x in bbox]) + ')'
|
|
for tag_str in tag_strs:
|
|
query += 'nwr' + tag_str + bbox_str + ';'
|
|
if ref is not None:
|
|
if not self.profile.get('bounded_update', False):
|
|
query += ref + ';'
|
|
else:
|
|
for bbox in bboxes:
|
|
bbox_str = '' if bbox is None else '(' + ','.join(
|
|
[str(x) for x in bbox]) + ')'
|
|
query += ref + bbox_str + ';'
|
|
query += '); out meta qt center;'
|
|
return query
|
|
|
|
def get_bbox(self, points):
|
|
"""Plain iterates over the dataset and returns the bounding box
|
|
that encloses it."""
|
|
padding = self.profile.get('bbox_padding', BBOX_PADDING)
|
|
bbox = [90.0, 180.0, -90.0, -180.0]
|
|
for p in points:
|
|
bbox[0] = min(bbox[0], p.lat - padding)
|
|
bbox[1] = min(bbox[1], p.lon - padding)
|
|
bbox[2] = max(bbox[2], p.lat + padding)
|
|
bbox[3] = max(bbox[3], p.lon + padding)
|
|
return bbox
|
|
|
|
def split_into_bboxes(self, points):
|
|
"""
|
|
Splits the dataset into multiple bboxes to lower load on the overpass api.
|
|
|
|
Returns a list of tuples (minlat, minlon, maxlat, maxlon).
|
|
"""
|
|
max_bboxes = self.profile.get('max_request_boxes', 4)
|
|
if max_bboxes <= 1 or len(points) <= 1:
|
|
return [self.get_bbox(points)]
|
|
|
|
# coord, alt coord, total w/h to the left/bottom, total w/h to the right/top
|
|
lons = sorted([[d.lon, d.lat, 0, 0] for d in points])
|
|
lats = sorted([[d.lat, d.lon, 0, 0] for d in points])
|
|
|
|
def update_side_dimensions(ar):
|
|
"""For each point, calculates the maximum and
|
|
minimum bound for all points left and right."""
|
|
fwd_top = fwd_bottom = ar[0][1]
|
|
back_top = back_bottom = ar[-1][1]
|
|
for i in range(len(ar)):
|
|
fwd_top = max(fwd_top, ar[i][1])
|
|
fwd_bottom = min(fwd_bottom, ar[i][1])
|
|
ar[i][2] = fwd_top - fwd_bottom
|
|
back_top = max(back_top, ar[-i-1][1])
|
|
back_bottom = min(back_bottom, ar[-i-1][1])
|
|
ar[-i-1][3] = back_top - back_bottom
|
|
|
|
def find_max_gap(ar, h):
|
|
"""Select an interval between points, which would give
|
|
the maximum area if split there."""
|
|
max_id = None
|
|
max_gap = 0
|
|
for i in range(len(ar) - 1):
|
|
# "Extra" variables are for area to the left and right
|
|
# that would be freed after splitting.
|
|
extra_left = (ar[i][0]-ar[0][0]) * (h-ar[i][2])
|
|
extra_right = (ar[-1][0]-ar[i+1][0]) * (h-ar[i+1][3])
|
|
# Gap is the area of the column between points i and i+1
|
|
# plus extra areas to the left and right.
|
|
gap = (ar[i+1][0] - ar[i][0]) * h + extra_left + extra_right
|
|
if gap > max_gap:
|
|
max_id = i
|
|
max_gap = gap
|
|
return max_id, max_gap
|
|
|
|
def get_bbox(b, pad=0):
|
|
"""Returns a list of [min_lat, min_lon, max_lat, max_lon] for a box."""
|
|
return [b[2][0][0]-pad, b[3][0][0]-pad, b[2][-1][0]+pad, b[3][-1][0]+pad]
|
|
|
|
def split(box, point_array, point_id):
|
|
"""Split the box over axis point_array at point point_id...point_id+1.
|
|
Modifies the box in-place and returns a new box."""
|
|
alt_array = 5 - point_array # 3->2, 2->3
|
|
points = box[point_array][point_id+1:]
|
|
del box[point_array][point_id+1:]
|
|
alt = {True: [], False: []} # True means point is in new box
|
|
for p in box[alt_array]:
|
|
alt[(p[1], p[0]) >= (points[0][0], points[0][1])].append(p)
|
|
|
|
new_box = [None] * 4
|
|
new_box[point_array] = points
|
|
new_box[alt_array] = alt[True]
|
|
box[alt_array] = alt[False]
|
|
for i in range(2):
|
|
box[i] = box[i+2][-1][0] - box[i+2][0][0]
|
|
new_box[i] = new_box[i+2][-1][0] - new_box[i+2][0][0]
|
|
return new_box
|
|
|
|
# height, width, lats, lons
|
|
boxes = [[lats[-1][0]-lats[0][0], lons[-1][0]-lons[0][0], lats, lons]]
|
|
initial_area = boxes[0][0] * boxes[0][1]
|
|
while len(boxes) < max_bboxes and len(boxes) <= len(points):
|
|
candidate_box = None
|
|
area = 0
|
|
point_id = None
|
|
point_array = None
|
|
for box in boxes:
|
|
for ar in (2, 3):
|
|
# Find a box and an axis for splitting that would decrease the area the most
|
|
update_side_dimensions(box[ar])
|
|
max_id, max_area = find_max_gap(box[ar], box[3-ar])
|
|
if max_area > area:
|
|
area = max_area
|
|
candidate_box = box
|
|
point_id = max_id
|
|
point_array = ar
|
|
if area * 100 < initial_area:
|
|
# Stop splitting when the area decrease is less than 1%
|
|
break
|
|
logging.debug('Splitting bbox %s at %s %s..%s; area decrease %s%%',
|
|
get_bbox(candidate_box),
|
|
'longs' if point_array == 3 else 'lats',
|
|
candidate_box[point_array][point_id][0],
|
|
candidate_box[point_array][point_id+1][0],
|
|
round(100*area/initial_area))
|
|
boxes.append(split(candidate_box, point_array, point_id))
|
|
|
|
padding = self.profile.get('bbox_padding', BBOX_PADDING)
|
|
return [get_bbox(b, padding) for b in boxes]
|
|
|
|
def get_categories(self, tags):
|
|
def match_query(tags, query):
|
|
for tag in query:
|
|
if len(tag) == 1:
|
|
return tag[0] in tags
|
|
else:
|
|
value = tags.get(tag[0], None)
|
|
if tag[1] is None or tag[1] == '':
|
|
return value is None
|
|
if value is None:
|
|
return False
|
|
found = False
|
|
for t2 in tag[1:]:
|
|
if t2[0] == '~':
|
|
if re.search(t2[1:], value):
|
|
found = True
|
|
elif t2[0] == '!':
|
|
if t2[1:].lower() in value.lower():
|
|
found = True
|
|
elif t2 == value:
|
|
found = True
|
|
if found:
|
|
break
|
|
if not found:
|
|
return False
|
|
return True
|
|
|
|
def tags_to_query(tags):
|
|
return [(k, v) for k, v in tags.items()]
|
|
|
|
result = set()
|
|
qualifies = self.profile.get('qualifies', args=tags)
|
|
if qualifies is not None:
|
|
if qualifies:
|
|
result.add(None)
|
|
return result
|
|
|
|
# First check default query
|
|
query = self.profile.get('query', None)
|
|
if query is not None:
|
|
if isinstance(query, str):
|
|
result.add(None)
|
|
else:
|
|
if isinstance(query[0][0], str):
|
|
query = [query]
|
|
for q in query:
|
|
if match_query(tags, q):
|
|
result.add(None)
|
|
break
|
|
|
|
# Then check each category if we got these
|
|
categories = self.profile.get('categories', {})
|
|
for name, params in categories.items():
|
|
if 'tags' not in params and 'query' not in params:
|
|
raise ValueError('No tags and query attributes for category "{}"'.format(name))
|
|
if match_query(tags, params.get('query', tags_to_query(params.get('tags')))):
|
|
result.add(name)
|
|
|
|
return result
|
|
|
|
def calc_boxes(self, dataset_points):
|
|
profile_bbox = self.profile.get('bbox', True)
|
|
if not profile_bbox:
|
|
bboxes = [None]
|
|
elif hasattr(profile_bbox, '__len__') and len(profile_bbox) == 4:
|
|
bboxes = [profile_bbox]
|
|
else:
|
|
bboxes = self.split_into_bboxes(dataset_points)
|
|
return bboxes
|
|
|
|
def download(self, bboxes=None):
|
|
"""Constructs an Overpass API query and requests objects
|
|
to match from a server."""
|
|
if not bboxes:
|
|
pbbox = self.profile.get('bbox', True)
|
|
if pbbox and hasattr(pbbox, '__len__') and len(pbbox) == 4:
|
|
bboxes = [pbbox]
|
|
else:
|
|
bboxes = [None]
|
|
|
|
query = self.construct_overpass_query(bboxes)
|
|
logging.debug('Overpass query: %s', query)
|
|
r = requests.get(OVERPASS_SERVER + 'interpreter', {'data': query})
|
|
if r.encoding is None:
|
|
r.encoding = 'utf-8'
|
|
if r.status_code != 200:
|
|
logging.error('Failed to download data from Overpass API: %s', r.status_code)
|
|
if 'rate_limited' in r.text:
|
|
r = requests.get(OVERPASS_SERVER + 'status')
|
|
logging.warning('Seems like you are rate limited. API status:\n%s', r.text)
|
|
else:
|
|
logging.error('Error message: %s', r.text)
|
|
raise IOError()
|
|
if 'runtime error: ' in r.text:
|
|
m = re.search(r'runtime error: ([^<]+)', r.text)
|
|
error = 'unknown' if not m else m.group(1)
|
|
if 'Query timed out' in error:
|
|
logging.error(
|
|
'Query timed out, try increasing the "overpass_timeout" profile variable')
|
|
else:
|
|
logging.error('Runtime error: %s', error)
|
|
raise IOError()
|
|
return self.parse_xml(r.content)
|
|
|
|
def parse_xml(self, fileobj):
|
|
"""Parses an OSM XML file into the "osmdata" field. For ways and relations,
|
|
finds the center. Drops objects that do not match the overpass query tags
|
|
(see "check_against_profile_tags" method)."""
|
|
if isinstance(fileobj, bytes):
|
|
xml = etree.fromstring(fileobj)
|
|
else:
|
|
xml = etree.parse(fileobj).getroot()
|
|
nodes = {}
|
|
for nd in xml.findall('node'):
|
|
nodes[nd.get('id')] = (float(nd.get('lat')), float(nd.get('lon')))
|
|
ways = {}
|
|
for way in xml.findall('way'):
|
|
center = way.find('center')
|
|
if center is not None:
|
|
ways[way.get('id')] = [float(center.get('lat')), float(center.get('lon'))]
|
|
else:
|
|
logging.debug('Way %s does not have a center', way.get('id'))
|
|
coord = [0, 0]
|
|
count = 0
|
|
for nd in way.findall('nd'):
|
|
if nd.get('ref') in nodes:
|
|
count += 1
|
|
for i in range(len(coord)):
|
|
coord[i] += nodes[nd.get('ref')][i]
|
|
ways[way.get('id')] = [coord[0] / count, coord[1] / count]
|
|
|
|
# For calculating weight of OSM objects
|
|
weight_fn = self.profile.get_raw('weight')
|
|
osmdata = {}
|
|
|
|
for el in xml:
|
|
tags = {}
|
|
for tag in el.findall('tag'):
|
|
tags[tag.get('k')] = tag.get('v')
|
|
categories = self.get_categories(tags)
|
|
if categories is False or categories is None or len(categories) == 0:
|
|
continue
|
|
|
|
if el.tag == 'node':
|
|
coord = nodes[el.get('id')]
|
|
members = None
|
|
elif el.tag == 'way':
|
|
coord = ways[el.get('id')]
|
|
members = [nd.get('ref') for nd in el.findall('nd')]
|
|
elif el.tag == 'relation':
|
|
center = el.find('center')
|
|
if center is not None:
|
|
coord = [float(center.get('lat')), float(center.get('lon'))]
|
|
else:
|
|
logging.debug('Relation %s does not have a center', el.get('id'))
|
|
coord = [0, 0]
|
|
count = 0
|
|
for m in el.findall('member'):
|
|
if m.get('type') == 'node' and m.get('ref') in nodes:
|
|
count += 1
|
|
for i in range(len(coord)):
|
|
coord[i] += nodes[m.get('ref')][i]
|
|
elif m.get('type') == 'way' and m.get('ref') in ways:
|
|
count += 1
|
|
for i in range(len(coord)):
|
|
coord[i] += ways[m.get('ref')][i]
|
|
if count > 0:
|
|
coord = [coord[0] / count, coord[1] / count]
|
|
members = [
|
|
(m.get('type'), m.get('ref'), m.get('role'))
|
|
for m in el.findall('member')
|
|
]
|
|
else:
|
|
continue
|
|
if not coord or coord == [0, 0]:
|
|
continue
|
|
pt = OSMPoint(
|
|
el.tag, int(el.get('id')), int(el.get('version')),
|
|
coord[0], coord[1], tags, categories)
|
|
pt.members = members
|
|
if pt.is_poi():
|
|
if callable(weight_fn):
|
|
weight = weight_fn(pt)
|
|
if weight:
|
|
if abs(weight) > 3:
|
|
pt.dist_offset = weight
|
|
else:
|
|
pt.dist_offset = weight * self.profile.max_distance
|
|
osmdata[pt.id] = pt
|
|
return osmdata
|
|
|
|
|
|
def check_moveability(changes):
|
|
to_check = [x for x in changes if x['properties']['osm_type'] == 'node' and
|
|
x['properties']['action'] == 'modify']
|
|
logging.info('Checking moveability of %s modified nodes', len(to_check))
|
|
for c in to_check:
|
|
p = c['properties']
|
|
p['can_move'] = False
|
|
r = requests.get('{}node/{}/ways'.format(OSM_API_SERVER, p['osm_id']))
|
|
if r.status_code == 200:
|
|
xml = etree.fromstring(r.content)
|
|
p['can_move'] = xml.find('way') is None
|