commit 1fccd16fc21e35230d1684372f6af488cc6ac98d Author: Ilya Zverev Date: Wed Oct 4 15:58:24 2017 +0300 Initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..9c602a0 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +__pycache__/ +*.log +*.osm +validate.sh diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..67db858 --- /dev/null +++ b/LICENSE @@ -0,0 +1,175 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. diff --git a/README.md b/README.md new file mode 100644 index 0000000..0262ee8 --- /dev/null +++ b/README.md @@ -0,0 +1,24 @@ +# Subway Preprocessor + +Here you see a list of scripts that can be used for preprocessing all the metro +systems in the world from OpenStreetMap. `subway_structure.py` produces +a list of disjunct systems that can be used for routing and for displaying +of metro maps. + +## How To Validate + +* Download or update a planet file in o5m format (using `osmconvert` and `osmupdate`). +* Use `filter_all_subways.sh` to extract a portion of data for all subways. +* Run `mapsme_subways.py -x filtered_data.osm` to build metro structures and receive a validation log. +* Run `validation_to_html.py` on that log to create readable HTML tables. + +## Adding Stop Areas To OSM + +To quickly add `stop_area` relations for the entire city, use the `make_stop_areas.py` script +from the `stop_area` directory. Give it a bounding box or a `.json` file download from Overpass API. +It would produce an JOSM XML file that you should manually check in JOSM. After that +just upload it. + +## Author and License + +All scripts were written by Ilya Zverev for MAPS.ME. Published under Apache Licence 2.0. diff --git a/download_all_subways.sh b/download_all_subways.sh new file mode 100755 index 0000000..2797520 --- /dev/null +++ b/download_all_subways.sh @@ -0,0 +1,6 @@ +#!/bin/bash +# Still times out, do not use unless you want to be blocked for some hours on Overpass API +TIMEOUT=2000 +QUERY='[out:json][timeout:'$TIMEOUT'];(rel["route"="subway"];rel["route"="light_rail"];rel["public_transport"="stop_area"];rel["public_transport"="stop_area_group"];node["station"="subway"];node["station"="light_rail"];node["railway"="subway_entrance"];);(._;>;);out body center qt;' +http http://overpass-api.de/api/interpreter "data==$QUERY" --timeout $TIMEOUT > subways-$(date +%y%m%d).json +http https://overpass-api.de/api/status | grep available diff --git a/filter_all_subways.sh b/filter_all_subways.sh new file mode 100755 index 0000000..1f7d980 --- /dev/null +++ b/filter_all_subways.sh @@ -0,0 +1,4 @@ +#!/bin/bash +[ $# -lt 1 ] && echo 'Usage: $0 []' && exit 1 +OSMFILTER=${2-./osmfilter} +"$OSMFILTER" "$1" --keep= --keep-relations="route=subway or route=light_rail or route=monorail or route_master=subway or route_master=light_rail or route_master=monorail or public_transport=stop_area or public_transport=stop_area_group" --keep-nodes="station=subway or station=light_rail or station=monorail or railway=subway_entrance" --drop-author -o=subways-$(date +%y%m%d).osm diff --git a/mapsme_subways.py b/mapsme_subways.py new file mode 100755 index 0000000..697bf38 --- /dev/null +++ b/mapsme_subways.py @@ -0,0 +1,230 @@ +#!/usr/bin/env python3 +import argparse +import json +import logging +import os +import sys +import time +import urllib.parse +import urllib.request +from subway_structure import download_cities + + +def overpass_request(bboxes=None): + query = '[out:json][timeout:1000];(' + if bboxes is None: + bboxes = [None] + for bbox in bboxes: + bbox_part = '' if not bbox else '({})'.format(','.join(bbox)) + for t, k, v in (('rel', 'route', 'subway'), + ('rel', 'route', 'light_rail'), + ('rel', 'route_master', 'subway'), + ('rel', 'route_master', 'light_rail'), + ('rel', 'public_transport', 'stop_area'), + ('rel', 'public_transport', 'stop_area_group'), + ('node', 'railway', 'subway_entrance')): + query += '{}["{}"="{}"]{};'.format(t, k, v, bbox_part) + query += ');(._;>);out body center qt;' + logging.debug('Query: %s', query) + url = 'http://overpass-api.de/api/interpreter?data={}'.format(urllib.parse.quote(query)) + response = urllib.request.urlopen(url, timeout=1000) + if response.getcode() != 200: + raise Exception('Failed to query Overpass API: HTTP {}'.format(response.getcode())) + return json.load(response)['elements'] + + +def multi_overpass(bboxes): + if not bboxes: + return overpass_request(None) + SLICE_SIZE = 10 + result = [] + for i in range(0, len(bboxes) + SLICE_SIZE - 1, SLICE_SIZE): + if i > 0: + time.sleep(5) + result.append(overpass_request(bboxes[i:i+SLICE_SIZE])) + return result + + +def load_xml(f): + try: + from lxml import etree + except ImportError: + import xml.etree.ElementTree as etree + + elements = [] + nodes = {} + for event, element in etree.iterparse(f): + if element.tag in ('node', 'way', 'relation'): + el = {'type': element.tag, 'id': int(element.get('id'))} + if element.tag == 'node': + for n in ('lat', 'lon'): + el[n] = float(element.get(n)) + nodes[el['id']] = (el['lat'], el['lon']) + tags = {} + nd = [] + members = [] + for sub in element: + if sub.tag == 'tag': + tags[sub.get('k')] = sub.get('v') + elif sub.tag == 'nd': + nd.append(int(sub.get('ref'))) + elif sub.tag == 'member': + members.append({'type': sub.get('type'), + 'ref': int(sub.get('ref')), + 'role': sub.get('role', '')}) + if tags: + el['tags'] = tags + if nd: + el['nodes'] = nd + if members: + el['members'] = members + elements.append(el) + element.clear() + logging.info('Read %s elements, now finding centers of ways and relations', len(elements)) + + # Now make centers, assuming relations go after ways + ways = {} + relations = {} + for el in elements: + if el['type'] == 'way' and 'nodes' in el: + center = [0, 0] + count = 0 + for nd in el['nodes']: + if nd in nodes: + center[0] += nodes[nd][0] + center[1] += nodes[nd][1] + count += 1 + if count > 0: + el['center'] = {'lat': center[0]/count, 'lon': center[1]/count} + ways[el['id']] = (el['center']['lat'], el['center']['lon']) + elif el['type'] == 'relation' and 'members' in el: + center = [0, 0] + count = 0 + for m in el['members']: + if m['type'] == 'node' and m['ref'] in nodes: + center[0] += nodes[m['ref']][0] + center[1] += nodes[m['ref']][1] + count += 1 + elif m['type'] == 'way' and m['ref'] in ways: + center[0] += ways[m['ref']][0] + center[1] += ways[m['ref']][1] + count += 1 + if count > 0: + el['center'] = {'lat': center[0]/count, 'lon': center[1]/count} + relations[el['id']] = (el['center']['lat'], el['center']['lon']) + + # Iterating again, now filling relations that contain only relations + for el in elements: + if el['type'] == 'relation' and 'members' in el: + center = [0, 0] + count = 0 + for m in el['members']: + if m['type'] == 'node' and m['ref'] in nodes: + center[0] += nodes[m['ref']][0] + center[1] += nodes[m['ref']][1] + count += 1 + elif m['type'] == 'way' and m['ref'] in ways: + center[0] += ways[m['ref']][0] + center[1] += ways[m['ref']][1] + count += 1 + elif m['type'] == 'relation' and m['ref'] in relations: + center[0] += relations[m['ref']][0] + center[1] += relations[m['ref']][1] + count += 1 + if count > 0: + el['center'] = {'lat': center[0]/count, 'lon': center[1]/count} + relations[el['id']] = (el['center']['lat'], el['center']['lon']) + return elements + + +def merge_mapsme_networks(networks): + result = {} + for k in ('stops', 'transfers', 'networks'): + result[k] = sum([n[k] for n in networks], []) + return result + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument( + '-i', '--source', help='File to write backup of OSM data, or to read data from') + parser.add_argument('-x', '--xml', help='OSM extract with routes, to read data from') + parser.add_argument( + '-b', '--bbox', action='store_true', + help='Use city boundaries to query Overpass API instead of querying the world') + parser.add_argument('-q', '--quiet', action='store_true', help='Show only warnings and errors') + parser.add_argument('-c', '--city', help='Validate only a single city') + parser.add_argument('-o', '--output', help='JSON file for MAPS.ME') + parser.add_argument('-n', '--networks', type=argparse.FileType('w'), help='File to write the networks statistics') + options = parser.parse_args() + + if options.quiet: + log_level = logging.WARNING + else: + log_level = logging.INFO + logging.basicConfig(level=logging.INFO, datefmt='%H:%M:%S', + format='%(asctime)s %(levelname)-7s %(message)s') + + # Downloading cities from Google Spreadsheets + cities = download_cities() + if options.city: + cities = [c for c in cities if c.name == options.city] + logging.info('Read %s metro networks', len(cities)) + if not cities: + sys.exit(2) + + # Reading cached json, loading XML or querying Overpass API + if options.source and os.path.exists(options.source): + logging.info('Reading %s', options.source) + with open(options.source, 'r') as f: + osm = json.load(f) + if 'elements' in osm: + osm = osm['elements'] + elif options.xml: + logging.info('Reading %s', options.xml) + osm = load_xml(options.xml) + if options.source: + with open(options.source, 'w') as f: + json.dump(osm, f) + else: + if options.bbox: + bboxes = [c.bbox for c in cities] + else: + bboxes = None + logging.info('Downloading data from Overpass API') + osm = multi_overpass(bboxes) + if options.source: + with open(options.source, 'w') as f: + json.dump(osm, f) + logging.info('Downloaded %s elements, sorting by city', len(osm)) + + # Sorting elements by city and prepare a dict + for el in osm: + for c in cities: + if c.contains(el): + c.add(el) + + logging.info('Building routes for each city') + good_cities = [] + for c in cities: + c.extract_routes() + c.validate() + if c.errors == 0: + good_cities.append(c) + + logging.info('%s good cities: %s', len(good_cities), ', '.join([c.name for c in good_cities])) + + if options.networks: + from collections import Counter + for c in cities: + networks = Counter() + for r in c.routes.values(): + networks[str(r.network)] += 1 + print('{}: {}'.format(c.name, '; '.join( + ['{} ({})'.format(k, v) for k, v in networks.items()])), file=options.networks) + + # Finally, preparing a JSON file for MAPS.ME + if options.output: + networks = [c.for_mapsme() for c in cities] + with open(options.output, 'w') as f: + json.dump(merge_mapsme_networks(networks), f) diff --git a/stop_areas/make_stop_areas.py b/stop_areas/make_stop_areas.py new file mode 100755 index 0000000..b804d72 --- /dev/null +++ b/stop_areas/make_stop_areas.py @@ -0,0 +1,180 @@ +#!/usr/bin/env python3 +import json +from lxml import etree +import sys +import kdtree +import math +import re +import urllib.parse +import urllib.request + + +QUERY = """ +[out:json][timeout:250][bbox:{{bbox}}]; +( + node["railway"="subway_entrance"]; + node["station"="subway"]; + node["station"="light_rail"]; + node["public_transport"="stop_position"]["train"="yes"]; + node["public_transport"="stop_position"]["subway"="yes"]; + way["station"="subway"]; + relation["station"="subway"]; + way["railway"="platform"]; + relation["railway"="platform"]; + relation[route="subway"]; + relation[route="light_rail"]; +); +(._;>;); +(._;rel(bn);); +out meta center qt; +""" + + +def el_id(el): + return el['type'][0] + str(el.get('id', el.get('ref', ''))) + + +class StationWrapper: + def __init__(self, st): + self.coords = (st['lon'], st['lat']) + self.station = st + + def __len__(self): + return 2 + + def __getitem__(self, i): + return self.coords[i] + + def distance(self, other): + """Calculate distance in meters.""" + dx = math.radians(self[0] - other['lon']) * math.cos( + 0.5 * math.radians(self[1] + other['lat'])) + dy = math.radians(self[1] - other['lat']) + return 6378137 * math.sqrt(dx*dx + dy*dy) + + +def overpass_request(bbox): + url = 'http://overpass-api.de/api/interpreter?data={}'.format( + urllib.parse.quote(QUERY.replace('{{bbox}}', bbox))) + response = urllib.request.urlopen(url, timeout=1000) + if response.getcode() != 200: + raise Exception('Failed to query Overpass API: HTTP {}'.format(response.getcode())) + return json.load(response)['elements'] + + +def add_stop_areas(src): + if not src: + raise Exception('Empty dataset provided to add_stop_areas') + + # Add station=* tags to stations in subway and light_rail routes + stations = {} + for el in src: + if 'tags' in el and el['tags'].get('railway', None) == 'station': + stations[el_id(el)] = el + + for el in src: + if (el['type'] == 'relation' and 'tags' in el and + el['tags'].get('route', None) in ('subway', 'light_rail')): + for m in el['members']: + st = stations.get(el_id(m), None) + if st and 'station' not in st['tags']: + st['tags']['station'] = el['tags']['route'] + st['modified'] = True + + # Create a kd-tree out of subway stations + stations = kdtree.create(dimensions=2) + for el in src: + if 'tags' in el and el['tags'].get('station', None) in ('subway', 'light_rail'): + stations.add(StationWrapper(el)) + + # Populate a list of nearby subway exits and platforms for each station + MAX_DISTANCE = 300 # meters + stop_areas = {} + for el in src: + if 'tags' not in el: + continue + if (el['tags'].get('railway', None) not in ('subway_entrance', 'platform') and + el['tags'].get('public_transport', None) not in ('platform', 'stop_position')): + continue + coords = el.get('center', el) + station = stations.search_nn((coords['lon'], coords['lat']))[0].data + if station.distance(coords) < MAX_DISTANCE: + k = (station.station['id'], station.station['tags']['name']) + # Disregard exits and platforms that are differently named + if el['tags'].get('name', k[1]) == k[1]: + if k not in stop_areas: + stop_areas[k] = {el_id(station.station): station.station} + stop_areas[k][el_id(el)] = el + + # Find existing stop_area relations for stations and remove these stations + for el in src: + if el['type'] == 'relation' and el['tags'].get('public_transport', None) == 'stop_area': + found = False + for m in el['members']: + if found: + break + for st in stop_areas: + if el_id(m) in stop_areas[st]: + del stop_areas[st] + found = True + break + + # Create OSM XML for new stop_area relations + root = etree.Element('osm', version='0.6') + rid = -1 + for st, members in stop_areas.items(): + rel = etree.SubElement(root, 'relation', id=str(rid)) + rid -= 1 + etree.SubElement(rel, 'tag', k='type', v='public_transport') + etree.SubElement(rel, 'tag', k='public_transport', v='stop_area') + etree.SubElement(rel, 'tag', k='name', v=st[1]) + for m in members.values(): + if m['tags'].get('railway', m['tags'].get('public_transport', None)) == 'platform': + role = 'platform' + elif m['tags'].get('public_transport', None) == 'stop_position': + role = 'stop' + else: + role = '' + etree.SubElement(rel, 'member', ref=str(m['id']), type=m['type'], role=role) + + # Add all downloaded elements + for el in src: + obj = etree.SubElement(root, el['type']) + for a in ('id', 'type', 'user', 'uid', 'version', 'changeset', 'timestamp', 'lat', 'lon'): + if a in el: + obj.set(a, str(el[a])) + if 'modified' in el: + obj.set('action', 'modify') + if 'tags' in el: + for k, v in el['tags'].items(): + etree.SubElement(obj, 'tag', k=k, v=v) + if 'members' in el: + for m in el['members']: + etree.SubElement(obj, 'member', ref=str(m['ref']), + type=m['type'], role=m.get('role', '')) + if 'nodes' in el: + for n in el['nodes']: + etree.SubElement(obj, 'nd', ref=str(n)) + + return etree.tostring(root, pretty_print=True) + + +if __name__ == '__main__': + if len(sys.argv) < 2: + print('Read a JSON from Overpass and output JOSM OSM XML with added stop_area relations') + print('Usage: {} {|} [output.osm]'.format(sys.argv[0])) + sys.exit(1) + + if re.match(r'', sys.argv[1]): + src = overpass_request(sys.argv[1]) + else: + with open(sys.argv[1], 'r') as f: + src = json.load(f)['elements'] + + result = add_stop_areas(src) + + if len(sys.argv) < 3: + print(result.decode('utf-8')) + else: + with open(sys.argv[2], 'wb') as f: + f.write(result) diff --git a/stop_areas/serve.py b/stop_areas/serve.py new file mode 100755 index 0000000..264516c --- /dev/null +++ b/stop_areas/serve.py @@ -0,0 +1,27 @@ +#!/usr/bin/env python3 +from flask import Flask, request, make_response, render_template +from make_stop_areas import add_stop_areas, overpass_request + +app = Flask(__name__) +app.debug = True + + +@app.route('/') +def form(): + return render_template('index.html') + + +@app.route('/process', methods=['GET']) +def convert(): + bbox = request.args.get('bbox').split(',') + bbox_r = ','.join([bbox[i] for i in (1, 0, 3, 2)]) + src = overpass_request(bbox_r) + if not src: + return 'No data from overpass, sorry.' + result = add_stop_areas(src) + response = make_response(result) + response.headers['Content-Disposition'] = 'attachment; filename="stop_areas.osm"' + return response + +if __name__ == '__main__': + app.run() diff --git a/stop_areas/templates/index.html b/stop_areas/templates/index.html new file mode 100644 index 0000000..e9b7738 --- /dev/null +++ b/stop_areas/templates/index.html @@ -0,0 +1,68 @@ + + + + + + Make Stop Areas + + + + + + + +
+ + + diff --git a/subway_structure.py b/subway_structure.py new file mode 100644 index 0000000..457e33d --- /dev/null +++ b/subway_structure.py @@ -0,0 +1,408 @@ +import csv +import logging +import math +import urllib.parse +import urllib.request + + +SPREADSHEET_ID = '1-UHDzfBwHdeyFxgC5cE_MaNQotF3-Y0r1nW9IwpIEj8' +MAX_DISTANCE_NEARBY = 150 # in meters + + +def el_id(el): + if 'type' not in el: + raise Exception('What is this element? {}'.format(el)) + return el['type'][0] + str(el.get('id', el.get('ref', ''))) + + +def el_center(el): + if 'lat' in el: + return (el['lon'], el['lat']) + elif 'center' in el: + if el['center']['lat'] == 0.0: + # Some relations don't have centers. We need route_masters and stop_area_groups. + if el['type'] == 'relation' and 'tags' in el and ( + el['tags'].get('type', None) == 'route_master' or + el['tags'].get('public_transport', None) == 'stop_area_group'): + return None + return (el['center']['lon'], el['center']['lat']) + return None + + +def distance(p1, p2): + if p1 is None or p2 is None: + return None + dx = math.radians(p1[0] - p2[0]) * math.cos( + 0.5 * math.radians(p1[1] + p2[1])) + dy = math.radians(p1[1] - p2[1]) + return 6378137 * math.sqrt(dx*dx + dy*dy) + + +class Station: + @staticmethod + def is_station(el): + if el.get('tags', {}).get('railway', None) != 'station': + return False + if 'construction' in el['tags'] or 'proposed' in el['tags']: + return False + if (el['tags'].get('station', None) not in ('subway', 'light_rail') and + el['tags'].get('subway', None) != 'yes' and + el['tags'].get('light_rail', None) != 'yes'): + return False + return True + + def __init__(self, el, city): + """Call this with a railway=station node.""" + if el.get('tags', {}).get('railway', None) != 'station': + raise Exception( + 'Station object should be instantiated from a station node. Got: {}'.format(el)) + if not Station.is_station(el): + raise Exception('Processing only subway and light rail stations') + + if el['type'] != 'node': + city.warn('Station is not a node', el) + self.element = el + self.is_light = (el['tags'].get('station', None) == 'light_rail' or + el['tags'].get('light_rail', None) == 'yes') + self.id = el_id(el) + self.elements = set([self.id]) + if self.id in city.stations: + city.error('Station {} {} is listed in two stop_areas, first one:'.format( + el['type'], el['id']), city.stations[self.id].element) + + # Find a stop_area relation + self.stop_area = None + nearby = [] + center = el_center(el) + if center is None: + raise Exception('Could not find center of {}'.format(el)) + for d in city.elements.values(): + if 'tags' not in d: + continue + # If it's a stop_area relation containing "el", record it + if d['type'] == 'relation' and d['tags'].get('public_transport', None) == 'stop_area': + for m in d['members']: + if m['type'] == el['type'] and m['ref'] == el['id']: + self.stop_area = d + break + # Otherwise record all platforms, stops and entrances nearby + elif d['type'] != 'relation' and ( + d['tags'].get('railway', None) in ('platform', 'subway_entrance') or + d['tags'].get('public_transport', None) in ('platform', 'stop_position')): + # Take care to not add other stations + if 'station' not in d['tags']: + d_center = el_center(d) + if d_center is not None and distance(center, d_center) <= MAX_DISTANCE_NEARBY: + nearby.append(d) + if self.stop_area: + break + + if self.stop_area: + # If we have a stop area, add all elements from it + self.elements.add(el_id(self.stop_area)) + for m in self.stop_area['members']: + k = el_id(m) + if k in city.elements: + self.elements.add(k) + else: + # Otherwise add nearby entrances and stop positions + for k in nearby: + self.elements.add(el_id(k)) + + # TODO: Set name, colour etc. + self.name = el['tags'].get('name', 'Unknown') + self.colour = el['tags'].get('colour', None) + + def contains(self, el): + return el_id(el) in self.elements + + +class Route: + """The longest route for a city with a unique ref.""" + @staticmethod + def is_route(el): + if el['type'] != 'relation' or el.get('tags', {}).get('type', None) != 'route': + return False + if 'members' not in el: + return False + if el['tags'].get('route', None) not in ('subway', 'light_rail'): + return False + if 'construction' in el['tags'] or 'proposed' in el['tags']: + return False + if 'ref' not in el['tags'] and 'name' not in el['tags']: + return False + return True + + @staticmethod + def get_network(relation): + return relation['tags'].get('network', relation['tags'].get('operator', None)) + + def __init__(self, relation, city): + if not Route.is_route(relation): + raise Exception('The relation does not seem a route: {}'.format(relation)) + self.element = relation + self.id = el_id(relation) + if 'ref' not in relation['tags']: + city.warn('Missing ref on a route', relation) + self.ref = relation['tags'].get('ref', relation['tags'].get('name', None)) + if 'colour' not in relation['tags']: + city.warn('Missing colour on a route', relation) + self.colour = relation['tags'].get('colour', None) + self.network = Route.get_network(relation) + self.is_light = relation['tags']['route'] == 'light_rail' + self.rails = [] + self.stops = [] + enough_stops = False + for m in relation['members']: + k = el_id(m) + if k in city.stations: + st = city.stations[k] + if not self.stops or self.stops[-1] != st: + if enough_stops: + if st not in self.stops: + city.warn('Inconsistent platform-stop "{}" in route'.format(st.name), + relation) + elif st not in self.stops: + self.stops.append(st) + if self.is_light and not st.is_light: + city.warn('Subway station "{}" in light rail route'.format(st.name), + relation) + elif st.is_light and not self.is_light: + city.warn('Light rail station "{}" in subway route'.format(st.name), + relation) + elif self.stops[0] == st and not enough_stops: + enough_stops = True + else: + city.warn('Duplicate stop "{}" in route - check stop/platform order'.format( + st.name), relation) + continue + + if k not in city.elements: + if m['role'] in ('stop', 'platform'): + city.error('{} {} {} for route relation is not in the dataset'.format( + m['role'], m['type'], m['ref']), relation) + raise Exception('Stop or platform is not in the dataset') + continue + el = city.elements[k] + if 'tags' not in el: + city.error('Untagged object in a route', relation) + continue + if m['role'] in ('stop', 'platform'): + if el['tags'].get('railway', None) in ('station', 'halt'): + city.error('Missing station={} on a {}'.format( + 'light_rail' if self.is_light else 'subway', m['role']), el) + elif 'construction' in el['tags'] or 'proposed' in el['tags']: + city.error('An under construction {} in route'.format(m['role']), el) + else: + city.error('{} {} {} is not connected to a station in route'.format( + m['role'], m['type'], m['ref']), relation) + if el['tags'].get('railway', None) in ('rail', 'subway', 'light_rail'): + if 'nodes' in el: + self.rails.append((el['nodes'][0], el['nodes'][1])) + else: + city.error('Cannot find nodes in a railway', el) + continue + if not self.stops: + city.error('Route has no stops', relation) + for i in range(1, len(self.rails)): + connected = sum([(1 if self.rails[i][j[0]] == self.rails[i-1][j[1]] else 0) + for j in ((0, 0), (0, 1), (1, 0), (1, 1))]) + if not connected: + city.warn('Hole in route rails near node {}'.format(self.rails[i][0]), relation) + break + + +class RouteMaster: + def __init__(self, route): + self.routes = [route] + self.best = route + self.ref = route.ref + self.network = route.network + self.is_light = route.is_light + + def add(self, route, city): + if route.network != self.network: + city.error('Route has different network ("{}") from master "{}"'.format( + route.network, self.network), route.element) + if route.ref != self.ref: + city.warn('Route "{}" has different ref from master "{}"'.format( + route.ref, self.ref), route.element) + if route.is_light != self.is_light: + city.error('Incompatible is_light flag: master has {} and route has {}'.format( + self.is_light, route.is_light), route.element) + return + self.routes.append(route) + if len(route.stops) > len(self.best.stops): + self.best = route + + def __len__(self): + return len(self.routes) + + def __get__(self, i): + return self.routes[i] + + +class City: + def __init__(self, row): + self.name = row[0] + self.country = row[1] + self.continent = row[2] + self.num_stations = int(row[3]) + self.num_lines = int(row[4] or '0') + self.num_light_lines = int(row[5] or '0') + self.num_interchanges = int(row[6] or '0') + self.networks = set(filter(None, [x.strip() for x in row[8].split(';')])) + bbox = row[7].split(',') + if len(bbox) == 4: + self.bbox = [float(bbox[i]) for i in (1, 0, 3, 2)] + else: + self.bbox = None + self.elements = {} # Dict el_id → el + self.stations = {} # Dict el_id → station + self.routes = {} # Dict route_ref → route + self.masters = {} # Dict el_id of route → el_id of route_master + self.transfers = [] # List of lists of stations + self.station_ids = set() # Set of stations' el_id + self.errors = 0 + self.warnings = 0 + + def contains(self, el): + center = el_center(el) + if center: + return (self.bbox[0] <= center[1] <= self.bbox[2] and + self.bbox[1] <= center[0] <= self.bbox[3]) + # Default is True, so we put elements w/o coords in all cities + return True + + def add(self, el): + if el['type'] == 'relation' and 'members' not in el: + return + self.elements[el_id(el)] = el + if el['type'] == 'relation' and el.get('tags', {}).get('type', None) == 'route_master': + for m in el['members']: + if m['type'] == 'relation': + self.masters[el_id(m)] = el_id(el) + + def log(self, level, message, el): + msg = '{}: {}'.format(self.name, message) + if el: + tags = el.get('tags', {}) + msg += ' ({} {}, "{}")'.format( + el['type'], el['id'], + tags.get('name', tags.get('ref', ''))) + logging.log(level, msg) + + def warn(self, message, el=None): + self.warnings += 1 + self.log(logging.WARNING, message, el) + + def error(self, message, el=None): + self.errors += 1 + self.log(logging.ERROR, message, el) + + def make_transfer(self, sag): + transfer = set() + for m in sag['members']: + k = el_id(m) + if k not in self.stations: + return + transfer.add(self.stations[k]) + if transfer: + self.transfers.append(transfer) + + def extract_routes(self): + for el in self.elements.values(): + if Station.is_station(el): + station = Station(el, self) + self.station_ids.add(station.id) + for e in station.elements: + self.stations[e] = station + + for el in self.elements.values(): + if Route.is_route(el): + if self.networks and Route.get_network(el) not in self.networks: + continue + route = Route(el, self) + k = self.masters.get(route.id, route.ref) + if k not in self.routes: + self.routes[k] = RouteMaster(route) + else: + self.routes[k].add(route, self) + + if (el['type'] == 'relation' and + el.get('tags', {}).get('public_transport', None) == 'stop_area_group'): + self.make_transfer(el) + + def count_unused_entrances(self): + stop_areas = set() + for el in self.elements.values(): + if (el['type'] == 'relation' and 'tags' in el and + el['tags'].get('public_transport', None) == 'stop_area' and + 'members' in el): + stop_areas.update([el_id(m) for m in el['members']]) + unused = [] + not_in_sa = [] + for el in self.elements.values(): + if (el['type'] == 'node' and 'tags' in el and + el['tags'].get('railway', None) == 'subway_entrance'): + i = el_id(el) + if i not in self.stations: + unused.append(i) + if i not in stop_areas: + not_in_sa.append(i) + if unused: + list_unused = '' if len(unused) > 20 else ': ' + ', '.join(unused) + self.error('Found {} unused subway entrances{}'.format(len(unused), list_unused)) + if not_in_sa: + self.warn('{} subway entrances are not in stop_area relations'.format(len(not_in_sa))) + + def validate(self): + unused_stations = set(self.station_ids) + for rmaster in self.routes.values(): + for st in rmaster.best.stops: + unused_stations.discard(st.id) + if unused_stations: + self.warn('{} unused stations: {}'.format( + len(unused_stations), ', '.join(unused_stations))) + self.count_unused_entrances() + light_rails = len([x for x in self.routes.values() if x.is_light]) + if len(self.routes) - light_rails != self.num_lines: + self.error('Found {} subway lines, expected {}'.format( + len(self.routes) - light_rails, self.num_lines)) + if light_rails != self.num_light_lines: + self.error('Found {} light rail lines, expected {}'.format( + light_rails, self.num_light_lines)) + station_count = len(self.station_ids) - len(unused_stations) + if station_count != self.num_stations: + self.error('Found {} stations in routes, expected {}'.format( + station_count, self.num_stations)) + if len(self.transfers) != self.num_interchanges: + self.error('Found {} interchanges, expected {}'.format( + len(self.transfers), self.num_interchanges)) + + def for_mapsme(self): + stops = [] + transfers = [] + routes = [] + network = {'network': 'TODO', 'agency_id': 12345, 'routes': routes} + result = {'stops': stops, 'transfers': transfers, 'networks': [network]} + return result + + +def download_cities(): + url = 'https://docs.google.com/spreadsheets/d/{}/export?format=csv'.format(SPREADSHEET_ID) + response = urllib.request.urlopen(url) + if response.getcode() != 200: + raise Exception('Failed to download cities spreadsheet: HTTP {}'.format(response.getcode())) + data = response.read().decode('utf-8') + r = csv.reader(data.splitlines()) + next(r) # skipping the header + names = set() + cities = [] + for row in r: + if len(row) > 7 and row[7]: + cities.append(City(row)) + if row[0].strip() in names: + logging.warning('Duplicate city name in the google spreadsheet: %s', row[0]) + names.add(row[0].strip()) + return cities diff --git a/v2h_templates.py b/v2h_templates.py new file mode 100644 index 0000000..a90f4ad --- /dev/null +++ b/v2h_templates.py @@ -0,0 +1,143 @@ +# These are templates for validation_to_html.py +# Variables should be in curly braces + +STYLE = ''' + +''' + +INDEX_HEADER = ''' + + + +Subway Validator + +(s) + + +

Subway Validation Results

+ +'''.replace('(s)', STYLE) + +INDEX_CONTINENT = ''' + + + + + + + + + + + + + + + + + + + + + + +{content} +''' + +INDEX_COUNTRY = ''' + + + + + + + + + + + +''' + +INDEX_FOOTER = ''' +
 
ContinentCountryGood CitiesSubway LinesLight Rail LinesStationsInterchangesErrorsWarnings
{continent}{good_cities} / {total_cities}{subwayl_found} / {subwayl_expected}{lightrl_found} / {lightrl_expected}{stations_found} / {stations_expected}{transfers_found} / {transfers_expected}{num_errors}{num_warnings}
 {country}{good_cities} / {total_cities}{subwayl_found} / {subwayl_expected}{lightrl_found} / {lightrl_expected}{stations_found} / {stations_expected}{transfers_found} / {transfers_expected}{num_errors}{num_warnings}
+

Produced by Subway Preprocessor on {date}. +See this spreadsheet for the reference metro statistics and +this wiki page for a list +of all metro systems.

+ + +''' + +COUNTRY_HEADER = ''' + + + +Subway Validator: {country} + +(s) + + +

Subway Validation Results for {country}

+

Return to the countries list.

+ + + + + + + + + +'''.replace('(s)', STYLE) + +COUNTRY_CITY = ''' + + + + + + + + + +''' + +COUNTRY_FOOTER = ''' +
CitySubway LinesLight Rail LinesStationsInterchangesUnused Entrances
{city}sub: {subwayl_found} / {subwayl_expected}lr: {lightrl_found} / {lightrl_expected}st: {stations_found} / {stations_expected}int: {transfers_found} / {transfers_expected}e: {unused_entrances}
+
+{errors} +
+{warnings} +
+
+

Produced by Subway Preprocessor on {date}.

+ + +''' diff --git a/validation_to_html.py b/validation_to_html.py new file mode 100755 index 0000000..9b07f70 --- /dev/null +++ b/validation_to_html.py @@ -0,0 +1,180 @@ +#!/usr/bin/env python3 +import datetime +import re +import os +import sys +from subway_structure import download_cities, SPREADSHEET_ID +from v2h_templates import * + +date = datetime.datetime.now().strftime('%d.%m.%Y %H:%M') + + +class CityData: + REGEXPS = ( + ('subwayl', re.compile(r'Found (\d+) subway lines, expected (\d+)')), + ('lightrl', re.compile(r'Found (\d+) light rail.*expected (\d+)')), + ('stations', re.compile(r'Found (\d+) stations.*expected (\d+)')), + ('transfers', re.compile(r'Found (\d+) interch.*expected (\d+)')), + ) + + def __init__(self, city=None): + self.city = city is not None + if city: + self.country = city.country + self.continent = city.continent + self.errors = [] + self.warnings = [] + self.data = { + 'stations_expected': city.num_stations if city else 0, + 'subwayl_expected': city.num_lines if city else 0, + 'lightrl_expected': city.num_light_lines if city else 0, + 'transfers_expected': city.num_interchanges if city else 0, + 'unused_entrances': 0, + 'good_cities': 1 if city else 0, + 'total_cities': 1 if city else 0, + 'num_errors': 0, + 'num_warnings': 0 + } + for k, _ in CityData.REGEXPS: + self.data[k+'_found'] = self.data[k+'_expected'] + + def __get__(self, i): + return self.data[i] + + def __set__(self, i, value): + self.data[i] = value + + def __add__(self, other): + d = CityData() + for k in d.data: + d.data[k] = self.data[k] + other.data[k] + return d + + def format(self, s): + def test_eq(v1, v2): + return '1' if v1 == v2 else '0' + + for k in self.data: + s = s.replace('{'+k+'}', str(self.data[k])) + for k in ('subwayl', 'lightrl', 'stations', 'transfers'): + s = s.replace('{='+k+'}', + test_eq(self.data[k+'_found'], self.data[k+'_expected'])) + s = s.replace('{=cities}', + test_eq(self.data['good_cities'], self.data['total_cities'])) + s = s.replace('{=entrances}', test_eq(self.data['unused_entrances'], 0)) + for k in ('errors', 'warnings'): + s = s.replace('{='+k+'}', test_eq(self.data['num_'+k], 0)) + return s + + def add_warning(self, msg): + self.warnings.append(msg) + self.data['num_warnings'] += 1 + + def add_error(self, msg): + for k, reg in CityData.REGEXPS: + m = reg.search(msg) + if m: + self.data[k+'_found'] = int(m[1]) + self.data[k+'_expected'] = int(m[2]) + m = re.search(r'Found (\d+) unused subway e', msg) + if m: + self.data['unused_entrances'] = int(m[1]) + self.errors.append(msg) + self.data['num_errors'] += 1 + self.data['good_cities'] = 0 + + +def tmpl(s, data=None, **kwargs): + if data: + s = data.format(s) + if kwargs: + for k, v in kwargs.items(): + s = s.replace('{'+k+'}', v) + s = s.replace('{date}', date) + google_url = 'https://docs.google.com/spreadsheets/d/{}/edit?usp=sharing'.format(SPREADSHEET_ID) + s = s.replace('{google}', google_url) + return s + + +EXPAND_OSM_TYPE = {'n': 'node', 'w': 'way', 'r': 'relation'} +RE_SHORT = re.compile(r'([nwr])(\d+)') +RE_FULL = re.compile(r'(node|way|relation) (\d+)') +LOG_LINE = re.compile(r'^(\d\d:\d\d:\d\d)\s+([A-Z]+)\s+([^:]+):\s+(.+?)\s*$') + + +def osm_links(s): + """Converts object mentions to HTML links.""" + def link(m): + return '{}'.format(EXPAND_OSM_TYPE[m[1][0]], m[2], m[0]) + s = RE_SHORT.sub(link, s) + s = RE_FULL.sub(link, s) + return s + + +def esc(s): + return s.replace('&', '&').replace('<', '<').replace('>', '>') + + +if len(sys.argv) < 2: + print('Reads a log from subway validator and prepares HTML files.') + print('Usage: {} []'.format(sys.argv[0])) + sys.exit(1) + +cities = {c.name: c for c in download_cities()} +data = {c.name: CityData(c) for c in cities.values()} + +last_city = None +with open(sys.argv[1], 'r') as f: + for line in f: + m = LOG_LINE.match(line) + if m: + level = m.group(2) + if level == 'INFO': + continue + city_name = m.group(3) + msg = m.group(4) + if city_name not in data: + raise Exception('City {} not found in the cities list'.format(city_name)) + city = data[city_name] + if level == 'WARNING': + city.add_warning(msg) + elif level == 'ERROR': + city.add_error(msg) + +countries = {} +continents = {} +c_by_c = {} # continent → set of countries +for c in data.values(): + countries[c.country] = c + countries.get(c.country, CityData()) + continents[c.continent] = c + continents.get(c.continent, CityData()) + if c.continent not in c_by_c: + c_by_c[c.continent] = set() + c_by_c[c.continent].add(c.country) + +date = datetime.datetime.now().strftime('%d.%m.%Y %H:%M') +path = '.' if len(sys.argv) < 3 else sys.argv[2] +index = open(os.path.join(path, 'index.html'), 'w') +index.write(tmpl(INDEX_HEADER)) + +for continent in sorted(continents.keys()): + content = '' + for country in sorted(c_by_c[continent]): + country_file_name = country.lower().replace(' ', '-') + '.html' + content += tmpl(INDEX_COUNTRY, countries[country], file=country_file_name, + country=country, continent=continent) + country_file = open(os.path.join(path, country_file_name), 'w') + country_file.write(tmpl(COUNTRY_HEADER, country=country, continent=continent)) + for name, city in sorted(data.items()): + if city.country == country: + e = '
'.join([osm_links(esc(e)) for e in city.errors]) + w = '
'.join([osm_links(esc(w)) for w in city.warnings]) + country_file.write(tmpl(COUNTRY_CITY, city, + city=name, country=country, continent=continent, + errors=e, warnings=w)) + country_file.write(tmpl(COUNTRY_FOOTER, country=country, continent=continent)) + country_file.close() + index.write(tmpl(INDEX_CONTINENT, continents[continent], + content=content, continent=continent)) + +index.write(tmpl(INDEX_FOOTER)) +index.close()