diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml new file mode 100644 index 0000000..fbf35d3 --- /dev/null +++ b/.github/workflows/python-app.yml @@ -0,0 +1,39 @@ +# This workflow will install Python dependencies, run tests and lint with a single version of Python +# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions + +name: Python application + +on: + push: + branches: [ "master" ] + pull_request: + branches: [ "master" ] + +permissions: + contents: read + +jobs: + build: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + - name: Set up Python 3.8 + uses: actions/setup-python@v3 + with: + python-version: "3.8" + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install flake8==6.0.0 black==23.1.0 + if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + - name: Lint with flake8 + run: | + flake8 + - name: Check with black + run: | + black --check --line-length 79 . + - name: Test with unittest + run: | + python -m unittest discover tests diff --git a/.gitignore b/.gitignore index 29ef132..f2fb32f 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,4 @@ html/ *.yaml *.pyc *.txt +*.zip diff --git a/README.md b/README.md index 4b9beeb..0802e45 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,8 @@ systems in the world from OpenStreetMap. `subway_structure.py` produces a list of disjunct systems that can be used for routing and for displaying of metro maps. +[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) + ## How To Validate @@ -51,7 +53,7 @@ a city's bbox has been extended. A single city or a country with few metro networks can be validated much faster if you allow the `process_subway.py` to fetch data from Overpass API. Here are the steps: -1. Python3 interpreter required (3.5+) +1. Python3 interpreter required (3.8+) 2. Clone the repo ``` git clone https://github.com/alexey-zakharenkov/subways.git subways_validator diff --git a/checkers/common.py b/checkers/common.py index b760b62..d336435 100644 --- a/checkers/common.py +++ b/checkers/common.py @@ -1,6 +1,6 @@ +import functools import logging import math -import functools """A coordinate of a station precision of which we must take into account @@ -16,42 +16,48 @@ def coords_eq(lon1, lat1, lon2, lat2): def osm_id_comparator(el): """This function is used as key for sorting lists of - OSM-originated objects + OSM-originated objects """ - return (el['osm_type'], el['osm_id']) + return (el["osm_type"], el["osm_id"]) def itinerary_comparator(itinerary): - "This function is used as key for sorting itineraries in a route""" - return (itinerary['stops'], itinerary['interval']) + """This function is used as key for sorting itineraries in a route""" + return (itinerary["stops"], itinerary["interval"]) def compare_stops(stop0, stop1): """Compares json of two stops in route""" - stop_keys = ('name', 'int_name', 'id', 'osm_id', 'osm_type') + stop_keys = ("name", "int_name", "id", "osm_id", "osm_type") stop0_props = tuple(stop0[k] for k in stop_keys) stop1_props = tuple(stop1[k] for k in stop_keys) if stop0_props != stop1_props: - logging.debug("Different stops properties: %s, %s", - stop0_props, stop1_props) + logging.debug( + "Different stops properties: %s, %s", stop0_props, stop1_props + ) return False - if not coords_eq(stop0['lon'], stop0['lat'], - stop1['lon'], stop1['lat']): - logging.debug("Different stops coordinates: %s (%f, %f), %s (%f, %f)", - stop0_props, stop0['lon'], stop0['lat'], - stop1_props, stop1['lon'], stop1['lat']) + if not coords_eq(stop0["lon"], stop0["lat"], stop1["lon"], stop1["lat"]): + logging.debug( + "Different stops coordinates: %s (%f, %f), %s (%f, %f)", + stop0_props, + stop0["lon"], + stop0["lat"], + stop1_props, + stop1["lon"], + stop1["lat"], + ) return False - entrances0 = sorted(stop0['entrances'], key=osm_id_comparator) - entrances1 = sorted(stop1['entrances'], key=osm_id_comparator) + entrances0 = sorted(stop0["entrances"], key=osm_id_comparator) + entrances1 = sorted(stop1["entrances"], key=osm_id_comparator) if entrances0 != entrances1: logging.debug("Different stop entrances") return False - exits0 = sorted(stop0['exits'], key=osm_id_comparator) - exits1 = sorted(stop1['exits'], key=osm_id_comparator) + exits0 = sorted(stop0["exits"], key=osm_id_comparator) + exits1 = sorted(stop1["exits"], key=osm_id_comparator) if exits0 != exits1: logging.debug("Different stop exits") return False @@ -61,21 +67,24 @@ def compare_stops(stop0, stop1): def compare_transfers(transfers0, transfers1): """Compares two arrays of transfers of the form - [(stop1_uid, stop2_uid, time), ...] + [(stop1_uid, stop2_uid, time), ...] """ if len(transfers0) != len(transfers1): - logging.debug("Different len(transfers): %d != %d", - len(transfers0), len(transfers1)) + logging.debug( + "Different len(transfers): %d != %d", + len(transfers0), + len(transfers1), + ) return False - transfers0 = [tuple([t[0], t[1], t[2]]) - if t[0] < t[1] else - tuple([t[1], t[0], t[2]]) - for t in transfers0] - transfers1 = [tuple([t[0], t[1], t[2]]) - if t[0] < t[1] else - tuple([t[1], t[0], t[2]]) - for t in transfers1] + transfers0 = [ + tuple([t[0], t[1], t[2]]) if t[0] < t[1] else tuple([t[1], t[0], t[2]]) + for t in transfers0 + ] + transfers1 = [ + tuple([t[0], t[1], t[2]]) if t[0] < t[1] else tuple([t[1], t[0], t[2]]) + for t in transfers1 + ] transfers0.sort() transfers1.sort() @@ -84,8 +93,9 @@ def compare_transfers(transfers0, transfers1): for tr0, tr1 in zip(transfers0, transfers1): if tr0 != tr1: if diff_cnt == 0: - logging.debug("First pair of different transfers: %s, %s", - tr0, tr1) + logging.debug( + "First pair of different transfers: %s, %s", tr0, tr1 + ) diff_cnt += 1 if diff_cnt: logging.debug("Different transfers number = %d", diff_cnt) @@ -95,46 +105,55 @@ def compare_transfers(transfers0, transfers1): def compare_networks(network0, network1): - if network0['agency_id'] != network1['agency_id']: - logging.debug("Different agency_id at route '%s'", - network0['network']) + if network0["agency_id"] != network1["agency_id"]: + logging.debug("Different agency_id at route '%s'", network0["network"]) return False - route_ids0 = sorted(x['route_id'] for x in network0['routes']) - route_ids1 = sorted(x['route_id'] for x in network1['routes']) + route_ids0 = sorted(x["route_id"] for x in network0["routes"]) + route_ids1 = sorted(x["route_id"] for x in network1["routes"]) if route_ids0 != route_ids1: - logging.debug("Different route_ids: %s != %s", - route_ids0, route_ids1) + logging.debug("Different route_ids: %s != %s", route_ids0, route_ids1) return False - routes0 = sorted(network0['routes'], key=lambda x: x['route_id']) - routes1 = sorted(network1['routes'], key=lambda x: x['route_id']) + routes0 = sorted(network0["routes"], key=lambda x: x["route_id"]) + routes1 = sorted(network1["routes"], key=lambda x: x["route_id"]) # Keys to compare routes. 'name' key is omitted since RouteMaster # can get its name from one of its Routes unpredictably. - route_keys = ('type', 'ref', 'colour', 'route_id') + route_keys = ("type", "ref", "colour", "route_id") for route0, route1 in zip(routes0, routes1): route0_props = tuple(route0[k] for k in route_keys) route1_props = tuple(route1[k] for k in route_keys) if route0_props != route1_props: - logging.debug("Route props of '%s' are different: %s, %s", - route0['route_id'], route0_props, route1_props) + logging.debug( + "Route props of '%s' are different: %s, %s", + route0["route_id"], + route0_props, + route1_props, + ) return False - itineraries0 = sorted(route0['itineraries'], key=itinerary_comparator) - itineraries1 = sorted(route1['itineraries'], key=itinerary_comparator) + itineraries0 = sorted(route0["itineraries"], key=itinerary_comparator) + itineraries1 = sorted(route1["itineraries"], key=itinerary_comparator) for itin0, itin1 in zip(itineraries0, itineraries1): - if itin0['interval'] != itin1['interval']: - logging.debug("Different interval: %d != %d at route %s '%s'", - itin0['interval'], itin1['interval'], - route0['route_id'], route0['name']) + if itin0["interval"] != itin1["interval"]: + logging.debug( + "Different interval: %d != %d at route %s '%s'", + itin0["interval"], + itin1["interval"], + route0["route_id"], + route0["name"], + ) return False - if itin0['stops'] != itin1['stops']: - logging.debug("Different stops at route %s '%s'", - route0['route_id'], route0['name']) + if itin0["stops"] != itin1["stops"]: + logging.debug( + "Different stops at route %s '%s'", + route0["route_id"], + route0["name"], + ) return False return True diff --git a/checkers/compare_city_caches.py b/checkers/compare_city_caches.py index c6c81bc..edba7d5 100644 --- a/checkers/compare_city_caches.py +++ b/checkers/compare_city_caches.py @@ -10,10 +10,11 @@ affect the process_subways.py output really doesn't change it. """ -import sys import json import logging -from common import compare_stops, compare_transfers, compare_networks +import sys + +from common import compare_networks, compare_stops, compare_transfers def compare_jsons(cache0, cache1): @@ -28,21 +29,21 @@ def compare_jsons(cache0, cache1): for name in city_names0: city0 = cache0[name] city1 = cache1[name] - if not compare_networks(city0['network'], city1['network']): + if not compare_networks(city0["network"], city1["network"]): return False - stop_ids0 = sorted(city0['stops'].keys()) - stop_ids1 = sorted(city1['stops'].keys()) + stop_ids0 = sorted(city0["stops"].keys()) + stop_ids1 = sorted(city1["stops"].keys()) if stop_ids0 != stop_ids1: logging.debug("Different stop_ids") return False - stops0 = [v for k, v in sorted(city0['stops'].items())] - stops1 = [v for k, v in sorted(city1['stops'].items())] + stops0 = [v for k, v in sorted(city0["stops"].items())] + stops1 = [v for k, v in sorted(city1["stops"].items())] for stop0, stop1 in zip(stops0, stops1): if not compare_stops(stop0, stop1): return False - if not compare_transfers(city0['transfers'], city1['transfers']): + if not compare_transfers(city0["transfers"], city1["transfers"]): return False return True @@ -57,8 +58,8 @@ if __name__ == "__main__": path0, path1 = sys.argv[1:3] - j0 = json.load(open(path0, encoding='utf-8')) - j1 = json.load(open(path1, encoding='utf-8')) + j0 = json.load(open(path0, encoding="utf-8")) + j1 = json.load(open(path1, encoding="utf-8")) equal = compare_jsons(j0, j1) diff --git a/checkers/compare_json_outputs.py b/checkers/compare_json_outputs.py index 8ded974..2c68f4b 100644 --- a/checkers/compare_json_outputs.py +++ b/checkers/compare_json_outputs.py @@ -10,38 +10,39 @@ affect the process_subways.py output really doesn't change it. """ -import sys import json import logging -from common import compare_stops, compare_transfers, compare_networks +import sys + +from common import compare_networks, compare_stops, compare_transfers def compare_jsons(result0, result1): """Compares two objects which are results of subway generation""" - network_names0 = sorted([x['network'] for x in result0['networks']]) - network_names1 = sorted([x['network'] for x in result1['networks']]) + network_names0 = sorted([x["network"] for x in result0["networks"]]) + network_names1 = sorted([x["network"] for x in result1["networks"]]) if network_names0 != network_names1: logging.debug("Different list of network names!") return False - networks0 = sorted(result0['networks'], key=lambda x: x['network']) - networks1 = sorted(result1['networks'], key=lambda x: x['network']) + networks0 = sorted(result0["networks"], key=lambda x: x["network"]) + networks1 = sorted(result1["networks"], key=lambda x: x["network"]) for network0, network1 in zip(networks0, networks1): if not compare_networks(network0, network1): return False - stop_ids0 = sorted(x['id'] for x in result0['stops']) - stop_ids1 = sorted(x['id'] for x in result1['stops']) + stop_ids0 = sorted(x["id"] for x in result0["stops"]) + stop_ids1 = sorted(x["id"] for x in result1["stops"]) if stop_ids0 != stop_ids1: logging.debug("Different stop_ids") return False - stops0 = sorted(result0['stops'], key=lambda x: x['id']) - stops1 = sorted(result1['stops'], key=lambda x: x['id']) + stops0 = sorted(result0["stops"], key=lambda x: x["id"]) + stops1 = sorted(result1["stops"], key=lambda x: x["id"]) for stop0, stop1 in zip(stops0, stops1): if not compare_stops(stop0, stop1): return False - if not compare_transfers(result0['transfers'], result1['transfers']): + if not compare_transfers(result0["transfers"], result1["transfers"]): return False return True @@ -56,8 +57,8 @@ if __name__ == "__main__": path0, path1 = sys.argv[1:3] - j0 = json.load(open(path0, encoding='utf-8')) - j1 = json.load(open(path1, encoding='utf-8')) + j0 = json.load(open(path0, encoding="utf-8")) + j1 = json.load(open(path1, encoding="utf-8")) equal = compare_jsons(j0, j1) diff --git a/css_colours.py b/css_colours.py index 0dea3e9..7218054 100644 --- a/css_colours.py +++ b/css_colours.py @@ -2,153 +2,153 @@ import re # Source: https://www.w3.org/TR/css3-color/#svg-color CSS_COLOURS = { - 'aliceblue': '#f0f8ff', - 'antiquewhite': '#faebd7', - 'aqua': '#00ffff', - 'aquamarine': '#7fffd4', - 'azure': '#f0ffff', - 'beige': '#f5f5dc', - 'bisque': '#ffe4c4', - 'black': '#000000', - 'blanchedalmond': '#ffebcd', - 'blue': '#0000ff', - 'blueviolet': '#8a2be2', - 'brown': '#a52a2a', - 'burlywood': '#deb887', - 'cadetblue': '#5f9ea0', - 'chartreuse': '#7fff00', - 'chocolate': '#d2691e', - 'coral': '#ff7f50', - 'cornflowerblue': '#6495ed', - 'cornsilk': '#fff8dc', - 'crimson': '#dc143c', - 'cyan': '#00ffff', - 'darkblue': '#00008b', - 'darkcyan': '#008b8b', - 'darkgoldenrod': '#b8860b', - 'darkgray': '#a9a9a9', - 'darkgreen': '#006400', - 'darkgrey': '#a9a9a9', - 'darkkhaki': '#bdb76b', - 'darkmagenta': '#8b008b', - 'darkolivegreen': '#556b2f', - 'darkorange': '#ff8c00', - 'darkorchid': '#9932cc', - 'darkred': '#8b0000', - 'darksalmon': '#e9967a', - 'darkseagreen': '#8fbc8f', - 'darkslateblue': '#483d8b', - 'darkslategray': '#2f4f4f', - 'darkslategrey': '#2f4f4f', - 'darkturquoise': '#00ced1', - 'darkviolet': '#9400d3', - 'deeppink': '#ff1493', - 'deepskyblue': '#00bfff', - 'dimgray': '#696969', - 'dimgrey': '#696969', - 'dodgerblue': '#1e90ff', - 'firebrick': '#b22222', - 'floralwhite': '#fffaf0', - 'forestgreen': '#228b22', - 'fuchsia': '#ff00ff', - 'gainsboro': '#dcdcdc', - 'ghostwhite': '#f8f8ff', - 'gold': '#ffd700', - 'goldenrod': '#daa520', - 'gray': '#808080', - 'green': '#008000', - 'greenyellow': '#adff2f', - 'grey': '#808080', - 'honeydew': '#f0fff0', - 'hotpink': '#ff69b4', - 'indianred': '#cd5c5c', - 'indigo': '#4b0082', - 'ivory': '#fffff0', - 'khaki': '#f0e68c', - 'lavender': '#e6e6fa', - 'lavenderblush': '#fff0f5', - 'lawngreen': '#7cfc00', - 'lemonchiffon': '#fffacd', - 'lightblue': '#add8e6', - 'lightcoral': '#f08080', - 'lightcyan': '#e0ffff', - 'lightgoldenrodyellow': '#fafad2', - 'lightgray': '#d3d3d3', - 'lightgreen': '#90ee90', - 'lightgrey': '#d3d3d3', - 'lightpink': '#ffb6c1', - 'lightsalmon': '#ffa07a', - 'lightseagreen': '#20b2aa', - 'lightskyblue': '#87cefa', - 'lightslategray': '#778899', - 'lightslategrey': '#778899', - 'lightsteelblue': '#b0c4de', - 'lightyellow': '#ffffe0', - 'lime': '#00ff00', - 'limegreen': '#32cd32', - 'linen': '#faf0e6', - 'magenta': '#ff00ff', - 'maroon': '#800000', - 'mediumaquamarine': '#66cdaa', - 'mediumblue': '#0000cd', - 'mediumorchid': '#ba55d3', - 'mediumpurple': '#9370db', - 'mediumseagreen': '#3cb371', - 'mediumslateblue': '#7b68ee', - 'mediumspringgreen': '#00fa9a', - 'mediumturquoise': '#48d1cc', - 'mediumvioletred': '#c71585', - 'midnightblue': '#191970', - 'mintcream': '#f5fffa', - 'mistyrose': '#ffe4e1', - 'moccasin': '#ffe4b5', - 'navajowhite': '#ffdead', - 'navy': '#000080', - 'oldlace': '#fdf5e6', - 'olive': '#808000', - 'olivedrab': '#6b8e23', - 'orange': '#ffa500', - 'orangered': '#ff4500', - 'orchid': '#da70d6', - 'palegoldenrod': '#eee8aa', - 'palegreen': '#98fb98', - 'paleturquoise': '#afeeee', - 'palevioletred': '#db7093', - 'papayawhip': '#ffefd5', - 'peachpuff': '#ffdab9', - 'peru': '#cd853f', - 'pink': '#ffc0cb', - 'plum': '#dda0dd', - 'powderblue': '#b0e0e6', - 'purple': '#800080', - 'red': '#ff0000', - 'rosybrown': '#bc8f8f', - 'royalblue': '#4169e1', - 'saddlebrown': '#8b4513', - 'salmon': '#fa8072', - 'sandybrown': '#f4a460', - 'seagreen': '#2e8b57', - 'seashell': '#fff5ee', - 'sienna': '#a0522d', - 'silver': '#c0c0c0', - 'skyblue': '#87ceeb', - 'slateblue': '#6a5acd', - 'slategray': '#708090', - 'slategrey': '#708090', - 'snow': '#fffafa', - 'springgreen': '#00ff7f', - 'steelblue': '#4682b4', - 'tan': '#d2b48c', - 'teal': '#008080', - 'thistle': '#d8bfd8', - 'tomato': '#ff6347', - 'turquoise': '#40e0d0', - 'violet': '#ee82ee', - 'wheat': '#f5deb3', - 'white': '#ffffff', - 'whitesmoke': '#f5f5f5', - 'yellow': '#ffff00', - 'yellowgreen': '#9acd32', + "aliceblue": "#f0f8ff", + "antiquewhite": "#faebd7", + "aqua": "#00ffff", + "aquamarine": "#7fffd4", + "azure": "#f0ffff", + "beige": "#f5f5dc", + "bisque": "#ffe4c4", + "black": "#000000", + "blanchedalmond": "#ffebcd", + "blue": "#0000ff", + "blueviolet": "#8a2be2", + "brown": "#a52a2a", + "burlywood": "#deb887", + "cadetblue": "#5f9ea0", + "chartreuse": "#7fff00", + "chocolate": "#d2691e", + "coral": "#ff7f50", + "cornflowerblue": "#6495ed", + "cornsilk": "#fff8dc", + "crimson": "#dc143c", + "cyan": "#00ffff", + "darkblue": "#00008b", + "darkcyan": "#008b8b", + "darkgoldenrod": "#b8860b", + "darkgray": "#a9a9a9", + "darkgreen": "#006400", + "darkgrey": "#a9a9a9", + "darkkhaki": "#bdb76b", + "darkmagenta": "#8b008b", + "darkolivegreen": "#556b2f", + "darkorange": "#ff8c00", + "darkorchid": "#9932cc", + "darkred": "#8b0000", + "darksalmon": "#e9967a", + "darkseagreen": "#8fbc8f", + "darkslateblue": "#483d8b", + "darkslategray": "#2f4f4f", + "darkslategrey": "#2f4f4f", + "darkturquoise": "#00ced1", + "darkviolet": "#9400d3", + "deeppink": "#ff1493", + "deepskyblue": "#00bfff", + "dimgray": "#696969", + "dimgrey": "#696969", + "dodgerblue": "#1e90ff", + "firebrick": "#b22222", + "floralwhite": "#fffaf0", + "forestgreen": "#228b22", + "fuchsia": "#ff00ff", + "gainsboro": "#dcdcdc", + "ghostwhite": "#f8f8ff", + "gold": "#ffd700", + "goldenrod": "#daa520", + "gray": "#808080", + "green": "#008000", + "greenyellow": "#adff2f", + "grey": "#808080", + "honeydew": "#f0fff0", + "hotpink": "#ff69b4", + "indianred": "#cd5c5c", + "indigo": "#4b0082", + "ivory": "#fffff0", + "khaki": "#f0e68c", + "lavender": "#e6e6fa", + "lavenderblush": "#fff0f5", + "lawngreen": "#7cfc00", + "lemonchiffon": "#fffacd", + "lightblue": "#add8e6", + "lightcoral": "#f08080", + "lightcyan": "#e0ffff", + "lightgoldenrodyellow": "#fafad2", + "lightgray": "#d3d3d3", + "lightgreen": "#90ee90", + "lightgrey": "#d3d3d3", + "lightpink": "#ffb6c1", + "lightsalmon": "#ffa07a", + "lightseagreen": "#20b2aa", + "lightskyblue": "#87cefa", + "lightslategray": "#778899", + "lightslategrey": "#778899", + "lightsteelblue": "#b0c4de", + "lightyellow": "#ffffe0", + "lime": "#00ff00", + "limegreen": "#32cd32", + "linen": "#faf0e6", + "magenta": "#ff00ff", + "maroon": "#800000", + "mediumaquamarine": "#66cdaa", + "mediumblue": "#0000cd", + "mediumorchid": "#ba55d3", + "mediumpurple": "#9370db", + "mediumseagreen": "#3cb371", + "mediumslateblue": "#7b68ee", + "mediumspringgreen": "#00fa9a", + "mediumturquoise": "#48d1cc", + "mediumvioletred": "#c71585", + "midnightblue": "#191970", + "mintcream": "#f5fffa", + "mistyrose": "#ffe4e1", + "moccasin": "#ffe4b5", + "navajowhite": "#ffdead", + "navy": "#000080", + "oldlace": "#fdf5e6", + "olive": "#808000", + "olivedrab": "#6b8e23", + "orange": "#ffa500", + "orangered": "#ff4500", + "orchid": "#da70d6", + "palegoldenrod": "#eee8aa", + "palegreen": "#98fb98", + "paleturquoise": "#afeeee", + "palevioletred": "#db7093", + "papayawhip": "#ffefd5", + "peachpuff": "#ffdab9", + "peru": "#cd853f", + "pink": "#ffc0cb", + "plum": "#dda0dd", + "powderblue": "#b0e0e6", + "purple": "#800080", + "red": "#ff0000", + "rosybrown": "#bc8f8f", + "royalblue": "#4169e1", + "saddlebrown": "#8b4513", + "salmon": "#fa8072", + "sandybrown": "#f4a460", + "seagreen": "#2e8b57", + "seashell": "#fff5ee", + "sienna": "#a0522d", + "silver": "#c0c0c0", + "skyblue": "#87ceeb", + "slateblue": "#6a5acd", + "slategray": "#708090", + "slategrey": "#708090", + "snow": "#fffafa", + "springgreen": "#00ff7f", + "steelblue": "#4682b4", + "tan": "#d2b48c", + "teal": "#008080", + "thistle": "#d8bfd8", + "tomato": "#ff6347", + "turquoise": "#40e0d0", + "violet": "#ee82ee", + "wheat": "#f5deb3", + "white": "#ffffff", + "whitesmoke": "#f5f5f5", + "yellow": "#ffff00", + "yellowgreen": "#9acd32", } @@ -158,8 +158,8 @@ def normalize_colour(c): c = c.strip().lower() if c in CSS_COLOURS: return CSS_COLOURS[c] - if re.match(r'^#?[0-9a-f]{3}([0-9a-f]{3})?$', c): + if re.match(r"^#?[0-9a-f]{3}([0-9a-f]{3})?$", c): if len(c) == 4: - return c[0]+c[1]+c[1]+c[2]+c[2]+c[3]+c[3] + return c[0] + c[1] + c[1] + c[2] + c[2] + c[3] + c[3] return c - raise ValueError('Unknown colour code: {}'.format(c)) + raise ValueError("Unknown colour code: {}".format(c)) diff --git a/make_all_metro_poly.py b/make_all_metro_poly.py index 610892d..00281a7 100644 --- a/make_all_metro_poly.py +++ b/make_all_metro_poly.py @@ -1,20 +1,23 @@ +import argparse + import shapely.geometry import shapely.ops -from process_subways import download_cities +from process_subways import DEFAULT_CITIES_INFO_URL, get_cities_info -def make_disjoint_metro_polygons(): - cities = download_cities() +def make_disjoint_metro_polygons(cities_info_url: str) -> None: + cities_info = get_cities_info(cities_info_url) polygons = [] - for c in cities: + for ci in cities_info: + bbox = tuple(map(float, ci["bbox"].split(","))) polygon = shapely.geometry.Polygon( [ - (c.bbox[1], c.bbox[0]), - (c.bbox[1], c.bbox[2]), - (c.bbox[3], c.bbox[2]), - (c.bbox[3], c.bbox[0]), + (bbox[0], bbox[1]), + (bbox[0], bbox[3]), + (bbox[2], bbox[3]), + (bbox[2], bbox[1]), ] ) polygons.append(polygon) @@ -31,5 +34,19 @@ def make_disjoint_metro_polygons(): print("END") -if __name__ == '__main__': - make_disjoint_metro_polygons() +def main() -> None: + parser = argparse.ArgumentParser() + parser.add_argument( + "--cities-info-url", + default=DEFAULT_CITIES_INFO_URL, + help=( + "URL of CSV file with reference information about rapid transit " + "networks. file:// protocol is also supported." + ), + ) + options = parser.parse_args() + make_disjoint_metro_polygons(options.cities_info_url) + + +if __name__ == "__main__": + main() diff --git a/mapsme_json_to_cities.py b/mapsme_json_to_cities.py index 4b8fea8..1c69a77 100644 --- a/mapsme_json_to_cities.py +++ b/mapsme_json_to_cities.py @@ -1,28 +1,41 @@ import argparse import json -from process_subways import download_cities +from process_subways import DEFAULT_CITIES_INFO_URL, get_cities_info -if __name__ == '__main__': +if __name__ == "__main__": arg_parser = argparse.ArgumentParser( - description=""" - This script generates a list of good/all network names. - It is used by subway render to generate the list of network at frontend. - It uses two sources: a mapsme.json validator output with good networks, and - a google spreadsheet with networks for the process_subways.download_cities() - function.""", + description=( + """This script generates a list of good/all network names. It is + used by subway render to generate the list of network at frontend. + It uses two sources: a mapsme.json validator output with good + networks, and a google spreadsheet with networks for the + process_subways.download_cities() function.""" + ), formatter_class=argparse.RawTextHelpFormatter, ) arg_parser.add_argument( - 'subway_json_file', - type=argparse.FileType('r'), - help="Validator output defined by -o option of process_subways.py script", + "subway_json_file", + type=argparse.FileType("r"), + help=( + "Validator output defined by -o option " + "of process_subways.py script", + ), ) arg_parser.add_argument( - '--with-bad', + "--cities-info-url", + default=DEFAULT_CITIES_INFO_URL, + help=( + "URL of CSV file with reference information about rapid transit " + "networks. file:// protocol is also supported." + ), + ) + + arg_parser.add_argument( + "--with-bad", action="store_true", help="Whether to include cities validation of which was failed", ) @@ -34,16 +47,16 @@ if __name__ == '__main__': subway_json = json.load(subway_json_file) good_cities = set( - n.get('network', n.get('title')) for n in subway_json['networks'] + n.get("network", n.get("title")) for n in subway_json["networks"] ) - cities = download_cities() + cities_info = get_cities_info(args.cities_info_url) lines = [] - for c in cities: - if c.name in good_cities: - lines.append(f"{c.name}, {c.country}") + for ci in cities_info: + if ci["name"] in good_cities: + lines.append(f"{ci['name']}, {ci['country']}") elif with_bad: - lines.append(f"{c.name}, {c.country} (Bad)") + lines.append(f"{ci['name']}, {ci['country']} (Bad)") for line in sorted(lines): print(line) diff --git a/process_subways.py b/process_subways.py index 1a25207..89e1021 100755 --- a/process_subways.py +++ b/process_subways.py @@ -1,5 +1,7 @@ #!/usr/bin/env python3 import argparse +import csv +import inspect import json import logging import os @@ -8,7 +10,10 @@ import sys import time import urllib.parse import urllib.request -from processors import processor +from functools import partial +from typing import Dict, List, Optional, Tuple + +import processors from subway_io import ( dump_yaml, load_xml, @@ -17,8 +22,8 @@ from subway_io import ( write_recovery_data, ) from subway_structure import ( + City, CriticalValidationError, - download_cities, find_transfers, get_unused_entrances_geojson, MODES_OVERGROUND, @@ -26,31 +31,38 @@ from subway_structure import ( ) +DEFAULT_SPREADSHEET_ID = "1SEW1-NiNOnA2qDwievcxYV1FOaQl1mb1fdeyqAxHu3k" +DEFAULT_CITIES_INFO_URL = ( + "https://docs.google.com/spreadsheets/d/" + f"{DEFAULT_SPREADSHEET_ID}/export?format=csv" +) + +Point = Tuple[float, float] + + def overpass_request(overground, overpass_api, bboxes): - query = '[out:json][timeout:1000];(' + query = "[out:json][timeout:1000];(" modes = MODES_OVERGROUND if overground else MODES_RAPID for bbox in bboxes: - bbox_part = '({})'.format(','.join(str(coord) for coord in bbox)) - query += '(' + bbox_part = "({})".format(",".join(str(coord) for coord in bbox)) + query += "(" for mode in modes: query += 'rel[route="{}"]{};'.format(mode, bbox_part) - query += ');' - query += 'rel(br)[type=route_master];' + query += ");" + query += "rel(br)[type=route_master];" if not overground: - query += 'node[railway=subway_entrance]{};'.format(bbox_part) - query += 'rel[public_transport=stop_area]{};'.format(bbox_part) + query += "node[railway=subway_entrance]{};".format(bbox_part) + query += "rel[public_transport=stop_area]{};".format(bbox_part) query += ( - 'rel(br)[type=public_transport][public_transport=stop_area_group];' + "rel(br)[type=public_transport][public_transport=stop_area_group];" ) - query += ');(._;>>;);out body center qt;' - logging.debug('Query: %s', query) - url = '{}?data={}'.format(overpass_api, urllib.parse.quote(query)) + query += ");(._;>>;);out body center qt;" + logging.debug("Query: %s", query) + url = "{}?data={}".format(overpass_api, urllib.parse.quote(query)) response = urllib.request.urlopen(url, timeout=1000) - if response.getcode() != 200: - raise Exception( - 'Failed to query Overpass API: HTTP {}'.format(response.getcode()) - ) - return json.load(response)['elements'] + if (r_code := response.getcode()) != 200: + raise Exception(f"Failed to query Overpass API: HTTP {r_code}") + return json.load(response)["elements"] def multi_overpass(overground, overpass_api, bboxes): @@ -60,16 +72,108 @@ def multi_overpass(overground, overpass_api, bboxes): for i in range(0, len(bboxes) + SLICE_SIZE - 1, SLICE_SIZE): if i > 0: time.sleep(INTERREQUEST_WAIT) - result.extend( - overpass_request( - overground, overpass_api, bboxes[i : i + SLICE_SIZE] - ) - ) + bboxes_i = bboxes[i : i + SLICE_SIZE] # noqa E203 + result.extend(overpass_request(overground, overpass_api, bboxes_i)) return result def slugify(name): - return re.sub(r'[^a-z0-9_-]+', '', name.lower().replace(' ', '_')) + return re.sub(r"[^a-z0-9_-]+", "", name.lower().replace(" ", "_")) + + +def get_way_center( + element: dict, node_centers: Dict[int, Point] +) -> Optional[Point]: + """ + :param element: dict describing OSM element + :param node_centers: osm_id => (lat, lon) + :return: tuple with center coordinates, or None + """ + + # If elements have been queried via overpass-api with + # 'out center;' clause then ways already have 'center' attribute + if "center" in element: + return element["center"]["lat"], element["center"]["lon"] + + if "nodes" not in element: + return None + + center = [0, 0] + count = 0 + way_nodes = element["nodes"] + way_nodes_len = len(element["nodes"]) + for i, nd in enumerate(way_nodes): + if nd not in node_centers: + continue + # Don't count the first node of a closed way twice + if ( + i == way_nodes_len - 1 + and way_nodes_len > 1 + and way_nodes[0] == way_nodes[-1] + ): + break + center[0] += node_centers[nd][0] + center[1] += node_centers[nd][1] + count += 1 + if count == 0: + return None + element["center"] = {"lat": center[0] / count, "lon": center[1] / count} + return element["center"]["lat"], element["center"]["lon"] + + +def get_relation_center( + element: dict, + node_centers: Dict[int, Point], + way_centers: Dict[int, Point], + relation_centers: Dict[int, Point], + ignore_unlocalized_child_relations: bool = False, +) -> Optional[Point]: + """ + :param element: dict describing OSM element + :param node_centers: osm_id => (lat, lon) + :param way_centers: osm_id => (lat, lon) + :param relation_centers: osm_id => (lat, lon) + :param ignore_unlocalized_child_relations: if a member that is a relation + has no center, skip it and calculate center based on member nodes, + ways and other, "localized" (with known centers), relations + :return: tuple with center coordinates, or None + """ + + # If elements have been queried via overpass-api with + # 'out center;' clause then some relations already have 'center' + # attribute. But this is not the case for relations composed only + # of other relations (e.g., route_master, stop_area_group or + # stop_area with only members that are multipolygons) + if "center" in element: + return element["center"]["lat"], element["center"]["lon"] + + center = [0, 0] + count = 0 + for m in element.get("members", list()): + m_id = m["ref"] + m_type = m["type"] + if m_type == "relation" and m_id not in relation_centers: + if ignore_unlocalized_child_relations: + continue + else: + # Cannot calculate fair center because the center + # of a child relation is not known yet + return None + member_container = ( + node_centers + if m_type == "node" + else way_centers + if m_type == "way" + else relation_centers + ) + if m_id in member_container: + center[0] += member_container[m_id][0] + center[1] += member_container[m_id][1] + count += 1 + if count == 0: + return None + element["center"] = {"lat": center[0] / count, "lon": center[1] / count} + return element["center"]["lat"], element["center"]["lon"] def calculate_centers(elements): @@ -77,163 +181,219 @@ def calculate_centers(elements): except for empty ways or relations. Relies on nodes-ways-relations order in the elements list. """ - nodes = {} # id(int) => (lat, lon) - ways = {} # id(int) => (lat, lon) - relations = {} # id(int) => (lat, lon) - empty_relations = set() # ids(int) of relations without members - # or containing only empty relations + nodes: Dict[int, Point] = {} # id => (lat, lon) + ways: Dict[int, Point] = {} # id => (lat, lon) + relations: Dict[int, Point] = {} # id => (lat, lon) - def calculate_way_center(el): - # If element has been queried via overpass-api with 'out center;' - # clause then ways already have 'center' attribute - if 'center' in el: - ways[el['id']] = (el['center']['lat'], el['center']['lon']) - return - center = [0, 0] - count = 0 - for nd in el['nodes']: - if nd in nodes: - center[0] += nodes[nd][0] - center[1] += nodes[nd][1] - count += 1 - if count > 0: - el['center'] = {'lat': center[0] / count, 'lon': center[1] / count} - ways[el['id']] = (el['center']['lat'], el['center']['lon']) - - def calculate_relation_center(el): - # If element has been queried via overpass-api with 'out center;' - # clause then some relations already have 'center' attribute - if 'center' in el: - relations[el['id']] = (el['center']['lat'], el['center']['lon']) - return True - center = [0, 0] - count = 0 - for m in el.get('members', []): - if m['type'] == 'relation' and m['ref'] not in relations: - if m['ref'] in empty_relations: - # Ignore empty child relations - continue - else: - # Center of child relation is not known yet - return False - member_container = ( - nodes - if m['type'] == 'node' - else ways - if m['type'] == 'way' - else relations - ) - if m['ref'] in member_container: - center[0] += member_container[m['ref']][0] - center[1] += member_container[m['ref']][1] - count += 1 - if count == 0: - empty_relations.add(el['id']) - else: - el['center'] = {'lat': center[0] / count, 'lon': center[1] / count} - relations[el['id']] = (el['center']['lat'], el['center']['lon']) - return True - - relations_without_center = [] + unlocalized_relations = [] # 'unlocalized' means the center of the + # relation has not been calculated yet for el in elements: - if el['type'] == 'node': - nodes[el['id']] = (el['lat'], el['lon']) - elif el['type'] == 'way': - if 'nodes' in el: - calculate_way_center(el) - elif el['type'] == 'relation': - if not calculate_relation_center(el): - relations_without_center.append(el) + if el["type"] == "node": + nodes[el["id"]] = (el["lat"], el["lon"]) + elif el["type"] == "way": + if center := get_way_center(el, nodes): + ways[el["id"]] = center + elif el["type"] == "relation": + if center := get_relation_center(el, nodes, ways, relations): + relations[el["id"]] = center + else: + unlocalized_relations.append(el) + + def iterate_relation_centers_calculation( + ignore_unlocalized_child_relations: bool, + ) -> List[int]: + unlocalized_relations_upd = [] + for rel in unlocalized_relations: + if center := get_relation_center( + rel, nodes, ways, relations, ignore_unlocalized_child_relations + ): + relations[rel["id"]] = center + else: + unlocalized_relations_upd.append(rel) + return unlocalized_relations_upd # Calculate centers for relations that have no one yet - while relations_without_center: - new_relations_without_center = [] - for rel in relations_without_center: - if not calculate_relation_center(rel): - new_relations_without_center.append(rel) - if len(new_relations_without_center) == len(relations_without_center): - break - relations_without_center = new_relations_without_center + while unlocalized_relations: + unlocalized_relations_upd = iterate_relation_centers_calculation(False) + progress = len(unlocalized_relations_upd) < len(unlocalized_relations) + if not progress: + unlocalized_relations_upd = iterate_relation_centers_calculation( + True + ) + progress = len(unlocalized_relations_upd) < len( + unlocalized_relations + ) + if not progress: + break + unlocalized_relations = unlocalized_relations_upd - if relations_without_center: - logging.error( - "Cannot calculate center for the relations (%d in total): %s%s", - len(relations_without_center), - ', '.join(str(rel['id']) for rel in relations_without_center[:20]), - ", ..." if len(relations_without_center) > 20 else "", - ) - if empty_relations: - logging.warning( - "Empty relations (%d in total): %s%s", - len(empty_relations), - ', '.join(str(x) for x in list(empty_relations)[:20]), - ", ..." if len(empty_relations) > 20 else "", + +def add_osm_elements_to_cities(osm_elements, cities): + for el in osm_elements: + for c in cities: + if c.contains(el): + c.add(el) + + +def validate_cities(cities): + """Validate cities. Return list of good cities.""" + good_cities = [] + for c in cities: + try: + c.extract_routes() + except CriticalValidationError as e: + logging.error( + "Critical validation error while processing %s: %s", + c.name, + e, + ) + c.error(str(e)) + except AssertionError as e: + logging.error( + "Validation logic error while processing %s: %s", + c.name, + e, + ) + c.error(f"Validation logic error: {e}") + else: + c.validate() + if c.is_good: + good_cities.append(c) + + return good_cities + + +def get_cities_info( + cities_info_url: str = DEFAULT_CITIES_INFO_URL, +) -> List[dict]: + response = urllib.request.urlopen(cities_info_url) + if ( + not cities_info_url.startswith("file://") + and (r_code := response.getcode()) != 200 + ): + raise Exception( + f"Failed to download cities spreadsheet: HTTP {r_code}" ) + data = response.read().decode("utf-8") + reader = csv.DictReader( + data.splitlines(), + fieldnames=( + "id", + "name", + "country", + "continent", + "num_stations", + "num_lines", + "num_light_lines", + "num_interchanges", + "bbox", + "networks", + ), + ) + + cities_info = list() + names = set() + next(reader) # skipping the header + for city_info in reader: + if city_info["id"] and city_info["bbox"]: + cities_info.append(city_info) + name = city_info["name"].strip() + if name in names: + logging.warning( + "Duplicate city name in city list: %s", + city_info, + ) + names.add(name) + return cities_info -if __name__ == '__main__': +def prepare_cities( + cities_info_url: str = DEFAULT_CITIES_INFO_URL, overground: bool = False +) -> List[City]: + if overground: + raise NotImplementedError("Overground transit not implemented yet") + cities_info = get_cities_info(cities_info_url) + return list(map(partial(City, overground=overground), cities_info)) + + +def main(): parser = argparse.ArgumentParser() parser.add_argument( - '-i', - '--source', - help='File to write backup of OSM data, or to read data from', + "--cities-info-url", + default=DEFAULT_CITIES_INFO_URL, + help=( + "URL of CSV file with reference information about rapid transit " + "networks. file:// protocol is also supported." + ), ) parser.add_argument( - '-x', '--xml', help='OSM extract with routes, to read data from' + "-i", + "--source", + help="File to write backup of OSM data, or to read data from", ) parser.add_argument( - '--overpass-api', - default='http://overpass-api.de/api/interpreter', + "-x", "--xml", help="OSM extract with routes, to read data from" + ) + parser.add_argument( + "--overpass-api", + default="http://overpass-api.de/api/interpreter", help="Overpass API URL", ) parser.add_argument( - '-q', - '--quiet', - action='store_true', - help='Show only warnings and errors', + "-q", + "--quiet", + action="store_true", + help="Show only warnings and errors", ) parser.add_argument( - '-c', '--city', help='Validate only a single city or a country' + "-c", "--city", help="Validate only a single city or a country" ) parser.add_argument( - '-t', - '--overground', - action='store_true', - help='Process overground transport instead of subways', + "-t", + "--overground", + action="store_true", + help="Process overground transport instead of subways", ) parser.add_argument( - '-e', - '--entrances', - type=argparse.FileType('w', encoding='utf-8'), - help='Export unused subway entrances as GeoJSON here', + "-e", + "--entrances", + type=argparse.FileType("w", encoding="utf-8"), + help="Export unused subway entrances as GeoJSON here", ) parser.add_argument( - '-l', - '--log', - type=argparse.FileType('w', encoding='utf-8'), - help='Validation JSON file name', + "-l", + "--log", + type=argparse.FileType("w", encoding="utf-8"), + help="Validation JSON file name", + ) + + for processor_name, processor in inspect.getmembers( + processors, inspect.ismodule + ): + if not processor_name.startswith("_"): + parser.add_argument( + f"--output-{processor_name}", + help=( + "Processed metro systems output filename " + f"in {processor_name.upper()} format" + ), + ) + + parser.add_argument("--cache", help="Cache file name for processed data") + parser.add_argument( + "-r", "--recovery-path", help="Cache file name for error recovery" ) parser.add_argument( - '-o', - '--output', - type=argparse.FileType('w', encoding='utf-8'), - help='Processed metro systems output', - ) - parser.add_argument('--cache', help='Cache file name for processed data') - parser.add_argument( - '-r', '--recovery-path', help='Cache file name for error recovery' + "-d", "--dump", help="Make a YAML file for a city data" ) parser.add_argument( - '-d', '--dump', help='Make a YAML file for a city data' + "-j", "--geojson", help="Make a GeoJSON file for a city data" ) parser.add_argument( - '-j', '--geojson', help='Make a GeoJSON file for a city data' - ) - parser.add_argument( - '--crude', - action='store_true', - help='Do not use OSM railway geometry for GeoJSON', + "--crude", + action="store_true", + help="Do not use OSM railway geometry for GeoJSON", ) options = parser.parse_args() @@ -243,12 +403,11 @@ if __name__ == '__main__': log_level = logging.INFO logging.basicConfig( level=log_level, - datefmt='%H:%M:%S', - format='%(asctime)s %(levelname)-7s %(message)s', + datefmt="%H:%M:%S", + format="%(asctime)s %(levelname)-7s %(message)s", ) - # Downloading cities from Google Spreadsheets - cities = download_cities(options.overground) + cities = prepare_cities(options.cities_info_url, options.overground) if options.city: cities = [ c @@ -256,7 +415,7 @@ if __name__ == '__main__': if c.name == options.city or c.country == options.city ] if not cities: - logging.error('No cities to process') + logging.error("No cities to process") sys.exit(2) # Augment cities with recovery data @@ -266,83 +425,59 @@ if __name__ == '__main__': for city in cities: city.recovery_data = recovery_data.get(city.name, None) - logging.info('Read %s metro networks', len(cities)) + logging.info("Read %s metro networks", len(cities)) # Reading cached json, loading XML or querying Overpass API if options.source and os.path.exists(options.source): - logging.info('Reading %s', options.source) - with open(options.source, 'r') as f: + logging.info("Reading %s", options.source) + with open(options.source, "r") as f: osm = json.load(f) - if 'elements' in osm: - osm = osm['elements'] + if "elements" in osm: + osm = osm["elements"] calculate_centers(osm) elif options.xml: - logging.info('Reading %s', options.xml) + logging.info("Reading %s", options.xml) osm = load_xml(options.xml) calculate_centers(osm) if options.source: - with open(options.source, 'w', encoding='utf-8') as f: + with open(options.source, "w", encoding="utf-8") as f: json.dump(osm, f) else: if len(cities) > 10: logging.error( - 'Would not download that many cities from Overpass API, ' - 'choose a smaller set' + "Would not download that many cities from Overpass API, " + "choose a smaller set" ) sys.exit(3) bboxes = [c.bbox for c in cities] - logging.info('Downloading data from Overpass API') + logging.info("Downloading data from Overpass API") osm = multi_overpass(options.overground, options.overpass_api, bboxes) calculate_centers(osm) if options.source: - with open(options.source, 'w', encoding='utf-8') as f: + with open(options.source, "w", encoding="utf-8") as f: json.dump(osm, f) - logging.info('Downloaded %s elements, sorting by city', len(osm)) + logging.info("Downloaded %s elements", len(osm)) - # Sorting elements by city and prepare a dict - for el in osm: - for c in cities: - if c.contains(el): - c.add(el) + logging.info("Sorting elements by city") + add_osm_elements_to_cities(osm, cities) - logging.info('Building routes for each city') - good_cities = [] - for c in cities: - try: - c.extract_routes() - except CriticalValidationError as e: - logging.error( - "Critical validation error while processing %s: %s", - c.name, - str(e), - ) - c.error(str(e)) - except AssertionError as e: - logging.error( - "Validation logic error while processing %s: %s", - c.name, - str(e), - ) - c.error("Validation logic error: {}".format(str(e))) - else: - c.validate() - if c.is_good(): - good_cities.append(c) + logging.info("Building routes for each city") + good_cities = validate_cities(cities) - logging.info('Finding transfer stations') + logging.info("Finding transfer stations") transfers = find_transfers(osm, cities) good_city_names = set(c.name for c in good_cities) logging.info( - '%s good cities: %s', + "%s good cities: %s", len(good_city_names), - ', '.join(sorted(good_city_names)), + ", ".join(sorted(good_city_names)), ) bad_city_names = set(c.name for c in cities) - good_city_names logging.info( - '%s bad cities: %s', + "%s bad cities: %s", len(bad_city_names), - ', '.join(sorted(bad_city_names)), + ", ".join(sorted(bad_city_names)), ) if options.recovery_path: @@ -355,48 +490,55 @@ if __name__ == '__main__': if os.path.isdir(options.dump): for c in cities: with open( - os.path.join(options.dump, slugify(c.name) + '.yaml'), - 'w', - encoding='utf-8', + os.path.join(options.dump, slugify(c.name) + ".yaml"), + "w", + encoding="utf-8", ) as f: dump_yaml(c, f) elif len(cities) == 1: - with open(options.dump, 'w', encoding='utf-8') as f: + with open(options.dump, "w", encoding="utf-8") as f: dump_yaml(cities[0], f) else: - logging.error('Cannot dump %s cities at once', len(cities)) + logging.error("Cannot dump %s cities at once", len(cities)) if options.geojson: if os.path.isdir(options.geojson): for c in cities: with open( os.path.join( - options.geojson, slugify(c.name) + '.geojson' + options.geojson, slugify(c.name) + ".geojson" ), - 'w', - encoding='utf-8', + "w", + encoding="utf-8", ) as f: json.dump(make_geojson(c, not options.crude), f) elif len(cities) == 1: - with open(options.geojson, 'w', encoding='utf-8') as f: + with open(options.geojson, "w", encoding="utf-8") as f: json.dump(make_geojson(cities[0], not options.crude), f) else: logging.error( - 'Cannot make a geojson of %s cities at once', len(cities) + "Cannot make a geojson of %s cities at once", len(cities) ) if options.log: res = [] for c in cities: v = c.get_validation_result() - v['slug'] = slugify(c.name) + v["slug"] = slugify(c.name) res.append(v) json.dump(res, options.log, indent=2, ensure_ascii=False) - if options.output: - json.dump( - processor.process(cities, transfers, options.cache), - options.output, - indent=1, - ensure_ascii=False, - ) + for processor_name, processor in inspect.getmembers( + processors, inspect.ismodule + ): + option_name = f"output_{processor_name}" + + if not getattr(options, option_name, None): + continue + + filename = getattr(options, option_name) + processor.process(cities, transfers, filename, options.cache) + + +if __name__ == "__main__": + main() diff --git a/processors/__init__.py b/processors/__init__.py index 8040f15..4f5ed84 100644 --- a/processors/__init__.py +++ b/processors/__init__.py @@ -1,2 +1,4 @@ -# Here you can change the processor -from . import mapsme as processor +# Import only those processors (modules) you want to use. +# Ignore F401 "module imported but unused" violation since these modules +# are addressed via introspection. +from . import mapsme, gtfs # noqa F401 diff --git a/processors/_common.py b/processors/_common.py new file mode 100644 index 0000000..e933719 --- /dev/null +++ b/processors/_common.py @@ -0,0 +1,108 @@ +from typing import List, Set + +from subway_structure import City, el_center, StopArea + +DEFAULT_INTERVAL = 2.5 * 60 # seconds +KMPH_TO_MPS = 1 / 3.6 # km/h to m/s conversion multiplier +SPEED_ON_TRANSFER = 3.5 * KMPH_TO_MPS # m/s +TRANSFER_PENALTY = 30 # seconds + + +def format_colour(colour): + """Truncate leading # sign.""" + return colour[1:] if colour else None + + +def transit_to_dict( + cities: List[City], transfers: List[Set[StopArea]] +) -> dict: + """Get data for good cities as a dictionary.""" + data = { + "stopareas": {}, # stoparea id => stoparea data + "networks": {}, # city name => city data + "transfers": {}, # set(tuple(stoparea_id1, stoparea_id2)), id1 list: + """Given object stored in a dict and an array of columns, + return a row to use in CSV. + """ + return [ + "" if (v := dict_data.get(column)) is None else v + for column in GTFS_COLUMNS[record_type] + ] + + +def make_gtfs( + filename: str, gtfs_data: dict, fmt: Optional[str] = None +) -> None: + if not fmt: + fmt = "tar" if filename.endswith(".tar") else "zip" + + if fmt == "zip": + make_gtfs_zip(filename, gtfs_data) + else: + make_gtfs_tar(filename, gtfs_data) + + +def make_gtfs_zip(filename: str, gtfs_data: dict) -> None: + if not filename.lower().endswith(".zip"): + filename = f"{filename}.zip" + + with ZipFile(filename, "w") as zf: + for gtfs_feature, columns in GTFS_COLUMNS.items(): + with StringIO(newline="") as string_io: + writer = csv.writer(string_io, delimiter=",") + writer.writerow(columns) + writer.writerows( + map( + partial(dict_to_row, record_type=gtfs_feature), + gtfs_data[gtfs_feature], + ) + ) + zf.writestr(f"{gtfs_feature}.txt", string_io.getvalue()) + + +def make_gtfs_tar(filename: str, gtfs_data: dict) -> None: + if not filename.lower().endswith(".tar"): + filename = f"{filename}.tar" + + with TarFile(filename, "w") as tf: + for gtfs_feature, columns in GTFS_COLUMNS.items(): + with StringIO(newline="") as string_io: + writer = csv.writer(string_io, delimiter=",") + writer.writerow(columns) + writer.writerows( + map( + partial(dict_to_row, record_type=gtfs_feature), + gtfs_data[gtfs_feature], + ) + ) + tarinfo = TarInfo(f"{gtfs_feature}.txt") + data = string_io.getvalue().encode() + tarinfo.size = len(data) + tf.addfile(tarinfo, BytesIO(data)) diff --git a/processors/mapsme.py b/processors/mapsme.py index c33c97f..b8818ea 100755 --- a/processors/mapsme.py +++ b/processors/mapsme.py @@ -1,23 +1,27 @@ import json -import os import logging +import os from collections import defaultdict + from subway_structure import ( + DISPLACEMENT_TOLERANCE, distance, el_center, Station, - DISPLACEMENT_TOLERANCE, +) +from ._common import ( + DEFAULT_INTERVAL, + format_colour, + KMPH_TO_MPS, + SPEED_ON_TRANSFER, + TRANSFER_PENALTY, ) -OSM_TYPES = {'n': (0, 'node'), 'w': (2, 'way'), 'r': (3, 'relation')} +OSM_TYPES = {"n": (0, "node"), "w": (2, "way"), "r": (3, "relation")} ENTRANCE_PENALTY = 60 # seconds -TRANSFER_PENALTY = 30 # seconds -KMPH_TO_MPS = 1 / 3.6 # km/h to m/s conversion multiplier SPEED_TO_ENTRANCE = 5 * KMPH_TO_MPS # m/s -SPEED_ON_TRANSFER = 3.5 * KMPH_TO_MPS # m/s SPEED_ON_LINE = 40 * KMPH_TO_MPS # m/s -DEFAULT_INTERVAL = 2.5 # minutes def uid(elid, typ=None): @@ -26,7 +30,7 @@ def uid(elid, typ=None): if not typ: osm_id = (osm_id << 2) + OSM_TYPES[t][0] elif typ != t: - raise Exception('Got {}, expected {}'.format(elid, typ)) + raise Exception("Got {}, expected {}".format(elid, typ)) return osm_id << 1 @@ -61,7 +65,8 @@ def if_object_is_used(method): class MapsmeCache: def __init__(self, cache_path, cities): if not cache_path: - # cache is not used, all actions with cache must be silently skipped + # Cache is not used, + # all actions with cache must be silently skipped self.is_used = False return self.cache_path = cache_path @@ -69,7 +74,7 @@ class MapsmeCache: self.cache = {} if os.path.exists(cache_path): try: - with open(cache_path, 'r', encoding='utf-8') as f: + with open(cache_path, "r", encoding="utf-8") as f: self.cache = json.load(f) except json.decoder.JSONDecodeError: logging.warning( @@ -81,16 +86,16 @@ class MapsmeCache: # One stoparea may participate in routes of different cities self.stop_cities = defaultdict(set) # stoparea id -> city names self.city_dict = {c.name: c for c in cities} - self.good_city_names = {c.name for c in cities if c.is_good()} + self.good_city_names = {c.name for c in cities if c.is_good} def _is_cached_city_usable(self, city): """Check if cached stations still exist in osm data and not moved far away. """ city_cache_data = self.cache[city.name] - for stoparea_id, cached_stoparea in city_cache_data['stops'].items(): - station_id = cached_stoparea['osm_type'][0] + str( - cached_stoparea['osm_id'] + for stoparea_id, cached_stoparea in city_cache_data["stops"].items(): + station_id = cached_stoparea["osm_type"][0] + str( + cached_stoparea["osm_id"] ) city_station = city.elements.get(station_id) if not city_station or not Station.is_station( @@ -99,7 +104,7 @@ class MapsmeCache: return False station_coords = el_center(city_station) cached_station_coords = tuple( - cached_stoparea[coord] for coord in ('lon', 'lat') + cached_stoparea[coord] for coord in ("lon", "lat") ) displacement = distance(station_coords, cached_station_coords) if displacement > DISPLACEMENT_TOLERANCE: @@ -112,11 +117,11 @@ class MapsmeCache: """Put stops and networks for bad cities into containers passed as arguments.""" for city in self.city_dict.values(): - if not city.is_good() and city.name in self.cache: + if not city.is_good and city.name in self.cache: city_cached_data = self.cache[city.name] if self._is_cached_city_usable(city): - stops.update(city_cached_data['stops']) - networks.append(city_cached_data['network']) + stops.update(city_cached_data["stops"]) + networks.append(city_cached_data["network"]) logging.info("Taking %s from cache", city.name) self.recovered_city_names.add(city.name) @@ -125,7 +130,7 @@ class MapsmeCache: """Add transfers from usable cached cities to 'transfers' dict passed as argument.""" for city_name in self.recovered_city_names: - city_cached_transfers = self.cache[city_name]['transfers'] + city_cached_transfers = self.cache[city_name]["transfers"] for stop1_uid, stop2_uid, transfer_time in city_cached_transfers: if (stop1_uid, stop2_uid) not in transfers: transfers[(stop1_uid, stop2_uid)] = transfer_time @@ -135,9 +140,10 @@ class MapsmeCache: """Create/replace one cache element with new data container. This should be done for each good city.""" self.cache[city_name] = { - 'network': network, - 'stops': {}, # stoparea el_id -> jsonified stop data - 'transfers': [], # list of tuples (stoparea1_uid, stoparea2_uid, time); uid1 < uid2 + "network": network, + "stops": {}, # stoparea el_id -> jsonified stop data + "transfers": [], # list of tuples + # (stoparea1_uid, stoparea2_uid, time); uid1 < uid2 } @if_object_is_used @@ -151,7 +157,7 @@ class MapsmeCache: """Add stoparea to the cache of each city the stoparea is in.""" stoparea_uid = uid(stoparea_id) for city_name in self.stop_cities[stoparea_uid]: - self.cache[city_name]['stops'][stoparea_id] = st + self.cache[city_name]["stops"][stoparea_id] = st @if_object_is_used def add_transfer(self, stoparea1_uid, stoparea2_uid, transfer_time): @@ -161,40 +167,39 @@ class MapsmeCache: & self.stop_cities[stoparea1_uid] & self.stop_cities[stoparea2_uid] ): - self.cache[city_name]['transfers'].append( + self.cache[city_name]["transfers"].append( (stoparea1_uid, stoparea2_uid, transfer_time) ) @if_object_is_used def save(self): try: - with open(self.cache_path, 'w', encoding='utf-8') as f: + with open(self.cache_path, "w", encoding="utf-8") as f: json.dump(self.cache, f, ensure_ascii=False) except Exception as e: logging.warning("Failed to save cache: %s", str(e)) -def process(cities, transfers, cache_path): - """cities - list of City instances; - transfers - list of sets of StopArea.id; - cache_path - path to json-file with good cities cache or None. +def process(cities, transfers, filename, cache_path): + """Generate all output and save to file. + :param cities: List of City instances + :param transfers: List of sets of StopArea.id + :param filename: Path to file to save the result + :param cache_path: Path to json-file with good cities cache or None. """ - def format_colour(c): - return c[1:] if c else None - def find_exits_for_platform(center, nodes): exits = [] min_distance = None for n in nodes: - d = distance(center, (n['lon'], n['lat'])) + d = distance(center, (n["lon"], n["lat"])) if not min_distance: min_distance = d * 2 / 3 elif d < min_distance: continue too_close = False for e in exits: - d = distance((e['lon'], e['lat']), (n['lon'], n['lat'])) + d = distance((e["lon"], e["lat"]), (n["lon"], n["lat"])) if d < min_distance: too_close = True break @@ -207,25 +212,25 @@ def process(cities, transfers, cache_path): stop_areas = {} # stoparea el_id -> StopArea instance stops = {} # stoparea el_id -> stop jsonified data networks = [] - good_cities = [c for c in cities if c.is_good()] + good_cities = [c for c in cities if c.is_good] platform_nodes = {} cache.provide_stops_and_networks(stops, networks) for city in good_cities: - network = {'network': city.name, 'routes': [], 'agency_id': city.id} + network = {"network": city.name, "routes": [], "agency_id": city.id} cache.initialize_good_city(city.name, network) for route in city: routes = { - 'type': route.mode, - 'ref': route.ref, - 'name': route.name, - 'colour': format_colour(route.colour), - 'route_id': uid(route.id, 'r'), - 'itineraries': [], + "type": route.mode, + "ref": route.ref, + "name": route.name, + "colour": format_colour(route.colour), + "route_id": uid(route.id, "r"), + "itineraries": [], } if route.infill: - routes['casing'] = routes['colour'] - routes['colour'] = format_colour(route.infill) + routes["casing"] = routes["colour"] + routes["colour"] = format_colour(route.infill) for i, variant in enumerate(route): itin = [] for stop in variant: @@ -237,41 +242,42 @@ def process(cities, transfers, cache_path): round(stop.distance / SPEED_ON_LINE), ] ) - # Make exits from platform nodes, if we don't have proper exits + # Make exits from platform nodes, + # if we don't have proper exits if ( len(stop.stoparea.entrances) + len(stop.stoparea.exits) == 0 ): for pl in stop.stoparea.platforms: pl_el = city.elements[pl] - if pl_el['type'] == 'node': + if pl_el["type"] == "node": pl_nodes = [pl_el] - elif pl_el['type'] == 'way': + elif pl_el["type"] == "way": pl_nodes = [ - city.elements.get('n{}'.format(n)) - for n in pl_el['nodes'] + city.elements.get("n{}".format(n)) + for n in pl_el["nodes"] ] else: pl_nodes = [] - for m in pl_el['members']: - if m['type'] == 'way': + for m in pl_el["members"]: + if m["type"] == "way": if ( - '{}{}'.format( - m['type'][0], m['ref'] + "{}{}".format( + m["type"][0], m["ref"] ) in city.elements ): pl_nodes.extend( [ city.elements.get( - 'n{}'.format(n) + "n{}".format(n) ) for n in city.elements[ - '{}{}'.format( - m['type'][0], - m['ref'], + "{}{}".format( + m["type"][0], + m["ref"], ) - ]['nodes'] + ]["nodes"] ] ) pl_nodes = [n for n in pl_nodes if n] @@ -279,39 +285,39 @@ def process(cities, transfers, cache_path): stop.stoparea.centers[pl], pl_nodes ) - routes['itineraries'].append( + routes["itineraries"].append( { - 'stops': itin, - 'interval': round( - (variant.interval or DEFAULT_INTERVAL) * 60 + "stops": itin, + "interval": round( + variant.interval or DEFAULT_INTERVAL ), } ) - network['routes'].append(routes) + network["routes"].append(routes) networks.append(network) for stop_id, stop in stop_areas.items(): st = { - 'name': stop.name, - 'int_name': stop.int_name, - 'lat': stop.center[1], - 'lon': stop.center[0], - 'osm_type': OSM_TYPES[stop.station.id[0]][1], - 'osm_id': int(stop.station.id[1:]), - 'id': uid(stop.id), - 'entrances': [], - 'exits': [], + "name": stop.name, + "int_name": stop.int_name, + "lat": stop.center[1], + "lon": stop.center[0], + "osm_type": OSM_TYPES[stop.station.id[0]][1], + "osm_id": int(stop.station.id[1:]), + "id": uid(stop.id), + "entrances": [], + "exits": [], } - for e_l, k in ((stop.entrances, 'entrances'), (stop.exits, 'exits')): + for e_l, k in ((stop.entrances, "entrances"), (stop.exits, "exits")): for e in e_l: - if e[0] == 'n': + if e[0] == "n": st[k].append( { - 'osm_type': 'node', - 'osm_id': int(e[1:]), - 'lon': stop.centers[e][0], - 'lat': stop.centers[e][1], - 'distance': ENTRANCE_PENALTY + "osm_type": "node", + "osm_id": int(e[1:]), + "lon": stop.centers[e][0], + "lat": stop.centers[e][1], + "distance": ENTRANCE_PENALTY + round( distance(stop.centers[e], stop.center) / SPEED_TO_ENTRANCE @@ -322,31 +328,31 @@ def process(cities, transfers, cache_path): if stop.platforms: for pl in stop.platforms: for n in platform_nodes[pl]: - for k in ('entrances', 'exits'): + for k in ("entrances", "exits"): st[k].append( { - 'osm_type': n['type'], - 'osm_id': n['id'], - 'lon': n['lon'], - 'lat': n['lat'], - 'distance': ENTRANCE_PENALTY + "osm_type": n["type"], + "osm_id": n["id"], + "lon": n["lon"], + "lat": n["lat"], + "distance": ENTRANCE_PENALTY + round( distance( - (n['lon'], n['lat']), stop.center + (n["lon"], n["lat"]), stop.center ) / SPEED_TO_ENTRANCE ), } ) else: - for k in ('entrances', 'exits'): + for k in ("entrances", "exits"): st[k].append( { - 'osm_type': OSM_TYPES[stop.station.id[0]][1], - 'osm_id': int(stop.station.id[1:]), - 'lon': stop.centers[stop.id][0], - 'lat': stop.centers[stop.id][1], - 'distance': 60, + "osm_type": OSM_TYPES[stop.station.id[0]][1], + "osm_id": int(stop.station.id[1:]), + "lon": stop.centers[stop.id][0], + "lat": stop.centers[stop.id][1], + "distance": 60, } ) @@ -382,8 +388,18 @@ def process(cities, transfers, cache_path): ] result = { - 'stops': list(stops.values()), - 'transfers': pairwise_transfers, - 'networks': networks, + "stops": list(stops.values()), + "transfers": pairwise_transfers, + "networks": networks, } - return result + + if not filename.lower().endswith("json"): + filename = f"{filename}.json" + + with open(filename, "w", encoding="utf-8") as f: + json.dump( + result, + f, + indent=1, + ensure_ascii=False, + ) diff --git a/requirements.txt b/requirements.txt index dab0d26..29b232b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,10 +1 @@ -Flask==2.0.1 -kdtree==0.16 -lxml==4.6.3 -Shapely==1.7.1 -## The following requirements were added by pip freeze: -click==8.0.1 -itsdangerous==2.0.1 -Jinja2==3.0.1 -MarkupSafe==2.0.1 -Werkzeug==2.0.1 +lxml==4.9.2 diff --git a/scripts/process_subways.sh b/scripts/process_subways.sh index 95d881d..1052d51 100755 --- a/scripts/process_subways.sh +++ b/scripts/process_subways.sh @@ -32,14 +32,16 @@ Environment variable reference: - PLANET_METRO: path to a local o5m file with extract of cities having metro It's used instead of \$PLANET if exists otherwise it's created first - PLANET_UPDATE_SERVER: server to get replication data from. Defaults to https://planet.openstreetmap.org/replication/ + - CITIES_INFO_URL: http(s) or "file://" URL to a CSV file with reference information about rapid transit systems. A default value is hammered into python code. - CITY: name of a city/country to process - BBOX: bounding box of an extract; x1,y1,x2,y2. Has precedence over \$POLY - POLY: *.poly file with [multi]polygon comprising cities with metro If neither \$BBOX nor \$POLY is set, then \$POLY is generated - - SKIP_PLANET_UPDATE: skip \$PLANET file update. Any non-empty string is True + - SKIP_PLANET_UPDATE: skip \$PLANET_METRO file update. Any non-empty string is True - SKIP_FILTERING: skip filtering railway data. Any non-empty string is True - FILTERED_DATA: path to filtered data. Defaults to \$TMPDIR/subways.osm - MAPSME: file name for maps.me json output + - GTFS: file name for GTFS output - DUMP: directory/file name to dump YAML city data. Do not set to omit dump - GEOJSON: directory/file name to dump GeoJSON data. Do not set to omit dump - ELEMENTS_CACHE: file name to elements cache. Allows OSM xml processing phase @@ -54,6 +56,7 @@ Environment variable reference: - SERVER: server name and path to upload HTML files (e.g. ilya@osmz.ru:/var/www/) - SERVER_KEY: rsa key to supply for uploading the files - REMOVE_HTML: set to 1 to remove \$HTML_DIR after uploading + - QUIET: set to any non-empty value to use WARNING log level in process_subways.py. Default is INFO. EOF exit fi @@ -88,9 +91,10 @@ function check_poly() { if [ -z "${POLY-}" -o ! -f "${POLY-}" ]; then POLY=${POLY:-$(mktemp "$TMPDIR/all-metro.XXXXXXXX.poly")} if [ -n "$("$PYTHON" -c "import shapely" 2>&1)" ]; then - "$PYTHON" -m pip install shapely + "$PYTHON" -m pip install shapely==1.7.1 fi - "$PYTHON" "$SUBWAYS_PATH"/make_all_metro_poly.py > "$POLY" + "$PYTHON" "$SUBWAYS_PATH"/make_all_metro_poly.py \ + ${CITIES_INFO_URL:+--cities-info-url "$CITIES_INFO_URL"} > "$POLY" fi fi POLY_CHECKED=1 @@ -235,10 +239,16 @@ if [ -n "${DUMP-}" ]; then mkdir -p "$DUMP" fi +if [ -n "${DUMP-}" ]; then + mkdir -p "$DUMP" +fi + VALIDATION="$TMPDIR/validation.json" -"$PYTHON" "$SUBWAYS_PATH/process_subways.py" -q \ +"$PYTHON" "$SUBWAYS_PATH/process_subways.py" ${QUIET:+-q} \ -x "$FILTERED_DATA" -l "$VALIDATION" \ - ${MAPSME:+-o "$MAPSME"} \ + ${CITIES_INFO_URL:+--cities-info-url "$CITIES_INFO_URL"} \ + ${MAPSME:+--output-mapsme "$MAPSME"} \ + ${GTFS:+--output-gtfs "$GTFS"} \ ${CITY:+-c "$CITY"} ${DUMP:+-d "$DUMP"} ${GEOJSON:+-j "$GEOJSON"} \ ${ELEMENTS_CACHE:+-i "$ELEMENTS_CACHE"} \ ${CITY_CACHE:+--cache "$CITY_CACHE"} \ @@ -257,7 +267,9 @@ fi mkdir -p $HTML_DIR rm -f "$HTML_DIR"/*.html -"$PYTHON" "$SUBWAYS_PATH/validation_to_html.py" "$VALIDATION" "$HTML_DIR" +"$PYTHON" "$SUBWAYS_PATH/validation_to_html.py" \ + ${CITIES_INFO_URL:+--cities-info-url "$CITIES_INFO_URL"} \ + "$VALIDATION" "$HTML_DIR" # Uploading files to the server diff --git a/stop_areas/make_stop_areas.py b/stop_areas/make_stop_areas.py index 43699a9..54b0dd5 100755 --- a/stop_areas/make_stop_areas.py +++ b/stop_areas/make_stop_areas.py @@ -1,14 +1,15 @@ #!/usr/bin/env python3 -import json import codecs -from lxml import etree -import sys -import kdtree +import json import math import re +import sys import urllib.parse import urllib.request +import kdtree +from lxml import etree + QUERY = """ [out:json][timeout:250][bbox:{{bbox}}]; @@ -32,17 +33,17 @@ out meta center qt; def el_id(el): - return el['type'][0] + str(el.get('id', el.get('ref', ''))) + return el["type"][0] + str(el.get("id", el.get("ref", ""))) class StationWrapper: def __init__(self, st): - if 'center' in st: - self.coords = (st['center']['lon'], st['center']['lat']) - elif 'lon' in st: - self.coords = (st['lon'], st['lat']) + if "center" in st: + self.coords = (st["center"]["lon"], st["center"]["lat"]) + elif "lon" in st: + self.coords = (st["lon"], st["lat"]) else: - raise Exception('Coordinates not found for station {}'.format(st)) + raise Exception("Coordinates not found for station {}".format(st)) self.station = st def __len__(self): @@ -53,85 +54,85 @@ class StationWrapper: def distance(self, other): """Calculate distance in meters.""" - dx = math.radians(self[0] - other['lon']) * math.cos( - 0.5 * math.radians(self[1] + other['lat']) + dx = math.radians(self[0] - other["lon"]) * math.cos( + 0.5 * math.radians(self[1] + other["lat"]) ) - dy = math.radians(self[1] - other['lat']) + dy = math.radians(self[1] - other["lat"]) return 6378137 * math.sqrt(dx * dx + dy * dy) def overpass_request(bbox): - url = 'http://overpass-api.de/api/interpreter?data={}'.format( - urllib.parse.quote(QUERY.replace('{{bbox}}', bbox)) + url = "http://overpass-api.de/api/interpreter?data={}".format( + urllib.parse.quote(QUERY.replace("{{bbox}}", bbox)) ) response = urllib.request.urlopen(url, timeout=1000) if response.getcode() != 200: raise Exception( - 'Failed to query Overpass API: HTTP {}'.format(response.getcode()) + "Failed to query Overpass API: HTTP {}".format(response.getcode()) ) - reader = codecs.getreader('utf-8') - return json.load(reader(response))['elements'] + reader = codecs.getreader("utf-8") + return json.load(reader(response))["elements"] def add_stop_areas(src): if not src: - raise Exception('Empty dataset provided to add_stop_areas') + raise Exception("Empty dataset provided to add_stop_areas") # Add station=* tags to stations in subway and light_rail routes stations = {} for el in src: - if 'tags' in el and el['tags'].get('railway', None) == 'station': + if "tags" in el and el["tags"].get("railway", None) == "station": stations[el_id(el)] = el for el in src: if ( - el['type'] == 'relation' - and 'tags' in el - and el['tags'].get('route', None) in ('subway', 'light_rail') + el["type"] == "relation" + and "tags" in el + and el["tags"].get("route", None) in ("subway", "light_rail") ): - for m in el['members']: + for m in el["members"]: st = stations.get(el_id(m), None) - if st and 'station' not in st['tags']: - st['tags']['station'] = el['tags']['route'] - st['modified'] = True + if st and "station" not in st["tags"]: + st["tags"]["station"] = el["tags"]["route"] + st["modified"] = True # Create a kd-tree out of subway stations stations = kdtree.create(dimensions=2) for el in src: - if 'tags' in el and el['tags'].get('station', None) in ( - 'subway', - 'light_rail', + if "tags" in el and el["tags"].get("station", None) in ( + "subway", + "light_rail", ): stations.add(StationWrapper(el)) if stations.is_leaf: - raise Exception('No stations found') + raise Exception("No stations found") # Populate a list of nearby subway exits and platforms for each station MAX_DISTANCE = 300 # meters stop_areas = {} for el in src: - if 'tags' not in el: + if "tags" not in el: continue - if 'station' in el['tags']: + if "station" in el["tags"]: continue - if el['tags'].get('railway', None) not in ( - 'subway_entrance', - 'platform', - ) and el['tags'].get('public_transport', None) not in ( - 'platform', - 'stop_position', + if el["tags"].get("railway", None) not in ( + "subway_entrance", + "platform", + ) and el["tags"].get("public_transport", None) not in ( + "platform", + "stop_position", ): continue - coords = el.get('center', el) - station = stations.search_nn((coords['lon'], coords['lat']))[0].data + coords = el.get("center", el) + station = stations.search_nn((coords["lon"], coords["lat"]))[0].data if station.distance(coords) < MAX_DISTANCE: k = ( - station.station['id'], - station.station['tags'].get('name', 'station_with_no_name'), + station.station["id"], + station.station["tags"].get("name", "station_with_no_name"), ) # Disregard exits and platforms that are differently named - if el['tags'].get('name', k[1]) == k[1]: + if el["tags"].get("name", k[1]) == k[1]: if k not in stop_areas: stop_areas[k] = {el_id(station.station): station.station} stop_areas[k][el_id(el)] = el @@ -139,11 +140,11 @@ def add_stop_areas(src): # Find existing stop_area relations for stations and remove these stations for el in src: if ( - el['type'] == 'relation' - and el['tags'].get('public_transport', None) == 'stop_area' + el["type"] == "relation" + and el["tags"].get("public_transport", None) == "stop_area" ): found = False - for m in el['members']: + for m in el["members"]: if found: break for st in stop_areas: @@ -153,89 +154,90 @@ def add_stop_areas(src): break # Create OSM XML for new stop_area relations - root = etree.Element('osm', version='0.6') + root = etree.Element("osm", version="0.6") rid = -1 for st, members in stop_areas.items(): - rel = etree.SubElement(root, 'relation', id=str(rid)) + rel = etree.SubElement(root, "relation", id=str(rid)) rid -= 1 - etree.SubElement(rel, 'tag', k='type', v='public_transport') - etree.SubElement(rel, 'tag', k='public_transport', v='stop_area') - etree.SubElement(rel, 'tag', k='name', v=st[1]) + etree.SubElement(rel, "tag", k="type", v="public_transport") + etree.SubElement(rel, "tag", k="public_transport", v="stop_area") + etree.SubElement(rel, "tag", k="name", v=st[1]) for m in members.values(): if ( - m['tags'].get( - 'railway', m['tags'].get('public_transport', None) + m["tags"].get( + "railway", m["tags"].get("public_transport", None) ) - == 'platform' + == "platform" ): - role = 'platform' - elif m['tags'].get('public_transport', None) == 'stop_position': - role = 'stop' + role = "platform" + elif m["tags"].get("public_transport", None) == "stop_position": + role = "stop" else: - role = '' + role = "" etree.SubElement( - rel, 'member', ref=str(m['id']), type=m['type'], role=role + rel, "member", ref=str(m["id"]), type=m["type"], role=role ) # Add all downloaded elements for el in src: - obj = etree.SubElement(root, el['type']) + obj = etree.SubElement(root, el["type"]) for a in ( - 'id', - 'type', - 'user', - 'uid', - 'version', - 'changeset', - 'timestamp', - 'lat', - 'lon', + "id", + "type", + "user", + "uid", + "version", + "changeset", + "timestamp", + "lat", + "lon", ): if a in el: obj.set(a, str(el[a])) - if 'modified' in el: - obj.set('action', 'modify') - if 'tags' in el: - for k, v in el['tags'].items(): - etree.SubElement(obj, 'tag', k=k, v=v) - if 'members' in el: - for m in el['members']: + if "modified" in el: + obj.set("action", "modify") + if "tags" in el: + for k, v in el["tags"].items(): + etree.SubElement(obj, "tag", k=k, v=v) + if "members" in el: + for m in el["members"]: etree.SubElement( obj, - 'member', - ref=str(m['ref']), - type=m['type'], - role=m.get('role', ''), + "member", + ref=str(m["ref"]), + type=m["type"], + role=m.get("role", ""), ) - if 'nodes' in el: - for n in el['nodes']: - etree.SubElement(obj, 'nd', ref=str(n)) + if "nodes" in el: + for n in el["nodes"]: + etree.SubElement(obj, "nd", ref=str(n)) return etree.tostring(root, pretty_print=True) -if __name__ == '__main__': +if __name__ == "__main__": if len(sys.argv) < 2: print( - 'Read a JSON from Overpass and output JOSM OSM XML with added stop_area relations' + "Read a JSON from Overpass and output JOSM OSM XML with added " + "stop_area relations" ) print( - 'Usage: {} {{|}} [output.osm]'.format( + "Usage: {} {{|}} [output.osm]".format( sys.argv[0] ) ) sys.exit(1) - if re.match(r'^[-0-9.,]+$', sys.argv[1]): + if re.match(r"^[-0-9.,]+$", sys.argv[1]): src = overpass_request(sys.argv[1]) else: - with open(sys.argv[1], 'r') as f: - src = json.load(f)['elements'] + with open(sys.argv[1], "r") as f: + src = json.load(f)["elements"] result = add_stop_areas(src) if len(sys.argv) < 3: - print(result.decode('utf-8')) + print(result.decode("utf-8")) else: - with open(sys.argv[2], 'wb') as f: + with open(sys.argv[2], "wb") as f: f.write(result) diff --git a/stop_areas/make_tram_areas.py b/stop_areas/make_tram_areas.py index f06fdac..eea244d 100755 --- a/stop_areas/make_tram_areas.py +++ b/stop_areas/make_tram_areas.py @@ -1,14 +1,15 @@ #!/usr/bin/env python3 -import json import codecs -from lxml import etree -import sys -import kdtree +import json import math import re +import sys import urllib.parse import urllib.request +import kdtree +from lxml import etree + QUERY = """ [out:json][timeout:250][bbox:{{bbox}}]; @@ -23,17 +24,17 @@ out meta center qt; def el_id(el): - return el['type'][0] + str(el.get('id', el.get('ref', ''))) + return el["type"][0] + str(el.get("id", el.get("ref", ""))) class StationWrapper: def __init__(self, st): - if 'center' in st: - self.coords = (st['center']['lon'], st['center']['lat']) - elif 'lon' in st: - self.coords = (st['lon'], st['lat']) + if "center" in st: + self.coords = (st["center"]["lon"], st["center"]["lat"]) + elif "lon" in st: + self.coords = (st["lon"], st["lat"]) else: - raise Exception('Coordinates not found for station {}'.format(st)) + raise Exception("Coordinates not found for station {}".format(st)) self.station = st def __len__(self): @@ -44,50 +45,50 @@ class StationWrapper: def distance(self, other): """Calculate distance in meters.""" - dx = math.radians(self[0] - other['lon']) * math.cos( - 0.5 * math.radians(self[1] + other['lat']) + dx = math.radians(self[0] - other["lon"]) * math.cos( + 0.5 * math.radians(self[1] + other["lat"]) ) - dy = math.radians(self[1] - other['lat']) + dy = math.radians(self[1] - other["lat"]) return 6378137 * math.sqrt(dx * dx + dy * dy) def overpass_request(bbox): - url = 'http://overpass-api.de/api/interpreter?data={}'.format( - urllib.parse.quote(QUERY.replace('{{bbox}}', bbox)) + url = "http://overpass-api.de/api/interpreter?data={}".format( + urllib.parse.quote(QUERY.replace("{{bbox}}", bbox)) ) response = urllib.request.urlopen(url, timeout=1000) if response.getcode() != 200: raise Exception( - 'Failed to query Overpass API: HTTP {}'.format(response.getcode()) + "Failed to query Overpass API: HTTP {}".format(response.getcode()) ) - reader = codecs.getreader('utf-8') - return json.load(reader(response))['elements'] + reader = codecs.getreader("utf-8") + return json.load(reader(response))["elements"] def is_part_of_stop(tags): - if tags.get('public_transport') in ('platform', 'stop_position'): + if tags.get("public_transport") in ("platform", "stop_position"): return True - if tags.get('railway') == 'platform': + if tags.get("railway") == "platform": return True return False def add_stop_areas(src): if not src: - raise Exception('Empty dataset provided to add_stop_areas') + raise Exception("Empty dataset provided to add_stop_areas") # Create a kd-tree out of tram stations stations = kdtree.create(dimensions=2) for el in src: - if 'tags' in el and el['tags'].get('railway') == 'tram_stop': + if "tags" in el and el["tags"].get("railway") == "tram_stop": stations.add(StationWrapper(el)) if stations.is_leaf: - raise Exception('No stations found') + raise Exception("No stations found") elements = {} for el in src: - if el.get('tags'): + if el.get("tags"): elements[el_id(el)] = el # Populate a list of nearby subway exits and platforms for each station @@ -96,27 +97,27 @@ def add_stop_areas(src): for el in src: # Only tram routes if ( - 'tags' not in el - or el['type'] != 'relation' - or el['tags'].get('route') != 'tram' + "tags" not in el + or el["type"] != "relation" + or el["tags"].get("route") != "tram" ): continue - for m in el['members']: + for m in el["members"]: if el_id(m) not in elements: continue pel = elements[el_id(m)] - if not is_part_of_stop(pel['tags']): + if not is_part_of_stop(pel["tags"]): continue - if pel['tags'].get('railway') == 'tram_stop': + if pel["tags"].get("railway") == "tram_stop": continue - coords = pel.get('center', pel) - station = stations.search_nn( - (coords['lon'], coords['lat']) - )[0].data + coords = pel.get("center", pel) + station = stations.search_nn((coords["lon"], coords["lat"]))[ + 0 + ].data if station.distance(coords) < MAX_DISTANCE: k = ( - station.station['id'], - station.station['tags'].get('name', None), + station.station["id"], + station.station["tags"].get("name", None), ) if k not in stop_areas: stop_areas[k] = {el_id(station.station): station.station} @@ -125,11 +126,11 @@ def add_stop_areas(src): # Find existing stop_area relations for stations and remove these stations for el in src: if ( - el['type'] == 'relation' - and el['tags'].get('public_transport', None) == 'stop_area' + el["type"] == "relation" + and el["tags"].get("public_transport", None) == "stop_area" ): found = False - for m in el['members']: + for m in el["members"]: if found: break for st in stop_areas: @@ -139,81 +140,81 @@ def add_stop_areas(src): break # Create OSM XML for new stop_area relations - root = etree.Element('osm', version='0.6') + root = etree.Element("osm", version="0.6") rid = -1 for st, members in stop_areas.items(): - rel = etree.SubElement(root, 'relation', id=str(rid)) + rel = etree.SubElement(root, "relation", id=str(rid)) rid -= 1 - etree.SubElement(rel, 'tag', k='type', v='public_transport') - etree.SubElement(rel, 'tag', k='public_transport', v='stop_area') + etree.SubElement(rel, "tag", k="type", v="public_transport") + etree.SubElement(rel, "tag", k="public_transport", v="stop_area") if st[1]: - etree.SubElement(rel, 'tag', k='name', v=st[1]) + etree.SubElement(rel, "tag", k="name", v=st[1]) for m in members.values(): etree.SubElement( - rel, 'member', ref=str(m['id']), type=m['type'], role='' + rel, "member", ref=str(m["id"]), type=m["type"], role="" ) # Add all downloaded elements for el in src: - obj = etree.SubElement(root, el['type']) + obj = etree.SubElement(root, el["type"]) for a in ( - 'id', - 'type', - 'user', - 'uid', - 'version', - 'changeset', - 'timestamp', - 'lat', - 'lon', + "id", + "type", + "user", + "uid", + "version", + "changeset", + "timestamp", + "lat", + "lon", ): if a in el: obj.set(a, str(el[a])) - if 'modified' in el: - obj.set('action', 'modify') - if 'tags' in el: - for k, v in el['tags'].items(): - etree.SubElement(obj, 'tag', k=k, v=v) - if 'members' in el: - for m in el['members']: + if "modified" in el: + obj.set("action", "modify") + if "tags" in el: + for k, v in el["tags"].items(): + etree.SubElement(obj, "tag", k=k, v=v) + if "members" in el: + for m in el["members"]: etree.SubElement( obj, - 'member', - ref=str(m['ref']), - type=m['type'], - role=m.get('role', ''), + "member", + ref=str(m["ref"]), + type=m["type"], + role=m.get("role", ""), ) - if 'nodes' in el: - for n in el['nodes']: - etree.SubElement(obj, 'nd', ref=str(n)) + if "nodes" in el: + for n in el["nodes"]: + etree.SubElement(obj, "nd", ref=str(n)) return etree.tostring(root, pretty_print=True, encoding="utf-8") -if __name__ == '__main__': +if __name__ == "__main__": if len(sys.argv) < 2: print( - 'Read a JSON from Overpass and output JOSM OSM XML ' - 'with added stop_area relations' + "Read a JSON from Overpass and output JOSM OSM XML " + "with added stop_area relations" ) print( - 'Usage: {} {{|}} [output.osm]'.format( + "Usage: {} {{|}} [output.osm]".format( sys.argv[0] ) ) sys.exit(1) - if re.match(r'^[-0-9.,]+$', sys.argv[1]): - bbox = sys.argv[1].split(',') - src = overpass_request(','.join([bbox[i] for i in (1, 0, 3, 2)])) + if re.match(r"^[-0-9.,]+$", sys.argv[1]): + bbox = sys.argv[1].split(",") + src = overpass_request(",".join([bbox[i] for i in (1, 0, 3, 2)])) else: - with open(sys.argv[1], 'r') as f: - src = json.load(f)['elements'] + with open(sys.argv[1], "r") as f: + src = json.load(f)["elements"] result = add_stop_areas(src) if len(sys.argv) < 3: - print(result.decode('utf-8')) + print(result.decode("utf-8")) else: - with open(sys.argv[2], 'wb') as f: + with open(sys.argv[2], "wb") as f: f.write(result) diff --git a/stop_areas/requirements.txt b/stop_areas/requirements.txt new file mode 100644 index 0000000..ad45dce --- /dev/null +++ b/stop_areas/requirements.txt @@ -0,0 +1,12 @@ +Flask==2.2.3 +kdtree==0.16 +lxml==4.9.2 + +## The following requirements were added by pip freeze: +click==8.1.3 +importlib-metadata==6.0.0 +itsdangerous==2.1.2 +Jinja2==3.1.2 +MarkupSafe==2.1.2 +Werkzeug==2.2.3 +zipp==3.13.0 diff --git a/stop_areas/serve.py b/stop_areas/serve.py index e5d695e..3e8dc28 100755 --- a/stop_areas/serve.py +++ b/stop_areas/serve.py @@ -1,28 +1,30 @@ #!/usr/bin/env python3 -from flask import Flask, request, make_response, render_template +from flask import Flask, make_response, render_template, request + from make_stop_areas import add_stop_areas, overpass_request + app = Flask(__name__) app.debug = True -@app.route('/') +@app.route("/") def form(): - return render_template('index.html') + return render_template("index.html") -@app.route('/process', methods=['GET']) +@app.route("/process", methods=["GET"]) def convert(): - src = overpass_request(request.args.get('bbox')) + src = overpass_request(request.args.get("bbox")) if not src: - return 'No data from overpass, sorry.' + return "No data from overpass, sorry." result = add_stop_areas(src) response = make_response(result) - response.headers['Content-Disposition'] = ( - 'attachment; filename="stop_areas.osm"' - ) + response.headers[ + "Content-Disposition" + ] = 'attachment; filename="stop_areas.osm"' return response -if __name__ == '__main__': +if __name__ == "__main__": app.run() diff --git a/subway_io.py b/subway_io.py index 810c02e..cbd252a 100644 --- a/subway_io.py +++ b/subway_io.py @@ -12,33 +12,33 @@ def load_xml(f): elements = [] for event, element in etree.iterparse(f): - if element.tag in ('node', 'way', 'relation'): - el = {'type': element.tag, 'id': int(element.get('id'))} - if element.tag == 'node': - for n in ('lat', 'lon'): + if element.tag in ("node", "way", "relation"): + el = {"type": element.tag, "id": int(element.get("id"))} + if element.tag == "node": + for n in ("lat", "lon"): el[n] = float(element.get(n)) tags = {} nd = [] members = [] for sub in element: - if sub.tag == 'tag': - tags[sub.get('k')] = sub.get('v') - elif sub.tag == 'nd': - nd.append(int(sub.get('ref'))) - elif sub.tag == 'member': + if sub.tag == "tag": + tags[sub.get("k")] = sub.get("v") + elif sub.tag == "nd": + nd.append(int(sub.get("ref"))) + elif sub.tag == "member": members.append( { - 'type': sub.get('type'), - 'ref': int(sub.get('ref')), - 'role': sub.get('role', ''), + "type": sub.get("type"), + "ref": int(sub.get("ref")), + "role": sub.get("role", ""), } ) if tags: - el['tags'] = tags + el["tags"] = tags if nd: - el['nodes'] = nd + el["nodes"] = nd if members: - el['members'] = members + el["members"] = members elements.append(el) element.clear() @@ -55,7 +55,7 @@ def _get_yaml_compatible_string(scalar): if string and ( string[0] in _YAML_SPECIAL_CHARACTERS or any(seq in string for seq in _YAML_SPECIAL_SEQUENCES) - or string.endswith(':') + or string.endswith(":") ): string = string.replace("'", "''") string = "'{}'".format(string) @@ -63,25 +63,25 @@ def _get_yaml_compatible_string(scalar): def dump_yaml(city, f): - def write_yaml(data, f, indent=''): + def write_yaml(data, f, indent=""): if isinstance(data, (set, list)): - f.write('\n') + f.write("\n") for i in data: f.write(indent) - f.write('- ') - write_yaml(i, f, indent + ' ') + f.write("- ") + write_yaml(i, f, indent + " ") elif isinstance(data, dict): - f.write('\n') + f.write("\n") for k, v in data.items(): if v is None: continue - f.write(indent + _get_yaml_compatible_string(k) + ': ') - write_yaml(v, f, indent + ' ') + f.write(indent + _get_yaml_compatible_string(k) + ": ") + write_yaml(v, f, indent + " ") if isinstance(v, (list, set, dict)): - f.write('\n') + f.write("\n") else: f.write(_get_yaml_compatible_string(data)) - f.write('\n') + f.write("\n") INCLUDE_STOP_AREAS = False stops = set() @@ -91,14 +91,14 @@ def dump_yaml(city, f): [(sa.transfer or sa.id, sa.name) for sa in route.stop_areas()] ) rte = { - 'type': route.mode, - 'ref': route.ref, - 'name': route.name, - 'colour': route.colour, - 'infill': route.infill, - 'station_count': len(stations), - 'stations': list(stations.values()), - 'itineraries': {}, + "type": route.mode, + "ref": route.ref, + "name": route.name, + "colour": route.colour, + "infill": route.infill, + "station_count": len(stations), + "stations": list(stations.values()), + "itineraries": {}, } for variant in route: if INCLUDE_STOP_AREAS: @@ -107,38 +107,38 @@ def dump_yaml(city, f): s = st.stoparea if s.id == s.station.id: v_stops.append( - '{} ({})'.format(s.station.name, s.station.id) + "{} ({})".format(s.station.name, s.station.id) ) else: v_stops.append( - '{} ({}) in {} ({})'.format( + "{} ({}) in {} ({})".format( s.station.name, s.station.id, s.name, s.id ) ) else: v_stops = [ - '{} ({})'.format( + "{} ({})".format( s.stoparea.station.name, s.stoparea.station.id ) for s in variant ] - rte['itineraries'][variant.id] = v_stops + rte["itineraries"][variant.id] = v_stops stops.update(v_stops) routes.append(rte) transfers = [] for t in city.transfers: - v_stops = ['{} ({})'.format(s.name, s.id) for s in t] + v_stops = ["{} ({})".format(s.name, s.id) for s in t] transfers.append(sorted(v_stops)) result = { - 'stations': sorted(stops), - 'transfers': sorted(transfers, key=lambda t: t[0]), - 'routes': sorted(routes, key=lambda r: r['ref']), + "stations": sorted(stops), + "transfers": sorted(transfers, key=lambda t: t[0]), + "routes": sorted(routes, key=lambda r: r["ref"]), } write_yaml(result, f) -def make_geojson(city, tracks=True): +def make_geojson(city, include_tracks_geometry=True): transfers = set() for t in city.transfers: transfers.update(t) @@ -147,36 +147,25 @@ def make_geojson(city, tracks=True): stops = set() for rmaster in city: for variant in rmaster: - if not tracks: - features.append( - { - 'type': 'Feature', - 'geometry': { - 'type': 'LineString', - 'coordinates': [s.stop for s in variant], - }, - 'properties': { - 'ref': variant.ref, - 'name': variant.name, - 'stroke': variant.colour, - }, - } - ) - elif variant.tracks: - features.append( - { - 'type': 'Feature', - 'geometry': { - 'type': 'LineString', - 'coordinates': variant.tracks, - }, - 'properties': { - 'ref': variant.ref, - 'name': variant.name, - 'stroke': variant.colour, - }, - } - ) + tracks = ( + variant.get_extended_tracks() + if include_tracks_geometry + else [s.stop for s in variant] + ) + features.append( + { + "type": "Feature", + "geometry": { + "type": "LineString", + "coordinates": tracks, + }, + "properties": { + "ref": variant.ref, + "name": variant.name, + "stroke": variant.colour, + }, + } + ) for st in variant: stops.add(st.stop) stopareas.add(st.stoparea) @@ -184,41 +173,41 @@ def make_geojson(city, tracks=True): for stop in stops: features.append( { - 'type': 'Feature', - 'geometry': { - 'type': 'Point', - 'coordinates': stop, + "type": "Feature", + "geometry": { + "type": "Point", + "coordinates": stop, }, - 'properties': { - 'marker-size': 'small', - 'marker-symbol': 'circle', + "properties": { + "marker-size": "small", + "marker-symbol": "circle", }, } ) for stoparea in stopareas: features.append( { - 'type': 'Feature', - 'geometry': { - 'type': 'Point', - 'coordinates': stoparea.center, + "type": "Feature", + "geometry": { + "type": "Point", + "coordinates": stoparea.center, }, - 'properties': { - 'name': stoparea.name, - 'marker-size': 'small', - 'marker-color': '#ff2600' + "properties": { + "name": stoparea.name, + "marker-size": "small", + "marker-color": "#ff2600" if stoparea in transfers - else '#797979', + else "#797979", }, } ) - return {'type': 'FeatureCollection', 'features': features} + return {"type": "FeatureCollection", "features": features} def _dumps_route_id(route_id): - """Argument is a route_id that depends on route colour and ref. Name - can be taken from route_master or can be route's own, we don't take it - into consideration. Some of route attributes can be None. The function makes + """Argument is a route_id that depends on route colour and ref. Name can + be taken from route_master or can be route's own, we don't take it into + consideration. Some of route attributes can be None. The function makes route_id json-compatible - dumps it to a string.""" return json.dumps(route_id, ensure_ascii=False) @@ -235,7 +224,7 @@ def read_recovery_data(path): shuffled stations in routes.""" data = None try: - with open(path, 'r') as f: + with open(path, "r") as f: try: data = json.load(f) except json.decoder.JSONDecodeError as e: @@ -269,21 +258,21 @@ def write_recovery_data(path, current_data, cities): itineraries = [] for variant in route: itin = { - 'stations': [], - 'name': variant.name, - 'from': variant.element['tags'].get('from'), - 'to': variant.element['tags'].get('to'), + "stations": [], + "name": variant.name, + "from": variant.element["tags"].get("from"), + "to": variant.element["tags"].get("to"), } for stop in variant: station = stop.stoparea.station station_name = station.name - if station_name == '?' and station.int_name: + if station_name == "?" and station.int_name: station_name = station.int_name - itin['stations'].append( + itin["stations"].append( { - 'oms_id': station.id, - 'name': station_name, - 'center': station.center, + "oms_id": station.id, + "name": station_name, + "center": station.center, } ) if itin is not None: @@ -293,7 +282,7 @@ def write_recovery_data(path, current_data, cities): data = current_data for city in cities: - if city.is_good(): + if city.is_good: data[city.name] = make_city_recovery_data(city) try: @@ -304,7 +293,7 @@ def write_recovery_data(path, current_data, cities): } for city_name, routes in data.items() } - with open(path, 'w', encoding='utf-8') as f: + with open(path, "w", encoding="utf-8") as f: json.dump(data, f, ensure_ascii=False, indent=2) except Exception as e: logging.warning("Cannot write recovery data to '%s': %s", path, str(e)) diff --git a/subway_structure.py b/subway_structure.py index 07af3a4..63bf63f 100644 --- a/subway_structure.py +++ b/subway_structure.py @@ -1,14 +1,10 @@ -import csv -import itertools -import logging import math -import urllib.parse -import urllib.request -from css_colours import normalize_colour +import re from collections import Counter, defaultdict +from css_colours import normalize_colour + -SPREADSHEET_ID = '1SEW1-NiNOnA2qDwievcxYV1FOaQl1mb1fdeyqAxHu3k' MAX_DISTANCE_TO_ENTRANCES = 300 # in meters MAX_DISTANCE_STOP_TO_LINE = 50 # in meters ALLOWED_STATIONS_MISMATCH = 0.02 # part of total station count @@ -20,32 +16,74 @@ DISALLOWED_ANGLE_BETWEEN_STOPS = 20 # in degrees # it is likely the same object DISPLACEMENT_TOLERANCE = 300 # in meters -MODES_RAPID = set(('subway', 'light_rail', 'monorail', 'train')) -MODES_OVERGROUND = set(('tram', 'bus', 'trolleybus', 'aerialway', 'ferry')) -DEFAULT_MODES_RAPID = set(('subway', 'light_rail')) -DEFAULT_MODES_OVERGROUND = set(('tram',)) # TODO: bus and trolleybus? +MODES_RAPID = set(("subway", "light_rail", "monorail", "train")) +MODES_OVERGROUND = set(("tram", "bus", "trolleybus", "aerialway", "ferry")) +DEFAULT_MODES_RAPID = set(("subway", "light_rail")) +DEFAULT_MODES_OVERGROUND = set(("tram",)) # TODO: bus and trolleybus? ALL_MODES = MODES_RAPID | MODES_OVERGROUND RAILWAY_TYPES = set( ( - 'rail', - 'light_rail', - 'subway', - 'narrow_gauge', - 'funicular', - 'monorail', - 'tram', + "rail", + "light_rail", + "subway", + "narrow_gauge", + "funicular", + "monorail", + "tram", ) ) CONSTRUCTION_KEYS = ( - 'construction', - 'proposed', - 'construction:railway', - 'proposed:railway', + "construction", + "proposed", + "construction:railway", + "proposed:railway", ) used_entrances = set() +START_END_TIMES_RE = re.compile(r".*?(\d{2}):(\d{2})-(\d{2}):(\d{2}).*") + + +def get_start_end_times(opening_hours): + """Very simplified method to parse OSM opening_hours tag. + We simply take the first HH:MM-HH:MM substring which is the most probable + opening hours interval for the most of weekdays. + """ + start_time, end_time = None, None + m = START_END_TIMES_RE.match(opening_hours) + if m: + ints = tuple(map(int, m.groups())) + start_time = (ints[0], ints[1]) + end_time = (ints[2], ints[3]) + return start_time, end_time + + +def osm_interval_to_seconds(interval_str): + """Convert to int an OSM value for 'interval'/'headway' tag + which may be in these formats: + HH:MM:SS, + HH:MM, + MM, + M + (https://wiki.openstreetmap.org/wiki/Key:interval#Format) + """ + hours, minutes, seconds = 0, 0, 0 + semicolon_count = interval_str.count(":") + try: + if semicolon_count == 0: + minutes = int(interval_str) + elif semicolon_count == 1: + hours, minutes = map(int, interval_str.split(":")) + elif semicolon_count == 2: + hours, minutes, seconds = map(int, interval_str.split(":")) + else: + return None + except ValueError: + return None + return seconds + 60 * minutes + 60 * 60 * hours + + class CriticalValidationError(Exception): """Is thrown if an error occurs that prevents further validation of a city.""" @@ -54,25 +92,25 @@ class CriticalValidationError(Exception): def el_id(el): if not el: return None - if 'type' not in el: - raise Exception('What is this element? {}'.format(el)) - return el['type'][0] + str(el.get('id', el.get('ref', ''))) + if "type" not in el: + raise Exception("What is this element? {}".format(el)) + return el["type"][0] + str(el.get("id", el.get("ref", ""))) def el_center(el): if not el: return None - if 'lat' in el: - return (el['lon'], el['lat']) - elif 'center' in el: - return (el['center']['lon'], el['center']['lat']) + if "lat" in el: + return (el["lon"], el["lat"]) + elif "center" in el: + return (el["center"]["lon"], el["center"]["lat"]) return None def distance(p1, p2): if p1 is None or p2 is None: raise Exception( - 'One of arguments to distance({}, {}) is None'.format(p1, p2) + "One of arguments to distance({}, {}) is None".format(p1, p2) ) dx = math.radians(p1[0] - p2[0]) * math.cos( 0.5 * math.radians(p1[1] + p2[1]) @@ -88,26 +126,29 @@ def is_near(p1, p2): ) -def project_on_line(p, line): - def project_on_segment(p, p1, p2): - dp = (p2[0] - p1[0], p2[1] - p1[1]) - d2 = dp[0] * dp[0] + dp[1] * dp[1] - if d2 < 1e-14: - return None - # u is the position of projection of p point on line p1p2 - # regarding point p1 and (p2-p1) direction vector - u = ((p[0] - p1[0]) * dp[0] + (p[1] - p1[1]) * dp[1]) / d2 - if not 0 <= u <= 1: - return None - return u +def project_on_segment(p, p1, p2): + """Given three points, return u - the position of projection of + point p onto segment p1p2 regarding point p1 and (p2-p1) direction vector + """ + dp = (p2[0] - p1[0], p2[1] - p1[1]) + d2 = dp[0] * dp[0] + dp[1] * dp[1] + if d2 < 1e-14: + return None + u = ((p[0] - p1[0]) * dp[0] + (p[1] - p1[1]) * dp[1]) / d2 + if not 0 <= u <= 1: + return None + return u + +def project_on_line(p, line): result = { # In the first approximation, position on rails is the index of the # closest vertex of line to the point p. Fractional value means that - # the projected point lies on a segment between two vertices. More than - # one value can occur if a route follows the same tracks more than once. - 'positions_on_line': None, - 'projected_point': None, # (lon, lat) + # the projected point lies on a segment between two vertices. + # More than one value can occur if a route follows the same tracks + # more than once. + "positions_on_line": None, + "projected_point": None, # (lon, lat) } if len(line) < 2: @@ -118,13 +159,13 @@ def project_on_line(p, line): for i, vertex in enumerate(line): d = distance(p, vertex) if d < d_min: - result['positions_on_line'] = [i] - result['projected_point'] = vertex + result["positions_on_line"] = [i] + result["projected_point"] = vertex d_min = d closest_to_vertex = True - elif vertex == result['projected_point']: + elif vertex == result["projected_point"]: # Repeated occurrence of the track vertex in line, like Oslo Line 5 - result['positions_on_line'].append(i) + result["positions_on_line"].append(i) # And then calculate distances to each segment for seg in range(len(line) - 1): # Check bbox for speed @@ -151,14 +192,15 @@ def project_on_line(p, line): ) d = distance(p, projected_point) if d < d_min: - result['positions_on_line'] = [seg + u] - result['projected_point'] = projected_point + result["positions_on_line"] = [seg + u] + result["projected_point"] = projected_point d_min = d closest_to_vertex = False - elif projected_point == result['projected_point']: - # Repeated occurrence of the track segment in line, like Oslo Line 5 + elif projected_point == result["projected_point"]: + # Repeated occurrence of the track segment in line, + # like Oslo Line 5 if not closest_to_vertex: - result['positions_on_line'].append(seg + u) + result["positions_on_line"].append(seg + u) return result @@ -167,7 +209,7 @@ def find_segment(p, line, start_vertex=0): EPS = 1e-9 for seg in range(start_vertex, len(line) - 1): if is_near(p, line[seg]): - return seg, 0 + return seg, 0.0 if line[seg][0] == line[seg + 1][0]: if not (p[0] - EPS <= line[seg][0] <= p[0] + EPS): continue @@ -192,7 +234,7 @@ def distance_on_line(p1, p2, line, start_vertex=0): of points p1 and p2. Returns a TUPLE of (d, vertex): d is the distance and vertex is the number of the second vertex, to continue calculations for the next point.""" - line_copy = line + line_len = len(line) seg1, pos1 = find_segment(p1, line, start_vertex) if seg1 is None: # logging.warn('p1 %s is not projected, st=%s', p1, start_vertex) @@ -209,7 +251,7 @@ def distance_on_line(p1, p2, line, start_vertex=0): return distance(line[seg1], line[seg1 + 1]) * abs(pos2 - pos1), seg1 if seg2 < seg1: # Should not happen - raise Exception('Pos1 %s is after pos2 %s', seg1, seg2) + raise Exception("Pos1 %s is after pos2 %s", seg1, seg2) d = 0 if pos1 < 1: d += distance(line[seg1], line[seg1 + 1]) * (1 - pos1) @@ -217,7 +259,7 @@ def distance_on_line(p1, p2, line, start_vertex=0): d += distance(line[i], line[i + 1]) if pos2 > 0: d += distance(line[seg2], line[seg2 + 1]) * pos2 - return d, seg2 % len(line_copy) + return d, seg2 % line_len def angle_between(p1, c, p2): @@ -233,19 +275,19 @@ def angle_between(p1, c, p2): def format_elid_list(ids): - msg = ', '.join(sorted(ids)[:20]) + msg = ", ".join(sorted(ids)[:20]) if len(ids) > 20: - msg += ', ...' + msg += ", ..." return msg class Station: @staticmethod def get_modes(el): - mode = el['tags'].get('station') + mode = el["tags"].get("station") modes = [] if not mode else [mode] for m in ALL_MODES: - if el['tags'].get(m) == 'yes': + if el["tags"].get(m) == "yes": modes.append(m) return set(modes) @@ -254,17 +296,17 @@ class Station: # public_transport=station is too ambiguous and unspecific to use, # so we expect for it to be backed by railway=station. if ( - 'tram' in modes - and el.get('tags', {}).get('railway') == 'tram_stop' + "tram" in modes + and el.get("tags", {}).get("railway") == "tram_stop" ): return True - if el.get('tags', {}).get('railway') not in ('station', 'halt'): + if el.get("tags", {}).get("railway") not in ("station", "halt"): return False for k in CONSTRUCTION_KEYS: - if k in el['tags']: + if k in el["tags"]: return False # Not checking for station=train, obviously - if 'train' not in modes and Station.get_modes(el).isdisjoint(modes): + if "train" not in modes and Station.get_modes(el).isdisjoint(modes): return False return True @@ -272,58 +314,58 @@ class Station: """Call this with a railway=station node.""" if not Station.is_station(el, city.modes): raise Exception( - 'Station object should be instantiated from a station node. ' - 'Got: {}'.format(el) + "Station object should be instantiated from a station node. " + "Got: {}".format(el) ) self.id = el_id(el) self.element = el self.modes = Station.get_modes(el) - self.name = el['tags'].get('name', '?') - self.int_name = el['tags'].get( - 'int_name', el['tags'].get('name:en', None) + self.name = el["tags"].get("name", "?") + self.int_name = el["tags"].get( + "int_name", el["tags"].get("name:en", None) ) try: - self.colour = normalize_colour(el['tags'].get('colour', None)) + self.colour = normalize_colour(el["tags"].get("colour", None)) except ValueError as e: self.colour = None city.warn(str(e), el) self.center = el_center(el) if self.center is None: - raise Exception('Could not find center of {}'.format(el)) + raise Exception("Could not find center of {}".format(el)) def __repr__(self): - return 'Station(id={}, modes={}, name={}, center={})'.format( - self.id, ','.join(self.modes), self.name, self.center + return "Station(id={}, modes={}, name={}, center={})".format( + self.id, ",".join(self.modes), self.name, self.center ) class StopArea: @staticmethod def is_stop(el): - if 'tags' not in el: + if "tags" not in el: return False - if el['tags'].get('railway') == 'stop': + if el["tags"].get("railway") == "stop": return True - if el['tags'].get('public_transport') == 'stop_position': + if el["tags"].get("public_transport") == "stop_position": return True return False @staticmethod def is_platform(el): - if 'tags' not in el: + if "tags" not in el: return False - if el['tags'].get('railway') in ('platform', 'platform_edge'): + if el["tags"].get("railway") in ("platform", "platform_edge"): return True - if el['tags'].get('public_transport') == 'platform': + if el["tags"].get("public_transport") == "platform": return True return False @staticmethod def is_track(el): - if el['type'] != 'way' or 'tags' not in el: + if el["type"] != "way" or "tags" not in el: return False - return el['tags'].get('railway') in RAILWAY_TYPES + return el["tags"].get("railway") in RAILWAY_TYPES def __init__(self, station, city, stop_area=None): """Call this with a Station object.""" @@ -335,7 +377,7 @@ class StopArea: self.platforms = set() # set of el_ids of platforms self.exits = set() # el_id of subway_entrance for leaving the platform self.entrances = set() # el_id of subway_entrance for entering - # the platform + # the platform self.center = None # lon, lat of the station centre point self.centers = {} # el_id -> (lon, lat) for all elements self.transfer = None # el_id of a transfer relation @@ -346,13 +388,13 @@ class StopArea: self.colour = station.colour if stop_area: - self.name = stop_area['tags'].get('name', self.name) - self.int_name = stop_area['tags'].get( - 'int_name', stop_area['tags'].get('name:en', self.int_name) + self.name = stop_area["tags"].get("name", self.name) + self.int_name = stop_area["tags"].get( + "int_name", stop_area["tags"].get("name:en", self.int_name) ) try: self.colour = ( - normalize_colour(stop_area['tags'].get('colour')) + normalize_colour(stop_area["tags"].get("colour")) or self.colour ) except ValueError as e: @@ -360,43 +402,43 @@ class StopArea: # If we have a stop area, add all elements from it warned_about_tracks = False - for m in stop_area['members']: + for m in stop_area["members"]: k = el_id(m) m_el = city.elements.get(k) - if m_el and 'tags' in m_el: + if m_el and "tags" in m_el: if Station.is_station(m_el, city.modes): if k != station.id: city.error( - 'Stop area has multiple stations', stop_area + "Stop area has multiple stations", stop_area ) elif StopArea.is_stop(m_el): self.stops.add(k) elif StopArea.is_platform(m_el): self.platforms.add(k) - elif m_el['tags'].get('railway') == 'subway_entrance': - if m_el['type'] != 'node': - city.warn('Subway entrance is not a node', m_el) + elif m_el["tags"].get("railway") == "subway_entrance": + if m_el["type"] != "node": + city.warn("Subway entrance is not a node", m_el) if ( - m_el['tags'].get('entrance') != 'exit' - and m['role'] != 'exit_only' + m_el["tags"].get("entrance") != "exit" + and m["role"] != "exit_only" ): self.entrances.add(k) if ( - m_el['tags'].get('entrance') != 'entrance' - and m['role'] != 'entry_only' + m_el["tags"].get("entrance") != "entrance" + and m["role"] != "entry_only" ): self.exits.add(k) elif StopArea.is_track(m_el): if not warned_about_tracks: city.warn( - 'Tracks in a stop_area relation', stop_area + "Tracks in a stop_area relation", stop_area ) warned_about_tracks = True else: # Otherwise add nearby entrances center = station.center for c_el in city.elements.values(): - if c_el.get('tags', {}).get('railway') == 'subway_entrance': + if c_el.get("tags", {}).get("railway") == "subway_entrance": c_id = el_id(c_el) if c_id not in city.stop_areas: c_center = el_center(c_el) @@ -405,28 +447,28 @@ class StopArea: and distance(center, c_center) <= MAX_DISTANCE_TO_ENTRANCES ): - if c_el['type'] != 'node': + if c_el["type"] != "node": city.warn( - 'Subway entrance is not a node', c_el + "Subway entrance is not a node", c_el ) - etag = c_el['tags'].get('entrance') - if etag != 'exit': + etag = c_el["tags"].get("entrance") + if etag != "exit": self.entrances.add(c_id) - if etag != 'entrance': + if etag != "entrance": self.exits.add(c_id) if self.exits and not self.entrances: city.warn( - 'Only exits for a station, no entrances', + "Only exits for a station, no entrances", stop_area or station.element, ) if self.entrances and not self.exits: - city.warn('No exits for a station', stop_area or station.element) + city.warn("No exits for a station", stop_area or station.element) for el in self.get_elements(): self.centers[el] = el_center(city.elements[el]) - """Calculates the center point of the station. This algorithm + """Calculate the center point of the station. This algorithm cannot rely on a station node, since many stop_areas can share one. Basically it averages center points of all platforms and stop positions.""" @@ -450,8 +492,9 @@ class StopArea: return result def __repr__(self): - return 'StopArea(id={}, name={}, station={}, transfer={}, center={})'.format( - self.id, self.name, self.station, self.transfer, self.center + return ( + f"StopArea(id={self.id}, name={self.name}, station={self.station}," + f" transfer={self.transfer}, center={self.center})" ) @@ -476,34 +519,34 @@ class RouteStop: @staticmethod def get_actual_role(el, role, modes): if StopArea.is_stop(el): - return 'stop' + return "stop" elif StopArea.is_platform(el): - return 'platform' + return "platform" elif Station.is_station(el, modes): - if 'platform' in role: - return 'platform' + if "platform" in role: + return "platform" else: - return 'stop' + return "stop" return None def add(self, member, relation, city): el = city.elements[el_id(member)] - role = member['role'] + role = member["role"] if StopArea.is_stop(el): - if 'platform' in role: - city.warn('Stop position in a platform role in a route', el) - if el['type'] != 'node': - city.error('Stop position is not a node', el) + if "platform" in role: + city.warn("Stop position in a platform role in a route", el) + if el["type"] != "node": + city.error("Stop position is not a node", el) self.stop = el_center(el) - if 'entry_only' not in role: + if "entry_only" not in role: self.can_exit = True - if 'exit_only' not in role: + if "exit_only" not in role: self.can_enter = True elif Station.is_station(el, city.modes): - if el['type'] != 'node': - city.notice('Station in route is not a node', el) + if el["type"] != "node": + city.notice("Station in route is not a node", el) if not self.seen_stop and not self.seen_platform: self.stop = el_center(el) @@ -511,12 +554,12 @@ class RouteStop: self.can_exit = True elif StopArea.is_platform(el): - if 'stop' in role: - city.warn('Platform in a stop role in a route', el) - if 'exit_only' not in role: + if "stop" in role: + city.warn("Platform in a stop role in a route", el) + if "exit_only" not in role: self.platform_entry = el_id(el) self.can_enter = True - if 'entry_only' not in role: + if "entry_only" not in role: self.platform_exit = el_id(el) self.can_exit = True if not self.seen_stop: @@ -524,39 +567,38 @@ class RouteStop: multiple_check = False actual_role = RouteStop.get_actual_role(el, role, city.modes) - if actual_role == 'platform': - if role == 'platform_entry_only': + if actual_role == "platform": + if role == "platform_entry_only": multiple_check = self.seen_platform_entry self.seen_platform_entry = True - elif role == 'platform_exit_only': + elif role == "platform_exit_only": multiple_check = self.seen_platform_exit self.seen_platform_exit = True else: - if role != 'platform' and 'stop' not in role: + if role != "platform" and "stop" not in role: city.warn( - "Platform with invalid role '{}' in a route".format( - role - ), - el, + f'Platform "{el["tags"].get("name", "")}" ' + f'({el_id(el)}) with invalid role "{role}" in route', + relation, ) multiple_check = self.seen_platform self.seen_platform_entry = True self.seen_platform_exit = True - elif actual_role == 'stop': + elif actual_role == "stop": multiple_check = self.seen_stop self.seen_stop = True if multiple_check: - log_function = city.error if actual_role == 'stop' else city.notice + log_function = city.error if actual_role == "stop" else city.notice log_function( - 'Multiple {}s for a station "{}" ({}) in a route relation'.format( - actual_role, el['tags'].get('name', ''), el_id(el) - ), + f'Multiple {actual_role}s for a station "' + f'{el["tags"].get("name", "")} ' + f"({el_id(el)}) in a route relation", relation, ) def __repr__(self): return ( - 'RouteStop(stop={}, pl_entry={}, pl_exit={}, stoparea={})'.format( + "RouteStop(stop={}, pl_entry={}, pl_exit={}, stoparea={})".format( self.stop, self.platform_entry, self.platform_exit, @@ -571,61 +613,88 @@ class Route: @staticmethod def is_route(el, modes): if ( - el['type'] != 'relation' - or el.get('tags', {}).get('type') != 'route' + el["type"] != "relation" + or el.get("tags", {}).get("type") != "route" ): return False - if 'members' not in el: + if "members" not in el: return False - if el['tags'].get('route') not in modes: + if el["tags"].get("route") not in modes: return False for k in CONSTRUCTION_KEYS: - if k in el['tags']: + if k in el["tags"]: return False - if 'ref' not in el['tags'] and 'name' not in el['tags']: + if "ref" not in el["tags"] and "name" not in el["tags"]: return False return True @staticmethod def get_network(relation): - for k in ('network:metro', 'network', 'operator'): - if k in relation['tags']: - return relation['tags'][k] + for k in ("network:metro", "network", "operator"): + if k in relation["tags"]: + return relation["tags"][k] return None @staticmethod def get_interval(tags): v = None - for k in ('interval', 'headway'): + for k in ("interval", "headway"): if k in tags: v = tags[k] break else: for kk in tags: - if kk.startswith(k + ':'): + if kk.startswith(k + ":"): v = tags[kk] break if not v: return None - try: - return float(v) - except ValueError: - return None + return osm_interval_to_seconds(v) - def build_longest_line(self, relation): + def __init__(self, relation, city, master=None): + assert Route.is_route( + relation, city.modes + ), f"The relation does not seem to be a route: {relation}" + self.city = city + self.element = relation + self.id = el_id(relation) + + self.ref = None + self.name = None + self.mode = None + self.colour = None + self.infill = None + self.network = None + self.interval = None + self.start_time = None + self.end_time = None + self.is_circular = False + self.stops = [] # List of RouteStop + # Would be a list of (lon, lat) for the longest stretch. Can be empty. + self.tracks = None + # Index of the fist stop that is located on/near the self.tracks + self.first_stop_on_rails_index = None + # Index of the last stop that is located on/near the self.tracks + self.last_stop_on_rails_index = None + + self.process_tags(master) + stop_position_elements = self.process_stop_members() + self.process_tracks(stop_position_elements) + + def build_longest_line(self): line_nodes = set() last_track = [] track = [] warned_about_holes = False - for m in relation['members']: + for m in self.element["members"]: el = self.city.elements.get(el_id(m), None) if not el or not StopArea.is_track(el): continue - if 'nodes' not in el or len(el['nodes']) < 2: - self.city.error('Cannot find nodes in a railway', el) + if "nodes" not in el or len(el["nodes"]) < 2: + self.city.error("Cannot find nodes in a railway", el) continue - nodes = ['n{}'.format(n) for n in el['nodes']] - if m['role'] == 'backward': + nodes = ["n{}".format(n) for n in el["nodes"]] + if m["role"] == "backward": nodes.reverse() line_nodes.update(nodes) if not track: @@ -651,10 +720,10 @@ class Route: # Store the track if it is long and clean it if not warned_about_holes: self.city.warn( - 'Hole in route rails near node {}'.format( + "Hole in route rails near node {}".format( track[-1] ), - relation, + self.element, ) warned_about_holes = True if len(track) > len(last_track): @@ -671,34 +740,50 @@ class Route: ] return last_track, line_nodes - def project_stops_on_line(self): + def get_stop_projections(self): projected = [project_on_line(x.stop, self.tracks) for x in self.stops] - def is_stop_near_tracks(stop_index): + def stop_near_tracks_criterion(stop_index: int): return ( - projected[stop_index]['projected_point'] is not None + projected[stop_index]["projected_point"] is not None and distance( self.stops[stop_index].stop, - projected[stop_index]['projected_point'], + projected[stop_index]["projected_point"], ) <= MAX_DISTANCE_STOP_TO_LINE ) - start = 0 - while start < len(self.stops) and not is_stop_near_tracks(start): - start += 1 - end = len(self.stops) - 1 - while end > start and not is_stop_near_tracks(end): - end -= 1 - tracks_start = [] - tracks_end = [] - stops_on_longest_line = [] + return projected, stop_near_tracks_criterion + + def project_stops_on_line(self): + projected, stop_near_tracks_criterion = self.get_stop_projections() + + projected_stops_data = { + "first_stop_on_rails_index": None, + "last_stop_on_rails_index": None, + "stops_on_longest_line": [], # list [{'route_stop': RouteStop, + # 'coords': (lon, lat), + # 'positions_on_rails': [] } + } + first_index = 0 + while first_index < len(self.stops) and not stop_near_tracks_criterion( + first_index + ): + first_index += 1 + projected_stops_data["first_stop_on_rails_index"] = first_index + + last_index = len(self.stops) - 1 + while last_index > projected_stops_data[ + "first_stop_on_rails_index" + ] and not stop_near_tracks_criterion(last_index): + last_index -= 1 + projected_stops_data["last_stop_on_rails_index"] = last_index + for i, route_stop in enumerate(self.stops): - if i < start: - tracks_start.append(route_stop.stop) - elif i > end: - tracks_end.append(route_stop.stop) - elif projected[i]['projected_point'] is None: + if not first_index <= i <= last_index: + continue + + if projected[i]["projected_point"] is None: self.city.error( 'Stop "{}" {} is nowhere near the tracks'.format( route_stop.stoparea.name, route_stop.stop @@ -706,7 +791,12 @@ class Route: self.element, ) else: - projected_point = projected[i]['projected_point'] + stop_data = { + "route_stop": route_stop, + "coords": None, + "positions_on_rails": None, + } + projected_point = projected[i]["projected_point"] # We've got two separate stations with a good stretch of # railway tracks between them. Put these on tracks. d = round(distance(route_stop.stop, projected_point)) @@ -718,16 +808,12 @@ class Route: self.element, ) else: - route_stop.stop = projected_point - route_stop.positions_on_rails = projected[i][ - 'positions_on_line' + stop_data["coords"] = projected_point + stop_data["positions_on_rails"] = projected[i][ + "positions_on_line" ] - stops_on_longest_line.append(route_stop) - if start >= len(self.stops): - self.tracks = tracks_start - elif tracks_start or tracks_end: - self.tracks = tracks_start + self.tracks + tracks_end - return stops_on_longest_line + projected_stops_data["stops_on_longest_line"].append(stop_data) + return projected_stops_data def calculate_distances(self): dist = 0 @@ -735,9 +821,15 @@ class Route: for i, stop in enumerate(self.stops): if i > 0: direct = distance(stop.stop, self.stops[i - 1].stop) - d_line = distance_on_line( - self.stops[i - 1].stop, stop.stop, self.tracks, vertex - ) + d_line = None + if ( + self.first_stop_on_rails_index + <= i + <= self.last_stop_on_rails_index + ): + d_line = distance_on_line( + self.stops[i - 1].stop, stop.stop, self.tracks, vertex + ) if d_line and direct - 10 <= d_line[0] <= direct * 2: vertex = d_line[1] dist += round(d_line[0]) @@ -745,100 +837,84 @@ class Route: dist += round(direct) stop.distance = dist - def __init__(self, relation, city, master=None): - if not Route.is_route(relation, city.modes): - raise Exception( - 'The relation does not seem a route: {}'.format(relation) - ) - master_tags = {} if not master else master['tags'] - self.city = city - self.element = relation - self.id = el_id(relation) - if 'ref' not in relation['tags'] and 'ref' not in master_tags: - city.notice('Missing ref on a route', relation) - self.ref = relation['tags'].get( - 'ref', master_tags.get('ref', relation['tags'].get('name', None)) + def process_tags(self, master): + relation = self.element + master_tags = {} if not master else master["tags"] + if "ref" not in relation["tags"] and "ref" not in master_tags: + self.city.notice("Missing ref on a route", relation) + self.ref = relation["tags"].get( + "ref", master_tags.get("ref", relation["tags"].get("name", None)) ) - self.name = relation['tags'].get('name', None) - self.mode = relation['tags']['route'] + self.name = relation["tags"].get("name", None) + self.mode = relation["tags"]["route"] if ( - 'colour' not in relation['tags'] - and 'colour' not in master_tags - and self.mode != 'tram' + "colour" not in relation["tags"] + and "colour" not in master_tags + and self.mode != "tram" ): - city.notice('Missing colour on a route', relation) + self.city.notice("Missing colour on a route", relation) try: self.colour = normalize_colour( - relation['tags'].get('colour', master_tags.get('colour', None)) + relation["tags"].get("colour", master_tags.get("colour", None)) ) except ValueError as e: self.colour = None - city.warn(str(e), relation) + self.city.warn(str(e), relation) try: self.infill = normalize_colour( - relation['tags'].get( - 'colour:infill', master_tags.get('colour:infill', None) + relation["tags"].get( + "colour:infill", master_tags.get("colour:infill", None) ) ) except ValueError as e: self.infill = None - city.warn(str(e), relation) + self.city.warn(str(e), relation) self.network = Route.get_network(relation) self.interval = Route.get_interval( - relation['tags'] + relation["tags"] ) or Route.get_interval(master_tags) - if relation['tags'].get('public_transport:version') == '1': - city.warn( - 'Public transport version is 1, which means the route ' - 'is an unsorted pile of objects', + self.start_time, self.end_time = get_start_end_times( + relation["tags"].get( + "opening_hours", master_tags.get("opening_hours", "") + ) + ) + if relation["tags"].get("public_transport:version") == "1": + self.city.warn( + "Public transport version is 1, which means the route " + "is an unsorted pile of objects", relation, ) - self.is_circular = False - # self.tracks would be a list of (lon, lat) for the longest stretch. Can be empty - tracks, line_nodes = self.build_longest_line(relation) - self.tracks = [el_center(city.elements.get(k)) for k in tracks] - if ( - None in self.tracks - ): # usually, extending BBOX for the city is needed - self.tracks = [] - for n in filter(lambda x: x not in city.elements, tracks): - city.warn( - 'The dataset is missing the railway tracks node {}'.format( - n - ), - relation, - ) - break - self.stops = [] # List of RouteStop + def process_stop_members(self): stations = set() # temporary for recording stations seen_stops = False seen_platforms = False repeat_pos = None - for m in relation['members']: - if 'inactive' in m['role']: + stop_position_elements = [] + for m in self.element["members"]: + if "inactive" in m["role"]: continue k = el_id(m) - if k in city.stations: - st_list = city.stations[k] + if k in self.city.stations: + st_list = self.city.stations[k] st = st_list[0] if len(st_list) > 1: - city.error( - 'Ambiguous station {} in route. Please use stop_position or split ' - 'interchange stations'.format(st.name), - relation, + self.city.error( + f"Ambiguous station {st.name} in route. Please " + "use stop_position or split interchange stations", + self.element, ) - el = city.elements[k] + el = self.city.elements[k] actual_role = RouteStop.get_actual_role( - el, m['role'], city.modes + el, m["role"], self.city.modes ) if actual_role: - if m['role'] and actual_role not in m['role']: - city.warn( + if m["role"] and actual_role not in m["role"]: + self.city.warn( "Wrong role '{}' for {} {}".format( - m['role'], actual_role, k + m["role"], actual_role, k ), - relation, + self.element, ) if repeat_pos is None: if not self.stops or st not in stations: @@ -852,11 +928,11 @@ class Route: if ( (seen_stops and seen_platforms) or ( - actual_role == 'stop' + actual_role == "stop" and not seen_platforms ) or ( - actual_role == 'platform' + actual_role == "platform" and not seen_stops ) ): @@ -870,14 +946,14 @@ class Route: if repeat_pos >= len(self.stops): continue # Check that the type matches - if (actual_role == 'stop' and seen_stops) or ( - actual_role == 'platform' and seen_platforms + if (actual_role == "stop" and seen_stops) or ( + actual_role == "platform" and seen_platforms ): - city.error( + self.city.error( 'Found an out-of-place {}: "{}" ({})'.format( - actual_role, el['tags'].get('name', ''), k + actual_role, el["tags"].get("name", ""), k ), - relation, + self.element, ) continue # Find the matching stop starting with index repeat_pos @@ -887,93 +963,204 @@ class Route: ): repeat_pos += 1 if repeat_pos >= len(self.stops): - city.error( - 'Incorrect order of {}s at {}'.format( + self.city.error( + "Incorrect order of {}s at {}".format( actual_role, k ), - relation, + self.element, ) continue stop = self.stops[repeat_pos] - stop.add(m, relation, city) + stop.add(m, self.element, self.city) if repeat_pos is None: seen_stops |= stop.seen_stop or stop.seen_station seen_platforms |= stop.seen_platform if StopArea.is_stop(el): - if k not in line_nodes: - city.warn( - 'Stop position "{}" ({}) is not on tracks'.format( - el['tags'].get('name', ''), k - ), - relation, - ) + stop_position_elements.append(el) + continue - if k not in city.elements: - if 'stop' in m['role'] or 'platform' in m['role']: + if k not in self.city.elements: + if "stop" in m["role"] or "platform" in m["role"]: raise CriticalValidationError( - '{} {} {} for route relation {} is not in the dataset'.format( - m['role'], m['type'], m['ref'], relation['id'] - ) + f"{m['role']} {m['type']} {m['ref']} for route " + f"relation {self.element['id']} is not in the dataset" ) continue - el = city.elements[k] - if 'tags' not in el: - city.error('Untagged object {} in a route'.format(k), relation) + el = self.city.elements[k] + if "tags" not in el: + self.city.error( + f"Untagged object {k} in a route", self.element + ) continue is_under_construction = False for ck in CONSTRUCTION_KEYS: - if ck in el['tags']: - city.warn( - 'Under construction {} {} in route. Consider ' - 'setting \'inactive\' role or removing construction attributes'.format( - m['role'] or 'feature', k - ), - relation, + if ck in el["tags"]: + self.city.warn( + f"Under construction {m['role'] or 'feature'} {k} " + "in route. Consider setting 'inactive' role or " + "removing construction attributes", + self.element, ) is_under_construction = True break if is_under_construction: continue - if Station.is_station(el, city.modes): + if Station.is_station(el, self.city.modes): # A station may be not included into this route due to previous - # 'stop area has multiple stations' error. No other error message is needed. + # 'stop area has multiple stations' error. No other error + # message is needed. pass - elif el['tags'].get('railway') in ('station', 'halt'): - city.error( - 'Missing station={} on a {}'.format(self.mode, m['role']), + elif el["tags"].get("railway") in ("station", "halt"): + self.city.error( + "Missing station={} on a {}".format(self.mode, m["role"]), el, ) else: actual_role = RouteStop.get_actual_role( - el, m['role'], city.modes + el, m["role"], self.city.modes ) if actual_role: - city.error( - '{} {} {} is not connected to a station in route'.format( - actual_role, m['type'], m['ref'] - ), - relation, + self.city.error( + f"{actual_role} {m['type']} {m['ref']} is not " + "connected to a station in route", + self.element, ) elif not StopArea.is_track(el): - city.warn( - 'Unknown member type for {} {} in route'.format( - m['type'], m['ref'] + self.city.warn( + "Unknown member type for {} {} in route".format( + m["type"], m["ref"] ), - relation, + self.element, ) + return stop_position_elements + + def process_tracks(self, stop_position_elements): + tracks, line_nodes = self.build_longest_line() + + for stop_el in stop_position_elements: + stop_id = el_id(stop_el) + if stop_id not in line_nodes: + self.city.warn( + 'Stop position "{}" ({}) is not on tracks'.format( + stop_el["tags"].get("name", ""), stop_id + ), + self.element, + ) + + # self.tracks would be a list of (lon, lat) for the longest stretch. + # Can be empty. + self.tracks = [el_center(self.city.elements.get(k)) for k in tracks] + if ( + None in self.tracks + ): # usually, extending BBOX for the city is needed + self.tracks = [] + for n in filter(lambda x: x not in self.city.elements, tracks): + self.city.warn( + f"The dataset is missing the railway tracks node {n}", + self.element, + ) + break + if len(self.stops) > 1: self.is_circular = ( self.stops[0].stoparea == self.stops[-1].stoparea ) - stops_on_longest_line = self.project_stops_on_line() - self.check_and_recover_stops_order(stops_on_longest_line) + if ( + self.is_circular + and self.tracks + and self.tracks[0] != self.tracks[-1] + ): + self.city.warn( + "Non-closed rail sequence in a circular route", + self.element, + ) + + projected_stops_data = self.project_stops_on_line() + self.check_and_recover_stops_order(projected_stops_data) + self.apply_projected_stops_data(projected_stops_data) self.calculate_distances() + def apply_projected_stops_data(self, projected_stops_data: dict) -> None: + """Store better stop coordinates and indexes of first/last stops + that lie on a continuous track line, to the instance attributes. + """ + for attr in ("first_stop_on_rails_index", "last_stop_on_rails_index"): + setattr(self, attr, projected_stops_data[attr]) + + for stop_data in projected_stops_data["stops_on_longest_line"]: + route_stop = stop_data["route_stop"] + route_stop.positions_on_rails = stop_data["positions_on_rails"] + if stop_coords := stop_data["coords"]: + route_stop.stop = stop_coords + + def get_extended_tracks(self): + """Amend tracks with points of leading/trailing self.stops + that were not projected onto the longest tracks line. + Return a new array. + """ + if self.first_stop_on_rails_index >= len(self.stops): + tracks = [route_stop.stop for route_stop in self.stops] + else: + tracks = ( + [ + route_stop.stop + for i, route_stop in enumerate(self.stops) + if i < self.first_stop_on_rails_index + ] + + self.tracks + + [ + route_stop.stop + for i, route_stop in enumerate(self.stops) + if i > self.last_stop_on_rails_index + ] + ) + return tracks + + def get_truncated_tracks(self, tracks): + """Truncate leading/trailing segments of `tracks` param + that are beyond the first and last stop locations. + Return a new array. + """ + if self.is_circular: + return tracks.copy() + + first_stop_location = find_segment(self.stops[0].stop, tracks, 0) + last_stop_location = find_segment(self.stops[-1].stop, tracks, 0) + + if last_stop_location != (None, None): + seg2, u2 = last_stop_location + if u2 == 0.0: + # Make seg2 the segment the last_stop_location is + # at the middle or end of + seg2 -= 1 + # u2 = 1.0 + if seg2 + 2 < len(tracks): + tracks = tracks[0 : seg2 + 2] # noqa E203 + tracks[-1] = self.stops[-1].stop + + if first_stop_location != (None, None): + seg1, u1 = first_stop_location + if u1 == 1.0: + # Make seg1 the segment the first_stop_location is + # at the beginning or middle of + seg1 += 1 + # u1 = 0.0 + if seg1 > 0: + tracks = tracks[seg1:] + tracks[0] = self.stops[0].stop + + return tracks + + def get_tracks_geometry(self): + tracks = self.get_extended_tracks() + tracks = self.get_truncated_tracks(tracks) + return tracks + def check_stops_order_by_angle(self): disorder_warnings = [] disorder_errors = [] @@ -984,8 +1171,9 @@ class Route: self.stops[si + 2].stop, ) if angle < ALLOWED_ANGLE_BETWEEN_STOPS: - msg = 'Angle between stops around "{}" is too narrow, {} degrees'.format( - self.stops[si + 1].stoparea.name, angle + msg = ( + f'Angle between stops around "{self.stops[si + 1]}" ' + f"is too narrow, {angle} degrees" ) if angle < DISALLOWED_ANGLE_BETWEEN_STOPS: disorder_errors.append(msg) @@ -996,34 +1184,15 @@ class Route: def check_stops_order_on_tracks_direct(self, stop_sequence): """Checks stops order on tracks, following stop_sequence in direct order only. - :param stop_sequence: list of RouteStop that belong to the - longest contiguous sequence of tracks in a route. + :param stop_sequence: list of dict{'route_stop', 'positions_on_rails', + 'coords'} for RouteStops that belong to the longest contiguous + sequence of tracks in a route. :return: error message on the first order violation or None. """ - - def make_assertion_error_msg(route_stop, error_type): - return ( - "stop_area {} '{}' has {} 'positions_on_rails' " - "attribute in route {}".format( - route_stop.stoparea.id, - route_stop.stoparea.name, - "no" if error_type == 1 else "empty", - self.id, - ) - ) - allowed_order_violations = 1 if self.is_circular else 0 max_position_on_rails = -1 - for route_stop in stop_sequence: - assert hasattr( - route_stop, 'positions_on_rails' - ), make_assertion_error_msg(route_stop, error_type=1) - - positions_on_rails = route_stop.positions_on_rails - assert positions_on_rails, make_assertion_error_msg( - route_stop, error_type=2 - ) - + for stop_data in stop_sequence: + positions_on_rails = stop_data["positions_on_rails"] suitable_occurrence = 0 while ( suitable_occurrence < len(positions_on_rails) @@ -1036,38 +1205,46 @@ class Route: suitable_occurrence -= 1 allowed_order_violations -= 1 else: + route_stop = stop_data["route_stop"] return 'Stops on tracks are unordered near "{}" {}'.format( route_stop.stoparea.name, route_stop.stop ) max_position_on_rails = positions_on_rails[suitable_occurrence] - def check_stops_order_on_tracks(self, stop_sequence): + def check_stops_order_on_tracks(self, projected_stops_data): """Checks stops order on tracks, trying direct and reversed order of stops in the stop_sequence. - :param stop_sequence: list of RouteStop that belong to the - longest contiguous sequence of tracks in a route. + :param projected_stops_data: info about RouteStops that belong to the + longest contiguous sequence of tracks in a route. May be changed + if tracks reversing is performed. :return: error message on the first order violation or None. """ - error_message = self.check_stops_order_on_tracks_direct(stop_sequence) + error_message = self.check_stops_order_on_tracks_direct( + projected_stops_data["stops_on_longest_line"] + ) if error_message: error_message_reversed = self.check_stops_order_on_tracks_direct( - reversed(stop_sequence) + reversed(projected_stops_data["stops_on_longest_line"]) ) if error_message_reversed is None: error_message = None self.city.warn( - 'Tracks seem to go in the opposite direction to stops', + "Tracks seem to go in the opposite direction to stops", self.element, ) + self.tracks.reverse() + new_projected_stops_data = self.project_stops_on_line() + projected_stops_data.update(new_projected_stops_data) + return error_message - def check_stops_order(self, stops_on_longest_line): + def check_stops_order(self, projected_stops_data): ( angle_disorder_warnings, angle_disorder_errors, ) = self.check_stops_order_by_angle() disorder_on_tracks_error = self.check_stops_order_on_tracks( - stops_on_longest_line + projected_stops_data ) disorder_warnings = angle_disorder_warnings disorder_errors = angle_disorder_errors @@ -1075,9 +1252,12 @@ class Route: disorder_errors.append(disorder_on_tracks_error) return disorder_warnings, disorder_errors - def check_and_recover_stops_order(self, stops_on_longest_line): + def check_and_recover_stops_order(self, projected_stops_data: dict): + """ + :param projected_stops_data: may change if we need to reverse tracks + """ disorder_warnings, disorder_errors = self.check_stops_order( - stops_on_longest_line + projected_stops_data ) if disorder_warnings or disorder_errors: resort_success = False @@ -1104,7 +1284,7 @@ class Route: for stop in self.stops: station = stop.stoparea.station stop_name = station.name - if stop_name == '?' and station.int_name: + if stop_name == "?" and station.int_name: stop_name = station.int_name # We won't programmatically recover routes with repeating stations: # such cases are rare and deserves manual verification @@ -1120,7 +1300,7 @@ class Route: suitable_itineraries = [] for itinerary in self.city.recovery_data[route_id]: itinerary_stop_names = [ - stop['name'] for stop in itinerary['stations'] + stop["name"] for stop in itinerary["stations"] ] if not ( len(stop_names) == len(itinerary_stop_names) @@ -1128,9 +1308,9 @@ class Route: ): continue big_station_displacement = False - for it_stop in itinerary['stations']: - name = it_stop['name'] - it_stop_center = it_stop['center'] + for it_stop in itinerary["stations"]: + name = it_stop["name"] + it_stop_center = it_stop["center"] self_stop_center = self_stops[name].stoparea.station.center if ( distance(it_stop_center, self_stop_center) @@ -1146,23 +1326,23 @@ class Route: elif len(suitable_itineraries) == 1: matching_itinerary = suitable_itineraries[0] else: - from_tag = self.element['tags'].get('from') - to_tag = self.element['tags'].get('to') + from_tag = self.element["tags"].get("from") + to_tag = self.element["tags"].get("to") if not from_tag and not to_tag: return False matching_itineraries = [ itin for itin in suitable_itineraries if from_tag - and itin['from'] == from_tag + and itin["from"] == from_tag or to_tag - and itin['to'] == to_tag + and itin["to"] == to_tag ] if len(matching_itineraries) != 1: return False matching_itinerary = matching_itineraries[0] self.stops = [ - self_stops[stop['name']] for stop in matching_itinerary['stations'] + self_stops[stop["name"]] for stop in matching_itinerary["stations"] ] return True @@ -1177,8 +1357,8 @@ class Route: def __repr__(self): return ( - 'Route(id={}, mode={}, ref={}, name={}, network={}, interval={}, ' - 'circular={}, num_stops={}, line_length={} m, from={}, to={}' + "Route(id={}, mode={}, ref={}, name={}, network={}, interval={}, " + "circular={}, num_stops={}, line_length={} m, from={}, to={}" ).format( self.id, self.mode, @@ -1202,27 +1382,27 @@ class RouteMaster: self.has_master = master is not None self.interval_from_master = False if master: - self.ref = master['tags'].get( - 'ref', master['tags'].get('name', None) + self.ref = master["tags"].get( + "ref", master["tags"].get("name", None) ) try: self.colour = normalize_colour( - master['tags'].get('colour', None) + master["tags"].get("colour", None) ) except ValueError: self.colour = None try: self.infill = normalize_colour( - master['tags'].get('colour:infill', None) + master["tags"].get("colour:infill", None) ) except ValueError: - self.colour = None + self.infill = None self.network = Route.get_network(master) - self.mode = master['tags'].get( - 'route_master', None + self.mode = master["tags"].get( + "route_master", None ) # This tag is required, but okay - self.name = master['tags'].get('name', None) - self.interval = Route.get_interval(master['tags']) + self.name = master["tags"].get("name", None) + self.interval = Route.get_interval(master["tags"]) self.interval_from_master = self.interval is not None else: self.ref = None @@ -1258,8 +1438,9 @@ class RouteMaster: self.infill = route.infill elif route.infill and route.infill != self.infill: city.notice( - 'Route "{}" has different infill colour from master "{}"'.format( - route.infill, self.infill + ( + f'Route "{route.infill}" has different infill colour ' + f'from master "{self.infill}"' ), route.element, ) @@ -1281,7 +1462,7 @@ class RouteMaster: self.mode = route.mode elif route.mode != self.mode: city.error( - 'Incompatible PT mode: master has {} and route has {}'.format( + "Incompatible PT mode: master has {} and route has {}".format( self.mode, route.mode ), route.element, @@ -1321,46 +1502,50 @@ class RouteMaster: return iter(self.routes) def __repr__(self): - return 'RouteMaster(id={}, mode={}, ref={}, name={}, network={}, num_variants={}'.format( - self.id, - self.mode, - self.ref, - self.name, - self.network, - len(self.routes), + return ( + f"RouteMaster(id={self.id}, mode={self.mode}, ref={self.ref}, " + f"name={self.name}, network={self.network}, " + f"num_variants={len(self.routes)}" ) class City: - def __init__(self, row, overground=False): + route_class = Route + + def __init__(self, city_data, overground=False): + self.validate_called = False self.errors = [] self.warnings = [] self.notices = [] - self.name = row[1] - self.country = row[2] - self.continent = row[3] - if not row[0]: - self.error('City {} does not have an id'.format(self.name)) - self.id = int(row[0] or '0') + self.id = int(city_data["id"]) + self.name = city_data["name"] + self.country = city_data["country"] + self.continent = city_data["continent"] self.overground = overground if not overground: - self.num_stations = int(row[4]) - self.num_lines = int(row[5] or '0') - self.num_light_lines = int(row[6] or '0') - self.num_interchanges = int(row[7] or '0') + self.num_stations = int(city_data["num_stations"]) + self.num_lines = int(city_data["num_lines"] or "0") + self.num_light_lines = int(city_data["num_light_lines"] or "0") + self.num_interchanges = int(city_data["num_interchanges"] or "0") else: - self.num_tram_lines = int(row[4] or '0') - self.num_trolleybus_lines = int(row[5] or '0') - self.num_bus_lines = int(row[6] or '0') - self.num_other_lines = int(row[7] or '0') + self.num_tram_lines = int(city_data["num_tram_lines"] or "0") + self.num_trolleybus_lines = int( + city_data["num_trolleybus_lines"] or "0" + ) + self.num_bus_lines = int(city_data["num_bus_lines"] or "0") + self.num_other_lines = int(city_data["num_other_lines"] or "0") # Aquiring list of networks and modes - networks = None if len(row) <= 9 else row[9].split(':') + networks = ( + None + if not city_data["networks"] + else city_data["networks"].split(":") + ) if not networks or len(networks[-1]) == 0: self.networks = [] else: self.networks = set( - filter(None, [x.strip() for x in networks[-1].split(';')]) + filter(None, [x.strip() for x in networks[-1].split(";")]) ) if not networks or len(networks) < 2 or len(networks[0]) == 0: if self.overground: @@ -1368,10 +1553,10 @@ class City: else: self.modes = DEFAULT_MODES_RAPID else: - self.modes = set([x.strip() for x in networks[0].split(',')]) + self.modes = set([x.strip() for x in networks[0].split(",")]) # Reversing bbox so it is (xmin, ymin, xmax, ymax) - bbox = row[8].split(',') + bbox = city_data["bbox"].split(",") if len(bbox) == 4: self.bbox = [float(bbox[i]) for i in (1, 0, 3, 2)] else: @@ -1383,7 +1568,7 @@ class City: self.masters = {} # Dict el_id of route → route_master self.stop_areas = defaultdict( list - ) # El_id → list of el_id of stop_area + ) # El_id → list of stop_area elements it belongs to self.transfers = [] # List of lists of stop areas self.station_ids = set() # Set of stations' uid self.stops_and_platforms = set() # Set of stops and platforms el_id @@ -1392,11 +1577,11 @@ class City: @staticmethod def log_message(message, el): if el: - tags = el.get('tags', {}) + tags = el.get("tags", {}) message += ' ({} {}, "{}")'.format( - el['type'], - el.get('id', el.get('ref')), - tags.get('name', tags.get('ref', '')), + el["type"], + el.get("id", el.get("ref")), + tags.get("name", tags.get("ref", "")), ) return message @@ -1427,45 +1612,59 @@ class City: return False def add(self, el): - if el['type'] == 'relation' and 'members' not in el: + if el["type"] == "relation" and "members" not in el: return + self.elements[el_id(el)] = el - if el['type'] == 'relation' and 'tags' in el: - if el['tags'].get('type') == 'route_master': - for m in el['members']: - if m['type'] == 'relation': - if el_id(m) in self.masters: - self.error('Route in two route_masters', m) - self.masters[el_id(m)] = el - elif el['tags'].get('public_transport') == 'stop_area': - warned_about_duplicates = False - for m in el['members']: - stop_areas = self.stop_areas[el_id(m)] - if el in stop_areas: - if not warned_about_duplicates: - self.warn('Duplicate element in a stop area', el) - warned_about_duplicates = True - else: - stop_areas.append(el) + if not (el["type"] == "relation" and "tags" in el): + return + + relation_type = el["tags"].get("type") + if relation_type == "route_master": + for m in el["members"]: + if m["type"] != "relation": + continue + + if el_id(m) in self.masters: + self.error("Route in two route_masters", m) + self.masters[el_id(m)] = el + + elif el["tags"].get("public_transport") == "stop_area": + if relation_type != "public_transport": + self.warn( + "stop_area relation with " + f"type={relation_type}, needed type=public_transport", + el, + ) + return + + warned_about_duplicates = False + for m in el["members"]: + stop_areas = self.stop_areas[el_id(m)] + if el in stop_areas and not warned_about_duplicates: + self.warn("Duplicate element in a stop area", el) + warned_about_duplicates = True + else: + stop_areas.append(el) def make_transfer(self, sag): transfer = set() - for m in sag['members']: + for m in sag["members"]: k = el_id(m) el = self.elements.get(k) if not el: # A sag member may validly not belong to the city while # the sag does - near the city bbox boundary continue - if 'tags' not in el: + if "tags" not in el: self.warn( - 'An untagged object {} in a stop_area_group'.format(k), sag + "An untagged object {} in a stop_area_group".format(k), sag ) continue if ( - el['type'] != 'relation' - or el['tags'].get('type') != 'public_transport' - or el['tags'].get('public_transport') != 'stop_area' + el["type"] != "relation" + or el["tags"].get("type") != "public_transport" + or el["tags"].get("public_transport") != "stop_area" ): continue if k in self.stations: @@ -1477,9 +1676,12 @@ class City: # ChÃĸtelet subway station <-> # "ChÃĸtelet - Les Halles" railway station <-> # Les Halles subway station - # Counterexample 2: Saint-Petersburg, transfers ВиŅ‚ĐĩĐąŅĐēиК вОĐēСаĐģ <-> ПŅƒŅˆĐēиĐŊŅĐēĐ°Ņ <-> ЗвĐĩĐŊиĐŗĐžŅ€ĐžĐ´ŅĐēĐ°Ņ + # Counterexample 2: Saint-Petersburg, transfers + # ВиŅ‚ĐĩĐąŅĐēиК вОĐēСаĐģ <-> + # ПŅƒŅˆĐēиĐŊŅĐēĐ°Ņ <-> + # ЗвĐĩĐŊиĐŗĐžŅ€ĐžĐ´ŅĐēĐ°Ņ self.warn( - 'Stop area {} belongs to multiple interchanges'.format( + "Stop area {} belongs to multiple interchanges".format( k ) ) @@ -1494,13 +1696,13 @@ class City: if Station.is_station(el, self.modes): # See PR https://github.com/mapsme/subways/pull/98 if ( - el['type'] == 'relation' - and el['tags'].get('type') != 'multipolygon' + el["type"] == "relation" + and el["tags"].get("type") != "multipolygon" ): + rel_type = el["tags"].get("type") self.warn( - "A railway station cannot be a relation of type '{}'".format( - el['tags'].get('type') - ), + "A railway station cannot be a relation of type " + f"{rel_type}", el, ) continue @@ -1519,12 +1721,13 @@ class City: for st_el in station.get_elements(): self.stations[st_el].append(station) - # Check that stops and platforms belong to single stop_area + # Check that stops and platforms belong to + # a single stop_area for sp in station.stops | station.platforms: if sp in self.stops_and_platforms: self.notice( - 'A stop or a platform {} belongs to multiple ' - 'stop areas, might be correct'.format(sp) + f"A stop or a platform {sp} belongs to " + "multiple stop areas, might be correct" ) else: self.stops_and_platforms.add(sp) @@ -1532,7 +1735,7 @@ class City: # Extract routes for el in self.elements.values(): if Route.is_route(el, self.modes): - if el['tags'].get('access') in ('no', 'private'): + if el["tags"].get("access") in ("no", "private"): continue route_id = el_id(el) master = self.masters.get(route_id, None) @@ -1548,12 +1751,12 @@ class City: ): continue - route = Route(el, self, master) + route = self.route_class(el, self, master) if not route.stops: - self.warn('Route has no stops', el) + self.warn("Route has no stops", el) continue elif len(route.stops) == 1: - self.warn('Route has only one stop', el) + self.warn("Route has only one stop", el) continue k = el_id(master) if master else route.ref @@ -1561,15 +1764,16 @@ class City: self.routes[k] = RouteMaster(master) self.routes[k].add(route, self) - # Sometimes adding a route to a newly initialized RouteMaster can fail + # Sometimes adding a route to a newly initialized RouteMaster + # can fail if len(self.routes[k]) == 0: del self.routes[k] # And while we're iterating over relations, find interchanges if ( - el['type'] == 'relation' - and el.get('tags', {}).get('public_transport', None) - == 'stop_area_group' + el["type"] == "relation" + and el.get("tags", {}).get("public_transport", None) + == "stop_area_group" ): self.make_transfer(el) @@ -1588,50 +1792,56 @@ class City: def __iter__(self): return iter(self.routes.values()) + @property def is_good(self): + if not (self.errors or self.validate_called): + raise RuntimeError( + "You mustn't refer to City.is_good property before calling " + "the City.validate() method unless an error already occurred." + ) return len(self.errors) == 0 def get_validation_result(self): result = { - 'name': self.name, - 'country': self.country, - 'continent': self.continent, - 'stations_found': getattr(self, 'found_stations', 0), - 'transfers_found': getattr(self, 'found_interchanges', 0), - 'unused_entrances': getattr(self, 'unused_entrances', 0), - 'networks': getattr(self, 'found_networks', 0), + "name": self.name, + "country": self.country, + "continent": self.continent, + "stations_found": getattr(self, "found_stations", 0), + "transfers_found": getattr(self, "found_interchanges", 0), + "unused_entrances": getattr(self, "unused_entrances", 0), + "networks": getattr(self, "found_networks", 0), } if not self.overground: result.update( { - 'subwayl_expected': self.num_lines, - 'lightrl_expected': self.num_light_lines, - 'subwayl_found': getattr(self, 'found_lines', 0), - 'lightrl_found': getattr(self, 'found_light_lines', 0), - 'stations_expected': self.num_stations, - 'transfers_expected': self.num_interchanges, + "subwayl_expected": self.num_lines, + "lightrl_expected": self.num_light_lines, + "subwayl_found": getattr(self, "found_lines", 0), + "lightrl_found": getattr(self, "found_light_lines", 0), + "stations_expected": self.num_stations, + "transfers_expected": self.num_interchanges, } ) else: result.update( { - 'stations_expected': 0, - 'transfers_expected': 0, - 'busl_expected': self.num_bus_lines, - 'trolleybusl_expected': self.num_trolleybus_lines, - 'traml_expected': self.num_tram_lines, - 'otherl_expected': self.num_other_lines, - 'busl_found': getattr(self, 'found_bus_lines', 0), - 'trolleybusl_found': getattr( - self, 'found_trolleybus_lines', 0 + "stations_expected": 0, + "transfers_expected": 0, + "busl_expected": self.num_bus_lines, + "trolleybusl_expected": self.num_trolleybus_lines, + "traml_expected": self.num_tram_lines, + "otherl_expected": self.num_other_lines, + "busl_found": getattr(self, "found_bus_lines", 0), + "trolleybusl_found": getattr( + self, "found_trolleybus_lines", 0 ), - 'traml_found': getattr(self, 'found_tram_lines', 0), - 'otherl_found': getattr(self, 'found_other_lines', 0), + "traml_found": getattr(self, "found_tram_lines", 0), + "otherl_found": getattr(self, "found_other_lines", 0), } ) - result['warnings'] = self.warnings - result['errors'] = self.errors - result['notices'] = self.notices + result["warnings"] = self.warnings + result["errors"] = self.errors + result["notices"] = self.notices return result def count_unused_entrances(self): @@ -1639,19 +1849,19 @@ class City: stop_areas = set() for el in self.elements.values(): if ( - el['type'] == 'relation' - and 'tags' in el - and el['tags'].get('public_transport') == 'stop_area' - and 'members' in el + el["type"] == "relation" + and "tags" in el + and el["tags"].get("public_transport") == "stop_area" + and "members" in el ): - stop_areas.update([el_id(m) for m in el['members']]) + stop_areas.update([el_id(m) for m in el["members"]]) unused = [] not_in_sa = [] for el in self.elements.values(): if ( - el['type'] == 'node' - and 'tags' in el - and el['tags'].get('railway') == 'subway_entrance' + el["type"] == "node" + and "tags" in el + and el["tags"].get("railway") == "subway_entrance" ): i = el_id(el) if i in self.stations: @@ -1664,15 +1874,13 @@ class City: self.entrances_not_in_stop_areas = len(not_in_sa) if unused: self.notice( - '{} subway entrances are not connected to a station: {}'.format( - len(unused), format_elid_list(unused) - ) + f"{len(unused)} subway entrances are not connected to a " + f"station: {format_elid_list(unused)}" ) if not_in_sa: self.notice( - '{} subway entrances are not in stop_area relations: {}'.format( - len(not_in_sa), format_elid_list(not_in_sa) - ) + f"{len(not_in_sa)} subway entrances are not in stop_area " + f"relations: {format_elid_list(not_in_sa)}" ) def check_return_routes(self, rmaster): @@ -1681,9 +1889,10 @@ class City: for variant in rmaster: if len(variant) < 2: continue - # Using transfer ids because a train can arrive at different stations within a transfer - # But disregard transfer that may give an impression of a circular route - # (for example, Simonis / Elisabeth station and route 2 in Brussels) + # Using transfer ids because a train can arrive at different + # stations within a transfer. But disregard transfer that may give + # an impression of a circular route (for example, + # Simonis / Elisabeth station and route 2 in Brussels) if variant[0].stoparea.transfer == variant[-1].stoparea.transfer: t = (variant[0].stoparea.id, variant[-1].stoparea.id) else: @@ -1701,72 +1910,64 @@ class City: if len(variants) == 0: self.error( - 'An empty route master {}. Please set construction:route ' - 'if it is under construction'.format(rmaster.id) + "An empty route master {}. Please set construction:route " + "if it is under construction".format(rmaster.id) ) elif len(variants) == 1: - log_function = self.error if not rmaster.best.is_circular else self.notice + log_function = ( + self.error if not rmaster.best.is_circular else self.notice + ) log_function( - 'Only one route in route_master. ' - 'Please check if it needs a return route', + "Only one route in route_master. " + "Please check if it needs a return route", rmaster.best.element, ) else: for t, rel in variants.items(): if t not in have_return: - self.notice('Route does not have a return direction', rel) - - def validate_route_refs(self): - master_refs = sorted(m.ref for m in self.routes.values()) - for ref, group in itertools.groupby(master_refs): - if len(list(group)) > 1: - # This can occur if some routes with some ref belong to - # a route_master, but other with the same ref doesn't. - self.error("Route masters {} have the same ref".format( - ', '.join( - m.id for m in self.routes.values() if m.ref == ref - ) - )) + self.notice("Route does not have a return direction", rel) def validate_lines(self): self.found_light_lines = len( - [x for x in self.routes.values() if x.mode != 'subway'] + [x for x in self.routes.values() if x.mode != "subway"] ) self.found_lines = len(self.routes) - self.found_light_lines if self.found_lines != self.num_lines: self.error( - 'Found {} subway lines, expected {}'.format( + "Found {} subway lines, expected {}".format( self.found_lines, self.num_lines ) ) if self.found_light_lines != self.num_light_lines: self.error( - 'Found {} light rail lines, expected {}'.format( + "Found {} light rail lines, expected {}".format( self.found_light_lines, self.num_light_lines ) ) def validate_overground_lines(self): self.found_tram_lines = len( - [x for x in self.routes.values() if x.mode == 'tram'] + [x for x in self.routes.values() if x.mode == "tram"] ) self.found_bus_lines = len( - [x for x in self.routes.values() if x.mode == 'bus'] + [x for x in self.routes.values() if x.mode == "bus"] ) self.found_trolleybus_lines = len( - [x for x in self.routes.values() if x.mode == 'trolleybus'] + [x for x in self.routes.values() if x.mode == "trolleybus"] ) self.found_other_lines = len( [ x for x in self.routes.values() - if x.mode not in ('bus', 'trolleybus', 'tram') + if x.mode not in ("bus", "trolleybus", "tram") ] ) if self.found_tram_lines != self.num_tram_lines: - log_function = self.error if self.found_tram_lines == 0 else self.notice + log_function = ( + self.error if self.found_tram_lines == 0 else self.notice + ) log_function( - 'Found {} tram lines, expected {}'.format( + "Found {} tram lines, expected {}".format( self.found_tram_lines, self.num_tram_lines ), ) @@ -1787,58 +1988,58 @@ class City: if unused_stations: self.unused_stations = len(unused_stations) self.notice( - '{} unused stations: {}'.format( + "{} unused stations: {}".format( self.unused_stations, format_elid_list(unused_stations) ) ) self.count_unused_entrances() self.found_interchanges = len(self.transfers) - self.validate_route_refs() - if self.overground: self.validate_overground_lines() else: self.validate_lines() if self.found_stations != self.num_stations: - msg = 'Found {} stations in routes, expected {}'.format( + msg = "Found {} stations in routes, expected {}".format( self.found_stations, self.num_stations ) log_function = ( self.error if not ( - 0 - <= (self.num_stations - self.found_stations) - / self.num_stations - <= ALLOWED_STATIONS_MISMATCH - ) + 0 + <= (self.num_stations - self.found_stations) + / self.num_stations + <= ALLOWED_STATIONS_MISMATCH + ) else self.warn ) log_function(msg) if self.found_interchanges != self.num_interchanges: - msg = 'Found {} interchanges, expected {}'.format( + msg = "Found {} interchanges, expected {}".format( self.found_interchanges, self.num_interchanges ) log_function = ( self.error if self.num_interchanges != 0 - and not ( - (self.num_interchanges - self.found_interchanges) - / self.num_interchanges - <= ALLOWED_TRANSFERS_MISMATCH - ) + and not ( + (self.num_interchanges - self.found_interchanges) + / self.num_interchanges + <= ALLOWED_TRANSFERS_MISMATCH + ) else self.warn ) log_function(msg) self.found_networks = len(networks) if len(networks) > max(1, len(self.networks)): - n_str = '; '.join( - ['{} ({})'.format(k, v) for k, v in networks.items()] + n_str = "; ".join( + ["{} ({})".format(k, v) for k, v in networks.items()] ) - self.notice('More than one network: {}'.format(n_str)) + self.notice("More than one network: {}".format(n_str)) + + self.validate_called = True def find_transfers(elements, cities): @@ -1846,14 +2047,14 @@ def find_transfers(elements, cities): stop_area_groups = [] for el in elements: if ( - el['type'] == 'relation' - and 'members' in el - and el.get('tags', {}).get('public_transport') == 'stop_area_group' + el["type"] == "relation" + and "members" in el + and el.get("tags", {}).get("public_transport") == "stop_area_group" ): stop_area_groups.append(el) - # StopArea.id uniquely identifies a StopArea. - # We must ensure StopArea uniqueness since one stop_area relation may result in + # StopArea.id uniquely identifies a StopArea. We must ensure StopArea + # uniqueness since one stop_area relation may result in # several StopArea instances at inter-city interchanges. stop_area_ids = defaultdict(set) # el_id -> set of StopArea.id stop_area_objects = dict() # StopArea.id -> one of StopArea instances @@ -1864,7 +2065,7 @@ def find_transfers(elements, cities): for sag in stop_area_groups: transfer = set() - for m in sag['members']: + for m in sag["members"]: k = el_id(m) if k not in stop_area_ids: continue @@ -1881,51 +2082,22 @@ def get_unused_entrances_geojson(elements): features = [] for el in elements: if ( - el['type'] == 'node' - and 'tags' in el - and el['tags'].get('railway') == 'subway_entrance' + el["type"] == "node" + and "tags" in el + and el["tags"].get("railway") == "subway_entrance" ): if el_id(el) not in used_entrances: - geometry = {'type': 'Point', 'coordinates': el_center(el)} + geometry = {"type": "Point", "coordinates": el_center(el)} properties = { k: v - for k, v in el['tags'].items() - if k not in ('railway', 'entrance') + for k, v in el["tags"].items() + if k not in ("railway", "entrance") } features.append( { - 'type': 'Feature', - 'geometry': geometry, - 'properties': properties, + "type": "Feature", + "geometry": geometry, + "properties": properties, } ) - return {'type': 'FeatureCollection', 'features': features} - - -def download_cities(overground=False): - url = ( - 'https://docs.google.com/spreadsheets/d/{}/export?format=csv{}'.format( - SPREADSHEET_ID, '&gid=1881416409' if overground else '' - ) - ) - response = urllib.request.urlopen(url) - if response.getcode() != 200: - raise Exception( - 'Failed to download cities spreadsheet: HTTP {}'.format( - response.getcode() - ) - ) - data = response.read().decode('utf-8') - r = csv.reader(data.splitlines()) - next(r) # skipping the header - names = set() - cities = [] - for row in r: - if len(row) > 8 and row[8]: - cities.append(City(row, overground)) - if row[0].strip() in names: - logging.warning( - 'Duplicate city name in the google spreadsheet: %s', row[0] - ) - names.add(row[0].strip()) - return cities + return {"type": "FeatureCollection", "features": features} diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/assets/kuntsevskaya_centers.json b/tests/assets/kuntsevskaya_centers.json new file mode 100644 index 0000000..36317ec --- /dev/null +++ b/tests/assets/kuntsevskaya_centers.json @@ -0,0 +1,28 @@ +{ + "w38836456": { + "lat": 55.73064775, + "lon": 37.446065950000005 + }, + "w489951237": { + "lat": 55.730760724999996, + "lon": 37.44602055 + }, + "r7588527": { + "lat": 55.73066371666667, + "lon": 37.44604881666667 + }, + "r7588528": { + "lat": 55.73075192499999, + "lon": 37.44609837 + }, + "r7588561": { + "lat": 55.73070782083333, + "lon": 37.44607359333334 + }, + "r13426423": { + "lat": 55.730760724999996, + "lon": 37.44602055 + }, + "r100": null, + "r101": null +} diff --git a/tests/assets/kuntsevskaya_transfer.osm b/tests/assets/kuntsevskaya_transfer.osm new file mode 100644 index 0000000..48bf044 --- /dev/null +++ b/tests/assets/kuntsevskaya_transfer.osm @@ -0,0 +1,82 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/sample_data.py b/tests/sample_data.py new file mode 100644 index 0000000..0fffacd --- /dev/null +++ b/tests/sample_data.py @@ -0,0 +1,1491 @@ +sample_networks = { + "Only 2 stations, no rails": { + "xml": """ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +""", + "station_count": 2, + "tracks": [], + "extended_tracks": [ + (0.0, 0.0), + (1.0, 0.0), + ], + "truncated_tracks": [], + "forward": { + "first_stop_on_rails_index": 2, + "last_stop_on_rails_index": 1, + "positions_on_rails": [], + }, + "backward": { + "first_stop_on_rails_index": 2, + "last_stop_on_rails_index": 1, + "positions_on_rails": [], + }, + }, + "Only 2 stations connected with rails": { + "xml": """ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +""", + "station_count": 2, + "tracks": [ + (0.0, 0.0), + (1.0, 0.0), + ], + "extended_tracks": [ + (0.0, 0.0), + (1.0, 0.0), + ], + "truncated_tracks": [ + (0.0, 0.0), + (1.0, 0.0), + ], + "forward": { + "first_stop_on_rails_index": 0, + "last_stop_on_rails_index": 1, + "positions_on_rails": [[0], [1]], + }, + "backward": { + "first_stop_on_rails_index": 0, + "last_stop_on_rails_index": 1, + "positions_on_rails": [[0], [1]], + }, + }, + "Only 6 stations, no rails": { + "xml": """ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +""", + "station_count": 6, + "tracks": [], + "extended_tracks": [ + (0.0, 0.0), + (1.0, 0.0), + (2.0, 0.0), + (3.0, 0.0), + (4.0, 0.0), + (5.0, 0.0), + ], + "truncated_tracks": [], + "forward": { + "first_stop_on_rails_index": 6, + "last_stop_on_rails_index": 5, + "positions_on_rails": [], + }, + "backward": { + "first_stop_on_rails_index": 6, + "last_stop_on_rails_index": 5, + "positions_on_rails": [], + }, + }, + "One rail line connecting all stations": { + "xml": """ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +""", + "station_count": 6, + "tracks": [ + (0.0, 0.0), + (1.0, 0.0), + (2.0, 0.0), + (3.0, 0.0), + (4.0, 0.0), + (5.0, 0.0), + ], + "extended_tracks": [ + (0.0, 0.0), + (1.0, 0.0), + (2.0, 0.0), + (3.0, 0.0), + (4.0, 0.0), + (5.0, 0.0), + ], + "truncated_tracks": [ + (0.0, 0.0), + (1.0, 0.0), + (2.0, 0.0), + (3.0, 0.0), + (4.0, 0.0), + (5.0, 0.0), + ], + "forward": { + "first_stop_on_rails_index": 0, + "last_stop_on_rails_index": 5, + "positions_on_rails": [[0], [1], [2], [3], [4], [5]], + }, + "backward": { + "first_stop_on_rails_index": 0, + "last_stop_on_rails_index": 5, + "positions_on_rails": [[0], [1], [2], [3], [4], [5]], + }, + }, + "One rail line connecting all stations except the last": { + "xml": """ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +""", + "station_count": 6, + "tracks": [ + (0.0, 0.0), + (1.0, 0.0), + (2.0, 0.0), + (3.0, 0.0), + (4.0, 0.0), + ], + "extended_tracks": [ + (0.0, 0.0), + (1.0, 0.0), + (2.0, 0.0), + (3.0, 0.0), + (4.0, 0.0), + (5.0, 0.0), + ], + "truncated_tracks": [ + (0.0, 0.0), + (1.0, 0.0), + (2.0, 0.0), + (3.0, 0.0), + (4.0, 0.0), + ], + "forward": { + "first_stop_on_rails_index": 0, + "last_stop_on_rails_index": 4, + "positions_on_rails": [[0], [1], [2], [3], [4]], + }, + "backward": { + "first_stop_on_rails_index": 1, + "last_stop_on_rails_index": 5, + "positions_on_rails": [[0], [1], [2], [3], [4]], + }, + }, + "One rail line connecting all stations except the fist": { + "xml": """ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +""", + "station_count": 6, + "tracks": [ + (1.0, 0.0), + (2.0, 0.0), + (3.0, 0.0), + (4.0, 0.0), + (5.0, 0.0), + ], + "extended_tracks": [ + (0.0, 0.0), + (1.0, 0.0), + (2.0, 0.0), + (3.0, 0.0), + (4.0, 0.0), + (5.0, 0.0), + ], + "truncated_tracks": [ + (1.0, 0.0), + (2.0, 0.0), + (3.0, 0.0), + (4.0, 0.0), + (5.0, 0.0), + ], + "forward": { + "first_stop_on_rails_index": 1, + "last_stop_on_rails_index": 5, + "positions_on_rails": [[0], [1], [2], [3], [4]], + }, + "backward": { + "first_stop_on_rails_index": 0, + "last_stop_on_rails_index": 4, + "positions_on_rails": [[0], [1], [2], [3], [4]], + }, + }, + "One rail line connecting all stations except the fist and the last": { + "xml": """ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +""", + "station_count": 6, + "tracks": [ + (1.0, 0.0), + (2.0, 0.0), + (3.0, 0.0), + (4.0, 0.0), + ], + "extended_tracks": [ + (0.0, 0.0), + (1.0, 0.0), + (2.0, 0.0), + (3.0, 0.0), + (4.0, 0.0), + (5.0, 0.0), + ], + "truncated_tracks": [ + (1.0, 0.0), + (2.0, 0.0), + (3.0, 0.0), + (4.0, 0.0), + ], + "forward": { + "first_stop_on_rails_index": 1, + "last_stop_on_rails_index": 4, + "positions_on_rails": [[0], [1], [2], [3]], + }, + "backward": { + "first_stop_on_rails_index": 1, + "last_stop_on_rails_index": 4, + "positions_on_rails": [[0], [1], [2], [3]], + }, + }, + "One rail line connecting only 2 first stations": { + "xml": """ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +""", + "station_count": 6, + "tracks": [ + (0.0, 0.0), + (1.0, 0.0), + ], + "extended_tracks": [ + (0.0, 0.0), + (1.0, 0.0), + (2.0, 0.0), + (3.0, 0.0), + (4.0, 0.0), + (5.0, 0.0), + ], + "truncated_tracks": [ + (0.0, 0.0), + (1.0, 0.0), + ], + "forward": { + "first_stop_on_rails_index": 0, + "last_stop_on_rails_index": 1, + "positions_on_rails": [[0], [1]], + }, + "backward": { + "first_stop_on_rails_index": 4, + "last_stop_on_rails_index": 5, + "positions_on_rails": [[0], [1]], + }, + }, + "One rail line connecting only 2 last stations": { + "xml": """ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +""", + "station_count": 6, + "tracks": [ + (4.0, 0.0), + (5.0, 0.0), + ], + "extended_tracks": [ + (0.0, 0.0), + (1.0, 0.0), + (2.0, 0.0), + (3.0, 0.0), + (4.0, 0.0), + (5.0, 0.0), + ], + "truncated_tracks": [ + (4.0, 0.0), + (5.0, 0.0), + ], + "forward": { + "first_stop_on_rails_index": 4, + "last_stop_on_rails_index": 5, + "positions_on_rails": [[0], [1]], + }, + "backward": { + "first_stop_on_rails_index": 0, + "last_stop_on_rails_index": 1, + "positions_on_rails": [[0], [1]], + }, + }, + "One rail connecting all stations and protruding at both ends": { + "xml": """ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +""", + "station_count": 6, + "tracks": [ + (-1.0, 0.0), + (0.0, 0.0), + (1.0, 0.0), + (2.0, 0.0), + (3.0, 0.0), + (4.0, 0.0), + (5.0, 0.0), + (6.0, 0.0), + ], + "extended_tracks": [ + (-1.0, 0.0), + (0.0, 0.0), + (1.0, 0.0), + (2.0, 0.0), + (3.0, 0.0), + (4.0, 0.0), + (5.0, 0.0), + (6.0, 0.0), + ], + "truncated_tracks": [ + (0.0, 0.0), + (1.0, 0.0), + (2.0, 0.0), + (3.0, 0.0), + (4.0, 0.0), + (5.0, 0.0), + ], + "forward": { + "first_stop_on_rails_index": 0, + "last_stop_on_rails_index": 5, + "positions_on_rails": [[1], [2], [3], [4], [5], [6]], + }, + "backward": { + "first_stop_on_rails_index": 0, + "last_stop_on_rails_index": 5, + "positions_on_rails": [[1], [2], [3], [4], [5], [6]], + }, + }, + ( + "Several rails with reversed order for backward route, " + "connecting all stations and protruding at both ends" + ): { + "xml": """ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +""", + "station_count": 6, + "tracks": [ + (-1.0, 0.0), + (0.0, 0.0), + (1.0, 0.0), + (2.0, 0.0), + (3.0, 0.0), + (4.0, 0.0), + (5.0, 0.0), + (6.0, 0.0), + ], + "extended_tracks": [ + (-1.0, 0.0), + (0.0, 0.0), + (1.0, 0.0), + (2.0, 0.0), + (3.0, 0.0), + (4.0, 0.0), + (5.0, 0.0), + (6.0, 0.0), + ], + "truncated_tracks": [ + (0.0, 0.0), + (1.0, 0.0), + (2.0, 0.0), + (3.0, 0.0), + (4.0, 0.0), + (5.0, 0.0), + ], + "forward": { + "first_stop_on_rails_index": 0, + "last_stop_on_rails_index": 5, + "positions_on_rails": [[1], [2], [3], [4], [5], [6]], + }, + "backward": { + "first_stop_on_rails_index": 0, + "last_stop_on_rails_index": 5, + "positions_on_rails": [[1], [2], [3], [4], [5], [6]], + }, + }, + ( + "One rail laying near all stations requiring station projecting, " + "protruding at both ends" + ): { + "xml": """ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +""", + "station_count": 6, + "tracks": [ + (-1.0, 0.0), + (6.0, 0.0), + ], + "extended_tracks": [ + (-1.0, 0.0), + (6.0, 0.0), + ], + "truncated_tracks": [ + (0.0, 0.0), + (5.0, 0.0), + ], + "forward": { + "first_stop_on_rails_index": 0, + "last_stop_on_rails_index": 5, + "positions_on_rails": [ + [1 / 7], + [2 / 7], + [3 / 7], + [4 / 7], + [5 / 7], + [6 / 7], + ], + }, + "backward": { + "first_stop_on_rails_index": 0, + "last_stop_on_rails_index": 5, + "positions_on_rails": [ + [1 / 7], + [2 / 7], + [3 / 7], + [4 / 7], + [5 / 7], + [6 / 7], + ], + }, + }, + "One rail laying near all stations except the first and last": { + "xml": """ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +""", + "station_count": 6, + "tracks": [ + (1.0, 0.0), + (4.0, 0.0), + ], + "extended_tracks": [ + (0.0, 0.0001), + (1.0, 0.0), + (4.0, 0.0), + (5.0, 0.0001), + ], + "truncated_tracks": [ + (1.0, 0.0), + (4.0, 0.0), + ], + "forward": { + "first_stop_on_rails_index": 1, + "last_stop_on_rails_index": 4, + "positions_on_rails": [[0], [1 / 3], [2 / 3], [1]], + }, + "backward": { + "first_stop_on_rails_index": 1, + "last_stop_on_rails_index": 4, + "positions_on_rails": [[0], [1 / 3], [2 / 3], [1]], + }, + }, + "Circle route without rails": { + "xml": """ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +""", + "station_count": 4, + "tracks": [], + "extended_tracks": [ + (0.0, 0.0), + (0.0, 1.0), + (1.0, 1.0), + (1.0, 0.0), + (0.0, 0.0), + ], + "truncated_tracks": [], + "forward": { + "first_stop_on_rails_index": 5, + "last_stop_on_rails_index": 4, + "positions_on_rails": [], + }, + "backward": { + "first_stop_on_rails_index": 5, + "last_stop_on_rails_index": 4, + "positions_on_rails": [], + }, + }, + "Circle route with closed rail line connecting all stations": { + "xml": """ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +""", + "station_count": 4, + "tracks": [ + (0.0, 0.0), + (0.0, 1.0), + (1.0, 1.0), + (1.0, 0.0), + (0.0, 0.0), + ], + "extended_tracks": [ + (0.0, 0.0), + (0.0, 1.0), + (1.0, 1.0), + (1.0, 0.0), + (0.0, 0.0), + ], + "truncated_tracks": [ + (0.0, 0.0), + (0.0, 1.0), + (1.0, 1.0), + (1.0, 0.0), + (0.0, 0.0), + ], + "forward": { + "first_stop_on_rails_index": 0, + "last_stop_on_rails_index": 4, + "positions_on_rails": [[0, 4], [1], [2], [3], [0, 4]], + }, + "backward": { + "first_stop_on_rails_index": 0, + "last_stop_on_rails_index": 4, + "positions_on_rails": [[0, 4], [1], [2], [3], [0, 4]], + }, + }, +} diff --git a/tests/test_build_tracks.py b/tests/test_build_tracks.py new file mode 100644 index 0000000..da16780 --- /dev/null +++ b/tests/test_build_tracks.py @@ -0,0 +1,153 @@ +""" +To perform tests manually, run this command from the top directory +of the repository: + +> python -m unittest discover tests + +or simply + +> python -m unittest +""" + +import io +import unittest + +from subway_structure import City +from subway_io import load_xml +from tests.sample_data import sample_networks + + +class TestOneRouteTracks(unittest.TestCase): + """Test tracks extending and truncating on one-route networks""" + + CITY_TEMPLATE = { + "id": 1, + "name": "Null Island", + "country": "World", + "continent": "Africa", + "num_stations": None, # Would be taken from the sample network data + "num_lines": 1, + "num_light_lines": 0, + "num_interchanges": 0, + "bbox": "-179, -89, 179, 89", + "networks": "", + } + + def assertListAlmostEqual(self, list1, list2, places=10) -> None: + if not (isinstance(list1, list) and isinstance(list2, list)): + raise RuntimeError( + f"Not lists passed to the '{self.__class__.__name__}." + "assertListAlmostEqual' method" + ) + self.assertEqual(len(list1), len(list2)) + for a, b in zip(list1, list2): + if isinstance(a, list) and isinstance(b, list): + self.assertListAlmostEqual(a, b, places) + else: + self.assertAlmostEqual(a, b, places) + + def prepare_city_routes(self, network) -> tuple: + city_data = self.CITY_TEMPLATE.copy() + city_data["num_stations"] = network["station_count"] + city = City(city_data) + elements = load_xml(io.BytesIO(network["xml"].encode("utf-8"))) + for el in elements: + city.add(el) + city.extract_routes() + city.validate() + + self.assertTrue(city.is_good) + + route_master = list(city.routes.values())[0] + variants = route_master.routes + + fwd_route = [v for v in variants if v.name == "Forward"][0] + bwd_route = [v for v in variants if v.name == "Backward"][0] + + return fwd_route, bwd_route + + def _test_tracks_extending_for_network(self, network_data): + fwd_route, bwd_route = self.prepare_city_routes(network_data) + + self.assertEqual( + fwd_route.tracks, + network_data["tracks"], + "Wrong tracks", + ) + extended_tracks = fwd_route.get_extended_tracks() + self.assertEqual( + extended_tracks, + network_data["extended_tracks"], + "Wrong tracks after extending", + ) + + self.assertEqual( + bwd_route.tracks, + network_data["tracks"][::-1], + "Wrong backward tracks", + ) + extended_tracks = bwd_route.get_extended_tracks() + self.assertEqual( + extended_tracks, + network_data["extended_tracks"][::-1], + "Wrong backward tracks after extending", + ) + + def _test_tracks_truncating_for_network(self, network_data): + fwd_route, bwd_route = self.prepare_city_routes(network_data) + + truncated_tracks = fwd_route.get_truncated_tracks(fwd_route.tracks) + self.assertEqual( + truncated_tracks, + network_data["truncated_tracks"], + "Wrong tracks after truncating", + ) + truncated_tracks = bwd_route.get_truncated_tracks(bwd_route.tracks) + self.assertEqual( + truncated_tracks, + network_data["truncated_tracks"][::-1], + "Wrong backward tracks after truncating", + ) + + def _test_stop_positions_on_rails_for_network(self, network_data): + fwd_route, bwd_route = self.prepare_city_routes(network_data) + + for route, route_label in zip( + (fwd_route, bwd_route), ("forward", "backward") + ): + route_data = network_data[route_label] + + for attr in ( + "first_stop_on_rails_index", + "last_stop_on_rails_index", + ): + self.assertEqual( + getattr(route, attr), + route_data[attr], + f"Wrong {attr} for {route_label} route", + ) + + first_ind = route_data["first_stop_on_rails_index"] + last_ind = route_data["last_stop_on_rails_index"] + positions_on_rails = [ + rs.positions_on_rails + for rs in route.stops[first_ind : last_ind + 1] # noqa E203 + ] + self.assertListAlmostEqual( + positions_on_rails, route_data["positions_on_rails"] + ) + + def test_tracks_extending(self) -> None: + for network_name, network_data in sample_networks.items(): + with self.subTest(msg=network_name): + self._test_tracks_extending_for_network(network_data) + + def test_tracks_truncating(self) -> None: + for network_name, network_data in sample_networks.items(): + with self.subTest(msg=network_name): + self._test_tracks_truncating_for_network(network_data) + + def test_stop_position_on_rails(self) -> None: + for network_name, network_data in sample_networks.items(): + with self.subTest(msg=network_name): + self._test_stop_positions_on_rails_for_network(network_data) diff --git a/tests/test_center_calculation.py b/tests/test_center_calculation.py new file mode 100644 index 0000000..4f01a3c --- /dev/null +++ b/tests/test_center_calculation.py @@ -0,0 +1,55 @@ +import json +from pathlib import Path +from unittest import TestCase + +from process_subways import calculate_centers +from subway_io import load_xml + + +class TestCenterCalculation(TestCase): + """Test center calculation. Test data [should] contain among others + the following edge cases: + - an empty relation. It's element should not obtain "center" key. + - relation as member of relation, the child relation following the parent + in the OSM XML file. + - relation with incomplete members (broken references). + - relations with cyclic references. + """ + + ASSETS_PATH = Path(__file__).resolve().parent / "assets" + OSM_DATA = str(ASSETS_PATH / "kuntsevskaya_transfer.osm") + CORRECT_CENTERS = str(ASSETS_PATH / "kuntsevskaya_centers.json") + + def test__calculate_centers(self) -> None: + elements = load_xml(self.OSM_DATA) + + calculate_centers(elements) + + elements_dict = { + f"{'w' if el['type'] == 'way' else 'r'}{el['id']}": el + for el in elements + } + + calculated_centers = { + k: el["center"] + for k, el in elements_dict.items() + if "center" in el + } + + with open(self.CORRECT_CENTERS) as f: + correct_centers = json.load(f) + + self.assertTrue(set(calculated_centers).issubset(correct_centers)) + + for k, correct_center in correct_centers.items(): + if correct_center is None: + self.assertNotIn("center", elements_dict[k]) + else: + self.assertIn(k, calculated_centers) + calculated_center = calculated_centers[k] + self.assertAlmostEqual( + calculated_center["lat"], correct_center["lat"], places=10 + ) + self.assertAlmostEqual( + calculated_center["lon"], correct_center["lon"], places=10 + ) diff --git a/tests/test_gtfs_processor.py b/tests/test_gtfs_processor.py new file mode 100644 index 0000000..5a234e8 --- /dev/null +++ b/tests/test_gtfs_processor.py @@ -0,0 +1,96 @@ +from unittest import TestCase + +from processors.gtfs import ( + dict_to_row, + GTFS_COLUMNS, +) + + +class TestGTFS(TestCase): + """Test processors/gtfs.py""" + + def test__dict_to_row__Nones_and_absent_keys(self) -> None: + """Test that absent or None values in a GTFS feature item + are converted by dict_to_row() function to empty strings + in right amount. + """ + + if GTFS_COLUMNS["trips"][:3] != ["route_id", "service_id", "trip_id"]: + raise RuntimeError("GTFS column names/order inconsistency") + + test_trips = [ + { + "description": "Absent keys", + "trip_data": { + "route_id": 1, + "service_id": "a", + "trip_id": "tr_123", + }, + }, + { + "description": "None or absent keys", + "trip_data": { + "route_id": 1, + "service_id": "a", + "trip_id": "tr_123", + "trip_headsign": None, + "trip_short_name": None, + "route_pattern_id": None, + }, + }, + { + "description": "None, empty-string or absent keys", + "trip_data": { + "route_id": 1, + "service_id": "a", + "trip_id": "tr_123", + "trip_headsign": "", + "trip_short_name": "", + "route_pattern_id": None, + }, + }, + ] + + answer = [1, "a", "tr_123"] + [""] * (len(GTFS_COLUMNS["trips"]) - 3) + + for test_trip in test_trips: + with self.subTest(msg=test_trip["description"]): + self.assertListEqual( + dict_to_row(test_trip["trip_data"], "trips"), answer + ) + + def test__dict_to_row__numeric_values(self) -> None: + """Test that zero numeric values remain zeros in dict_to_row() + function, and not empty strings or None. + """ + + shapes = [ + { + "description": "Numeric non-zeroes", + "shape_data": { + "shape_id": 1, + "shape_pt_lat": 55.3242425, + "shape_pt_lon": -179.23242, + "shape_pt_sequence": 133, + "shape_dist_traveled": 1.2345, + }, + "answer": [1, 55.3242425, -179.23242, 133, 1.2345], + }, + { + "description": "Numeric zeroes and None keys", + "shape_data": { + "shape_id": 0, + "shape_pt_lat": 0.0, + "shape_pt_lon": 0, + "shape_pt_sequence": 0, + "shape_dist_traveled": None, + }, + "answer": [0, 0.0, 0, 0, ""], + }, + ] + + for shape in shapes: + with self.subTest(shape["description"]): + self.assertListEqual( + dict_to_row(shape["shape_data"], "shapes"), shape["answer"] + ) diff --git a/tests/test_projection.py b/tests/test_projection.py new file mode 100644 index 0000000..b0091aa --- /dev/null +++ b/tests/test_projection.py @@ -0,0 +1,160 @@ +import collections +import itertools +import unittest + +from subway_structure import project_on_segment + + +class TestProjection(unittest.TestCase): + """Test subway_structure.project_on_segment function""" + + PRECISION = 10 # decimal places in assertAlmostEqual + + SHIFT = 1e-6 # Small distance between projected point and segment endpoint + + def _test_projection_in_bulk(self, points, segments, answers): + """Test 'project_on_segment' function for array of points and array + of parallel segments projections on which are equal. + """ + for point, ans in zip(points, answers): + for seg in segments: + for segment, answer in zip( + (seg, seg[::-1]), # What if invert the segment? + (ans, None if ans is None else 1 - ans), + ): + u = project_on_segment(point, segment[0], segment[1]) + + if answer is None: + self.assertIsNone( + u, + f"Project of point {point} onto segment {segment} " + f"should be None, but {u} returned", + ) + else: + self.assertAlmostEqual( + u, + answer, + self.PRECISION, + f"Wrong projection of point {point} onto segment " + f"{segment}: {u} returned, {answer} expected", + ) + + def test_projection_on_horizontal_segments(self): + points = [ + (-2, 0), + (-1 - self.SHIFT, 0), + (-1, 0), + (-1 + self.SHIFT, 0), + (-0.5, 0), + (0, 0), + (0.5, 0), + (1 - self.SHIFT, 0), + (1, 0), + (1 + self.SHIFT, 0), + (2, 0), + ] + horizontal_segments = [ + ((-1, -1), (1, -1)), + ((-1, 0), (1, 0)), + ((-1, 1), (1, 1)), + ] + answers = [ + None, + None, + 0, + self.SHIFT / 2, + 0.25, + 0.5, + 0.75, + 1 - self.SHIFT / 2, + 1, + None, + None, + ] + + self._test_projection_in_bulk(points, horizontal_segments, answers) + + def test_projection_on_vertical_segments(self): + points = [ + (0, -2), + (0, -1 - self.SHIFT), + (0, -1), + (0, -1 + self.SHIFT), + (0, -0.5), + (0, 0), + (0, 0.5), + (0, 1 - self.SHIFT), + (0, 1), + (0, 1 + self.SHIFT), + (0, 2), + ] + vertical_segments = [ + ((-1, -1), (-1, 1)), + ((0, -1), (0, 1)), + ((1, -1), (1, 1)), + ] + answers = [ + None, + None, + 0, + self.SHIFT / 2, + 0.25, + 0.5, + 0.75, + 1 - self.SHIFT / 2, + 1, + None, + None, + ] + + self._test_projection_in_bulk(points, vertical_segments, answers) + + def test_projection_on_inclined_segment(self): + points = [ + (-2, -2), + (-1, -1), + (-0.5, -0.5), + (0, 0), + (0.5, 0.5), + (1, 1), + (2, 2), + ] + segments = [ + ((-2, 0), (0, 2)), + ((-1, -1), (1, 1)), + ((0, -2), (2, 0)), + ] + answers = [None, 0, 0.25, 0.5, 0.75, 1, None] + + self._test_projection_in_bulk(points, segments, answers) + + def test_projection_with_different_collections(self): + """The tested function should accept points as any consecutive + container with index operator. + """ + types = ( + tuple, + list, + collections.deque, + ) + + point = (0, 0.5) + segment_end1 = (0, 0) + segment_end2 = (1, 0) + + for p_type, s1_type, s2_type in itertools.product(types, types, types): + p = p_type(point) + s1 = s1_type(segment_end1) + s2 = s2_type(segment_end2) + project_on_segment(p, s1, s2) + + def test_projection_on_degenerate_segment(self): + coords = [-1, 0, 1] + points = [(x, y) for x, y in itertools.product(coords, coords)] + segments = [ + ((0, 0), (0, 0)), + ((0, 0), (0, 1e-8)), + ] + answers = [None] * len(points) + + self._test_projection_in_bulk(points, segments, answers) diff --git a/v2h_templates.py b/v2h_templates.py index 8198d9b..a1102b4 100644 --- a/v2h_templates.py +++ b/v2h_templates.py @@ -1,7 +1,19 @@ +validator_osm_wiki_url = ( + "https://wiki.openstreetmap.org/wiki/Quality_assurance#subway-preprocessor" +) +github_url = "https://github.com/alexey-zakharenkov/subways" +produced_by = f"""Produced by +Subway Preprocessor on {{date}}""" +metro_mapping_osm_article = "https://wiki.openstreetmap.org/wiki/Metro_Mapping" +list_of_metro_systems_url = ( + "https://en.wikipedia.org/wiki/List_of_metro_systems#List" +) + + # These are templates for validation_to_html.py # Variables should be in curly braces -STYLE = ''' +STYLE = """ -''' +""" -INDEX_HEADER = ''' +INDEX_HEADER = f""" Subway Validator -(s) +{STYLE}

Subway Validation Results

-

{good_cities} of {total_cities} networks validated without errors. -To make a network validate successfully please follow the -metro mapping instructions. -Commit your changes to the OSM and then check back to the updated validation results after the next validation cycle, please. -See the validator instance(s) description -for the schedule and capabilities.

+

{{good_cities}} of {{total_cities}} networks validated without +errors. To make a network validate successfully please follow the +metro mapping +instructions. Commit your changes to the OSM and then check back to the +updated validation results after the next validation cycle, please. +See the validator instance(s) +description for the schedule and capabilities.

View networks on a map

-'''.replace('(s)', STYLE) +""" -INDEX_CONTINENT = ''' +INDEX_CONTINENT = """ @@ -157,9 +170,9 @@ INDEX_CONTINENT = ''' {content} -''' +""" -INDEX_COUNTRY = ''' +INDEX_COUNTRY = """ @@ -172,56 +185,57 @@ INDEX_COUNTRY = ''' -''' +""" -INDEX_FOOTER = ''' +INDEX_FOOTER = f"""
 
Continent{num_notices}
  {country}{num_warnings} {num_notices}
- + -''' +""" -COUNTRY_HEADER = ''' +COUNTRY_HEADER = f""" -Subway Validator: {country} +Subway Validator: {{country}} -(s) +{STYLE}
-

Subway Validation Results for {country}

+

Subway Validation Results for {{country}}

Return to the countries list.

-{?subways} +{{?subways}} -{end}{?overground} +{{end}}{{?overground}} -{end} +{{end}} -'''.replace('(s)', STYLE) +""" -COUNTRY_CITY = ''' +COUNTRY_CITY = """ {?subways} @@ -229,36 +243,55 @@ COUNTRY_CITY = ''' {end}{?overground} - + {end} - + -''' +""" -COUNTRY_FOOTER = ''' +COUNTRY_FOOTER = f"""
CitySubway Lines Light Rail LinesTram Lines Bus Lines T-Bus Lines Other LinesStations Interchanges Unused Entrances
{city} {?yaml}Y{end} {?json}J{end} - {?json}M{end} + {?json}M{end} sub: {subwayl_found} / {subwayl_expected}t: {traml_found} / {traml_expected} b: {busl_found} / {busl_expected}tb: {trolleybusl_found} / {trolleybusl_expected} + tb: {trolleybusl_found} / {trolleybusl_expected} + o: {otherl_found} / {otherl_expected}st: {stations_found} / {stations_expected}int: {transfers_found} / {transfers_expected} + int: {transfers_found} / {transfers_expected} + ent: {unused_entrances}
{?errors} -
🛑 Errors
+
+
+ 🛑 Errors +
{errors}
{end} {?warnings} -
⚠ī¸ Warnings
+
+
+ ⚠ī¸ Warnings +
{warnings}
{end} {?notices} -
ℹī¸ Notices
+
+
+ ℹī¸ Notices +
{notices} {end}
- +
{produced_by}.
-''' +""" diff --git a/validation_to_html.py b/validation_to_html.py index fe21734..f772a4f 100755 --- a/validation_to_html.py +++ b/validation_to_html.py @@ -1,204 +1,229 @@ #!/usr/bin/env python3 +from __future__ import annotations + +import argparse import datetime -import re -import os -import sys import json -from subway_structure import SPREADSHEET_ID -from v2h_templates import * +import os +import re +from collections import defaultdict +from typing import Any, Optional + +from process_subways import DEFAULT_SPREADSHEET_ID +from v2h_templates import ( + COUNTRY_CITY, + COUNTRY_FOOTER, + COUNTRY_HEADER, + INDEX_CONTINENT, + INDEX_COUNTRY, + INDEX_FOOTER, + INDEX_HEADER, +) class CityData: - def __init__(self, city=None): + def __init__(self, city: Optional[str] = None) -> None: self.city = city is not None self.data = { - 'good_cities': 0, - 'total_cities': 1 if city else 0, - 'num_errors': 0, - 'num_warnings': 0, - 'num_notices': 0 + "good_cities": 0, + "total_cities": 1 if city else 0, + "num_errors": 0, + "num_warnings": 0, + "num_notices": 0, } self.slug = None if city: - self.slug = city['slug'] - self.country = city['country'] - self.continent = city['continent'] - self.errors = city['errors'] - self.warnings = city['warnings'] - self.notices = city['notices'] + self.slug = city["slug"] + self.country = city["country"] + self.continent = city["continent"] + self.errors = city["errors"] + self.warnings = city["warnings"] + self.notices = city["notices"] if not self.errors: - self.data['good_cities'] = 1 - self.data['num_errors'] = len(self.errors) - self.data['num_warnings'] = len(self.warnings) - self.data['num_notices'] = len(self.notices) + self.data["good_cities"] = 1 + self.data["num_errors"] = len(self.errors) + self.data["num_warnings"] = len(self.warnings) + self.data["num_notices"] = len(self.notices) for k, v in city.items(): - if 'found' in k or 'expected' in k or 'unused' in k: + if "found" in k or "expected" in k or "unused" in k: self.data[k] = v - def not__get__(self, i): - return self.data.get(i) - - def not__set__(self, i, value): - self.data[i] = value - - def __add__(self, other): + def __add__(self, other: CityData) -> CityData: d = CityData() for k in set(self.data.keys()) | set(other.data.keys()): d.data[k] = self.data.get(k, 0) + other.data.get(k, 0) return d - def format(self, s): - def test_eq(v1, v2): - return '1' if v1 == v2 else '0' + @staticmethod + def test_eq(v1: Any, v2: Any) -> str: + return "1" if v1 == v2 else "0" + def format(self, s: str) -> str: for k in self.data: - s = s.replace('{' + k + '}', str(self.data[k])) - s = s.replace('{slug}', self.slug or '') + s = s.replace("{" + k + "}", str(self.data[k])) + s = s.replace("{slug}", self.slug or "") for k in ( - 'subwayl', - 'lightrl', - 'stations', - 'transfers', - 'busl', - 'trolleybusl', - 'traml', - 'otherl', + "subwayl", + "lightrl", + "stations", + "transfers", + "busl", + "trolleybusl", + "traml", + "otherl", ): - if k + '_expected' in self.data: + if k + "_expected" in self.data: s = s.replace( - '{=' + k + '}', - test_eq( - self.data[k + '_found'], self.data[k + '_expected'] + "{=" + k + "}", + self.test_eq( + self.data[k + "_found"], self.data[k + "_expected"] ), ) s = s.replace( - '{=cities}', - test_eq(self.data['good_cities'], self.data['total_cities']), + "{=cities}", + self.test_eq(self.data["good_cities"], self.data["total_cities"]), ) s = s.replace( - '{=entrances}', test_eq(self.data['unused_entrances'], 0) + "{=entrances}", self.test_eq(self.data["unused_entrances"], 0) ) - for k in ('errors', 'warnings', 'notices'): - s = s.replace('{=' + k + '}', test_eq(self.data['num_' + k], 0)) + for k in ("errors", "warnings", "notices"): + s = s.replace( + "{=" + k + "}", self.test_eq(self.data["num_" + k], 0) + ) return s -def tmpl(s, data=None, **kwargs): +def tmpl(s: str, data: Optional[CityData] = None, **kwargs) -> str: if data: s = data.format(s) if kwargs: for k, v in kwargs.items(): if v is not None: - s = s.replace('{' + k + '}', str(v)) + s = s.replace("{" + k + "}", str(v)) s = re.sub( - r'\{\?' + k + r'\}(.+?)\{end\}', - r'\1' if v else '', + r"\{\?" + k + r"\}(.+?)\{end\}", + r"\1" if v else "", s, flags=re.DOTALL, ) - s = s.replace('{date}', date) - google_url = ( - 'https://docs.google.com/spreadsheets/d/{}/edit?usp=sharing'.format( - SPREADSHEET_ID - ) - ) - s = s.replace('{google}', google_url) return s -EXPAND_OSM_TYPE = {'n': 'node', 'w': 'way', 'r': 'relation'} -RE_SHORT = re.compile(r'\b([nwr])(\d+)\b') -RE_FULL = re.compile(r'\b(node|way|relation) (\d+)\b') -RE_COORDS = re.compile(r'\((-?\d+\.\d+), (-?\d+\.\d+)\)') +EXPAND_OSM_TYPE = {"n": "node", "w": "way", "r": "relation"} +RE_SHORT = re.compile(r"\b([nwr])(\d+)\b") +RE_FULL = re.compile(r"\b(node|way|relation) (\d+)\b") +RE_COORDS = re.compile(r"\((-?\d+\.\d+), (-?\d+\.\d+)\)") -def osm_links(s): +def osm_links(s: str) -> str: """Converts object mentions to HTML links.""" - def link(m): - return '{}'.format( - EXPAND_OSM_TYPE[m.group(1)[0]], m.group(2), m.group(0) + def link(m: re.Match) -> str: + osm_type = EXPAND_OSM_TYPE[m.group(1)[0]] + osm_id = m.group(2) + return ( + '{m.group(0)}' ) s = RE_SHORT.sub(link, s) s = RE_FULL.sub(link, s) s = RE_COORDS.sub( - r'(pos)', + r'(pos)', s, ) return s -def esc(s): - return s.replace('&', '&').replace('<', '<').replace('>', '>') +def esc(s: str) -> str: + return s.replace("&", "&").replace("<", "<").replace(">", ">") -if len(sys.argv) < 2: - print('Reads a log from subway validator and prepares HTML files.') - print( - 'Usage: {} []'.format(sys.argv[0]) +def br_osm_links(elems: list) -> str: + return "
".join(osm_links(esc(elem)) for elem in elems) + + +def main() -> None: + parser = argparse.ArgumentParser( + description=( + "Reads a log from subway validator and prepares HTML files." + ) ) - sys.exit(1) + parser.add_argument("validation_log") + parser.add_argument("target_directory", nargs="?", default=".") + parser.add_argument( + "--cities-info-url", + default=( + "https://docs.google.com/spreadsheets/d/" + f"{DEFAULT_SPREADSHEET_ID}/edit?usp=sharing" + ), + ) + options = parser.parse_args() + target_dir = options.target_directory + cities_info_url = options.cities_info_url -with open(sys.argv[1], 'r', encoding='utf-8') as f: - data = {c['name']: CityData(c) for c in json.load(f)} + with open(options.validation_log, encoding="utf-8") as f: + data = {c["name"]: CityData(c) for c in json.load(f)} -countries = {} -continents = {} -c_by_c = {} # continent → set of countries -for c in data.values(): - countries[c.country] = c + countries.get(c.country, CityData()) - continents[c.continent] = c + continents.get(c.continent, CityData()) - if c.continent not in c_by_c: - c_by_c[c.continent] = set() - c_by_c[c.continent].add(c.country) -world = sum(continents.values(), CityData()) + countries = {} + continents = {} + c_by_c = defaultdict(set) # continent → set of countries + for c in data.values(): + countries[c.country] = c + countries.get(c.country, CityData()) + continents[c.continent] = c + continents.get(c.continent, CityData()) + c_by_c[c.continent].add(c.country) + world = sum(continents.values(), CityData()) -overground = 'traml_expected' in next(iter(data.values())).data -date = datetime.datetime.utcnow().strftime('%d.%m.%Y %H:%M UTC') -path = '.' if len(sys.argv) < 3 else sys.argv[2] -index = open(os.path.join(path, 'index.html'), 'w', encoding='utf-8') -index.write(tmpl(INDEX_HEADER, world)) + overground = "traml_expected" in next(iter(data.values())).data + date = datetime.datetime.utcnow().strftime("%d.%m.%Y %H:%M UTC") + index = open(os.path.join(target_dir, "index.html"), "w", encoding="utf-8") + index.write(tmpl(INDEX_HEADER, world)) -for continent in sorted(continents.keys()): - content = '' - for country in sorted(c_by_c[continent]): - country_file_name = country.lower().replace(' ', '-') + '.html' - content += tmpl( - INDEX_COUNTRY, - countries[country], - file=country_file_name, - country=country, - continent=continent, - ) - country_file = open( - os.path.join(path, country_file_name), 'w', encoding='utf-8' - ) - country_file.write( - tmpl( - COUNTRY_HEADER, + for continent in sorted(continents.keys()): + content = "" + for country in sorted(c_by_c[continent]): + country_file_name = country.lower().replace(" ", "-") + ".html" + content += tmpl( + INDEX_COUNTRY, + countries[country], + file=country_file_name, country=country, continent=continent, - overground=overground, - subways=not overground, ) - ) - for name, city in sorted(data.items()): - if city.country == country: - file_base = os.path.join(path, city.slug) + country_file = open( + os.path.join(target_dir, country_file_name), + "w", + encoding="utf-8", + ) + country_file.write( + tmpl( + COUNTRY_HEADER, + country=country, + continent=continent, + overground=overground, + subways=not overground, + ) + ) + for name, city in sorted( + (name, city) + for name, city in data.items() + if city.country == country + ): + file_base = os.path.join(target_dir, city.slug) yaml_file = ( - city.slug + '.yaml' - if os.path.exists(file_base + '.yaml') + city.slug + ".yaml" + if os.path.exists(file_base + ".yaml") else None ) json_file = ( - city.slug + '.geojson' - if os.path.exists(file_base + '.geojson') + city.slug + ".geojson" + if os.path.exists(file_base + ".geojson") else None ) - errors = '
'.join([osm_links(esc(e)) for e in city.errors]) - warnings = '
'.join([osm_links(esc(w)) for w in city.warnings]) - notices = '
'.join([osm_links(esc(n)) for n in city.notices]) + errors = br_osm_links(city.errors) + warnings = br_osm_links(city.warnings) + notices = br_osm_links(city.notices) country_file.write( tmpl( COUNTRY_CITY, @@ -215,18 +240,27 @@ for continent in sorted(continents.keys()): overground=overground, ) ) - country_file.write( - tmpl(COUNTRY_FOOTER, country=country, continent=continent) + country_file.write( + tmpl( + COUNTRY_FOOTER, + country=country, + continent=continent, + date=date, + ) + ) + country_file.close() + index.write( + tmpl( + INDEX_CONTINENT, + continents[continent], + content=content, + continent=continent, + ) ) - country_file.close() - index.write( - tmpl( - INDEX_CONTINENT, - continents[continent], - content=content, - continent=continent, - ) - ) -index.write(tmpl(INDEX_FOOTER)) -index.close() + index.write(tmpl(INDEX_FOOTER, date=date, cities_info_url=cities_info_url)) + index.close() + + +if __name__ == "__main__": + main()