From fc60bf56fac1604c9aa3356e0a9fc3798efa95b1 Mon Sep 17 00:00:00 2001 From: Alexey Zakharenkov Date: Wed, 11 Jan 2023 16:13:29 +0300 Subject: [PATCH] Add --cities-info-url CLI parameter to the main script and utilities --- make_all_metro_poly.py | 35 ++++-- mapsme_json_to_cities.py | 21 +++- process_subways.py | 82 ++++++++++++-- scripts/process_subways.sh | 9 +- subway_structure.py | 52 --------- v2h_templates.py | 7 +- validation_to_html.py | 217 ++++++++++++++++++++----------------- 7 files changed, 242 insertions(+), 181 deletions(-) diff --git a/make_all_metro_poly.py b/make_all_metro_poly.py index 05a01b1..00281a7 100644 --- a/make_all_metro_poly.py +++ b/make_all_metro_poly.py @@ -1,20 +1,23 @@ +import argparse + import shapely.geometry import shapely.ops -from process_subways import download_cities +from process_subways import DEFAULT_CITIES_INFO_URL, get_cities_info -def make_disjoint_metro_polygons(): - cities = download_cities() +def make_disjoint_metro_polygons(cities_info_url: str) -> None: + cities_info = get_cities_info(cities_info_url) polygons = [] - for c in cities: + for ci in cities_info: + bbox = tuple(map(float, ci["bbox"].split(","))) polygon = shapely.geometry.Polygon( [ - (c.bbox[1], c.bbox[0]), - (c.bbox[1], c.bbox[2]), - (c.bbox[3], c.bbox[2]), - (c.bbox[3], c.bbox[0]), + (bbox[0], bbox[1]), + (bbox[0], bbox[3]), + (bbox[2], bbox[3]), + (bbox[2], bbox[1]), ] ) polygons.append(polygon) @@ -31,5 +34,19 @@ def make_disjoint_metro_polygons(): print("END") +def main() -> None: + parser = argparse.ArgumentParser() + parser.add_argument( + "--cities-info-url", + default=DEFAULT_CITIES_INFO_URL, + help=( + "URL of CSV file with reference information about rapid transit " + "networks. file:// protocol is also supported." + ), + ) + options = parser.parse_args() + make_disjoint_metro_polygons(options.cities_info_url) + + if __name__ == "__main__": - make_disjoint_metro_polygons() + main() diff --git a/mapsme_json_to_cities.py b/mapsme_json_to_cities.py index 043d0b6..1c69a77 100644 --- a/mapsme_json_to_cities.py +++ b/mapsme_json_to_cities.py @@ -1,7 +1,7 @@ import argparse import json -from process_subways import download_cities +from process_subways import DEFAULT_CITIES_INFO_URL, get_cities_info if __name__ == "__main__": @@ -25,6 +25,15 @@ if __name__ == "__main__": ), ) + arg_parser.add_argument( + "--cities-info-url", + default=DEFAULT_CITIES_INFO_URL, + help=( + "URL of CSV file with reference information about rapid transit " + "networks. file:// protocol is also supported." + ), + ) + arg_parser.add_argument( "--with-bad", action="store_true", @@ -40,14 +49,14 @@ if __name__ == "__main__": good_cities = set( n.get("network", n.get("title")) for n in subway_json["networks"] ) - cities = download_cities() + cities_info = get_cities_info(args.cities_info_url) lines = [] - for c in cities: - if c.name in good_cities: - lines.append(f"{c.name}, {c.country}") + for ci in cities_info: + if ci["name"] in good_cities: + lines.append(f"{ci['name']}, {ci['country']}") elif with_bad: - lines.append(f"{c.name}, {c.country} (Bad)") + lines.append(f"{ci['name']}, {ci['country']} (Bad)") for line in sorted(lines): print(line) diff --git a/process_subways.py b/process_subways.py index 21a2fb4..89e1021 100755 --- a/process_subways.py +++ b/process_subways.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 import argparse +import csv import inspect import json import logging @@ -9,6 +10,7 @@ import sys import time import urllib.parse import urllib.request +from functools import partial from typing import Dict, List, Optional, Tuple import processors @@ -20,8 +22,8 @@ from subway_io import ( write_recovery_data, ) from subway_structure import ( + City, CriticalValidationError, - download_cities, find_transfers, get_unused_entrances_geojson, MODES_OVERGROUND, @@ -29,6 +31,12 @@ from subway_structure import ( ) +DEFAULT_SPREADSHEET_ID = "1SEW1-NiNOnA2qDwievcxYV1FOaQl1mb1fdeyqAxHu3k" +DEFAULT_CITIES_INFO_URL = ( + "https://docs.google.com/spreadsheets/d/" + f"{DEFAULT_SPREADSHEET_ID}/export?format=csv" +) + Point = Tuple[float, float] @@ -49,13 +57,11 @@ def overpass_request(overground, overpass_api, bboxes): "rel(br)[type=public_transport][public_transport=stop_area_group];" ) query += ");(._;>>;);out body center qt;" - logging.info("Query: %s", query) + logging.debug("Query: %s", query) url = "{}?data={}".format(overpass_api, urllib.parse.quote(query)) response = urllib.request.urlopen(url, timeout=1000) - if response.getcode() != 200: - raise Exception( - "Failed to query Overpass API: HTTP {}".format(response.getcode()) - ) + if (r_code := response.getcode()) != 200: + raise Exception(f"Failed to query Overpass API: HTTP {r_code}") return json.load(response)["elements"] @@ -258,8 +264,69 @@ def validate_cities(cities): return good_cities +def get_cities_info( + cities_info_url: str = DEFAULT_CITIES_INFO_URL, +) -> List[dict]: + response = urllib.request.urlopen(cities_info_url) + if ( + not cities_info_url.startswith("file://") + and (r_code := response.getcode()) != 200 + ): + raise Exception( + f"Failed to download cities spreadsheet: HTTP {r_code}" + ) + data = response.read().decode("utf-8") + reader = csv.DictReader( + data.splitlines(), + fieldnames=( + "id", + "name", + "country", + "continent", + "num_stations", + "num_lines", + "num_light_lines", + "num_interchanges", + "bbox", + "networks", + ), + ) + + cities_info = list() + names = set() + next(reader) # skipping the header + for city_info in reader: + if city_info["id"] and city_info["bbox"]: + cities_info.append(city_info) + name = city_info["name"].strip() + if name in names: + logging.warning( + "Duplicate city name in city list: %s", + city_info, + ) + names.add(name) + return cities_info + + +def prepare_cities( + cities_info_url: str = DEFAULT_CITIES_INFO_URL, overground: bool = False +) -> List[City]: + if overground: + raise NotImplementedError("Overground transit not implemented yet") + cities_info = get_cities_info(cities_info_url) + return list(map(partial(City, overground=overground), cities_info)) + + def main(): parser = argparse.ArgumentParser() + parser.add_argument( + "--cities-info-url", + default=DEFAULT_CITIES_INFO_URL, + help=( + "URL of CSV file with reference information about rapid transit " + "networks. file:// protocol is also supported." + ), + ) parser.add_argument( "-i", "--source", @@ -340,8 +407,7 @@ def main(): format="%(asctime)s %(levelname)-7s %(message)s", ) - # Downloading cities from Google Spreadsheets - cities = download_cities(options.overground) + cities = prepare_cities(options.cities_info_url, options.overground) if options.city: cities = [ c diff --git a/scripts/process_subways.sh b/scripts/process_subways.sh index 2ee69ab..2068643 100755 --- a/scripts/process_subways.sh +++ b/scripts/process_subways.sh @@ -32,6 +32,7 @@ Environment variable reference: - PLANET_METRO: path to a local o5m file with extract of cities having metro It's used instead of \$PLANET if exists otherwise it's created first - PLANET_UPDATE_SERVER: server to get replication data from. Defaults to https://planet.openstreetmap.org/replication/ + - CITIES_INFO_URL: http(s) or "file://" URL to a CSV file with reference information about rapid transit systems. A default value is hammered into python code. - CITY: name of a city/country to process - BBOX: bounding box of an extract; x1,y1,x2,y2. Has precedence over \$POLY - POLY: *.poly file with [multi]polygon comprising cities with metro @@ -92,7 +93,8 @@ function check_poly() { if [ -n "$("$PYTHON" -c "import shapely" 2>&1)" ]; then "$PYTHON" -m pip install shapely fi - "$PYTHON" "$SUBWAYS_PATH"/make_all_metro_poly.py > "$POLY" + "$PYTHON" "$SUBWAYS_PATH"/make_all_metro_poly.py \ + ${CITIES_INFO_URL:+--cities-info-url "$CITIES_INFO_URL"} > "$POLY" fi fi POLY_CHECKED=1 @@ -244,6 +246,7 @@ fi VALIDATION="$TMPDIR/validation.json" "$PYTHON" "$SUBWAYS_PATH/process_subways.py" ${QUIET:+-q} \ -x "$FILTERED_DATA" -l "$VALIDATION" \ + ${CITIES_INFO_URL:+--cities-info-url "$CITIES_INFO_URL"} \ ${MAPSME:+--output-mapsme "$MAPSME"} \ ${GTFS:+--output-gtfs "$GTFS"} \ ${CITY:+-c "$CITY"} ${DUMP:+-d "$DUMP"} ${GEOJSON:+-j "$GEOJSON"} \ @@ -264,7 +267,9 @@ fi mkdir -p $HTML_DIR rm -f "$HTML_DIR"/*.html -"$PYTHON" "$SUBWAYS_PATH/validation_to_html.py" "$VALIDATION" "$HTML_DIR" +"$PYTHON" "$SUBWAYS_PATH/validation_to_html.py" \ + ${CITIES_INFO_URL:+--cities-info-url "$CITIES_INFO_URL"} \ + "$VALIDATION" "$HTML_DIR" # Uploading files to the server diff --git a/subway_structure.py b/subway_structure.py index 739d9c5..ef8b5eb 100644 --- a/subway_structure.py +++ b/subway_structure.py @@ -1,15 +1,10 @@ -import csv -import logging import math import re -import urllib.parse -import urllib.request from collections import Counter, defaultdict from css_colours import normalize_colour -SPREADSHEET_ID = "1SEW1-NiNOnA2qDwievcxYV1FOaQl1mb1fdeyqAxHu3k" MAX_DISTANCE_TO_ENTRANCES = 300 # in meters MAX_DISTANCE_STOP_TO_LINE = 50 # in meters ALLOWED_STATIONS_MISMATCH = 0.02 # part of total station count @@ -2108,50 +2103,3 @@ def get_unused_entrances_geojson(elements): } ) return {"type": "FeatureCollection", "features": features} - - -def download_cities(overground=False): - assert not overground, "Overground transit not implemented yet" - url = ( - "https://docs.google.com/spreadsheets/d/{}/export?format=csv{}".format( - SPREADSHEET_ID, "&gid=1881416409" if overground else "" - ) - ) - response = urllib.request.urlopen(url) - if response.getcode() != 200: - raise Exception( - "Failed to download cities spreadsheet: HTTP {}".format( - response.getcode() - ) - ) - data = response.read().decode("utf-8") - reader = csv.DictReader( - data.splitlines(), - fieldnames=( - "id", - "name", - "country", - "continent", - "num_stations", - "num_lines", - "num_light_lines", - "num_interchanges", - "bbox", - "networks", - ), - ) - - next(reader) # skipping the header - names = set() - cities = [] - for city_data in reader: - if city_data["id"] and city_data["bbox"]: - cities.append(City(city_data, overground)) - name = city_data["name"].strip() - if name in names: - logging.warning( - "Duplicate city name in the google spreadsheet: %s", - city_data, - ) - names.add(name) - return cities diff --git a/v2h_templates.py b/v2h_templates.py index 3162180..a1102b4 100644 --- a/v2h_templates.py +++ b/v2h_templates.py @@ -3,7 +3,7 @@ validator_osm_wiki_url = ( ) github_url = "https://github.com/alexey-zakharenkov/subways" produced_by = f"""Produced by -Subway Preprocessor on {{date}}.""" +Subway Preprocessor on {{date}}""" metro_mapping_osm_article = "https://wiki.openstreetmap.org/wiki/Metro_Mapping" list_of_metro_systems_url = ( "https://en.wikipedia.org/wiki/List_of_metro_systems#List" @@ -191,8 +191,7 @@ INDEX_FOOTER = f""" @@ -292,7 +291,7 @@ COUNTRY_CITY = """ COUNTRY_FOOTER = f""" - + """ diff --git a/validation_to_html.py b/validation_to_html.py index 9eca75c..9858d2b 100755 --- a/validation_to_html.py +++ b/validation_to_html.py @@ -1,11 +1,11 @@ #!/usr/bin/env python3 +import argparse import datetime import json import os import re -import sys -from subway_structure import SPREADSHEET_ID +from process_subways import DEFAULT_SPREADSHEET_ID from v2h_templates import ( COUNTRY_CITY, COUNTRY_FOOTER, @@ -105,13 +105,6 @@ def tmpl(s, data=None, **kwargs): s, flags=re.DOTALL, ) - s = s.replace("{date}", date) - google_url = ( - "https://docs.google.com/spreadsheets/d/{}/edit?usp=sharing".format( - SPREADSHEET_ID - ) - ) - s = s.replace("{google}", google_url) return s @@ -143,104 +136,128 @@ def esc(s): return s.replace("&", "&").replace("<", "<").replace(">", ">") -if len(sys.argv) < 2: - print("Reads a log from subway validator and prepares HTML files.") - print( - "Usage: {} []".format(sys.argv[0]) +def main(): + parser = argparse.ArgumentParser( + description=( + "Reads a log from subway validator and prepares HTML files." + ) ) - sys.exit(1) + parser.add_argument("validation_log") + parser.add_argument("target_directory", nargs="?", default=".") + parser.add_argument( + "--cities-info-url", + default=( + "https://docs.google.com/spreadsheets/d/" + f"{DEFAULT_SPREADSHEET_ID}/edit?usp=sharing" + ), + ) + options = parser.parse_args() + target_dir = options.target_directory + cities_info_url = options.cities_info_url -with open(sys.argv[1], "r", encoding="utf-8") as f: - data = {c["name"]: CityData(c) for c in json.load(f)} + with open(options.validation_log, "r", encoding="utf-8") as f: + data = {c["name"]: CityData(c) for c in json.load(f)} -countries = {} -continents = {} -c_by_c = {} # continent → set of countries -for c in data.values(): - countries[c.country] = c + countries.get(c.country, CityData()) - continents[c.continent] = c + continents.get(c.continent, CityData()) - if c.continent not in c_by_c: - c_by_c[c.continent] = set() - c_by_c[c.continent].add(c.country) -world = sum(continents.values(), CityData()) + countries = {} + continents = {} + c_by_c = {} # continent → set of countries + for c in data.values(): + countries[c.country] = c + countries.get(c.country, CityData()) + continents[c.continent] = c + continents.get(c.continent, CityData()) + if c.continent not in c_by_c: + c_by_c[c.continent] = set() + c_by_c[c.continent].add(c.country) + world = sum(continents.values(), CityData()) -overground = "traml_expected" in next(iter(data.values())).data -date = datetime.datetime.utcnow().strftime("%d.%m.%Y %H:%M UTC") -path = "." if len(sys.argv) < 3 else sys.argv[2] -index = open(os.path.join(path, "index.html"), "w", encoding="utf-8") -index.write(tmpl(INDEX_HEADER, world)) + overground = "traml_expected" in next(iter(data.values())).data + date = datetime.datetime.utcnow().strftime("%d.%m.%Y %H:%M UTC") + index = open(os.path.join(target_dir, "index.html"), "w", encoding="utf-8") + index.write(tmpl(INDEX_HEADER, world)) -for continent in sorted(continents.keys()): - content = "" - for country in sorted(c_by_c[continent]): - country_file_name = country.lower().replace(" ", "-") + ".html" - content += tmpl( - INDEX_COUNTRY, - countries[country], - file=country_file_name, - country=country, - continent=continent, - ) - country_file = open( - os.path.join(path, country_file_name), "w", encoding="utf-8" - ) - country_file.write( - tmpl( - COUNTRY_HEADER, + for continent in sorted(continents.keys()): + content = "" + for country in sorted(c_by_c[continent]): + country_file_name = country.lower().replace(" ", "-") + ".html" + content += tmpl( + INDEX_COUNTRY, + countries[country], + file=country_file_name, country=country, continent=continent, - overground=overground, - subways=not overground, + ) + country_file = open( + os.path.join(target_dir, country_file_name), + "w", + encoding="utf-8", + ) + country_file.write( + tmpl( + COUNTRY_HEADER, + country=country, + continent=continent, + overground=overground, + subways=not overground, + ) + ) + for name, city in sorted(data.items()): + if city.country == country: + file_base = os.path.join(target_dir, city.slug) + yaml_file = ( + city.slug + ".yaml" + if os.path.exists(file_base + ".yaml") + else None + ) + json_file = ( + city.slug + ".geojson" + if os.path.exists(file_base + ".geojson") + else None + ) + errors = "
".join( + [osm_links(esc(e)) for e in city.errors] + ) + warnings = "
".join( + [osm_links(esc(w)) for w in city.warnings] + ) + notices = "
".join( + [osm_links(esc(n)) for n in city.notices] + ) + country_file.write( + tmpl( + COUNTRY_CITY, + city, + city=name, + country=country, + continent=continent, + yaml=yaml_file, + json=json_file, + subways=not overground, + errors=errors, + warnings=warnings, + notices=notices, + overground=overground, + ) + ) + country_file.write( + tmpl( + COUNTRY_FOOTER, + country=country, + continent=continent, + date=date, + ) + ) + country_file.close() + index.write( + tmpl( + INDEX_CONTINENT, + continents[continent], + content=content, + continent=continent, ) ) - for name, city in sorted(data.items()): - if city.country == country: - file_base = os.path.join(path, city.slug) - yaml_file = ( - city.slug + ".yaml" - if os.path.exists(file_base + ".yaml") - else None - ) - json_file = ( - city.slug + ".geojson" - if os.path.exists(file_base + ".geojson") - else None - ) - errors = "
".join([osm_links(esc(e)) for e in city.errors]) - warnings = "
".join( - [osm_links(esc(w)) for w in city.warnings] - ) - notices = "
".join( - [osm_links(esc(n)) for n in city.notices] - ) - country_file.write( - tmpl( - COUNTRY_CITY, - city, - city=name, - country=country, - continent=continent, - yaml=yaml_file, - json=json_file, - subways=not overground, - errors=errors, - warnings=warnings, - notices=notices, - overground=overground, - ) - ) - country_file.write( - tmpl(COUNTRY_FOOTER, country=country, continent=continent) - ) - country_file.close() - index.write( - tmpl( - INDEX_CONTINENT, - continents[continent], - content=content, - continent=continent, - ) - ) -index.write(tmpl(INDEX_FOOTER)) -index.close() + index.write(tmpl(INDEX_FOOTER, date=date, cities_info_url=cities_info_url)) + index.close() + + +if __name__ == "__main__": + main()