Add --cities-info-url CLI parameter to the main script and utilities

This commit is contained in:
Alexey Zakharenkov 2023-01-11 16:13:29 +03:00 committed by Alexey Zakharenkov
parent 92563e6d80
commit 52599fe566
7 changed files with 242 additions and 181 deletions

View file

@ -1,20 +1,23 @@
import argparse
import shapely.geometry
import shapely.ops
from process_subways import download_cities
from process_subways import DEFAULT_CITIES_INFO_URL, get_cities_info
def make_disjoint_metro_polygons():
cities = download_cities()
def make_disjoint_metro_polygons(cities_info_url: str) -> None:
cities_info = get_cities_info(cities_info_url)
polygons = []
for c in cities:
for ci in cities_info:
bbox = tuple(map(float, ci["bbox"].split(",")))
polygon = shapely.geometry.Polygon(
[
(c.bbox[1], c.bbox[0]),
(c.bbox[1], c.bbox[2]),
(c.bbox[3], c.bbox[2]),
(c.bbox[3], c.bbox[0]),
(bbox[0], bbox[1]),
(bbox[0], bbox[3]),
(bbox[2], bbox[3]),
(bbox[2], bbox[1]),
]
)
polygons.append(polygon)
@ -31,5 +34,19 @@ def make_disjoint_metro_polygons():
print("END")
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument(
"--cities-info-url",
default=DEFAULT_CITIES_INFO_URL,
help=(
"URL of CSV file with reference information about rapid transit "
"networks. file:// protocol is also supported."
),
)
options = parser.parse_args()
make_disjoint_metro_polygons(options.cities_info_url)
if __name__ == "__main__":
make_disjoint_metro_polygons()
main()

View file

@ -1,7 +1,7 @@
import argparse
import json
from process_subways import download_cities
from process_subways import DEFAULT_CITIES_INFO_URL, get_cities_info
if __name__ == "__main__":
@ -25,6 +25,15 @@ if __name__ == "__main__":
),
)
arg_parser.add_argument(
"--cities-info-url",
default=DEFAULT_CITIES_INFO_URL,
help=(
"URL of CSV file with reference information about rapid transit "
"networks. file:// protocol is also supported."
),
)
arg_parser.add_argument(
"--with-bad",
action="store_true",
@ -40,14 +49,14 @@ if __name__ == "__main__":
good_cities = set(
n.get("network", n.get("title")) for n in subway_json["networks"]
)
cities = download_cities()
cities_info = get_cities_info(args.cities_info_url)
lines = []
for c in cities:
if c.name in good_cities:
lines.append(f"{c.name}, {c.country}")
for ci in cities_info:
if ci["name"] in good_cities:
lines.append(f"{ci['name']}, {ci['country']}")
elif with_bad:
lines.append(f"{c.name}, {c.country} (Bad)")
lines.append(f"{ci['name']}, {ci['country']} (Bad)")
for line in sorted(lines):
print(line)

View file

@ -1,5 +1,6 @@
#!/usr/bin/env python3
import argparse
import csv
import inspect
import json
import logging
@ -9,6 +10,7 @@ import sys
import time
import urllib.parse
import urllib.request
from functools import partial
from typing import Dict, List, Optional, Tuple
import processors
@ -20,8 +22,8 @@ from subway_io import (
write_recovery_data,
)
from subway_structure import (
City,
CriticalValidationError,
download_cities,
find_transfers,
get_unused_entrances_geojson,
MODES_OVERGROUND,
@ -29,6 +31,12 @@ from subway_structure import (
)
DEFAULT_SPREADSHEET_ID = "1SEW1-NiNOnA2qDwievcxYV1FOaQl1mb1fdeyqAxHu3k"
DEFAULT_CITIES_INFO_URL = (
"https://docs.google.com/spreadsheets/d/"
f"{DEFAULT_SPREADSHEET_ID}/export?format=csv"
)
Point = Tuple[float, float]
@ -49,13 +57,11 @@ def overpass_request(overground, overpass_api, bboxes):
"rel(br)[type=public_transport][public_transport=stop_area_group];"
)
query += ");(._;>>;);out body center qt;"
logging.info("Query: %s", query)
logging.debug("Query: %s", query)
url = "{}?data={}".format(overpass_api, urllib.parse.quote(query))
response = urllib.request.urlopen(url, timeout=1000)
if response.getcode() != 200:
raise Exception(
"Failed to query Overpass API: HTTP {}".format(response.getcode())
)
if (r_code := response.getcode()) != 200:
raise Exception(f"Failed to query Overpass API: HTTP {r_code}")
return json.load(response)["elements"]
@ -258,8 +264,69 @@ def validate_cities(cities):
return good_cities
def get_cities_info(
cities_info_url: str = DEFAULT_CITIES_INFO_URL,
) -> List[dict]:
response = urllib.request.urlopen(cities_info_url)
if (
not cities_info_url.startswith("file://")
and (r_code := response.getcode()) != 200
):
raise Exception(
f"Failed to download cities spreadsheet: HTTP {r_code}"
)
data = response.read().decode("utf-8")
reader = csv.DictReader(
data.splitlines(),
fieldnames=(
"id",
"name",
"country",
"continent",
"num_stations",
"num_lines",
"num_light_lines",
"num_interchanges",
"bbox",
"networks",
),
)
cities_info = list()
names = set()
next(reader) # skipping the header
for city_info in reader:
if city_info["id"] and city_info["bbox"]:
cities_info.append(city_info)
name = city_info["name"].strip()
if name in names:
logging.warning(
"Duplicate city name in city list: %s",
city_info,
)
names.add(name)
return cities_info
def prepare_cities(
cities_info_url: str = DEFAULT_CITIES_INFO_URL, overground: bool = False
) -> List[City]:
if overground:
raise NotImplementedError("Overground transit not implemented yet")
cities_info = get_cities_info(cities_info_url)
return list(map(partial(City, overground=overground), cities_info))
def main():
parser = argparse.ArgumentParser()
parser.add_argument(
"--cities-info-url",
default=DEFAULT_CITIES_INFO_URL,
help=(
"URL of CSV file with reference information about rapid transit "
"networks. file:// protocol is also supported."
),
)
parser.add_argument(
"-i",
"--source",
@ -340,8 +407,7 @@ def main():
format="%(asctime)s %(levelname)-7s %(message)s",
)
# Downloading cities from Google Spreadsheets
cities = download_cities(options.overground)
cities = prepare_cities(options.cities_info_url, options.overground)
if options.city:
cities = [
c

View file

@ -32,6 +32,7 @@ Environment variable reference:
- PLANET_METRO: path to a local o5m file with extract of cities having metro
It's used instead of \$PLANET if exists otherwise it's created first
- PLANET_UPDATE_SERVER: server to get replication data from. Defaults to https://planet.openstreetmap.org/replication/
- CITIES_INFO_URL: http(s) or "file://" URL to a CSV file with reference information about rapid transit systems. A default value is hammered into python code.
- CITY: name of a city/country to process
- BBOX: bounding box of an extract; x1,y1,x2,y2. Has precedence over \$POLY
- POLY: *.poly file with [multi]polygon comprising cities with metro
@ -92,7 +93,8 @@ function check_poly() {
if [ -n "$("$PYTHON" -c "import shapely" 2>&1)" ]; then
"$PYTHON" -m pip install shapely
fi
"$PYTHON" "$SUBWAYS_PATH"/make_all_metro_poly.py > "$POLY"
"$PYTHON" "$SUBWAYS_PATH"/make_all_metro_poly.py \
${CITIES_INFO_URL:+--cities-info-url "$CITIES_INFO_URL"} > "$POLY"
fi
fi
POLY_CHECKED=1
@ -241,6 +243,7 @@ fi
VALIDATION="$TMPDIR/validation.json"
"$PYTHON" "$SUBWAYS_PATH/process_subways.py" ${QUIET:+-q} \
-x "$FILTERED_DATA" -l "$VALIDATION" \
${CITIES_INFO_URL:+--cities-info-url "$CITIES_INFO_URL"} \
${MAPSME:+--output-mapsme "$MAPSME"} \
${GTFS:+--output-gtfs "$GTFS"} \
${CITY:+-c "$CITY"} ${DUMP:+-d "$DUMP"} ${GEOJSON:+-j "$GEOJSON"} \
@ -261,7 +264,9 @@ fi
mkdir -p $HTML_DIR
rm -f "$HTML_DIR"/*.html
"$PYTHON" "$SUBWAYS_PATH/validation_to_html.py" "$VALIDATION" "$HTML_DIR"
"$PYTHON" "$SUBWAYS_PATH/validation_to_html.py" \
${CITIES_INFO_URL:+--cities-info-url "$CITIES_INFO_URL"} \
"$VALIDATION" "$HTML_DIR"
# Uploading files to the server

View file

@ -1,15 +1,10 @@
import csv
import logging
import math
import re
import urllib.parse
import urllib.request
from collections import Counter, defaultdict
from css_colours import normalize_colour
SPREADSHEET_ID = "1SEW1-NiNOnA2qDwievcxYV1FOaQl1mb1fdeyqAxHu3k"
MAX_DISTANCE_TO_ENTRANCES = 300 # in meters
MAX_DISTANCE_STOP_TO_LINE = 50 # in meters
ALLOWED_STATIONS_MISMATCH = 0.02 # part of total station count
@ -2108,50 +2103,3 @@ def get_unused_entrances_geojson(elements):
}
)
return {"type": "FeatureCollection", "features": features}
def download_cities(overground=False):
assert not overground, "Overground transit not implemented yet"
url = (
"https://docs.google.com/spreadsheets/d/{}/export?format=csv{}".format(
SPREADSHEET_ID, "&gid=1881416409" if overground else ""
)
)
response = urllib.request.urlopen(url)
if response.getcode() != 200:
raise Exception(
"Failed to download cities spreadsheet: HTTP {}".format(
response.getcode()
)
)
data = response.read().decode("utf-8")
reader = csv.DictReader(
data.splitlines(),
fieldnames=(
"id",
"name",
"country",
"continent",
"num_stations",
"num_lines",
"num_light_lines",
"num_interchanges",
"bbox",
"networks",
),
)
next(reader) # skipping the header
names = set()
cities = []
for city_data in reader:
if city_data["id"] and city_data["bbox"]:
cities.append(City(city_data, overground))
name = city_data["name"].strip()
if name in names:
logging.warning(
"Duplicate city name in the google spreadsheet: %s",
city_data,
)
names.add(name)
return cities

View file

@ -3,7 +3,7 @@ validator_osm_wiki_url = (
)
github_url = "https://github.com/alexey-zakharenkov/subways"
produced_by = f"""Produced by
<a href="{github_url}">Subway Preprocessor</a> on {{date}}."""
<a href="{github_url}">Subway Preprocessor</a> on {{date}}"""
metro_mapping_osm_article = "https://wiki.openstreetmap.org/wiki/Metro_Mapping"
list_of_metro_systems_url = (
"https://en.wikipedia.org/wiki/List_of_metro_systems#List"
@ -191,8 +191,7 @@ INDEX_FOOTER = f"""
</table>
</main>
<footer>{produced_by}
See <a href="{{google}}">this spreadsheet</a> for the reference
metro statistics and
from <a href="{{cities_info_url}}">this reference metro statistics</a>. See
<a href="{list_of_metro_systems_url}">
this wiki page</a> for a list of all metro systems.</footer>
</body>
@ -292,7 +291,7 @@ COUNTRY_CITY = """
COUNTRY_FOOTER = f"""
</table>
</main>
<footer>{produced_by}</footer>
<footer>{produced_by}.</footer>
</body>
</html>
"""

View file

@ -1,11 +1,11 @@
#!/usr/bin/env python3
import argparse
import datetime
import json
import os
import re
import sys
from subway_structure import SPREADSHEET_ID
from process_subways import DEFAULT_SPREADSHEET_ID
from v2h_templates import (
COUNTRY_CITY,
COUNTRY_FOOTER,
@ -105,13 +105,6 @@ def tmpl(s, data=None, **kwargs):
s,
flags=re.DOTALL,
)
s = s.replace("{date}", date)
google_url = (
"https://docs.google.com/spreadsheets/d/{}/edit?usp=sharing".format(
SPREADSHEET_ID
)
)
s = s.replace("{google}", google_url)
return s
@ -143,104 +136,128 @@ def esc(s):
return s.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")
if len(sys.argv) < 2:
print("Reads a log from subway validator and prepares HTML files.")
print(
"Usage: {} <validation.log> [<target_directory>]".format(sys.argv[0])
def main():
parser = argparse.ArgumentParser(
description=(
"Reads a log from subway validator and prepares HTML files."
)
)
sys.exit(1)
parser.add_argument("validation_log")
parser.add_argument("target_directory", nargs="?", default=".")
parser.add_argument(
"--cities-info-url",
default=(
"https://docs.google.com/spreadsheets/d/"
f"{DEFAULT_SPREADSHEET_ID}/edit?usp=sharing"
),
)
options = parser.parse_args()
target_dir = options.target_directory
cities_info_url = options.cities_info_url
with open(sys.argv[1], "r", encoding="utf-8") as f:
data = {c["name"]: CityData(c) for c in json.load(f)}
with open(options.validation_log, "r", encoding="utf-8") as f:
data = {c["name"]: CityData(c) for c in json.load(f)}
countries = {}
continents = {}
c_by_c = {} # continent → set of countries
for c in data.values():
countries[c.country] = c + countries.get(c.country, CityData())
continents[c.continent] = c + continents.get(c.continent, CityData())
if c.continent not in c_by_c:
c_by_c[c.continent] = set()
c_by_c[c.continent].add(c.country)
world = sum(continents.values(), CityData())
countries = {}
continents = {}
c_by_c = {} # continent → set of countries
for c in data.values():
countries[c.country] = c + countries.get(c.country, CityData())
continents[c.continent] = c + continents.get(c.continent, CityData())
if c.continent not in c_by_c:
c_by_c[c.continent] = set()
c_by_c[c.continent].add(c.country)
world = sum(continents.values(), CityData())
overground = "traml_expected" in next(iter(data.values())).data
date = datetime.datetime.utcnow().strftime("%d.%m.%Y %H:%M UTC")
path = "." if len(sys.argv) < 3 else sys.argv[2]
index = open(os.path.join(path, "index.html"), "w", encoding="utf-8")
index.write(tmpl(INDEX_HEADER, world))
overground = "traml_expected" in next(iter(data.values())).data
date = datetime.datetime.utcnow().strftime("%d.%m.%Y %H:%M UTC")
index = open(os.path.join(target_dir, "index.html"), "w", encoding="utf-8")
index.write(tmpl(INDEX_HEADER, world))
for continent in sorted(continents.keys()):
content = ""
for country in sorted(c_by_c[continent]):
country_file_name = country.lower().replace(" ", "-") + ".html"
content += tmpl(
INDEX_COUNTRY,
countries[country],
file=country_file_name,
country=country,
continent=continent,
)
country_file = open(
os.path.join(path, country_file_name), "w", encoding="utf-8"
)
country_file.write(
tmpl(
COUNTRY_HEADER,
for continent in sorted(continents.keys()):
content = ""
for country in sorted(c_by_c[continent]):
country_file_name = country.lower().replace(" ", "-") + ".html"
content += tmpl(
INDEX_COUNTRY,
countries[country],
file=country_file_name,
country=country,
continent=continent,
overground=overground,
subways=not overground,
)
country_file = open(
os.path.join(target_dir, country_file_name),
"w",
encoding="utf-8",
)
country_file.write(
tmpl(
COUNTRY_HEADER,
country=country,
continent=continent,
overground=overground,
subways=not overground,
)
)
for name, city in sorted(data.items()):
if city.country == country:
file_base = os.path.join(target_dir, city.slug)
yaml_file = (
city.slug + ".yaml"
if os.path.exists(file_base + ".yaml")
else None
)
json_file = (
city.slug + ".geojson"
if os.path.exists(file_base + ".geojson")
else None
)
errors = "<br>".join(
[osm_links(esc(e)) for e in city.errors]
)
warnings = "<br>".join(
[osm_links(esc(w)) for w in city.warnings]
)
notices = "<br>".join(
[osm_links(esc(n)) for n in city.notices]
)
country_file.write(
tmpl(
COUNTRY_CITY,
city,
city=name,
country=country,
continent=continent,
yaml=yaml_file,
json=json_file,
subways=not overground,
errors=errors,
warnings=warnings,
notices=notices,
overground=overground,
)
)
country_file.write(
tmpl(
COUNTRY_FOOTER,
country=country,
continent=continent,
date=date,
)
)
country_file.close()
index.write(
tmpl(
INDEX_CONTINENT,
continents[continent],
content=content,
continent=continent,
)
)
for name, city in sorted(data.items()):
if city.country == country:
file_base = os.path.join(path, city.slug)
yaml_file = (
city.slug + ".yaml"
if os.path.exists(file_base + ".yaml")
else None
)
json_file = (
city.slug + ".geojson"
if os.path.exists(file_base + ".geojson")
else None
)
errors = "<br>".join([osm_links(esc(e)) for e in city.errors])
warnings = "<br>".join(
[osm_links(esc(w)) for w in city.warnings]
)
notices = "<br>".join(
[osm_links(esc(n)) for n in city.notices]
)
country_file.write(
tmpl(
COUNTRY_CITY,
city,
city=name,
country=country,
continent=continent,
yaml=yaml_file,
json=json_file,
subways=not overground,
errors=errors,
warnings=warnings,
notices=notices,
overground=overground,
)
)
country_file.write(
tmpl(COUNTRY_FOOTER, country=country, continent=continent)
)
country_file.close()
index.write(
tmpl(
INDEX_CONTINENT,
continents[continent],
content=content,
continent=continent,
)
)
index.write(tmpl(INDEX_FOOTER))
index.close()
index.write(tmpl(INDEX_FOOTER, date=date, cities_info_url=cities_info_url))
index.close()
if __name__ == "__main__":
main()