Check CSV numeric values on reading network information

This commit is contained in:
Alexey Zakharenkov 2023-03-27 11:32:19 +03:00 committed by Alexey Zakharenkov
parent c7ae6ece61
commit 483429129c
6 changed files with 126 additions and 46 deletions

View file

@ -19,10 +19,10 @@ jobs:
steps:
- uses: actions/checkout@v3
- name: Set up Python 3.8
- name: Set up Python 3.11
uses: actions/setup-python@v3
with:
python-version: "3.8"
python-version: "3.11"
- name: Install dependencies
run: |
python -m pip install --upgrade pip

View file

@ -53,7 +53,7 @@ a city's bbox has been extended.
A single city or a country with few metro networks can be validated much faster
if you allow the `process_subway.py` to fetch data from Overpass API. Here are the steps:
1. Python3 interpreter required (3.8+)
1. Python3 interpreter required (3.11+)
2. Clone the repo
```
git clone https://github.com/alexey-zakharenkov/subways.git subways_validator

View file

@ -11,7 +11,6 @@ import time
import urllib.parse
import urllib.request
from functools import partial
from typing import Dict, List, Optional, Tuple
import processors
from subway_io import (
@ -30,17 +29,18 @@ from subway_structure import (
MODES_RAPID,
)
DEFAULT_SPREADSHEET_ID = "1SEW1-NiNOnA2qDwievcxYV1FOaQl1mb1fdeyqAxHu3k"
DEFAULT_CITIES_INFO_URL = (
"https://docs.google.com/spreadsheets/d/"
f"{DEFAULT_SPREADSHEET_ID}/export?format=csv"
)
Point = Tuple[float, float]
Point = tuple[float, float]
def overpass_request(overground, overpass_api, bboxes):
def overpass_request(
overground: bool, overpass_api: str, bboxes: list[list[float]]
) -> list[dict]:
query = "[out:json][timeout:1000];("
modes = MODES_OVERGROUND if overground else MODES_RAPID
for bbox in bboxes:
@ -65,7 +65,9 @@ def overpass_request(overground, overpass_api, bboxes):
return json.load(response)["elements"]
def multi_overpass(overground, overpass_api, bboxes):
def multi_overpass(
overground: bool, overpass_api: str, bboxes: list[list[float]]
) -> list[dict]:
SLICE_SIZE = 10
INTERREQUEST_WAIT = 5 # in seconds
result = []
@ -77,13 +79,13 @@ def multi_overpass(overground, overpass_api, bboxes):
return result
def slugify(name):
def slugify(name: str) -> str:
return re.sub(r"[^a-z0-9_-]+", "", name.lower().replace(" ", "_"))
def get_way_center(
element: dict, node_centers: Dict[int, Point]
) -> Optional[Point]:
element: dict, node_centers: dict[int, Point]
) -> Point | None:
"""
:param element: dict describing OSM element
:param node_centers: osm_id => (lat, lon)
@ -123,11 +125,11 @@ def get_way_center(
def get_relation_center(
element: dict,
node_centers: Dict[int, Point],
way_centers: Dict[int, Point],
relation_centers: Dict[int, Point],
node_centers: dict[int, Point],
way_centers: dict[int, Point],
relation_centers: dict[int, Point],
ignore_unlocalized_child_relations: bool = False,
) -> Optional[Point]:
) -> Point | None:
"""
:param element: dict describing OSM element
:param node_centers: osm_id => (lat, lon)
@ -176,14 +178,14 @@ def get_relation_center(
return element["center"]["lat"], element["center"]["lon"]
def calculate_centers(elements):
def calculate_centers(elements: list[dict]) -> None:
"""Adds 'center' key to each way/relation in elements,
except for empty ways or relations.
Relies on nodes-ways-relations order in the elements list.
"""
nodes: Dict[int, Point] = {} # id => (lat, lon)
ways: Dict[int, Point] = {} # id => (lat, lon)
relations: Dict[int, Point] = {} # id => (lat, lon)
nodes: dict[int, Point] = {} # id => (lat, lon)
ways: dict[int, Point] = {} # id => (lat, lon)
relations: dict[int, Point] = {} # id => (lat, lon)
unlocalized_relations = [] # 'unlocalized' means the center of the
# relation has not been calculated yet
@ -202,7 +204,7 @@ def calculate_centers(elements):
def iterate_relation_centers_calculation(
ignore_unlocalized_child_relations: bool,
) -> List[int]:
) -> list[dict]:
unlocalized_relations_upd = []
for rel in unlocalized_relations:
if center := get_relation_center(
@ -229,14 +231,16 @@ def calculate_centers(elements):
unlocalized_relations = unlocalized_relations_upd
def add_osm_elements_to_cities(osm_elements, cities):
def add_osm_elements_to_cities(
osm_elements: list[dict], cities: list[City]
) -> None:
for el in osm_elements:
for c in cities:
if c.contains(el):
c.add(el)
def validate_cities(cities):
def validate_cities(cities: list[City]) -> list[City]:
"""Validate cities. Return list of good cities."""
good_cities = []
for c in cities:
@ -266,7 +270,7 @@ def validate_cities(cities):
def get_cities_info(
cities_info_url: str = DEFAULT_CITIES_INFO_URL,
) -> List[dict]:
) -> list[dict]:
response = urllib.request.urlopen(cities_info_url)
if (
not cities_info_url.startswith("file://")
@ -310,14 +314,14 @@ def get_cities_info(
def prepare_cities(
cities_info_url: str = DEFAULT_CITIES_INFO_URL, overground: bool = False
) -> List[City]:
) -> list[City]:
if overground:
raise NotImplementedError("Overground transit not implemented yet")
cities_info = get_cities_info(cities_info_url)
return list(map(partial(City, overground=overground), cities_info))
def main():
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument(
"--cities-info-url",

View file

@ -1517,25 +1517,23 @@ class City:
self.errors = []
self.warnings = []
self.notices = []
self.id = int(city_data["id"])
self.try_fill_int_attribute(city_data, "id")
self.name = city_data["name"]
self.country = city_data["country"]
self.continent = city_data["continent"]
self.overground = overground
if not overground:
self.num_stations = int(city_data["num_stations"])
self.num_lines = int(city_data["num_lines"] or "0")
self.num_light_lines = int(city_data["num_light_lines"] or "0")
self.num_interchanges = int(city_data["num_interchanges"] or "0")
self.try_fill_int_attribute(city_data, "num_stations")
self.try_fill_int_attribute(city_data, "num_lines", "0")
self.try_fill_int_attribute(city_data, "num_light_lines", "0")
self.try_fill_int_attribute(city_data, "num_interchanges", "0")
else:
self.num_tram_lines = int(city_data["num_tram_lines"] or "0")
self.num_trolleybus_lines = int(
city_data["num_trolleybus_lines"] or "0"
)
self.num_bus_lines = int(city_data["num_bus_lines"] or "0")
self.num_other_lines = int(city_data["num_other_lines"] or "0")
self.try_fill_int_attribute(city_data, "num_tram_lines", "0")
self.try_fill_int_attribute(city_data, "num_trolleybus_lines", "0")
self.try_fill_int_attribute(city_data, "num_bus_lines", "0")
self.try_fill_int_attribute(city_data, "num_other_lines", "0")
# Aquiring list of networks and modes
# Acquiring list of networks and modes
networks = (
None
if not city_data["networks"]
@ -1574,6 +1572,33 @@ class City:
self.stops_and_platforms = set() # Set of stops and platforms el_id
self.recovery_data = None
def try_fill_int_attribute(
self, city_data: dict, attr: str, default: str | None = None
) -> None:
"""Try to convert string value to int. Conversion is considered
to fail if one of the following is true:
* attr is not empty and data type casting fails;
* attr is empty and no default value is given.
In such cases the city is marked as bad by adding an error
to the city validation log.
"""
attr_value = city_data[attr]
if not attr_value and default is not None:
attr_value = default
try:
attr_int = int(attr_value)
except ValueError:
print_value = (
f"{city_data[attr]}" if city_data[attr] else "<empty>"
)
self.error(
f"Configuration error: wrong value for {attr}: {print_value}"
)
setattr(self, attr, 0)
else:
setattr(self, attr, attr_int)
@staticmethod
def log_message(message, el):
if el:
@ -1814,12 +1839,12 @@ class City:
if not self.overground:
result.update(
{
"subwayl_expected": self.num_lines,
"lightrl_expected": self.num_light_lines,
"subwayl_expected": getattr(self, "num_lines", 0),
"lightrl_expected": getattr(self, "num_light_lines", 0),
"subwayl_found": getattr(self, "found_lines", 0),
"lightrl_found": getattr(self, "found_light_lines", 0),
"stations_expected": self.num_stations,
"transfers_expected": self.num_interchanges,
"stations_expected": getattr(self, "num_stations", 0),
"transfers_expected": getattr(self, "num_interchanges", 0),
}
)
else:
@ -1827,10 +1852,12 @@ class City:
{
"stations_expected": 0,
"transfers_expected": 0,
"busl_expected": self.num_bus_lines,
"trolleybusl_expected": self.num_trolleybus_lines,
"traml_expected": self.num_tram_lines,
"otherl_expected": self.num_other_lines,
"busl_expected": getattr(self, "num_bus_lines", 0),
"trolleybusl_expected": getattr(
self, "num_trolleybus_lines", 0
),
"traml_expected": getattr(self, "num_tram_lines", 0),
"otherl_expected": getattr(self, "num_other_lines", 0),
"busl_found": getattr(self, "found_bus_lines", 0),
"trolleybusl_found": getattr(
self, "found_trolleybus_lines", 0
@ -2006,7 +2033,8 @@ class City:
)
log_function = (
self.error
if not (
if self.num_stations > 0
and not (
0
<= (self.num_stations - self.found_stations)
/ self.num_stations

View file

@ -0,0 +1,12 @@
#,City,Country,Region,Stations,Subway Lines,Light Rail +Monorail,Interchanges,"BBox (lon, lat)",Networks (opt.),Comment,Source,
291,Moscow,Russia,Europe,335,14,3,66,,"subway,train:Московский метрополитен;МЦК;МЦД","No bbox - skip the city",
,Moscow - Aeroexpress,Russia,Europe,22,0,3,0,"37.170,55.385,38.028,56.022",train:Аэроэкспресс,"No id - skip the city",https://aeroexpress.ru
292,Nizhny Novgorod,Russia,Europe,16,2,0,,"43.759918,56.1662,44.13208,56.410862",,"No configuration errors",,,
NBS,Novosibirsk,Russia,Europe,13,2,0,1,"82.774773,54.926747,83.059044,55.127864",,"Non-numeric ID",,,
294,Saint Petersburg,Russia,Europe,72,,,,"30.0648,59.7509,30.5976,60.1292",,"Empty line counts - no problem at CSV parsing stage",,
,,,,,,,,,,,,
295,Samara,Russia,Europe,10x,1,0,0,"50.011826,53.094024,50.411453,53.384147",,"Non-numeric station count",,
296,Volgograd,Russia,Europe,40,zero,2zero,0,"44.366704,48.636024,44.62302,48.81765",,"Non-numbers in subway and light_rail line counts",,
297,Yekaterinburg,Russia,Europe,,1,0,0,"60.460854,56.730505,60.727272,56.920997",,"Empty station count",,
,,,,,,,,,,,,
Can't render this file because it has a wrong number of fields in line 2.

View file

@ -0,0 +1,36 @@
import inspect
from pathlib import Path
from unittest import TestCase
from process_subways import prepare_cities
class TestPrepareCities(TestCase):
def test_prepare_cities(self) -> None:
csv_path = (
Path(inspect.getfile(self.__class__)).parent
/ "assets"
/ "networks_with_bad_values.csv"
)
cities = prepare_cities(cities_info_url=f"file://{csv_path}")
city_errors = {city.name: sorted(city.errors) for city in cities}
expected_errors = {
"Nizhny Novgorod": [],
"Novosibirsk": ["Configuration error: wrong value for id: NBS"],
"Saint Petersburg": [],
"Samara": [
"Configuration error: wrong value for num_stations: 10x"
],
"Volgograd": [
"Configuration error: wrong value for num_light_lines: 2zero",
"Configuration error: wrong value for num_lines: zero",
],
"Yekaterinburg": [
"Configuration error: wrong value for num_stations: <empty>"
],
}
self.assertDictEqual(city_errors, expected_errors)