Check CSV numeric values on reading network information
This commit is contained in:
parent
c7ae6ece61
commit
483429129c
6 changed files with 126 additions and 46 deletions
4
.github/workflows/python-app.yml
vendored
4
.github/workflows/python-app.yml
vendored
|
@ -19,10 +19,10 @@ jobs:
|
|||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Set up Python 3.8
|
||||
- name: Set up Python 3.11
|
||||
uses: actions/setup-python@v3
|
||||
with:
|
||||
python-version: "3.8"
|
||||
python-version: "3.11"
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
|
|
|
@ -53,7 +53,7 @@ a city's bbox has been extended.
|
|||
A single city or a country with few metro networks can be validated much faster
|
||||
if you allow the `process_subway.py` to fetch data from Overpass API. Here are the steps:
|
||||
|
||||
1. Python3 interpreter required (3.8+)
|
||||
1. Python3 interpreter required (3.11+)
|
||||
2. Clone the repo
|
||||
```
|
||||
git clone https://github.com/alexey-zakharenkov/subways.git subways_validator
|
||||
|
|
|
@ -11,7 +11,6 @@ import time
|
|||
import urllib.parse
|
||||
import urllib.request
|
||||
from functools import partial
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
|
||||
import processors
|
||||
from subway_io import (
|
||||
|
@ -30,17 +29,18 @@ from subway_structure import (
|
|||
MODES_RAPID,
|
||||
)
|
||||
|
||||
|
||||
DEFAULT_SPREADSHEET_ID = "1SEW1-NiNOnA2qDwievcxYV1FOaQl1mb1fdeyqAxHu3k"
|
||||
DEFAULT_CITIES_INFO_URL = (
|
||||
"https://docs.google.com/spreadsheets/d/"
|
||||
f"{DEFAULT_SPREADSHEET_ID}/export?format=csv"
|
||||
)
|
||||
|
||||
Point = Tuple[float, float]
|
||||
Point = tuple[float, float]
|
||||
|
||||
|
||||
def overpass_request(overground, overpass_api, bboxes):
|
||||
def overpass_request(
|
||||
overground: bool, overpass_api: str, bboxes: list[list[float]]
|
||||
) -> list[dict]:
|
||||
query = "[out:json][timeout:1000];("
|
||||
modes = MODES_OVERGROUND if overground else MODES_RAPID
|
||||
for bbox in bboxes:
|
||||
|
@ -65,7 +65,9 @@ def overpass_request(overground, overpass_api, bboxes):
|
|||
return json.load(response)["elements"]
|
||||
|
||||
|
||||
def multi_overpass(overground, overpass_api, bboxes):
|
||||
def multi_overpass(
|
||||
overground: bool, overpass_api: str, bboxes: list[list[float]]
|
||||
) -> list[dict]:
|
||||
SLICE_SIZE = 10
|
||||
INTERREQUEST_WAIT = 5 # in seconds
|
||||
result = []
|
||||
|
@ -77,13 +79,13 @@ def multi_overpass(overground, overpass_api, bboxes):
|
|||
return result
|
||||
|
||||
|
||||
def slugify(name):
|
||||
def slugify(name: str) -> str:
|
||||
return re.sub(r"[^a-z0-9_-]+", "", name.lower().replace(" ", "_"))
|
||||
|
||||
|
||||
def get_way_center(
|
||||
element: dict, node_centers: Dict[int, Point]
|
||||
) -> Optional[Point]:
|
||||
element: dict, node_centers: dict[int, Point]
|
||||
) -> Point | None:
|
||||
"""
|
||||
:param element: dict describing OSM element
|
||||
:param node_centers: osm_id => (lat, lon)
|
||||
|
@ -123,11 +125,11 @@ def get_way_center(
|
|||
|
||||
def get_relation_center(
|
||||
element: dict,
|
||||
node_centers: Dict[int, Point],
|
||||
way_centers: Dict[int, Point],
|
||||
relation_centers: Dict[int, Point],
|
||||
node_centers: dict[int, Point],
|
||||
way_centers: dict[int, Point],
|
||||
relation_centers: dict[int, Point],
|
||||
ignore_unlocalized_child_relations: bool = False,
|
||||
) -> Optional[Point]:
|
||||
) -> Point | None:
|
||||
"""
|
||||
:param element: dict describing OSM element
|
||||
:param node_centers: osm_id => (lat, lon)
|
||||
|
@ -176,14 +178,14 @@ def get_relation_center(
|
|||
return element["center"]["lat"], element["center"]["lon"]
|
||||
|
||||
|
||||
def calculate_centers(elements):
|
||||
def calculate_centers(elements: list[dict]) -> None:
|
||||
"""Adds 'center' key to each way/relation in elements,
|
||||
except for empty ways or relations.
|
||||
Relies on nodes-ways-relations order in the elements list.
|
||||
"""
|
||||
nodes: Dict[int, Point] = {} # id => (lat, lon)
|
||||
ways: Dict[int, Point] = {} # id => (lat, lon)
|
||||
relations: Dict[int, Point] = {} # id => (lat, lon)
|
||||
nodes: dict[int, Point] = {} # id => (lat, lon)
|
||||
ways: dict[int, Point] = {} # id => (lat, lon)
|
||||
relations: dict[int, Point] = {} # id => (lat, lon)
|
||||
|
||||
unlocalized_relations = [] # 'unlocalized' means the center of the
|
||||
# relation has not been calculated yet
|
||||
|
@ -202,7 +204,7 @@ def calculate_centers(elements):
|
|||
|
||||
def iterate_relation_centers_calculation(
|
||||
ignore_unlocalized_child_relations: bool,
|
||||
) -> List[int]:
|
||||
) -> list[dict]:
|
||||
unlocalized_relations_upd = []
|
||||
for rel in unlocalized_relations:
|
||||
if center := get_relation_center(
|
||||
|
@ -229,14 +231,16 @@ def calculate_centers(elements):
|
|||
unlocalized_relations = unlocalized_relations_upd
|
||||
|
||||
|
||||
def add_osm_elements_to_cities(osm_elements, cities):
|
||||
def add_osm_elements_to_cities(
|
||||
osm_elements: list[dict], cities: list[City]
|
||||
) -> None:
|
||||
for el in osm_elements:
|
||||
for c in cities:
|
||||
if c.contains(el):
|
||||
c.add(el)
|
||||
|
||||
|
||||
def validate_cities(cities):
|
||||
def validate_cities(cities: list[City]) -> list[City]:
|
||||
"""Validate cities. Return list of good cities."""
|
||||
good_cities = []
|
||||
for c in cities:
|
||||
|
@ -266,7 +270,7 @@ def validate_cities(cities):
|
|||
|
||||
def get_cities_info(
|
||||
cities_info_url: str = DEFAULT_CITIES_INFO_URL,
|
||||
) -> List[dict]:
|
||||
) -> list[dict]:
|
||||
response = urllib.request.urlopen(cities_info_url)
|
||||
if (
|
||||
not cities_info_url.startswith("file://")
|
||||
|
@ -310,14 +314,14 @@ def get_cities_info(
|
|||
|
||||
def prepare_cities(
|
||||
cities_info_url: str = DEFAULT_CITIES_INFO_URL, overground: bool = False
|
||||
) -> List[City]:
|
||||
) -> list[City]:
|
||||
if overground:
|
||||
raise NotImplementedError("Overground transit not implemented yet")
|
||||
cities_info = get_cities_info(cities_info_url)
|
||||
return list(map(partial(City, overground=overground), cities_info))
|
||||
|
||||
|
||||
def main():
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
"--cities-info-url",
|
||||
|
|
|
@ -1517,25 +1517,23 @@ class City:
|
|||
self.errors = []
|
||||
self.warnings = []
|
||||
self.notices = []
|
||||
self.id = int(city_data["id"])
|
||||
self.try_fill_int_attribute(city_data, "id")
|
||||
self.name = city_data["name"]
|
||||
self.country = city_data["country"]
|
||||
self.continent = city_data["continent"]
|
||||
self.overground = overground
|
||||
if not overground:
|
||||
self.num_stations = int(city_data["num_stations"])
|
||||
self.num_lines = int(city_data["num_lines"] or "0")
|
||||
self.num_light_lines = int(city_data["num_light_lines"] or "0")
|
||||
self.num_interchanges = int(city_data["num_interchanges"] or "0")
|
||||
self.try_fill_int_attribute(city_data, "num_stations")
|
||||
self.try_fill_int_attribute(city_data, "num_lines", "0")
|
||||
self.try_fill_int_attribute(city_data, "num_light_lines", "0")
|
||||
self.try_fill_int_attribute(city_data, "num_interchanges", "0")
|
||||
else:
|
||||
self.num_tram_lines = int(city_data["num_tram_lines"] or "0")
|
||||
self.num_trolleybus_lines = int(
|
||||
city_data["num_trolleybus_lines"] or "0"
|
||||
)
|
||||
self.num_bus_lines = int(city_data["num_bus_lines"] or "0")
|
||||
self.num_other_lines = int(city_data["num_other_lines"] or "0")
|
||||
self.try_fill_int_attribute(city_data, "num_tram_lines", "0")
|
||||
self.try_fill_int_attribute(city_data, "num_trolleybus_lines", "0")
|
||||
self.try_fill_int_attribute(city_data, "num_bus_lines", "0")
|
||||
self.try_fill_int_attribute(city_data, "num_other_lines", "0")
|
||||
|
||||
# Aquiring list of networks and modes
|
||||
# Acquiring list of networks and modes
|
||||
networks = (
|
||||
None
|
||||
if not city_data["networks"]
|
||||
|
@ -1574,6 +1572,33 @@ class City:
|
|||
self.stops_and_platforms = set() # Set of stops and platforms el_id
|
||||
self.recovery_data = None
|
||||
|
||||
def try_fill_int_attribute(
|
||||
self, city_data: dict, attr: str, default: str | None = None
|
||||
) -> None:
|
||||
"""Try to convert string value to int. Conversion is considered
|
||||
to fail if one of the following is true:
|
||||
* attr is not empty and data type casting fails;
|
||||
* attr is empty and no default value is given.
|
||||
In such cases the city is marked as bad by adding an error
|
||||
to the city validation log.
|
||||
"""
|
||||
attr_value = city_data[attr]
|
||||
if not attr_value and default is not None:
|
||||
attr_value = default
|
||||
|
||||
try:
|
||||
attr_int = int(attr_value)
|
||||
except ValueError:
|
||||
print_value = (
|
||||
f"{city_data[attr]}" if city_data[attr] else "<empty>"
|
||||
)
|
||||
self.error(
|
||||
f"Configuration error: wrong value for {attr}: {print_value}"
|
||||
)
|
||||
setattr(self, attr, 0)
|
||||
else:
|
||||
setattr(self, attr, attr_int)
|
||||
|
||||
@staticmethod
|
||||
def log_message(message, el):
|
||||
if el:
|
||||
|
@ -1814,12 +1839,12 @@ class City:
|
|||
if not self.overground:
|
||||
result.update(
|
||||
{
|
||||
"subwayl_expected": self.num_lines,
|
||||
"lightrl_expected": self.num_light_lines,
|
||||
"subwayl_expected": getattr(self, "num_lines", 0),
|
||||
"lightrl_expected": getattr(self, "num_light_lines", 0),
|
||||
"subwayl_found": getattr(self, "found_lines", 0),
|
||||
"lightrl_found": getattr(self, "found_light_lines", 0),
|
||||
"stations_expected": self.num_stations,
|
||||
"transfers_expected": self.num_interchanges,
|
||||
"stations_expected": getattr(self, "num_stations", 0),
|
||||
"transfers_expected": getattr(self, "num_interchanges", 0),
|
||||
}
|
||||
)
|
||||
else:
|
||||
|
@ -1827,10 +1852,12 @@ class City:
|
|||
{
|
||||
"stations_expected": 0,
|
||||
"transfers_expected": 0,
|
||||
"busl_expected": self.num_bus_lines,
|
||||
"trolleybusl_expected": self.num_trolleybus_lines,
|
||||
"traml_expected": self.num_tram_lines,
|
||||
"otherl_expected": self.num_other_lines,
|
||||
"busl_expected": getattr(self, "num_bus_lines", 0),
|
||||
"trolleybusl_expected": getattr(
|
||||
self, "num_trolleybus_lines", 0
|
||||
),
|
||||
"traml_expected": getattr(self, "num_tram_lines", 0),
|
||||
"otherl_expected": getattr(self, "num_other_lines", 0),
|
||||
"busl_found": getattr(self, "found_bus_lines", 0),
|
||||
"trolleybusl_found": getattr(
|
||||
self, "found_trolleybus_lines", 0
|
||||
|
@ -2006,7 +2033,8 @@ class City:
|
|||
)
|
||||
log_function = (
|
||||
self.error
|
||||
if not (
|
||||
if self.num_stations > 0
|
||||
and not (
|
||||
0
|
||||
<= (self.num_stations - self.found_stations)
|
||||
/ self.num_stations
|
||||
|
|
12
tests/assets/networks_with_bad_values.csv
Normal file
12
tests/assets/networks_with_bad_values.csv
Normal file
|
@ -0,0 +1,12 @@
|
|||
#,City,Country,Region,Stations,Subway Lines,Light Rail +Monorail,Interchanges,"BBox (lon, lat)",Networks (opt.),Comment,Source,
|
||||
291,Moscow,Russia,Europe,335,14,3,66,,"subway,train:Московский метрополитен;МЦК;МЦД","No bbox - skip the city",
|
||||
,Moscow - Aeroexpress,Russia,Europe,22,0,3,0,"37.170,55.385,38.028,56.022",train:Аэроэкспресс,"No id - skip the city",https://aeroexpress.ru
|
||||
292,Nizhny Novgorod,Russia,Europe,16,2,0,,"43.759918,56.1662,44.13208,56.410862",,"No configuration errors",,,
|
||||
NBS,Novosibirsk,Russia,Europe,13,2,0,1,"82.774773,54.926747,83.059044,55.127864",,"Non-numeric ID",,,
|
||||
294,Saint Petersburg,Russia,Europe,72,,,,"30.0648,59.7509,30.5976,60.1292",,"Empty line counts - no problem at CSV parsing stage",,
|
||||
,,,,,,,,,,,,
|
||||
295,Samara,Russia,Europe,10x,1,0,0,"50.011826,53.094024,50.411453,53.384147",,"Non-numeric station count",,
|
||||
296,Volgograd,Russia,Europe,40,zero,2zero,0,"44.366704,48.636024,44.62302,48.81765",,"Non-numbers in subway and light_rail line counts",,
|
||||
297,Yekaterinburg,Russia,Europe,,1,0,0,"60.460854,56.730505,60.727272,56.920997",,"Empty station count",,
|
||||
,,,,,,,,,,,,
|
||||
|
Can't render this file because it has a wrong number of fields in line 2.
|
36
tests/test_prepare_cities.py
Normal file
36
tests/test_prepare_cities.py
Normal file
|
@ -0,0 +1,36 @@
|
|||
import inspect
|
||||
from pathlib import Path
|
||||
from unittest import TestCase
|
||||
|
||||
from process_subways import prepare_cities
|
||||
|
||||
|
||||
class TestPrepareCities(TestCase):
|
||||
def test_prepare_cities(self) -> None:
|
||||
csv_path = (
|
||||
Path(inspect.getfile(self.__class__)).parent
|
||||
/ "assets"
|
||||
/ "networks_with_bad_values.csv"
|
||||
)
|
||||
|
||||
cities = prepare_cities(cities_info_url=f"file://{csv_path}")
|
||||
|
||||
city_errors = {city.name: sorted(city.errors) for city in cities}
|
||||
|
||||
expected_errors = {
|
||||
"Nizhny Novgorod": [],
|
||||
"Novosibirsk": ["Configuration error: wrong value for id: NBS"],
|
||||
"Saint Petersburg": [],
|
||||
"Samara": [
|
||||
"Configuration error: wrong value for num_stations: 10x"
|
||||
],
|
||||
"Volgograd": [
|
||||
"Configuration error: wrong value for num_light_lines: 2zero",
|
||||
"Configuration error: wrong value for num_lines: zero",
|
||||
],
|
||||
"Yekaterinburg": [
|
||||
"Configuration error: wrong value for num_stations: <empty>"
|
||||
],
|
||||
}
|
||||
|
||||
self.assertDictEqual(city_errors, expected_errors)
|
Loading…
Add table
Reference in a new issue