Use stoparea ids instead of instances in transfers; save transfers only for good cities

This commit is contained in:
Alexey Zakharenkov 2024-02-02 10:14:47 +03:00 committed by Alexey Zakharenkov
parent e449c98a7f
commit f7087a0c25
8 changed files with 520 additions and 68 deletions

View file

@ -490,7 +490,7 @@ def main() -> None:
good_cities = validate_cities(cities)
logging.info("Finding transfer stations")
transfers = find_transfers(osm, cities)
transfers = find_transfers(osm, good_cities)
good_city_names = set(c.name for c in good_cities)
logging.info(

View file

@ -91,18 +91,17 @@ def transit_to_dict(
# transfers
pairwise_transfers = set()
for stoparea_set in transfers:
stoparea_list = list(stoparea_set)
for first_i in range(len(stoparea_list) - 1):
for second_i in range(first_i + 1, len(stoparea_list)):
stoparea1_id = stoparea_list[first_i].id
stoparea2_id = stoparea_list[second_i].id
for stoparea_id_set in transfers:
stoparea_ids = sorted(stoparea_id_set)
for first_i in range(len(stoparea_ids) - 1):
for second_i in range(first_i + 1, len(stoparea_ids)):
stoparea1_id = stoparea_ids[first_i]
stoparea2_id = stoparea_ids[second_i]
if all(
st_id in data["stopareas"]
for st_id in (stoparea1_id, stoparea2_id)
):
id1, id2 = sorted([stoparea1_id, stoparea2_id])
pairwise_transfers.add((id1, id2))
pairwise_transfers.add((stoparea1_id, stoparea2_id))
data["transfers"] = pairwise_transfers
return data

View file

@ -4,10 +4,12 @@ import os
from collections import defaultdict
from subway_structure import (
City,
DISPLACEMENT_TOLERANCE,
distance,
el_center,
Station,
TransfersT,
)
from ._common import (
DEFAULT_INTERVAL,
@ -180,11 +182,12 @@ class MapsmeCache:
logging.warning("Failed to save cache: %s", str(e))
def process(cities, transfers, filename, cache_path):
def transit_data_to_mapsme(
cities: list[City], transfers: TransfersT, cache_path: str | None
) -> dict:
"""Generate all output and save to file.
:param cities: List of City instances
:param transfers: List of sets of StopArea.id
:param filename: Path to file to save the result
:param cache_path: Path to json-file with good cities cache or None.
"""
@ -362,18 +365,21 @@ def process(cities, transfers, filename, cache_path):
pairwise_transfers = (
{}
) # (stoparea1_uid, stoparea2_uid) -> time; uid1 < uid2
for t_set in transfers:
t = list(t_set)
for t_first in range(len(t) - 1):
for t_second in range(t_first + 1, len(t)):
stoparea1 = t[t_first]
stoparea2 = t[t_second]
if stoparea1.id in stops and stoparea2.id in stops:
uid1 = uid(stoparea1.id)
uid2 = uid(stoparea2.id)
for stoparea_id_set in transfers:
stoparea_ids = list(stoparea_id_set)
for i_first in range(len(stoparea_ids) - 1):
for i_second in range(i_first + 1, len(stoparea_ids)):
stoparea1_id = stoparea_ids[i_first]
stoparea2_id = stoparea_ids[i_second]
if stoparea1_id in stops and stoparea2_id in stops:
uid1 = uid(stoparea1_id)
uid2 = uid(stoparea2_id)
uid1, uid2 = sorted([uid1, uid2])
transfer_time = TRANSFER_PENALTY + round(
distance(stoparea1.center, stoparea2.center)
distance(
stop_areas[stoparea1_id].center,
stop_areas[stoparea2_id].center,
)
/ SPEED_ON_TRANSFER
)
pairwise_transfers[(uid1, uid2)] = transfer_time
@ -392,13 +398,29 @@ def process(cities, transfers, filename, cache_path):
"transfers": pairwise_transfers,
"networks": networks,
}
return result
def process(
cities: list[City],
transfers: TransfersT,
filename: str,
cache_path: str | None,
):
"""Generate all output and save to file.
:param cities: List of City instances
:param transfers: List of sets of StopArea.id
:param filename: Path to file to save the result
:param cache_path: Path to json-file with good cities cache or None.
"""
if not filename.lower().endswith("json"):
filename = f"{filename}.json"
mapsme_transit = transit_data_to_mapsme(cities, transfers, cache_path)
with open(filename, "w", encoding="utf-8") as f:
json.dump(
result,
mapsme_transit,
f,
indent=1,
ensure_ascii=False,

View file

@ -3,6 +3,7 @@ from __future__ import annotations
import math
import re
from collections import Counter, defaultdict
from collections.abc import Collection, Iterator
from itertools import chain, islice
from css_colours import normalize_colour
@ -45,6 +46,10 @@ used_entrances = set()
START_END_TIMES_RE = re.compile(r".*?(\d{2}):(\d{2})-(\d{2}):(\d{2}).*")
IdT = str # Type of feature ids
TransferT = set[IdT] # A transfer is a set of StopArea IDs
TransfersT = Collection[TransferT]
def get_start_end_times(opening_hours):
"""Very simplified method to parse OSM opening_hours tag.
@ -664,6 +669,14 @@ class Route:
return None
return osm_interval_to_seconds(v)
def stopareas(self) -> Iterator[StopArea]:
yielded_stopareas = set()
for route_stop in self:
stoparea = route_stop.stoparea
if stoparea not in yielded_stopareas:
yield stoparea
yielded_stopareas.add(stoparea)
def __init__(self, relation, city, master=None):
assert Route.is_route(
relation, city.modes
@ -1465,6 +1478,14 @@ class RouteMaster:
self.name = None
self.interval = None
def stopareas(self) -> Iterator[StopArea]:
yielded_stopareas = set()
for route in self:
for stoparea in route.stopareas():
if stoparea not in yielded_stopareas:
yield stoparea
yielded_stopareas.add(stoparea)
def add(self, route, city):
if not self.network:
self.network = route.network
@ -1682,7 +1703,7 @@ class City:
self.stop_areas = defaultdict(
list
) # El_id → list of stop_area elements it belongs to
self.transfers = [] # List of lists of stop areas
self.transfers: TransfersT = [] # List of sets of stop areas
self.station_ids = set() # Set of stations' uid
self.stops_and_platforms = set() # Set of stops and platforms el_id
self.recovery_data = None
@ -1787,18 +1808,19 @@ class City:
else:
stop_areas.append(el)
def make_transfer(self, sag):
def make_transfer(self, stoparea_group: dict) -> None:
transfer = set()
for m in sag["members"]:
for m in stoparea_group["members"]:
k = el_id(m)
el = self.elements.get(k)
if not el:
# A sag member may validly not belong to the city while
# the sag does - near the city bbox boundary
# A stoparea_group member may validly not belong to the city
# while the stoparea_group does - near the city bbox boundary
continue
if "tags" not in el:
self.warn(
"An untagged object {} in a stop_area_group".format(k), sag
"An untagged object {} in a stop_area_group".format(k),
stoparea_group,
)
continue
if (
@ -1825,7 +1847,7 @@ class City:
k
)
)
stoparea.transfer = el_id(sag)
stoparea.transfer = el_id(stoparea_group)
if len(transfer) > 1:
self.transfers.append(transfer)
@ -1918,20 +1940,28 @@ class City:
self.make_transfer(el)
# Filter transfers, leaving only stations that belong to routes
used_stop_areas = set()
for rmaster in self.routes.values():
for route in rmaster:
used_stop_areas.update([s.stoparea for s in route.stops])
new_transfers = []
for transfer in self.transfers:
new_tr = [s for s in transfer if s in used_stop_areas]
if len(new_tr) > 1:
new_transfers.append(new_tr)
self.transfers = new_transfers
own_stopareas = set(self.stopareas())
self.transfers = [
inner_transfer
for inner_transfer in (
own_stopareas.intersection(transfer)
for transfer in self.transfers
)
if len(inner_transfer) > 1
]
def __iter__(self):
return iter(self.routes.values())
def stopareas(self) -> Iterator[StopArea]:
yielded_stopareas = set()
for route_master in self:
for stoparea in route_master.stopareas():
if stoparea not in yielded_stopareas:
yield stoparea
yielded_stopareas.add(stoparea)
@property
def is_good(self):
if not (self.errors or self.validate_called):
@ -2306,36 +2336,38 @@ class City:
route.calculate_distances()
def find_transfers(elements, cities):
def find_transfers(
elements: list[dict], cities: Collection[City]
) -> TransfersT:
"""As for now, two Cities may contain the same stoparea, but those
StopArea instances would have different python id. So we don't store
references to StopAreas, but only their ids. This is important at
inter-city interchanges.
"""
stop_area_groups = [
el
for el in elements
if el["type"] == "relation"
and "members" in el
and el.get("tags", {}).get("public_transport") == "stop_area_group"
]
stopareas_in_cities_ids = set(
stoparea.id
for city in cities
if city.is_good
for stoparea in city.stopareas()
)
transfers = []
stop_area_groups = []
for el in elements:
if (
el["type"] == "relation"
and "members" in el
and el.get("tags", {}).get("public_transport") == "stop_area_group"
):
stop_area_groups.append(el)
# StopArea.id uniquely identifies a StopArea. We must ensure StopArea
# uniqueness since one stop_area relation may result in
# several StopArea instances at inter-city interchanges.
stop_area_ids = defaultdict(set) # el_id -> set of StopArea.id
stop_area_objects = dict() # StopArea.id -> one of StopArea instances
for city in cities:
for el, st in city.stations.items():
stop_area_ids[el].update(sa.id for sa in st)
stop_area_objects.update((sa.id, sa) for sa in st)
for sag in stop_area_groups:
transfer = set()
for m in sag["members"]:
k = el_id(m)
if k not in stop_area_ids:
continue
transfer.update(
stop_area_objects[sa_id] for sa_id in stop_area_ids[k]
for stop_area_group in stop_area_groups:
transfer: TransferT = set(
member_id
for member_id in (
el_id(member) for member in stop_area_group["members"]
)
if member_id in stopareas_in_cities_ids
)
if len(transfer) > 1:
transfers.append(transfer)
return transfers

View file

@ -21,6 +21,7 @@ metro_samples = [
},
],
"gtfs_dir": "assets/tiny_world_gtfs",
"transfers": [{"r1", "r2"}, {"r3", "r4"}],
"json_dump": """
{
"stopareas": {
@ -366,5 +367,320 @@ metro_samples = [
]
}
""",
"mapsme_output": {
"stops": [
{
"name": "Station 1",
"int_name": None,
"lat": 0.0,
"lon": 0.0,
"osm_type": "node",
"osm_id": 1,
"id": 8,
"entrances": [
{
"osm_type": "node",
"osm_id": 1,
"lon": 0.0,
"lat": 0.0,
"distance": 60,
}
],
"exits": [
{
"osm_type": "node",
"osm_id": 1,
"lon": 0.0,
"lat": 0.0,
"distance": 60,
}
],
},
{
"name": "Station 2",
"int_name": None,
"lat": 0.0047037307,
"lon": 0.00470373068,
"osm_type": "node",
"osm_id": 2,
"id": 14,
"entrances": [
{
"osm_type": "node",
"osm_id": 2,
"lon": 0.0047209447,
"lat": 0.004686516680000001,
"distance": 60,
}
],
"exits": [
{
"osm_type": "node",
"osm_id": 2,
"lon": 0.0047209447,
"lat": 0.004686516680000001,
"distance": 60,
}
],
},
{
"name": "Station 3",
"int_name": None,
"lat": 0.0097589171,
"lon": 0.01012040581,
"osm_type": "node",
"osm_id": 3,
"id": 30,
"entrances": [
{
"osm_type": "node",
"osm_id": 201,
"lon": 0.01007169217,
"lat": 0.00967473055,
"distance": 68,
},
{
"osm_type": "node",
"osm_id": 202,
"lon": 0.01018702716,
"lat": 0.00966936613,
"distance": 69,
},
],
"exits": [
{
"osm_type": "node",
"osm_id": 201,
"lon": 0.01007169217,
"lat": 0.00967473055,
"distance": 68,
},
{
"osm_type": "node",
"osm_id": 202,
"lon": 0.01018702716,
"lat": 0.00966936613,
"distance": 69,
},
],
},
{
"name": "Station 4",
"int_name": None,
"lat": 0.01,
"lon": 0.0,
"osm_type": "node",
"osm_id": 4,
"id": 32,
"entrances": [
{
"osm_type": "node",
"osm_id": 205,
"lon": 0.000201163,
"lat": 0.01015484596,
"distance": 80,
}
],
"exits": [
{
"osm_type": "node",
"osm_id": 205,
"lon": 0.000201163,
"lat": 0.01015484596,
"distance": 80,
}
],
},
{
"name": "Station 5",
"int_name": None,
"lat": 0.00514739839,
"lon": 0.0047718624,
"osm_type": "node",
"osm_id": 5,
"id": 22,
"entrances": [
{
"osm_type": "node",
"osm_id": 5,
"lon": 0.0047718624,
"lat": 0.00514739839,
"distance": 60,
}
],
"exits": [
{
"osm_type": "node",
"osm_id": 5,
"lon": 0.0047718624,
"lat": 0.00514739839,
"distance": 60,
}
],
},
{
"name": "Station 6",
"int_name": None,
"lat": 0.0,
"lon": 0.01,
"osm_type": "node",
"osm_id": 6,
"id": 48,
"entrances": [
{
"osm_type": "node",
"osm_id": 6,
"lon": 0.01,
"lat": 0.0,
"distance": 60,
}
],
"exits": [
{
"osm_type": "node",
"osm_id": 6,
"lon": 0.01,
"lat": 0.0,
"distance": 60,
}
],
},
{
"name": "Station 7",
"int_name": None,
"lat": 0.010286367745,
"lon": 0.009716854315,
"osm_type": "node",
"osm_id": 7,
"id": 38,
"entrances": [
{
"osm_type": "node",
"osm_id": 203,
"lon": 0.00959962338,
"lat": 0.01042574907,
"distance": 75,
},
{
"osm_type": "node",
"osm_id": 204,
"lon": 0.00952183932,
"lat": 0.01034796501,
"distance": 76,
},
],
"exits": [
{
"osm_type": "node",
"osm_id": 203,
"lon": 0.00959962338,
"lat": 0.01042574907,
"distance": 75,
},
{
"osm_type": "node",
"osm_id": 204,
"lon": 0.00952183932,
"lat": 0.01034796501,
"distance": 76,
},
],
},
{
"name": "Station 8",
"int_name": None,
"lat": 0.014377764559999999,
"lon": 0.012405493905,
"osm_type": "node",
"osm_id": 8,
"id": 134,
"entrances": [
{
"osm_type": "node",
"osm_id": 8,
"lon": 0.012391026016666667,
"lat": 0.01436273297,
"distance": 60,
}
],
"exits": [
{
"osm_type": "node",
"osm_id": 8,
"lon": 0.012391026016666667,
"lat": 0.01436273297,
"distance": 60,
}
],
},
],
"transfers": [(14, 22, 81), (30, 38, 106)],
"networks": [
{
"network": "Intersecting 2 metro lines",
"routes": [
{
"type": "subway",
"ref": "1",
"name": "Blue Line",
"colour": "0000ff",
"route_id": 30,
"itineraries": [
{
"stops": [[8, 0], [14, 67], [30, 141]],
"interval": 150,
},
{
"stops": [[30, 0], [14, 74], [8, 141]],
"interval": 150,
},
],
},
{
"type": "subway",
"ref": "2",
"name": "Red Line",
"colour": "ff0000",
"route_id": 28,
"itineraries": [
{
"stops": [[32, 0], [22, 68], [48, 142]],
"interval": 150,
},
{
"stops": [[48, 0], [22, 74], [32, 142]],
"interval": 150,
},
],
},
],
"agency_id": 1,
},
{
"network": "One light rail line",
"routes": [
{
"type": "light_rail",
"ref": "LR",
"name": "LR Line",
"colour": "ffffff",
"route_id": 22,
"itineraries": [
{
"stops": [[38, 0], [134, 49]],
"interval": 150,
},
{
"stops": [[134, 0], [38, 48]],
"interval": 150,
},
],
"casing": "a52a2a",
}
],
"agency_id": 2,
},
],
},
},
]

View file

@ -0,0 +1,30 @@
from copy import deepcopy
from tests.sample_data_for_outputs import metro_samples
from tests.util import TestCase, JsonLikeComparisonMixin
class TestTransfers(JsonLikeComparisonMixin, TestCase):
"""Test that the validator provides expected set of transfers."""
def _test__find_transfers__for_sample(self, metro_sample: dict) -> None:
cities, transfers = self.prepare_cities(metro_sample)
expected_transfers = metro_sample["transfers"]
self.assertSequenceAlmostEqualIgnoreOrder(
expected_transfers,
transfers,
cmp=lambda transfer_as_set: sorted(transfer_as_set),
)
def test__find_transfers(self) -> None:
sample1 = metro_samples[0]
sample2 = deepcopy(metro_samples[0])
# Make the second city invalid and thus exclude the inter-city transfer
sample2["cities_info"][1]["num_stations"] += 1
sample2["transfers"] = [{"r1", "r2"}]
for sample in sample1, sample2:
with self.subTest(msg=sample["name"]):
self._test__find_transfers__for_sample(sample)

View file

@ -0,0 +1,53 @@
from operator import itemgetter
from processors.mapsme import transit_data_to_mapsme
from tests.sample_data_for_outputs import metro_samples
from tests.util import JsonLikeComparisonMixin, TestCase
class TestMapsme(JsonLikeComparisonMixin, TestCase):
"""Test processors/mapsme.py"""
def test__transit_data_to_mapsme(self) -> None:
for sample in metro_samples:
with self.subTest(msg=sample["name"]):
self._test__transit_data_to_mapsme__for_sample(sample)
def _test__transit_data_to_mapsme__for_sample(
self, metro_sample: dict
) -> None:
cities, transfers = self.prepare_cities(metro_sample)
calculated_mapsme_data = transit_data_to_mapsme(
cities, transfers, cache_path=None
)
control_mapsme_data = metro_sample["mapsme_output"]
self.assertSetEqual(
set(control_mapsme_data.keys()),
set(calculated_mapsme_data.keys()),
)
self.assertSequenceAlmostEqualIgnoreOrder(
control_mapsme_data["stops"],
calculated_mapsme_data["stops"],
cmp=itemgetter("id"),
unordered_lists={
"entrances": lambda e: (e["osm_type"], e["osm_id"]),
"exits": lambda e: (e["osm_type"], e["osm_id"]),
},
)
self.assertSequenceAlmostEqualIgnoreOrder(
control_mapsme_data["transfers"],
calculated_mapsme_data["transfers"],
)
self.assertSequenceAlmostEqualIgnoreOrder(
control_mapsme_data["networks"],
calculated_mapsme_data["networks"],
cmp=itemgetter("network"),
unordered_lists={
"routes": itemgetter("route_id"),
"itineraries": lambda it: (it["stops"], it["interval"]),
},
)

View file

@ -173,7 +173,7 @@ class JsonLikeComparisonMixin:
self: TestCaseMixin,
seq1: Sequence,
seq2: Sequence,
cmp: Callable | None,
cmp: Callable | None = None,
places: int = 10,
*,
unordered_lists: dict[str, Callable] | None = None,