Process route duration (average vehicle speed)

This commit is contained in:
Alexey Zakharenkov 2024-04-04 12:48:29 +03:00
parent 6a4c2a255f
commit aff6a9f129
11 changed files with 303 additions and 90 deletions

View file

@ -17,3 +17,10 @@ def el_center(el: OsmElementT) -> LonLat | None:
elif "center" in el:
return el["center"]["lon"], el["center"]["lat"]
return None
def get_network(relation: OsmElementT) -> str | None:
for k in ("network:metro", "network", "operator"):
if k in relation["tags"]:
return relation["tags"][k]
return None

View file

@ -10,6 +10,7 @@ if typing.TYPE_CHECKING:
DEFAULT_INTERVAL = 2.5 * 60 # seconds
KMPH_TO_MPS = 1 / 3.6 # km/h to m/s conversion multiplier
DEFAULT_AVE_VEHICLE_SPEED = 40 * KMPH_TO_MPS # m/s
SPEED_ON_TRANSFER = 3.5 * KMPH_TO_MPS # m/s
TRANSFER_PENALTY = 30 # seconds
@ -52,6 +53,7 @@ def transit_to_dict(cities: list[City], transfers: TransfersT) -> dict:
"start_time": route.start_time,
"end_time": route.end_time,
"interval": route.interval,
"duration": route.duration,
"stops": [
{
"stoparea_id": route_stop.stoparea.id,

View file

@ -9,8 +9,10 @@ from tarfile import TarFile, TarInfo
from zipfile import ZipFile
from ._common import (
DEFAULT_AVE_VEHICLE_SPEED,
DEFAULT_INTERVAL,
format_colour,
KMPH_TO_MPS,
SPEED_ON_TRANSFER,
TRANSFER_PENALTY,
transit_to_dict,
@ -63,6 +65,7 @@ GTFS_COLUMNS = {
"trip_route_type",
"route_pattern_id",
"bikes_allowed",
"average_speed", # extension field (km/h)
],
"stops": [
"stop_id",
@ -242,11 +245,22 @@ def transit_data_to_gtfs(data: dict) -> dict:
for itinerary in route_master["itineraries"]:
shape_id = itinerary["id"][1:] # truncate leading 'r'
average_speed = round(
(
DEFAULT_AVE_VEHICLE_SPEED
if not itinerary["duration"]
else itinerary["stops"][-1]["distance"]
/ itinerary["duration"]
)
/ KMPH_TO_MPS,
1,
) # km/h
trip = {
"trip_id": itinerary["id"],
"route_id": route_master["id"],
"service_id": "always",
"shape_id": shape_id,
"average_speed": average_speed,
}
gtfs_data["trips"].append(trip)

View file

@ -14,6 +14,7 @@ from subways.osm_element import el_center
from subways.structure.station import Station
from subways.types import IdT, LonLat, OsmElementT, TransfersT
from ._common import (
DEFAULT_AVE_VEHICLE_SPEED,
DEFAULT_INTERVAL,
format_colour,
KMPH_TO_MPS,
@ -29,7 +30,6 @@ if typing.TYPE_CHECKING:
OSM_TYPES = {"n": (0, "node"), "w": (2, "way"), "r": (3, "relation")}
ENTRANCE_PENALTY = 60 # seconds
SPEED_TO_ENTRANCE = 5 * KMPH_TO_MPS # m/s
SPEED_ON_LINE = 40 * KMPH_TO_MPS # m/s
# (stoparea1_uid, stoparea2_uid) -> seconds; stoparea1_uid < stoparea2_uid
TransferTimesT: TypeAlias = dict[tuple[int, int], int]
@ -258,7 +258,7 @@ def transit_data_to_mapsme(
itin.append(
[
uid(stop.stoparea.id),
round(stop.distance / SPEED_ON_LINE),
round(stop.distance / DEFAULT_AVE_VEHICLE_SPEED),
]
)
# Make exits from platform nodes,

View file

@ -8,7 +8,7 @@ from subways.consts import (
DEFAULT_MODES_OVERGROUND,
DEFAULT_MODES_RAPID,
)
from subways.osm_element import el_center, el_id
from subways.osm_element import el_center, el_id, get_network
from subways.structure.route import Route
from subways.structure.route_master import RouteMaster
from subways.structure.station import Station
@ -287,11 +287,11 @@ class City:
if el["tags"].get("access") in ("no", "private"):
continue
route_id = el_id(el)
master = self.masters.get(route_id, None)
master_element = self.masters.get(route_id, None)
if self.networks:
network = Route.get_network(el)
if master:
master_network = Route.get_network(master)
network = get_network(el)
if master_element:
master_network = get_network(master_element)
else:
master_network = None
if (
@ -300,7 +300,7 @@ class City:
):
continue
route = self.route_class(el, self, master)
route = self.route_class(el, self, master_element)
if not route.stops:
self.warn("Route has no stops", el)
continue
@ -308,15 +308,11 @@ class City:
self.warn("Route has only one stop", el)
continue
k = el_id(master) if master else route.ref
if k not in self.routes:
self.routes[k] = RouteMaster(self, master)
self.routes[k].add(route)
# Sometimes adding a route to a newly initialized RouteMaster
# can fail
if len(self.routes[k]) == 0:
del self.routes[k]
master_id = el_id(master_element) or route.ref
route_master = self.routes.setdefault(
master_id, RouteMaster(self, master_element)
)
route_master.add(route)
# And while we're iterating over relations, find interchanges
if (

View file

@ -2,7 +2,7 @@ from __future__ import annotations
import re
import typing
from collections.abc import Callable, Iterator
from collections.abc import Callable, Collection, Iterator
from itertools import islice
from subways.consts import (
@ -18,7 +18,7 @@ from subways.geom_utils import (
find_segment,
project_on_line,
)
from subways.osm_element import el_id, el_center
from subways.osm_element import el_id, el_center, get_network
from subways.structure.route_stop import RouteStop
from subways.structure.station import Station
from subways.structure.stop_area import StopArea
@ -33,24 +33,29 @@ ALLOWED_ANGLE_BETWEEN_STOPS = 45 # in degrees
DISALLOWED_ANGLE_BETWEEN_STOPS = 20 # in degrees
def get_start_end_times(
def parse_time_range(
opening_hours: str,
) -> tuple[tuple[int, int], tuple[int, int]] | tuple[None, None]:
) -> tuple[tuple[int, int], tuple[int, int]] | None:
"""Very simplified method to parse OSM opening_hours tag.
We simply take the first HH:MM-HH:MM substring which is the most probable
opening hours interval for the most of the weekdays.
"""
start_time, end_time = None, None
if opening_hours == "24/7":
return (0, 0), (24, 0)
m = START_END_TIMES_RE.match(opening_hours)
if m:
ints = tuple(map(int, m.groups()))
start_time = (ints[0], ints[1])
end_time = (ints[2], ints[3])
if not m:
return None
ints = tuple(map(int, m.groups()))
if ints[1] > 59 or ints[3] > 59:
return None
start_time = (ints[0], ints[1])
end_time = (ints[2], ints[3])
return start_time, end_time
def osm_interval_to_seconds(interval_str: str) -> int | None:
"""Convert to int an OSM value for 'interval'/'headway' tag
"""Convert to int an OSM value for 'interval'/'headway'/'duration' tag
which may be in these formats:
HH:MM:SS,
HH:MM,
@ -71,7 +76,54 @@ def osm_interval_to_seconds(interval_str: str) -> int | None:
return None
except ValueError:
return None
return seconds + 60 * minutes + 60 * 60 * hours
if seconds < 0 or minutes < 0 or hours < 0:
return None
if semicolon_count > 0 and (seconds >= 60 or minutes >= 60):
return None
interval = seconds + 60 * minutes + 60 * 60 * hours
if interval == 0:
return None
return interval
def get_interval_in_seconds_from_tags(
tags: dict, keys: str | Collection[str]
) -> int | None:
"""Extract time interval value from tags for keys among "keys".
E.g., "interval" and "headway" means the same in OSM.
Examples:
interval=5 => 300
headway:peak=00:01:30 => 90
"""
if isinstance(keys, str):
keys = (keys,)
value = None
for key in keys:
if key in tags:
value = tags[key]
break
if value is None:
for key in keys:
if value:
break
for tag_name in tags:
if tag_name.startswith(key + ":"):
value = tags[tag_name]
break
if not value:
return None
return osm_interval_to_seconds(value)
def get_route_interval(tags: dict) -> int | None:
return get_interval_in_seconds_from_tags(tags, ("interval", "headway"))
def get_route_duration(tags: dict) -> int | None:
return get_interval_in_seconds_from_tags(tags, "duration")
class Route:
@ -95,29 +147,6 @@ class Route:
return False
return True
@staticmethod
def get_network(relation: OsmElementT) -> str | None:
for k in ("network:metro", "network", "operator"):
if k in relation["tags"]:
return relation["tags"][k]
return None
@staticmethod
def get_interval(tags: dict) -> int | None:
v = None
for k in ("interval", "headway"):
if k in tags:
v = tags[k]
break
else:
for kk in tags:
if kk.startswith(k + ":"):
v = tags[kk]
break
if not v:
return None
return osm_interval_to_seconds(v)
def stopareas(self) -> Iterator[StopArea]:
yielded_stopareas = set()
for route_stop in self:
@ -146,6 +175,7 @@ class Route:
self.infill = None
self.network = None
self.interval = None
self.duration = None
self.start_time = None
self.end_time = None
self.is_circular = False
@ -319,46 +349,51 @@ class Route:
def process_tags(self, master: OsmElementT) -> None:
relation = self.element
tags = relation["tags"]
master_tags = {} if not master else master["tags"]
if "ref" not in relation["tags"] and "ref" not in master_tags:
if "ref" not in tags and "ref" not in master_tags:
self.city.notice("Missing ref on a route", relation)
self.ref = relation["tags"].get(
"ref", master_tags.get("ref", relation["tags"].get("name", None))
self.ref = tags.get(
"ref", master_tags.get("ref", tags.get("name", None))
)
self.name = relation["tags"].get("name", None)
self.mode = relation["tags"]["route"]
self.name = tags.get("name", None)
self.mode = tags["route"]
if (
"colour" not in relation["tags"]
"colour" not in tags
and "colour" not in master_tags
and self.mode != "tram"
):
self.city.notice("Missing colour on a route", relation)
try:
self.colour = normalize_colour(
relation["tags"].get("colour", master_tags.get("colour", None))
tags.get("colour", master_tags.get("colour", None))
)
except ValueError as e:
self.colour = None
self.city.warn(str(e), relation)
try:
self.infill = normalize_colour(
relation["tags"].get(
tags.get(
"colour:infill", master_tags.get("colour:infill", None)
)
)
except ValueError as e:
self.infill = None
self.city.warn(str(e), relation)
self.network = Route.get_network(relation)
self.interval = Route.get_interval(
relation["tags"]
) or Route.get_interval(master_tags)
self.start_time, self.end_time = get_start_end_times(
relation["tags"].get(
"opening_hours", master_tags.get("opening_hours", "")
)
self.network = get_network(relation)
self.interval = get_route_interval(tags) or get_route_interval(
master_tags
)
if relation["tags"].get("public_transport:version") == "1":
self.duration = get_route_duration(tags) or get_route_duration(
master_tags
)
parsed_time_range = parse_time_range(
tags.get("opening_hours", master_tags.get("opening_hours", ""))
)
if parsed_time_range:
self.start_time, self.end_time = parsed_time_range
if tags.get("public_transport:version") == "1":
self.city.warn(
"Public transport version is 1, which means the route "
"is an unsorted pile of objects",

View file

@ -7,8 +7,8 @@ from typing import TypeVar
from subways.consts import MAX_DISTANCE_STOP_TO_LINE
from subways.css_colours import normalize_colour
from subways.geom_utils import distance, project_on_line
from subways.osm_element import el_id
from subways.structure.route import Route
from subways.osm_element import el_id, get_network
from subways.structure.route import get_route_duration, get_route_interval
from subways.structure.stop_area import StopArea
from subways.types import IdT, OsmElementT
@ -26,7 +26,7 @@ class RouteMaster:
def __init__(self, city: City, master: OsmElementT = None) -> None:
self.city = city
self.routes = []
self.best: Route = None
self.best: Route = None # noqa: F821
self.id: IdT = el_id(master)
self.has_master = master is not None
self.interval_from_master = False
@ -46,13 +46,14 @@ class RouteMaster:
)
except ValueError:
self.infill = None
self.network = Route.get_network(master)
self.network = get_network(master)
self.mode = master["tags"].get(
"route_master", None
) # This tag is required, but okay
self.name = master["tags"].get("name", None)
self.interval = Route.get_interval(master["tags"])
self.interval = get_route_interval(master["tags"])
self.interval_from_master = self.interval is not None
self.duration = get_route_duration(master["tags"])
else:
self.ref = None
self.colour = None
@ -61,6 +62,7 @@ class RouteMaster:
self.mode = None
self.name = None
self.interval = None
self.duration = None
def stopareas(self) -> Iterator[StopArea]:
yielded_stopareas = set()
@ -70,7 +72,7 @@ class RouteMaster:
yield stoparea
yielded_stopareas.add(stoparea)
def add(self, route: Route) -> None:
def add(self, route: Route) -> None: # noqa: F821
if not self.network:
self.network = route.network
elif route.network and route.network != self.network:
@ -148,10 +150,10 @@ class RouteMaster:
):
self.best = route
def get_meaningful_routes(self) -> list[Route]:
def get_meaningful_routes(self) -> list[Route]: # noqa: F821
return [route for route in self if len(route) >= 2]
def find_twin_routes(self) -> dict[Route, Route]:
def find_twin_routes(self) -> dict[Route, Route]: # noqa: F821
"""Two non-circular routes are twins if they have the same end
stations and opposite directions, and the number of stations is
the same or almost the same. We'll then find stops that are present
@ -325,7 +327,11 @@ class RouteMaster:
break
return common_subsequence
def alert_twin_routes_differ(self, route1: Route, route2: Route) -> None:
def alert_twin_routes_differ(
self,
route1: Route, # noqa: F821
route2: Route, # noqa: F821
) -> None:
"""Arguments are that route1.id < route2.id"""
(
stops_missing_from_route1,
@ -382,7 +388,10 @@ class RouteMaster:
)
@staticmethod
def calculate_twin_routes_diff(route1: Route, route2: Route) -> tuple:
def calculate_twin_routes_diff(
route1: Route, # noqa: F821
route2: Route, # noqa: F821
) -> tuple:
"""WagnerFischer algorithm for stops diff in two twin routes."""
stops1 = route1.stops
@ -450,10 +459,10 @@ class RouteMaster:
def __len__(self) -> int:
return len(self.routes)
def __getitem__(self, i) -> Route:
def __getitem__(self, i) -> Route: # noqa: F821
return self.routes[i]
def __iter__(self) -> Iterator[Route]:
def __iter__(self) -> Iterator[Route]: # noqa: F821
return iter(self.routes)
def __repr__(self) -> str:

View file

@ -187,9 +187,10 @@
<member type='relation' ref='9' role='' />
<tag k='colour' v='brown' />
<tag k='colour:infill' v='white' />
<tag k='duration' v='5' />
<tag k='name' v='LR Line' />
<tag k='network' v='network-2' />
<tag k='ref' v='LR' />
<tag k='name' v='LR Line' />
<tag k='route_master' v='light_rail' />
<tag k='type' v='route_master' />
</relation>
@ -198,6 +199,7 @@
<member type='node' ref='5' role='' />
<member type='node' ref='6' role='' />
<member type='way' ref='2' role='' />
<tag k='duration' v='10' />
<tag k='name' v='2 forward' />
<tag k='ref' v='2' />
<tag k='route' v='subway' />
@ -208,6 +210,7 @@
<member type='node' ref='5' role='' />
<member type='node' ref='4' role='' />
<member type='way' ref='2' role='' />
<tag k='duration:peak' v='8' />
<tag k='name' v='2 backward' />
<tag k='ref' v='2' />
<tag k='route' v='subway' />
@ -217,18 +220,18 @@
<member type='relation' ref='13' role='' />
<member type='relation' ref='12' role='' />
<tag k='colour' v='red' />
<tag k='name' v='Red Line' />
<tag k='network' v='network-1' />
<tag k='ref' v='2' />
<tag k='name' v='Red Line' />
<tag k='route_master' v='subway' />
<tag k='type' v='route_master' />
</relation>
<relation id='15' visible='true' version='1'>
<member type='relation' ref='8' role='' />
<member type='relation' ref='7' role='' />
<tag k='name' v='Blue Line' />
<tag k='network' v='network-1' />
<tag k='ref' v='1' />
<tag k='name' v='Blue Line' />
<tag k='route_master' v='subway' />
<tag k='type' v='route_master' />
</relation>

View file

@ -1,7 +1,7 @@
route_id,service_id,trip_id,trip_headsign,trip_short_name,direction_id,block_id,shape_id,wheelchair_accessible,trip_route_type,route_pattern_id,bikes_allowed
r15,always,r7,,,,,7,,,,
r15,always,r8,,,,,8,,,,
r14,always,r12,,,,,12,,,,
r14,always,r13,,,,,13,,,,
r11,always,r9,,,,,9,,,,
r11,always,r10,,,,,10,,,,
route_id,service_id,trip_id,trip_headsign,trip_short_name,direction_id,block_id,shape_id,wheelchair_accessible,trip_route_type,route_pattern_id,bikes_allowed,average_speed
r15,always,r7,,,,,7,,,,,40.0
r15,always,r8,,,,,8,,,,,40.0
r14,always,r12,,,,,12,,,,,9.4
r14,always,r13,,,,,13,,,,,11.8
r11,always,r9,,,,,9,,,,,6.5
r11,always,r10,,,,,10,,,,,6.5

View file

@ -163,6 +163,7 @@ metro_samples = [
"start_time": null,
"end_time": null,
"interval": null,
"duration": null,
"stops": [
{
"stoparea_id": "n1",
@ -197,6 +198,7 @@ metro_samples = [
"start_time": null,
"end_time": null,
"interval": null,
"duration": null,
"stops": [
{
"stoparea_id": "r3",
@ -237,6 +239,7 @@ metro_samples = [
"start_time": null,
"end_time": null,
"interval": null,
"duration": 600,
"stops": [
{
"stoparea_id": "n4",
@ -267,6 +270,7 @@ metro_samples = [
"start_time": null,
"end_time": null,
"interval": null,
"duration": 480,
"stops": [
{
"stoparea_id": "n6",
@ -313,6 +317,7 @@ metro_samples = [
"start_time": null,
"end_time": null,
"interval": null,
"duration": 300,
"stops": [
{
"stoparea_id": "r4",
@ -339,6 +344,7 @@ metro_samples = [
"start_time": null,
"end_time": null,
"interval": null,
"duration": 300,
"stops": [
{
"stoparea_id": "r16",

141
subways/tests/test_route.py Normal file
View file

@ -0,0 +1,141 @@
from unittest import TestCase
from subways.structure.route import (
get_interval_in_seconds_from_tags,
osm_interval_to_seconds,
parse_time_range,
)
class TestTimeIntervalsParsing(TestCase):
def test__osm_interval_to_seconds__invalid_value(self) -> None:
intervals = (
["", "abc", "x30", "30x", "3x0"]
+ ["5:", ":5", "01:05:", ":01:05", "01:01:00:", ":01:01:00"]
+ ["01x:05", "01:x5", "x5:01:00", "01:0x:00", "01:01:x"]
+ ["-5", "01:-05", "-01:05", "-01:00:00", "01:-01:00", "01:01:-01"]
+ ["0", "00:00", "00:00:00"]
+ ["00:60", "01:00:60", "01:60:00"]
+ ["01:60:61", "01:61:60", "01:61:61"]
)
for interval in intervals:
with self.subTest(msg=f"value='{interval}'"):
self.assertIsNone(osm_interval_to_seconds(interval))
def test__osm_interval_to_seconds__valid_value(self) -> None:
intervals = {
"5": 300,
"65": 3900,
"10:55": 39300,
"02:02:02": 7322,
"2:2:2": 7322,
"00:59": 3540,
"01:00": 3600,
"00:00:50": 50,
"00:10:00": 600,
"01:00:00": 3600,
}
for interval_str, interval_sec in intervals.items():
with self.subTest(msg=f"value='{interval_str}'"):
self.assertEqual(
interval_sec, osm_interval_to_seconds(interval_str)
)
def test__parse_time_range__invalid_values(self) -> None:
ranges = (
["", "a", "ab:cd-ab:cd", "1", "1-2", "01-02"]
+ ["24/8", "24/7/365"]
+ ["1:00-02:00", "01:0-02:00", "01:00-2:00", "01:00-02:0"]
+ ["1x:00-02:00", "01:0x-02:00", "01:00-1x:00", "01:00-02:ab"]
+ ["-1:00-02:00", "01:-1-02:00", "01:00--2:00", "01:00-02:-1"]
+ ["01;00-02:00", "01:00-02;00", "01:00=02:00"]
+ ["01:00-#02:00", "01:00 - 02:00"]
+ ["01:60-02:05", "01:00-01:61"]
)
for r in ranges:
with self.subTest(msg=f"value='{r}'"):
self.assertIsNone(parse_time_range(r))
def test__parse_time_range__valid_values(self) -> None:
ranges = (
["24/7"]
+ ["00:00-00:00", "00:01-00:02"]
+ ["01:00-02:00", "02:01-01:02"]
+ ["02:00-26:59", "12:01-13:59"]
+ ["Mo-Fr 06:00-21:30", "06:00-21:30 (weekdays)"]
+ ["Mo-Fr 06:00-21:00; Sa-Su 07:00-20:00"]
)
answers = [
((0, 0), (24, 0)),
((0, 0), (0, 0)),
((0, 1), (0, 2)),
((1, 0), (2, 0)),
((2, 1), (1, 2)),
((2, 0), (26, 59)),
((12, 1), (13, 59)),
((6, 0), (21, 30)),
((6, 0), (21, 30)),
((6, 0), (21, 0)),
]
for r, answer in zip(ranges, answers):
with self.subTest(msg=f"value='{r}'"):
self.assertTupleEqual(answer, parse_time_range(r))
class TestRouteIntervals(TestCase):
def test__get_interval_in_seconds_from_tags__one_key(self) -> None:
cases = [
{"tags": {}, "answer": None},
{"tags": {"a": "1"}, "answer": None},
{"tags": {"duration": "1"}, "answer": 60},
{"tags": {"durationxxx"}, "answer": None},
{"tags": {"xxxduration"}, "answer": None},
# prefixes not considered
{"tags": {"ru:duration"}, "answer": None},
# suffixes considered
{"tags": {"duration:peak": "1"}, "answer": 60},
# bare tag has precedence over suffixed version
{"tags": {"duration:peak": "1", "duration": "2"}, "answer": 120},
# first suffixed version apply
{"tags": {"duration:y": "1", "duration:x": "2"}, "answer": 60},
# other tags present
{"tags": {"a": "x", "duration": "1", "b": "y"}, "answer": 60},
]
for case in cases:
with self.subTest(msg=f"{case['tags']}"):
self.assertEqual(
case["answer"],
get_interval_in_seconds_from_tags(
case["tags"], "duration"
),
)
def test__get_interval_in_seconds_from_tags__several_keys(self) -> None:
keys = ("interval", "headway")
cases = [
{"tags": {}, "answer": None},
# prefixes not considered
{"tags": {"ru:interval"}, "answer": None},
{"tags": {"interval": "1"}, "answer": 60},
{"tags": {"headway": "1"}, "answer": 60},
{"tags": {"interval": "1", "headway": "2"}, "answer": 60},
# interval has precedence due to its position in 'keys'
{"tags": {"headway": "2", "interval": "1"}, "answer": 60},
# non-suffixed keys has precedence
{"tags": {"interval:peak": "1", "headway": "2"}, "answer": 120},
# among suffixed versions, first key in 'keys' is used first
{
"tags": {"headway:peak": "2", "interval:peak": "1"},
"answer": 60,
},
]
for case in cases:
with self.subTest(msg=f"{case['tags']}"):
self.assertEqual(
case["answer"],
get_interval_in_seconds_from_tags(case["tags"], keys),
)