From 05b9478decc8ef6f5e5f8f4ebee2fec9ec196ce5 Mon Sep 17 00:00:00 2001 From: Alexey Zakharenkov Date: Wed, 17 Aug 2022 13:16:06 +0300 Subject: [PATCH] Fixes to GTFS generation --- .gitignore | 1 + processors/gtfs.py | 159 +++++++++++++++++++++++++++++--------------- subway_structure.py | 11 +-- 3 files changed, 111 insertions(+), 60 deletions(-) diff --git a/.gitignore b/.gitignore index 29ef132..f2fb32f 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,4 @@ html/ *.yaml *.pyc *.txt +*.zip diff --git a/processors/gtfs.py b/processors/gtfs.py index 3021c8f..a6ac776 100644 --- a/processors/gtfs.py +++ b/processors/gtfs.py @@ -10,11 +10,12 @@ from ._common import ( ) from subway_structure import ( distance, + el_center, ) -DEFAULT_TRIP_START_TIME = "05:00:00" -DEFAULT_TRIP_END_TIME = "01:00:00" +DEFAULT_TRIP_START_TIME = (5, 0) # 05:00 +DEFAULT_TRIP_END_TIME = (1, 0) # 01:00 COORDINATE_PRECISION = 7 # fractional digits. It's OSM precision, ~ 5 cm GTFS_COLUMNS = { @@ -139,6 +140,12 @@ def dict_to_row(dict_data, record_type): return row +def round_coords(coords_tuple): + return tuple( + map(lambda coord: round(coord, COORDINATE_PRECISION), coords_tuple) + ) + + def process(cities, transfers, filename, cache_path): """Generate all output and save to file. :param cities: List of City instances @@ -173,45 +180,83 @@ def process(cities, transfers, filename, cache_path): all_stops = {} # stop (stop area center or station) el_id -> stop data good_cities = [c for c in cities if c.is_good] - def add_stop_gtfs(route_stop): + def add_stop_gtfs(route_stop, city): """Add stop to all_stops. If it's not a station, also add parent station if it has not been added yet. Return gtfs stop_id. """ - is_real_stop_area = ( - route_stop.stoparea.element["tags"].get("public_transport") - == "stop_area" - ) - el_id_ = route_stop.stoparea.id - if el_id_ not in all_stops: + # For the case a StopArea is derived solely from railway=station + # object, we generate GTFS platform (stop), station and sometimes + # an entrance from the same object, so use suffixes + station_id = f"{route_stop.stoparea.id}_st" + platform_id = f"{route_stop.stoparea.id}_plt" + + if station_id not in all_stops: station_name = route_stop.stoparea.station.name - center = route_stop.stoparea.center - location_type = 1 if is_real_stop_area else 0 - stop_gtfs = { - "stop_id": el_id_, - "stop_code": el_id_, + station_center = round_coords(route_stop.stoparea.center) + + station_gtfs = { + "stop_id": station_id, + "stop_code": station_id, "stop_name": station_name, - "stop_lat": round(center[1], COORDINATE_PRECISION), - "stop_lon": round(center[0], COORDINATE_PRECISION), - "location_type": location_type, + "stop_lat": station_center[1], + "stop_lon": station_center[0], + "location_type": 1, # station in GTFS terms } - if is_real_stop_area: - station_id = route_stop.stoparea.station.id - stop_gtfs["parent_station"] = station_id - if station_id not in all_stops: - center = route_stop.stoparea.station.center - station_gtfs = { - "stop_id": station_id, - "stop_code": station_id, - "stop_name": station_name, - "stop_lat": round(center[1], COORDINATE_PRECISION), - "stop_lon": round(center[0], COORDINATE_PRECISION), - "location_type": 1, + all_stops[station_id] = station_gtfs + + platform_id = f"{route_stop.stoparea.id}_plt" + platform_gtfs = { + "stop_id": platform_id, + "stop_code": platform_id, + "stop_name": station_name, + "stop_lat": station_center[1], + "stop_lon": station_center[0], + "location_type": 0, # stop/platform in GTFS terms + "parent_station": station_id, + } + all_stops[platform_id] = platform_gtfs + + osm_entrance_ids = ( + route_stop.stoparea.entrances | route_stop.stoparea.exits + ) + if not osm_entrance_ids: + entrance_id = f"{route_stop.stoparea.id}_egress" + entrance_gtfs = { + "stop_id": entrance_id, + "stop_code": entrance_id, + "stop_name": station_name, + "stop_lat": station_center[1], + "stop_lon": station_center[0], + "location_type": 2, + "parent_station": station_id, + } + all_stops[entrance_id] = entrance_gtfs + else: + for osm_entrance_id in osm_entrance_ids: + entrance = city.elements[osm_entrance_id] + entrance_id = f"{osm_entrance_id}_{route_stop.stoparea.id}" + entrance_name = entrance["tags"].get("name") + if not entrance_name: + entrance_name = station_name + ref = entrance["tags"].get("ref") + if ref: + entrance_name += f" {ref}" + center = el_center(entrance) + center = round_coords(center) + entrance_gtfs = { + "stop_id": entrance_id, + "stop_code": entrance_id, + "stop_name": entrance_name, + "stop_lat": center[1], + "stop_lon": center[0], + "location_type": 2, + "parent_station": station_id, } - all_stops[station_id] = station_gtfs - all_stops[el_id_] = stop_gtfs - return el_id_ + all_stops[entrance_id] = entrance_gtfs + + return platform_id # agency, routes, trips, stop_times, frequencies, shapes for city in good_cities: @@ -230,11 +275,12 @@ def process(cities, transfers, filename, cache_path): gtfs_data["routes"].append(dict_to_row(route, "routes")) for variant in city_route: + shape_id = variant.id[1:] # truncate leading 'r' trip = { "trip_id": variant.id, "route_id": route["route_id"], "service_id": "always", - "shape_id": None, + "shape_id": shape_id, } gtfs_data["trips"].append(dict_to_row(trip, "trips")) @@ -242,31 +288,33 @@ def process(cities, transfers, filename, cache_path): tracks = variant.get_truncated_tracks(tracks) for i, (lon, lat) in enumerate(tracks): + lon, lat = round_coords((lon, lat)) gtfs_data["shapes"].append( dict_to_row( { - "shape_id": variant.id, + "shape_id": shape_id, "trip_id": variant.id, - "shape_pt_lat": round( - lat, COORDINATE_PRECISION - ), - "shape_pt_lon": round( - lon, COORDINATE_PRECISION - ), + "shape_pt_lat": lat, + "shape_pt_lon": lon, "shape_pt_sequence": i, }, "shapes", ) ) + start_time = variant.start_time or DEFAULT_TRIP_START_TIME + end_time = variant.end_time or DEFAULT_TRIP_END_TIME + if end_time <= start_time: + end_time = (end_time[0] + 24, end_time[1]) + start_time = f"{start_time[0]:02d}:{start_time[1]:02d}:00" + end_time = f"{end_time[0]:02d}:{end_time[1]:02d}:00" + gtfs_data["frequencies"].append( dict_to_row( { "trip_id": variant.id, - "start_time": variant.start_time - or DEFAULT_TRIP_START_TIME, - "end_time": variant.end_time - or DEFAULT_TRIP_END_TIME, + "start_time": start_time, + "end_time": end_time, "headway_secs": variant.interval or DEFAULT_INTERVAL, }, @@ -275,17 +323,18 @@ def process(cities, transfers, filename, cache_path): ) for stop_sequence, route_stop in enumerate(variant): - gtfs_stop_id = add_stop_gtfs(route_stop) - - stop_time = { - "trip_id": variant.id, - "stop_sequence": stop_sequence, - "shape_dist_traveled": route_stop.distance, - "stop_id": gtfs_stop_id, - } + gtfs_platform_id = add_stop_gtfs(route_stop, city) gtfs_data["stop_times"].append( - dict_to_row(stop_time, "stop_times") + dict_to_row( + { + "trip_id": variant.id, + "stop_sequence": stop_sequence, + "shape_dist_traveled": route_stop.distance, + "stop_id": gtfs_platform_id, + }, + "stop_times", + ) ) # stops @@ -309,8 +358,8 @@ def process(cities, transfers, filename, cache_path): gtfs_data["transfers"].append( dict_to_row( { - "from_stop_id": id1, - "to_stop_id": id2, + "from_stop_id": f"{id1}_st", + "to_stop_id": f"{id2}_st", "transfer_type": 0, "min_transfer_time": transfer_time, }, diff --git a/subway_structure.py b/subway_structure.py index 1a99f5e..b16d411 100644 --- a/subway_structure.py +++ b/subway_structure.py @@ -46,7 +46,7 @@ CONSTRUCTION_KEYS = ( used_entrances = set() -START_END_TIMES_RE = re.compile(r'.*?(\d{2}:\d{2})-(\d{2}:\d{2}).*') +START_END_TIMES_RE = re.compile(r'.*?(\d{2}):(\d{2})-(\d{2}):(\d{2}).*') def get_start_end_times(opening_hours): @@ -54,12 +54,13 @@ def get_start_end_times(opening_hours): We simply take the first HH:MM-HH:MM substring which is the most probable opening hours interval for the most of weekdays. """ + start_time, end_time = None, None m = START_END_TIMES_RE.match(opening_hours) if m: - # Each group is HH:MM. We need HH:MM:SS. - return tuple(map(lambda t: f"{t}:00", m.groups())) - else: - return None, None + ints = tuple(map(int, m.groups())) + start_time = (ints[0], ints[1]) + end_time = (ints[2], ints[3]) + return start_time, end_time def osm_interval_to_seconds(interval_str):