Upstream fixes from Alexey #4

Merged
biodranik merged 30 commits from upstream into master 2023-03-14 21:31:33 +00:00
7 changed files with 448 additions and 36 deletions
Showing only changes of commit 277f5b0e91 - Show all commits

View file

@ -1,5 +1,6 @@
#!/usr/bin/env python3
import argparse
import inspect
import json
import logging
import os
@ -8,7 +9,9 @@ import sys
import time
import urllib.parse
import urllib.request
from processors import processor
import processors
from subway_io import (
dump_yaml,
load_xml,
@ -214,12 +217,19 @@ if __name__ == '__main__':
type=argparse.FileType('w', encoding='utf-8'),
help='Validation JSON file name',
)
parser.add_argument(
'-o',
'--output',
type=argparse.FileType('w', encoding='utf-8'),
help='Processed metro systems output',
)
for processor_name, processor in inspect.getmembers(
processors, inspect.ismodule
):
if not processor_name.startswith("_"):
parser.add_argument(
f'--output-{processor_name}',
help=(
'Processed metro systems output filename '
f'in {processor_name.upper()} format'
),
)
parser.add_argument('--cache', help='Cache file name for processed data')
parser.add_argument(
'-r', '--recovery-path', help='Cache file name for error recovery'
@ -393,10 +403,13 @@ if __name__ == '__main__':
res.append(v)
json.dump(res, options.log, indent=2, ensure_ascii=False)
if options.output:
json.dump(
processor.process(cities, transfers, options.cache),
options.output,
indent=1,
ensure_ascii=False,
)
for processor_name, processor in inspect.getmembers(
processors, inspect.ismodule
):
option_name = f"output_{processor_name}"
if not hasattr(options, option_name):
continue
filename = getattr(options, option_name)
processor.process(cities, transfers, filename, options.cache)

View file

@ -1,2 +1,2 @@
# Here you can change the processor
from . import mapsme as processor
# Import only those processors (modules) you want to use
from . import mapsme, gtfs

9
processors/_common.py Normal file
View file

@ -0,0 +1,9 @@
DEFAULT_INTERVAL = 2.5 * 60 # seconds
KMPH_TO_MPS = 1 / 3.6 # km/h to m/s conversion multiplier
SPEED_ON_TRANSFER = 3.5 * KMPH_TO_MPS # m/s
TRANSFER_PENALTY = 30 # seconds
def format_colour(colour):
"""Truncate leading # sign."""
return colour[1:] if colour else None

331
processors/gtfs.py Normal file
View file

@ -0,0 +1,331 @@
import csv
import io
import zipfile
from ._common import (
DEFAULT_INTERVAL,
format_colour,
SPEED_ON_TRANSFER,
TRANSFER_PENALTY,
)
from subway_structure import (
distance,
)
DEFAULT_TRIP_START_TIME = "05:00:00"
DEFAULT_TRIP_END_TIME = "01:00:00"
COORDINATE_PRECISION = 7 # fractional digits. It's OSM precision, ~ 5 cm
GTFS_COLUMNS = {
"agency": [
"agency_id",
"agency_name",
"agency_url",
"agency_timezone",
"agency_lang",
"agency_phone",
],
"routes": [
"route_id",
"agency_id",
"route_short_name",
"route_long_name",
"route_desc",
"route_type",
"route_url",
"route_color",
"route_text_color",
"route_sort_order",
"route_fare_class",
"line_id",
"listed_route",
],
"trips": [
"route_id",
"service_id",
"trip_id",
"trip_headsign",
"trip_short_name",
"direction_id",
"block_id",
"shape_id",
"wheelchair_accessible",
"trip_route_type",
"route_pattern_id",
"bikes_allowed",
],
"stops": [
"stop_id",
"stop_code",
"stop_name",
"stop_desc",
"platform_code",
"platform_name",
"stop_lat",
"stop_lon",
"zone_id",
"stop_address",
"stop_url",
"level_id",
"location_type",
"parent_station",
"wheelchair_boarding",
"municipality",
"on_street",
"at_street",
"vehicle_type",
],
"calendar": [
"service_id",
"monday",
"tuesday",
"wednesday",
"thursday",
"friday",
"saturday",
"sunday",
"start_date",
"end_date",
],
"stop_times": [
"trip_id",
"arrival_time",
"departure_time",
"stop_id",
"stop_sequence",
"stop_headsign",
"pickup_type",
"drop_off_type",
"shape_dist_traveled",
"timepoint",
"checkpoint_id",
"continuous_pickup",
"continuous_drop_off",
],
"frequencies": [
"trip_id",
"start_time",
"end_time",
"headway_secs",
"exact_times",
],
"shapes": [
"shape_id",
"shape_pt_lat",
"shape_pt_lon",
"shape_pt_sequence",
"shape_dist_traveled",
],
"transfers": [
"from_stop_id",
"to_stop_id",
"transfer_type",
"min_transfer_time",
],
}
def dict_to_row(dict_data, record_type):
"""Given object stored in a dict and an array of columns,
returns a row to use in CSV.
"""
row = []
for column in GTFS_COLUMNS[record_type]:
value = dict_data.get(column)
if value is None:
value = ""
row.append(value)
return row
def process(cities, transfers, filename, cache_path):
"""Generate all output and save to file.
:param cities: List of City instances
:param transfers: List of sets of StopArea.id
:param filename: Path to file to save the result
:param cache_path: Path to json-file with good cities cache or None.
"""
# TODO: make universal cache for all processors, and apply the cache to GTFS
# Keys correspond GTFS file names
gtfs_data = {key: [] for key in GTFS_COLUMNS.keys()}
gtfs_data["calendar"].append(
dict_to_row(
{
"service_id": "always",
"monday": 1,
"tuesday": 1,
"wednesday": 1,
"thursday": 1,
"friday": 1,
"saturday": 1,
"sunday": 1,
"start_date": "19700101",
"end_date": "30000101",
},
"calendar",
)
)
all_stops = {} # stop (stop area center or station) el_id -> stop data
good_cities = [c for c in cities if c.is_good()]
def add_stop_gtfs(route_stop):
"""Add stop to all_stops.
If it's not a station, also add parent station
if it has not been added yet. Return gtfs stop_id.
"""
is_real_stop_area = (
route_stop.stoparea.element["tags"].get("public_transport")
== "stop_area"
)
el_id_ = route_stop.stoparea.id
if el_id_ not in all_stops:
station_name = route_stop.stoparea.station.name
center = route_stop.stoparea.center
location_type = 1 if is_real_stop_area else 0
stop_gtfs = {
"stop_id": el_id_,
"stop_code": el_id_,
"stop_name": station_name,
"stop_lat": round(center[1], COORDINATE_PRECISION),
"stop_lon": round(center[0], COORDINATE_PRECISION),
"location_type": location_type,
}
if is_real_stop_area:
station_id = route_stop.stoparea.station.id
stop_gtfs["parent_station"] = station_id
if station_id not in all_stops:
center = route_stop.stoparea.station.center
station_gtfs = {
"stop_id": station_id,
"stop_code": station_id,
"stop_name": station_name,
"stop_lat": round(center[1], COORDINATE_PRECISION),
"stop_lon": round(center[0], COORDINATE_PRECISION),
"location_type": 1,
}
all_stops[station_id] = station_gtfs
all_stops[el_id_] = stop_gtfs
return el_id_
# agency, routes, trips, stop_times, frequencies, shapes
for city in good_cities:
agency = {"agency_id": city.id, "agency_name": city.name}
gtfs_data["agency"].append(dict_to_row(agency, "agency"))
for city_route in city:
route = {
"route_id": city_route.id,
"agency_id": agency["agency_id"],
"route_type": 12 if city_route.mode == "monorail" else 1,
"route_short_name": city_route.ref,
"route_long_name": city_route.name,
"route_color": format_colour(city_route.colour),
}
gtfs_data["routes"].append(dict_to_row(route, "routes"))
for variant in city_route:
trip = {
"trip_id": variant.id,
"route_id": route["route_id"],
"service_id": "always",
"shape_id": None,
}
gtfs_data["trips"].append(dict_to_row(trip, "trips"))
for i, (lon, lat) in enumerate(variant.tracks):
gtfs_data["shapes"].append(
dict_to_row(
{
"shape_id": variant.id,
"trip_id": variant.id,
"shape_pt_lat": round(
lat, COORDINATE_PRECISION
),
"shape_pt_lon": round(
lon, COORDINATE_PRECISION
),
"shape_pt_sequence": i,
},
"shapes",
)
)
gtfs_data["frequencies"].append(
dict_to_row(
{
"trip_id": variant.id,
"start_time": variant.start_time
or DEFAULT_TRIP_START_TIME,
"end_time": variant.end_time
or DEFAULT_TRIP_END_TIME,
"headway_secs": variant.interval
or DEFAULT_INTERVAL,
},
"frequencies",
)
)
for stop_sequence, route_stop in enumerate(variant):
gtfs_stop_id = add_stop_gtfs(route_stop)
stop_time = {
"trip_id": variant.id,
"stop_sequence": stop_sequence,
"shape_dist_traveled": route_stop.distance,
"stop_id": gtfs_stop_id,
}
gtfs_data["stop_times"].append(
dict_to_row(stop_time, "stop_times")
)
# stops
gtfs_data["stops"].extend(
map(lambda row: dict_to_row(row, "stops"), all_stops.values())
)
# transfers
for stoparea_set in transfers:
for stoparea1 in stoparea_set:
for stoparea2 in stoparea_set:
if stoparea1.id < stoparea2.id:
transfer_time = TRANSFER_PENALTY + round(
distance(stoparea1.center, stoparea2.center)
/ SPEED_ON_TRANSFER
)
for id1, id2 in (
(stoparea1.id, stoparea2.id),
(stoparea2.id, stoparea1.id),
):
gtfs_data["transfers"].append(
dict_to_row(
{
"from_stop_id": id1,
"to_stop_id": id2,
"transfer_type": 0,
"min_transfer_time": transfer_time,
},
"transfers",
)
)
make_gtfs(filename, gtfs_data)
def make_gtfs(filename, gtfs_data):
if not filename.lower().endswith("zip"):
filename = f"{filename}.zip"
with zipfile.ZipFile(filename, "w") as zf:
for gtfs_feature, columns in GTFS_COLUMNS.items():
with io.StringIO(newline="") as string_io:
writer = csv.writer(string_io, delimiter=",")
writer.writerow(columns)
writer.writerows(gtfs_data[gtfs_feature])
zf.writestr(f"{gtfs_feature}.txt", string_io.getvalue())

View file

@ -1,7 +1,16 @@
import json
import os
import logging
from collections import defaultdict
from ._common import (
DEFAULT_INTERVAL,
format_colour,
KMPH_TO_MPS,
SPEED_ON_TRANSFER,
TRANSFER_PENALTY,
)
from subway_structure import (
distance,
el_center,
@ -12,12 +21,9 @@ from subway_structure import (
OSM_TYPES = {'n': (0, 'node'), 'w': (2, 'way'), 'r': (3, 'relation')}
ENTRANCE_PENALTY = 60 # seconds
TRANSFER_PENALTY = 30 # seconds
KMPH_TO_MPS = 1 / 3.6 # km/h to m/s conversion multiplier
SPEED_TO_ENTRANCE = 5 * KMPH_TO_MPS # m/s
SPEED_ON_TRANSFER = 3.5 * KMPH_TO_MPS # m/s
SPEED_ON_LINE = 40 * KMPH_TO_MPS # m/s
DEFAULT_INTERVAL = 2.5 # minutes
DEFAULT_INTERVAL = 2.5 * 60 # seconds
def uid(elid, typ=None):
@ -174,15 +180,14 @@ class MapsmeCache:
logging.warning("Failed to save cache: %s", str(e))
def process(cities, transfers, cache_path):
"""cities - list of City instances;
transfers - list of sets of StopArea.id;
cache_path - path to json-file with good cities cache or None.
def process(cities, transfers, filename, cache_path):
"""Generate all output and save to file.
:param cities: List of City instances
:param transfers: List of sets of StopArea.id
:param filename: Path to file to save the result
:param cache_path: Path to json-file with good cities cache or None.
"""
def format_colour(c):
return c[1:] if c else None
def find_exits_for_platform(center, nodes):
exits = []
min_distance = None
@ -282,9 +287,7 @@ def process(cities, transfers, cache_path):
routes['itineraries'].append(
{
'stops': itin,
'interval': round(
(variant.interval or DEFAULT_INTERVAL) * 60
),
'interval': round(variant.interval or DEFAULT_INTERVAL),
}
)
network['routes'].append(routes)
@ -386,4 +389,14 @@ def process(cities, transfers, cache_path):
'transfers': pairwise_transfers,
'networks': networks,
}
return result
if not filename.lower().endswith("json"):
filename = f"{filename}.json"
with open(filename, "w", encoding="utf-8") as f:
json.dump(
result,
f,
indent=1,
ensure_ascii=False,
)

View file

@ -40,6 +40,7 @@ Environment variable reference:
- SKIP_FILTERING: skip filtering railway data. Any non-empty string is True
- FILTERED_DATA: path to filtered data. Defaults to \$TMPDIR/subways.osm
- MAPSME: file name for maps.me json output
- GTFS: file name for GTFS output
- DUMP: directory/file name to dump YAML city data. Do not set to omit dump
- GEOJSON: directory/file name to dump GeoJSON data. Do not set to omit dump
- ELEMENTS_CACHE: file name to elements cache. Allows OSM xml processing phase
@ -234,7 +235,8 @@ fi
VALIDATION="$TMPDIR/validation.json"
"$PYTHON" "$SUBWAYS_PATH/process_subways.py" -q \
-x "$FILTERED_DATA" -l "$VALIDATION" \
${MAPSME:+-o "$MAPSME"} \
${MAPSME:+--output-mapsme "$MAPSME"} \
${GTFS:+--output-gtfs "$GTFS"} \
${CITY:+-c "$CITY"} ${DUMP:+-d "$DUMP"} ${GEOJSON:+-j "$GEOJSON"} \
${ELEMENTS_CACHE:+-i "$ELEMENTS_CACHE"} \
${CITY_CACHE:+--cache "$CITY_CACHE"} \

View file

@ -1,6 +1,7 @@
import csv
import logging
import math
import re
import urllib.parse
import urllib.request
from css_colours import normalize_colour
@ -45,6 +46,47 @@ CONSTRUCTION_KEYS = (
used_entrances = set()
START_END_TIMES_RE = re.compile(r'.*?(\d{2}:\d{2})-(\d{2}:\d{2}).*')
def get_start_end_times(opening_hours):
"""Very simplified method to parse OSM opening_hours tag.
We simply take the first HH:MM-HH:MM substring which is the most probable
opening hours interval for the most of weekdays.
"""
m = START_END_TIMES_RE.match(opening_hours)
if m:
# Each group is HH:MM. We need HH:MM:SS.
return tuple(map(lambda t: f"{t}:00", m.groups()))
else:
return None, None
def osm_interval_to_seconds(interval_str):
"""Convert to int an OSM value for 'interval'/'headway' tag
which may be in these formats:
HH:MM:SS,
HH:MM,
MM,
M
(https://wiki.openstreetmap.org/wiki/Key:interval#Format)
"""
hours, minutes, seconds = 0, 0, 0
semicolon_count = interval_str.count(':')
try:
if semicolon_count == 0:
minutes = int(interval_str)
elif semicolon_count == 1:
hours, minutes = map(int, interval_str.split(':'))
elif semicolon_count == 2:
hours, minutes, seconds = map(int, interval_str.split(':'))
else:
return None
except ValueError:
return None
return seconds + 60*minutes + 60*60*hours
class CriticalValidationError(Exception):
"""Is thrown if an error occurs
that prevents further validation of a city."""
@ -606,10 +648,7 @@ class Route:
break
if not v:
return None
try:
return float(v)
except ValueError:
return None
return osm_interval_to_seconds(v)
def build_longest_line(self, relation):
line_nodes = set()
@ -786,6 +825,11 @@ class Route:
self.interval = Route.get_interval(
relation['tags']
) or Route.get_interval(master_tags)
self.start_time, self.end_time = get_start_end_times(
relation['tags'].get(
'opening_hours', master_tags.get('opening_hours', '')
)
)
if relation['tags'].get('public_transport:version') == '1':
city.warn(
'Public transport version is 1, which means the route '