Upstream fixes from Alexey #4
7 changed files with 448 additions and 36 deletions
|
@ -1,5 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
import argparse
|
||||
import inspect
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
|
@ -8,7 +9,9 @@ import sys
|
|||
import time
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
from processors import processor
|
||||
|
||||
import processors
|
||||
|
||||
from subway_io import (
|
||||
dump_yaml,
|
||||
load_xml,
|
||||
|
@ -214,12 +217,19 @@ if __name__ == '__main__':
|
|||
type=argparse.FileType('w', encoding='utf-8'),
|
||||
help='Validation JSON file name',
|
||||
)
|
||||
parser.add_argument(
|
||||
'-o',
|
||||
'--output',
|
||||
type=argparse.FileType('w', encoding='utf-8'),
|
||||
help='Processed metro systems output',
|
||||
)
|
||||
|
||||
for processor_name, processor in inspect.getmembers(
|
||||
processors, inspect.ismodule
|
||||
):
|
||||
if not processor_name.startswith("_"):
|
||||
parser.add_argument(
|
||||
f'--output-{processor_name}',
|
||||
help=(
|
||||
'Processed metro systems output filename '
|
||||
f'in {processor_name.upper()} format'
|
||||
),
|
||||
)
|
||||
|
||||
parser.add_argument('--cache', help='Cache file name for processed data')
|
||||
parser.add_argument(
|
||||
'-r', '--recovery-path', help='Cache file name for error recovery'
|
||||
|
@ -393,10 +403,13 @@ if __name__ == '__main__':
|
|||
res.append(v)
|
||||
json.dump(res, options.log, indent=2, ensure_ascii=False)
|
||||
|
||||
if options.output:
|
||||
json.dump(
|
||||
processor.process(cities, transfers, options.cache),
|
||||
options.output,
|
||||
indent=1,
|
||||
ensure_ascii=False,
|
||||
)
|
||||
for processor_name, processor in inspect.getmembers(
|
||||
processors, inspect.ismodule
|
||||
):
|
||||
option_name = f"output_{processor_name}"
|
||||
|
||||
if not hasattr(options, option_name):
|
||||
continue
|
||||
|
||||
filename = getattr(options, option_name)
|
||||
processor.process(cities, transfers, filename, options.cache)
|
||||
|
|
|
@ -1,2 +1,2 @@
|
|||
# Here you can change the processor
|
||||
from . import mapsme as processor
|
||||
# Import only those processors (modules) you want to use
|
||||
from . import mapsme, gtfs
|
||||
|
|
9
processors/_common.py
Normal file
9
processors/_common.py
Normal file
|
@ -0,0 +1,9 @@
|
|||
DEFAULT_INTERVAL = 2.5 * 60 # seconds
|
||||
KMPH_TO_MPS = 1 / 3.6 # km/h to m/s conversion multiplier
|
||||
SPEED_ON_TRANSFER = 3.5 * KMPH_TO_MPS # m/s
|
||||
TRANSFER_PENALTY = 30 # seconds
|
||||
|
||||
|
||||
def format_colour(colour):
|
||||
"""Truncate leading # sign."""
|
||||
return colour[1:] if colour else None
|
331
processors/gtfs.py
Normal file
331
processors/gtfs.py
Normal file
|
@ -0,0 +1,331 @@
|
|||
import csv
|
||||
import io
|
||||
import zipfile
|
||||
|
||||
from ._common import (
|
||||
DEFAULT_INTERVAL,
|
||||
format_colour,
|
||||
SPEED_ON_TRANSFER,
|
||||
TRANSFER_PENALTY,
|
||||
)
|
||||
from subway_structure import (
|
||||
distance,
|
||||
)
|
||||
|
||||
|
||||
DEFAULT_TRIP_START_TIME = "05:00:00"
|
||||
DEFAULT_TRIP_END_TIME = "01:00:00"
|
||||
COORDINATE_PRECISION = 7 # fractional digits. It's OSM precision, ~ 5 cm
|
||||
|
||||
GTFS_COLUMNS = {
|
||||
"agency": [
|
||||
"agency_id",
|
||||
"agency_name",
|
||||
"agency_url",
|
||||
"agency_timezone",
|
||||
"agency_lang",
|
||||
"agency_phone",
|
||||
],
|
||||
"routes": [
|
||||
"route_id",
|
||||
"agency_id",
|
||||
"route_short_name",
|
||||
"route_long_name",
|
||||
"route_desc",
|
||||
"route_type",
|
||||
"route_url",
|
||||
"route_color",
|
||||
"route_text_color",
|
||||
"route_sort_order",
|
||||
"route_fare_class",
|
||||
"line_id",
|
||||
"listed_route",
|
||||
],
|
||||
"trips": [
|
||||
"route_id",
|
||||
"service_id",
|
||||
"trip_id",
|
||||
"trip_headsign",
|
||||
"trip_short_name",
|
||||
"direction_id",
|
||||
"block_id",
|
||||
"shape_id",
|
||||
"wheelchair_accessible",
|
||||
"trip_route_type",
|
||||
"route_pattern_id",
|
||||
"bikes_allowed",
|
||||
],
|
||||
"stops": [
|
||||
"stop_id",
|
||||
"stop_code",
|
||||
"stop_name",
|
||||
"stop_desc",
|
||||
"platform_code",
|
||||
"platform_name",
|
||||
"stop_lat",
|
||||
"stop_lon",
|
||||
"zone_id",
|
||||
"stop_address",
|
||||
"stop_url",
|
||||
"level_id",
|
||||
"location_type",
|
||||
"parent_station",
|
||||
"wheelchair_boarding",
|
||||
"municipality",
|
||||
"on_street",
|
||||
"at_street",
|
||||
"vehicle_type",
|
||||
],
|
||||
"calendar": [
|
||||
"service_id",
|
||||
"monday",
|
||||
"tuesday",
|
||||
"wednesday",
|
||||
"thursday",
|
||||
"friday",
|
||||
"saturday",
|
||||
"sunday",
|
||||
"start_date",
|
||||
"end_date",
|
||||
],
|
||||
"stop_times": [
|
||||
"trip_id",
|
||||
"arrival_time",
|
||||
"departure_time",
|
||||
"stop_id",
|
||||
"stop_sequence",
|
||||
"stop_headsign",
|
||||
"pickup_type",
|
||||
"drop_off_type",
|
||||
"shape_dist_traveled",
|
||||
"timepoint",
|
||||
"checkpoint_id",
|
||||
"continuous_pickup",
|
||||
"continuous_drop_off",
|
||||
],
|
||||
"frequencies": [
|
||||
"trip_id",
|
||||
"start_time",
|
||||
"end_time",
|
||||
"headway_secs",
|
||||
"exact_times",
|
||||
],
|
||||
"shapes": [
|
||||
"shape_id",
|
||||
"shape_pt_lat",
|
||||
"shape_pt_lon",
|
||||
"shape_pt_sequence",
|
||||
"shape_dist_traveled",
|
||||
],
|
||||
"transfers": [
|
||||
"from_stop_id",
|
||||
"to_stop_id",
|
||||
"transfer_type",
|
||||
"min_transfer_time",
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
def dict_to_row(dict_data, record_type):
|
||||
"""Given object stored in a dict and an array of columns,
|
||||
returns a row to use in CSV.
|
||||
"""
|
||||
row = []
|
||||
for column in GTFS_COLUMNS[record_type]:
|
||||
value = dict_data.get(column)
|
||||
if value is None:
|
||||
value = ""
|
||||
row.append(value)
|
||||
return row
|
||||
|
||||
|
||||
def process(cities, transfers, filename, cache_path):
|
||||
"""Generate all output and save to file.
|
||||
:param cities: List of City instances
|
||||
:param transfers: List of sets of StopArea.id
|
||||
:param filename: Path to file to save the result
|
||||
:param cache_path: Path to json-file with good cities cache or None.
|
||||
"""
|
||||
|
||||
# TODO: make universal cache for all processors, and apply the cache to GTFS
|
||||
|
||||
# Keys correspond GTFS file names
|
||||
gtfs_data = {key: [] for key in GTFS_COLUMNS.keys()}
|
||||
|
||||
gtfs_data["calendar"].append(
|
||||
dict_to_row(
|
||||
{
|
||||
"service_id": "always",
|
||||
"monday": 1,
|
||||
"tuesday": 1,
|
||||
"wednesday": 1,
|
||||
"thursday": 1,
|
||||
"friday": 1,
|
||||
"saturday": 1,
|
||||
"sunday": 1,
|
||||
"start_date": "19700101",
|
||||
"end_date": "30000101",
|
||||
},
|
||||
"calendar",
|
||||
)
|
||||
)
|
||||
|
||||
all_stops = {} # stop (stop area center or station) el_id -> stop data
|
||||
good_cities = [c for c in cities if c.is_good()]
|
||||
|
||||
def add_stop_gtfs(route_stop):
|
||||
"""Add stop to all_stops.
|
||||
If it's not a station, also add parent station
|
||||
if it has not been added yet. Return gtfs stop_id.
|
||||
"""
|
||||
is_real_stop_area = (
|
||||
route_stop.stoparea.element["tags"].get("public_transport")
|
||||
== "stop_area"
|
||||
)
|
||||
el_id_ = route_stop.stoparea.id
|
||||
|
||||
if el_id_ not in all_stops:
|
||||
station_name = route_stop.stoparea.station.name
|
||||
center = route_stop.stoparea.center
|
||||
location_type = 1 if is_real_stop_area else 0
|
||||
stop_gtfs = {
|
||||
"stop_id": el_id_,
|
||||
"stop_code": el_id_,
|
||||
"stop_name": station_name,
|
||||
"stop_lat": round(center[1], COORDINATE_PRECISION),
|
||||
"stop_lon": round(center[0], COORDINATE_PRECISION),
|
||||
"location_type": location_type,
|
||||
}
|
||||
if is_real_stop_area:
|
||||
station_id = route_stop.stoparea.station.id
|
||||
stop_gtfs["parent_station"] = station_id
|
||||
if station_id not in all_stops:
|
||||
center = route_stop.stoparea.station.center
|
||||
station_gtfs = {
|
||||
"stop_id": station_id,
|
||||
"stop_code": station_id,
|
||||
"stop_name": station_name,
|
||||
"stop_lat": round(center[1], COORDINATE_PRECISION),
|
||||
"stop_lon": round(center[0], COORDINATE_PRECISION),
|
||||
"location_type": 1,
|
||||
}
|
||||
all_stops[station_id] = station_gtfs
|
||||
all_stops[el_id_] = stop_gtfs
|
||||
return el_id_
|
||||
|
||||
# agency, routes, trips, stop_times, frequencies, shapes
|
||||
for city in good_cities:
|
||||
agency = {"agency_id": city.id, "agency_name": city.name}
|
||||
gtfs_data["agency"].append(dict_to_row(agency, "agency"))
|
||||
|
||||
for city_route in city:
|
||||
route = {
|
||||
"route_id": city_route.id,
|
||||
"agency_id": agency["agency_id"],
|
||||
"route_type": 12 if city_route.mode == "monorail" else 1,
|
||||
"route_short_name": city_route.ref,
|
||||
"route_long_name": city_route.name,
|
||||
"route_color": format_colour(city_route.colour),
|
||||
}
|
||||
gtfs_data["routes"].append(dict_to_row(route, "routes"))
|
||||
|
||||
for variant in city_route:
|
||||
trip = {
|
||||
"trip_id": variant.id,
|
||||
"route_id": route["route_id"],
|
||||
"service_id": "always",
|
||||
"shape_id": None,
|
||||
}
|
||||
gtfs_data["trips"].append(dict_to_row(trip, "trips"))
|
||||
|
||||
for i, (lon, lat) in enumerate(variant.tracks):
|
||||
gtfs_data["shapes"].append(
|
||||
dict_to_row(
|
||||
{
|
||||
"shape_id": variant.id,
|
||||
"trip_id": variant.id,
|
||||
"shape_pt_lat": round(
|
||||
lat, COORDINATE_PRECISION
|
||||
),
|
||||
"shape_pt_lon": round(
|
||||
lon, COORDINATE_PRECISION
|
||||
),
|
||||
"shape_pt_sequence": i,
|
||||
},
|
||||
"shapes",
|
||||
)
|
||||
)
|
||||
|
||||
gtfs_data["frequencies"].append(
|
||||
dict_to_row(
|
||||
{
|
||||
"trip_id": variant.id,
|
||||
"start_time": variant.start_time
|
||||
or DEFAULT_TRIP_START_TIME,
|
||||
"end_time": variant.end_time
|
||||
or DEFAULT_TRIP_END_TIME,
|
||||
"headway_secs": variant.interval
|
||||
or DEFAULT_INTERVAL,
|
||||
},
|
||||
"frequencies",
|
||||
)
|
||||
)
|
||||
|
||||
for stop_sequence, route_stop in enumerate(variant):
|
||||
gtfs_stop_id = add_stop_gtfs(route_stop)
|
||||
|
||||
stop_time = {
|
||||
"trip_id": variant.id,
|
||||
"stop_sequence": stop_sequence,
|
||||
"shape_dist_traveled": route_stop.distance,
|
||||
"stop_id": gtfs_stop_id,
|
||||
}
|
||||
|
||||
gtfs_data["stop_times"].append(
|
||||
dict_to_row(stop_time, "stop_times")
|
||||
)
|
||||
|
||||
# stops
|
||||
gtfs_data["stops"].extend(
|
||||
map(lambda row: dict_to_row(row, "stops"), all_stops.values())
|
||||
)
|
||||
|
||||
# transfers
|
||||
for stoparea_set in transfers:
|
||||
for stoparea1 in stoparea_set:
|
||||
for stoparea2 in stoparea_set:
|
||||
if stoparea1.id < stoparea2.id:
|
||||
transfer_time = TRANSFER_PENALTY + round(
|
||||
distance(stoparea1.center, stoparea2.center)
|
||||
/ SPEED_ON_TRANSFER
|
||||
)
|
||||
for id1, id2 in (
|
||||
(stoparea1.id, stoparea2.id),
|
||||
(stoparea2.id, stoparea1.id),
|
||||
):
|
||||
gtfs_data["transfers"].append(
|
||||
dict_to_row(
|
||||
{
|
||||
"from_stop_id": id1,
|
||||
"to_stop_id": id2,
|
||||
"transfer_type": 0,
|
||||
"min_transfer_time": transfer_time,
|
||||
},
|
||||
"transfers",
|
||||
)
|
||||
)
|
||||
|
||||
make_gtfs(filename, gtfs_data)
|
||||
|
||||
|
||||
def make_gtfs(filename, gtfs_data):
|
||||
if not filename.lower().endswith("zip"):
|
||||
filename = f"{filename}.zip"
|
||||
|
||||
with zipfile.ZipFile(filename, "w") as zf:
|
||||
for gtfs_feature, columns in GTFS_COLUMNS.items():
|
||||
with io.StringIO(newline="") as string_io:
|
||||
writer = csv.writer(string_io, delimiter=",")
|
||||
writer.writerow(columns)
|
||||
writer.writerows(gtfs_data[gtfs_feature])
|
||||
zf.writestr(f"{gtfs_feature}.txt", string_io.getvalue())
|
|
@ -1,7 +1,16 @@
|
|||
import json
|
||||
import os
|
||||
import logging
|
||||
|
||||
from collections import defaultdict
|
||||
|
||||
from ._common import (
|
||||
DEFAULT_INTERVAL,
|
||||
format_colour,
|
||||
KMPH_TO_MPS,
|
||||
SPEED_ON_TRANSFER,
|
||||
TRANSFER_PENALTY,
|
||||
)
|
||||
from subway_structure import (
|
||||
distance,
|
||||
el_center,
|
||||
|
@ -12,12 +21,9 @@ from subway_structure import (
|
|||
|
||||
OSM_TYPES = {'n': (0, 'node'), 'w': (2, 'way'), 'r': (3, 'relation')}
|
||||
ENTRANCE_PENALTY = 60 # seconds
|
||||
TRANSFER_PENALTY = 30 # seconds
|
||||
KMPH_TO_MPS = 1 / 3.6 # km/h to m/s conversion multiplier
|
||||
SPEED_TO_ENTRANCE = 5 * KMPH_TO_MPS # m/s
|
||||
SPEED_ON_TRANSFER = 3.5 * KMPH_TO_MPS # m/s
|
||||
SPEED_ON_LINE = 40 * KMPH_TO_MPS # m/s
|
||||
DEFAULT_INTERVAL = 2.5 # minutes
|
||||
DEFAULT_INTERVAL = 2.5 * 60 # seconds
|
||||
|
||||
|
||||
def uid(elid, typ=None):
|
||||
|
@ -174,15 +180,14 @@ class MapsmeCache:
|
|||
logging.warning("Failed to save cache: %s", str(e))
|
||||
|
||||
|
||||
def process(cities, transfers, cache_path):
|
||||
"""cities - list of City instances;
|
||||
transfers - list of sets of StopArea.id;
|
||||
cache_path - path to json-file with good cities cache or None.
|
||||
def process(cities, transfers, filename, cache_path):
|
||||
"""Generate all output and save to file.
|
||||
:param cities: List of City instances
|
||||
:param transfers: List of sets of StopArea.id
|
||||
:param filename: Path to file to save the result
|
||||
:param cache_path: Path to json-file with good cities cache or None.
|
||||
"""
|
||||
|
||||
def format_colour(c):
|
||||
return c[1:] if c else None
|
||||
|
||||
def find_exits_for_platform(center, nodes):
|
||||
exits = []
|
||||
min_distance = None
|
||||
|
@ -282,9 +287,7 @@ def process(cities, transfers, cache_path):
|
|||
routes['itineraries'].append(
|
||||
{
|
||||
'stops': itin,
|
||||
'interval': round(
|
||||
(variant.interval or DEFAULT_INTERVAL) * 60
|
||||
),
|
||||
'interval': round(variant.interval or DEFAULT_INTERVAL),
|
||||
}
|
||||
)
|
||||
network['routes'].append(routes)
|
||||
|
@ -386,4 +389,14 @@ def process(cities, transfers, cache_path):
|
|||
'transfers': pairwise_transfers,
|
||||
'networks': networks,
|
||||
}
|
||||
return result
|
||||
|
||||
if not filename.lower().endswith("json"):
|
||||
filename = f"{filename}.json"
|
||||
|
||||
with open(filename, "w", encoding="utf-8") as f:
|
||||
json.dump(
|
||||
result,
|
||||
f,
|
||||
indent=1,
|
||||
ensure_ascii=False,
|
||||
)
|
||||
|
|
|
@ -40,6 +40,7 @@ Environment variable reference:
|
|||
- SKIP_FILTERING: skip filtering railway data. Any non-empty string is True
|
||||
- FILTERED_DATA: path to filtered data. Defaults to \$TMPDIR/subways.osm
|
||||
- MAPSME: file name for maps.me json output
|
||||
- GTFS: file name for GTFS output
|
||||
- DUMP: directory/file name to dump YAML city data. Do not set to omit dump
|
||||
- GEOJSON: directory/file name to dump GeoJSON data. Do not set to omit dump
|
||||
- ELEMENTS_CACHE: file name to elements cache. Allows OSM xml processing phase
|
||||
|
@ -234,7 +235,8 @@ fi
|
|||
VALIDATION="$TMPDIR/validation.json"
|
||||
"$PYTHON" "$SUBWAYS_PATH/process_subways.py" -q \
|
||||
-x "$FILTERED_DATA" -l "$VALIDATION" \
|
||||
${MAPSME:+-o "$MAPSME"} \
|
||||
${MAPSME:+--output-mapsme "$MAPSME"} \
|
||||
${GTFS:+--output-gtfs "$GTFS"} \
|
||||
${CITY:+-c "$CITY"} ${DUMP:+-d "$DUMP"} ${GEOJSON:+-j "$GEOJSON"} \
|
||||
${ELEMENTS_CACHE:+-i "$ELEMENTS_CACHE"} \
|
||||
${CITY_CACHE:+--cache "$CITY_CACHE"} \
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
import csv
|
||||
import logging
|
||||
import math
|
||||
import re
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
from css_colours import normalize_colour
|
||||
|
@ -45,6 +46,47 @@ CONSTRUCTION_KEYS = (
|
|||
used_entrances = set()
|
||||
|
||||
|
||||
START_END_TIMES_RE = re.compile(r'.*?(\d{2}:\d{2})-(\d{2}:\d{2}).*')
|
||||
|
||||
|
||||
def get_start_end_times(opening_hours):
|
||||
"""Very simplified method to parse OSM opening_hours tag.
|
||||
We simply take the first HH:MM-HH:MM substring which is the most probable
|
||||
opening hours interval for the most of weekdays.
|
||||
"""
|
||||
m = START_END_TIMES_RE.match(opening_hours)
|
||||
if m:
|
||||
# Each group is HH:MM. We need HH:MM:SS.
|
||||
return tuple(map(lambda t: f"{t}:00", m.groups()))
|
||||
else:
|
||||
return None, None
|
||||
|
||||
|
||||
def osm_interval_to_seconds(interval_str):
|
||||
"""Convert to int an OSM value for 'interval'/'headway' tag
|
||||
which may be in these formats:
|
||||
HH:MM:SS,
|
||||
HH:MM,
|
||||
MM,
|
||||
M
|
||||
(https://wiki.openstreetmap.org/wiki/Key:interval#Format)
|
||||
"""
|
||||
hours, minutes, seconds = 0, 0, 0
|
||||
semicolon_count = interval_str.count(':')
|
||||
try:
|
||||
if semicolon_count == 0:
|
||||
minutes = int(interval_str)
|
||||
elif semicolon_count == 1:
|
||||
hours, minutes = map(int, interval_str.split(':'))
|
||||
elif semicolon_count == 2:
|
||||
hours, minutes, seconds = map(int, interval_str.split(':'))
|
||||
else:
|
||||
return None
|
||||
except ValueError:
|
||||
return None
|
||||
return seconds + 60*minutes + 60*60*hours
|
||||
|
||||
|
||||
class CriticalValidationError(Exception):
|
||||
"""Is thrown if an error occurs
|
||||
that prevents further validation of a city."""
|
||||
|
@ -606,10 +648,7 @@ class Route:
|
|||
break
|
||||
if not v:
|
||||
return None
|
||||
try:
|
||||
return float(v)
|
||||
except ValueError:
|
||||
return None
|
||||
return osm_interval_to_seconds(v)
|
||||
|
||||
def build_longest_line(self, relation):
|
||||
line_nodes = set()
|
||||
|
@ -786,6 +825,11 @@ class Route:
|
|||
self.interval = Route.get_interval(
|
||||
relation['tags']
|
||||
) or Route.get_interval(master_tags)
|
||||
self.start_time, self.end_time = get_start_end_times(
|
||||
relation['tags'].get(
|
||||
'opening_hours', master_tags.get('opening_hours', '')
|
||||
)
|
||||
)
|
||||
if relation['tags'].get('public_transport:version') == '1':
|
||||
city.warn(
|
||||
'Public transport version is 1, which means the route '
|
||||
|
|
Loading…
Add table
Reference in a new issue