Move io functions into subway_io.py module; a bug fixed

This commit is contained in:
Alexey Zakharenkov 2019-04-25 09:42:48 +03:00
parent 0f83a69f8e
commit e38db5f5be
3 changed files with 250 additions and 248 deletions

View file

@ -9,7 +9,6 @@ import time
import urllib.parse
import urllib.request
from processors import processor
from collections import OrderedDict
from subway_structure import (
download_cities,
@ -17,8 +16,11 @@ from subway_structure import (
get_unused_entrances_geojson,
)
from subway_io import (
dump_yaml,
load_xml,
make_geojson,
read_recovery_data,
write_recovery_data
write_recovery_data,
)
@ -57,230 +59,6 @@ def multi_overpass(bboxes):
return result
def load_xml(f):
try:
from lxml import etree
except ImportError:
import xml.etree.ElementTree as etree
elements = []
nodes = {}
for event, element in etree.iterparse(f):
if element.tag in ('node', 'way', 'relation'):
el = {'type': element.tag, 'id': int(element.get('id'))}
if element.tag == 'node':
for n in ('lat', 'lon'):
el[n] = float(element.get(n))
nodes[el['id']] = (el['lat'], el['lon'])
tags = {}
nd = []
members = []
for sub in element:
if sub.tag == 'tag':
tags[sub.get('k')] = sub.get('v')
elif sub.tag == 'nd':
nd.append(int(sub.get('ref')))
elif sub.tag == 'member':
members.append({'type': sub.get('type'),
'ref': int(sub.get('ref')),
'role': sub.get('role', '')})
if tags:
el['tags'] = tags
if nd:
el['nodes'] = nd
if members:
el['members'] = members
elements.append(el)
element.clear()
# Now make centers, assuming relations go after ways
ways = {}
relations = {}
for el in elements:
if el['type'] == 'way' and 'nodes' in el:
center = [0, 0]
count = 0
for nd in el['nodes']:
if nd in nodes:
center[0] += nodes[nd][0]
center[1] += nodes[nd][1]
count += 1
if count > 0:
el['center'] = {'lat': center[0]/count, 'lon': center[1]/count}
ways[el['id']] = (el['center']['lat'], el['center']['lon'])
elif el['type'] == 'relation' and 'members' in el:
center = [0, 0]
count = 0
for m in el['members']:
if m['type'] == 'node' and m['ref'] in nodes:
center[0] += nodes[m['ref']][0]
center[1] += nodes[m['ref']][1]
count += 1
elif m['type'] == 'way' and m['ref'] in ways:
center[0] += ways[m['ref']][0]
center[1] += ways[m['ref']][1]
count += 1
if count > 0:
el['center'] = {'lat': center[0]/count, 'lon': center[1]/count}
relations[el['id']] = (el['center']['lat'], el['center']['lon'])
# Iterating again, now filling relations that contain only relations
for el in elements:
if el['type'] == 'relation' and 'members' in el:
center = [0, 0]
count = 0
for m in el['members']:
if m['type'] == 'node' and m['ref'] in nodes:
center[0] += nodes[m['ref']][0]
center[1] += nodes[m['ref']][1]
count += 1
elif m['type'] == 'way' and m['ref'] in ways:
center[0] += ways[m['ref']][0]
center[1] += ways[m['ref']][1]
count += 1
elif m['type'] == 'relation' and m['ref'] in relations:
center[0] += relations[m['ref']][0]
center[1] += relations[m['ref']][1]
count += 1
if count > 0:
el['center'] = {'lat': center[0]/count, 'lon': center[1]/count}
relations[el['id']] = (el['center']['lat'], el['center']['lon'])
return elements
def dump_data(city, f):
def write_yaml(data, f, indent=''):
if isinstance(data, (set, list)):
f.write('\n')
for i in data:
f.write(indent)
f.write('- ')
write_yaml(i, f, indent + ' ')
elif isinstance(data, dict):
f.write('\n')
for k, v in data.items():
if v is None:
continue
f.write(indent + str(k) + ': ')
write_yaml(v, f, indent + ' ')
if isinstance(v, (list, set, dict)):
f.write('\n')
else:
f.write(str(data))
f.write('\n')
INCLUDE_STOP_AREAS = False
stops = set()
routes = []
for route in city:
stations = OrderedDict([(sa.transfer or sa.id, sa.name) for sa in route.stop_areas()])
rte = {
'type': route.mode,
'ref': route.ref,
'name': route.name,
'colour': route.colour,
'infill': route.infill,
'station_count': len(stations),
'stations': list(stations.values()),
'itineraries': {}
}
for variant in route:
if INCLUDE_STOP_AREAS:
v_stops = []
for st in variant:
s = st.stoparea
if s.id == s.station.id:
v_stops.append('{} ({})'.format(s.station.name, s.station.id))
else:
v_stops.append('{} ({}) in {} ({})'.format(s.station.name, s.station.id,
s.name, s.id))
else:
v_stops = ['{} ({})'.format(
s.stoparea.station.name,
s.stoparea.station.id) for s in variant]
rte['itineraries'][variant.id] = v_stops
stops.update(v_stops)
routes.append(rte)
transfers = []
for t in city.transfers:
v_stops = ['{} ({})'.format(s.name, s.id) for s in t]
transfers.append(v_stops)
result = {
'stations': sorted(stops),
'transfers': sorted(transfers, key=lambda t: t[0]),
'routes': sorted(routes, key=lambda r: r['ref']),
}
write_yaml(result, f)
def make_geojson(city, tracks=True):
transfers = set()
for t in city.transfers:
transfers.update(t)
features = []
stopareas = set()
stops = set()
for rmaster in city:
for variant in rmaster:
if not tracks:
features.append({
'type': 'Feature',
'geometry': {
'type': 'LineString',
'coordinates': [s.stop for s in variant],
},
'properties': {
'ref': variant.ref,
'name': variant.name,
'stroke': variant.colour
}
})
elif variant.tracks:
features.append({
'type': 'Feature',
'geometry': {
'type': 'LineString',
'coordinates': variant.tracks,
},
'properties': {
'ref': variant.ref,
'name': variant.name,
'stroke': variant.colour
}
})
for st in variant:
stops.add(st.stop)
stopareas.add(st.stoparea)
for stop in stops:
features.append({
'type': 'Feature',
'geometry': {
'type': 'Point',
'coordinates': stop,
},
'properties': {
'marker-size': 'small',
'marker-symbol': 'circle'
}
})
for stoparea in stopareas:
features.append({
'type': 'Feature',
'geometry': {
'type': 'Point',
'coordinates': stoparea.center,
},
'properties': {
'name': stoparea.name,
'marker-size': 'small',
'marker-color': '#ff2600' if stoparea in transfers else '#797979'
}
})
return {'type': 'FeatureCollection', 'features': features}
def slugify(name):
return re.sub(r'[^a-z0-9_-]+', '', name.lower().replace(' ', '_'))
@ -381,7 +159,8 @@ if __name__ == '__main__':
logging.info('Finding transfer stations')
transfers = find_transfers(osm, cities)
logging.info('%s good cities: %s', len(good_cities), ', '.join([c.name for c in good_cities]))
logging.info('%s good cities: %s', len(good_cities),
', '.join(sorted([c.name for c in good_cities])))
if options.recovery_path:
write_recovery_data(options.recovery_path, recovery_data, cities)
@ -394,10 +173,10 @@ if __name__ == '__main__':
for c in cities:
with open(os.path.join(options.dump, slugify(c.name) + '.yaml'),
'w', encoding='utf-8') as f:
dump_data(c, f)
dump_yaml(c, f)
elif len(cities) == 1:
with open(options.dump, 'w', encoding='utf-8') as f:
dump_data(cities[0], f)
dump_yaml(cities[0], f)
else:
logging.error('Cannot dump %s cities at once', len(cities))

View file

@ -1,18 +1,244 @@
import json
import logging
from collections import OrderedDict
def load_xml(f):
try:
from lxml import etree
except ImportError:
import xml.etree.ElementTree as etree
elements = []
nodes = {}
for event, element in etree.iterparse(f):
if element.tag in ('node', 'way', 'relation'):
el = {'type': element.tag, 'id': int(element.get('id'))}
if element.tag == 'node':
for n in ('lat', 'lon'):
el[n] = float(element.get(n))
nodes[el['id']] = (el['lat'], el['lon'])
tags = {}
nd = []
members = []
for sub in element:
if sub.tag == 'tag':
tags[sub.get('k')] = sub.get('v')
elif sub.tag == 'nd':
nd.append(int(sub.get('ref')))
elif sub.tag == 'member':
members.append({'type': sub.get('type'),
'ref': int(sub.get('ref')),
'role': sub.get('role', '')})
if tags:
el['tags'] = tags
if nd:
el['nodes'] = nd
if members:
el['members'] = members
elements.append(el)
element.clear()
# Now make centers, assuming relations go after ways
ways = {}
relations = {}
for el in elements:
if el['type'] == 'way' and 'nodes' in el:
center = [0, 0]
count = 0
for nd in el['nodes']:
if nd in nodes:
center[0] += nodes[nd][0]
center[1] += nodes[nd][1]
count += 1
if count > 0:
el['center'] = {'lat': center[0]/count, 'lon': center[1]/count}
ways[el['id']] = (el['center']['lat'], el['center']['lon'])
elif el['type'] == 'relation' and 'members' in el:
center = [0, 0]
count = 0
for m in el['members']:
if m['type'] == 'node' and m['ref'] in nodes:
center[0] += nodes[m['ref']][0]
center[1] += nodes[m['ref']][1]
count += 1
elif m['type'] == 'way' and m['ref'] in ways:
center[0] += ways[m['ref']][0]
center[1] += ways[m['ref']][1]
count += 1
if count > 0:
el['center'] = {'lat': center[0]/count, 'lon': center[1]/count}
relations[el['id']] = (el['center']['lat'], el['center']['lon'])
# Iterating again, now filling relations that contain only relations
for el in elements:
if el['type'] == 'relation' and 'members' in el:
center = [0, 0]
count = 0
for m in el['members']:
if m['type'] == 'node' and m['ref'] in nodes:
center[0] += nodes[m['ref']][0]
center[1] += nodes[m['ref']][1]
count += 1
elif m['type'] == 'way' and m['ref'] in ways:
center[0] += ways[m['ref']][0]
center[1] += ways[m['ref']][1]
count += 1
elif m['type'] == 'relation' and m['ref'] in relations:
center[0] += relations[m['ref']][0]
center[1] += relations[m['ref']][1]
count += 1
if count > 0:
el['center'] = {'lat': center[0]/count, 'lon': center[1]/count}
relations[el['id']] = (el['center']['lat'], el['center']['lon'])
return elements
def dump_yaml(city, f):
def write_yaml(data, f, indent=''):
if isinstance(data, (set, list)):
f.write('\n')
for i in data:
f.write(indent)
f.write('- ')
write_yaml(i, f, indent + ' ')
elif isinstance(data, dict):
f.write('\n')
for k, v in data.items():
if v is None:
continue
f.write(indent + str(k) + ': ')
write_yaml(v, f, indent + ' ')
if isinstance(v, (list, set, dict)):
f.write('\n')
else:
f.write(str(data))
f.write('\n')
INCLUDE_STOP_AREAS = False
stops = set()
routes = []
for route in city:
stations = OrderedDict([(sa.transfer or sa.id, sa.name) for sa in route.stop_areas()])
rte = {
'type': route.mode,
'ref': route.ref,
'name': route.name,
'colour': route.colour,
'infill': route.infill,
'station_count': len(stations),
'stations': list(stations.values()),
'itineraries': {}
}
for variant in route:
if INCLUDE_STOP_AREAS:
v_stops = []
for st in variant:
s = st.stoparea
if s.id == s.station.id:
v_stops.append('{} ({})'.format(s.station.name, s.station.id))
else:
v_stops.append('{} ({}) in {} ({})'.format(s.station.name, s.station.id,
s.name, s.id))
else:
v_stops = ['{} ({})'.format(
s.stoparea.station.name,
s.stoparea.station.id) for s in variant]
rte['itineraries'][variant.id] = v_stops
stops.update(v_stops)
routes.append(rte)
transfers = []
for t in city.transfers:
v_stops = ['{} ({})'.format(s.name, s.id) for s in t]
transfers.append(v_stops)
result = {
'stations': sorted(stops),
'transfers': sorted(transfers, key=lambda t: t[0]),
'routes': sorted(routes, key=lambda r: r['ref']),
}
write_yaml(result, f)
def make_geojson(city, tracks=True):
transfers = set()
for t in city.transfers:
transfers.update(t)
features = []
stopareas = set()
stops = set()
for rmaster in city:
for variant in rmaster:
if not tracks:
features.append({
'type': 'Feature',
'geometry': {
'type': 'LineString',
'coordinates': [s.stop for s in variant],
},
'properties': {
'ref': variant.ref,
'name': variant.name,
'stroke': variant.colour
}
})
elif variant.tracks:
features.append({
'type': 'Feature',
'geometry': {
'type': 'LineString',
'coordinates': variant.tracks,
},
'properties': {
'ref': variant.ref,
'name': variant.name,
'stroke': variant.colour
}
})
for st in variant:
stops.add(st.stop)
stopareas.add(st.stoparea)
for stop in stops:
features.append({
'type': 'Feature',
'geometry': {
'type': 'Point',
'coordinates': stop,
},
'properties': {
'marker-size': 'small',
'marker-symbol': 'circle'
}
})
for stoparea in stopareas:
features.append({
'type': 'Feature',
'geometry': {
'type': 'Point',
'coordinates': stoparea.center,
},
'properties': {
'name': stoparea.name,
'marker-size': 'small',
'marker-color': '#ff2600' if stoparea in transfers else '#797979'
}
})
return {'type': 'FeatureCollection', 'features': features}
def _dumps_route_id(route_id):
"""Argument is a route_id that depends on route colour, ref. Name
can be taken from route_master or be route's own, don't take it.
(some of route attributes can be None). Functions makes it json-compatible -
dumps to a string."""
"""Argument is a route_id that depends on route colour and ref. Name
can be taken from route_master or can be route's own, we don't take it
into consideration. Some of route attributes can be None. The function makes
route_id json-compatible - dumps it to a string."""
return json.dumps(route_id, ensure_ascii=False)
def _loads_route_id(route_id_dump):
"""Argument is a json-encoded identifier of a route.
FunReturn a tuple of"""
Return a tuple (colour, ref)."""
return tuple(json.loads(route_id_dump))
@ -49,25 +275,19 @@ def write_recovery_data(path, current_data, cities):
routes = {}
for route in city:
# Recovery is based primarily on route/station names/refs.
# If route's name/ref/colour changes, the route won't be used.
# If route's ref/colour changes, the route won't be used.
route_id = (route.colour, route.ref)
itineraries = []
for variant in route:
itin = {'stops': [],
'is_circular': variant.is_circular,
'name': variant.name,
'from': variant.element['tags'].get('from'),
'to': variant.element['tags'].get('to')}
for stop in variant:
station = stop.stoparea.station
station_name = station.name
if station_name == '?':
if station_name == '?' and station.int_name:
station_name = station.int_name
# If a station has no name, the itinerary won't be used.
# But! If variant contains only one unnamed station, we can cope with it.
# if station_name is None:
# itin = None
# break
itin['stops'].append({
'oms_id': station.id,
'name': station_name,
@ -92,6 +312,5 @@ def write_recovery_data(path, current_data, cities):
with open(path, 'w', encoding='utf-8') as f:
json.dump(data, f, ensure_ascii=False, indent=2)
except Exception as e:
logging.warning("Cannot write recovery data '%s'", path)
logging.warning(str(e))
logging.warning("Cannot write recovery data '%s': %s", path, str(e))

View file

@ -771,12 +771,16 @@ class Route:
self.city.error(msg, self.element)
def try_resort_stops(self):
"""Precondition: self.city.recovery_data is not None"""
"""Precondition: self.city.recovery_data is not None.
Return success of station order recovering."""
self_stops = {} # station name => RouteStop
for stop in self.stops:
stop_name = stop.stoparea.station.name
station = stop.stoparea.station
stop_name = station.name
if stop_name == '?' and station.int_name:
stop_name = station.int_name
# We won't programmatically recover routes with repeating stations:
# the end doesn't justify the means
# such cases are rare and deserves manual verification
if stop_name in self_stops:
return False
self_stops[stop_name] = stop
@ -813,7 +817,6 @@ class Route:
return False
matching_itinerary = matching_itineraries[0]
self.stops = [self_stops[stop['name']] for stop in matching_itinerary['stops']]
print("Recovered order!")
return True
def __len__(self):
@ -987,6 +990,7 @@ class City:
self.stops_and_platforms = set() # Set of stops and platforms el_id
self.errors = []
self.warnings = []
self.recovery_data = None
def log_message(self, message, el):
if el: