Caching transfers in good cities

This commit is contained in:
Alexey Zakharenkov 2019-02-12 13:05:04 +03:00
parent c168529300
commit 5ee07c9b81
4 changed files with 212 additions and 29 deletions

View file

@ -0,0 +1,159 @@
"""This utility allows one to check equivalency of generated city caches
(defined by --cache command line parameter) of process_subways.py.
Due to unordered nature of sets/dicts, two runs of process_subways.py
even on the same input generate equivalent jsons,
which cannot be compared with 'diff' command. The compare_jsons() function
compares two city_cache.json taking into account possible shuffling of
dict items and items of some lists, as well as system-specific subtleties.
This utility is useful to ensure that code improvements which must not
affect the process_subways.py output really doesn't change it.
"""
import sys
import json
from itertools import chain
def compare_jsons(cache0, cache1):
def floats_eq(a, b):
return abs(b - a) < 1e-13
def coords_eq(lon1, lat1, lon2, lat2):
return floats_eq(lon1, lon2) and floats_eq(lat1, lat2)
def osm_id_comparator(el):
return (el['osm_type'], el['osm_id'])
city_names0 = sorted(cache0.keys())
city_names1 = sorted(cache1.keys())
if city_names0 != city_names1:
print("Different list of city names!")
return False
for name in city_names0:
result0 = cache0[name]
result1 = cache1[name]
network0 = result0['network']
network1 = result1['network']
if network0['agency_id'] != network1['agency_id']:
print("Different agency_id:",
network0['network'], network1['network'])
return False
# Keys to compare routes. 'name' key is omitted since RouteMaster
# can get its name from one of its Routes unpredictably.
route_keys = ('type', 'ref', 'colour', 'route_id')
route_ids0 = sorted(x['route_id'] for x in network0['routes'])
route_ids1 = sorted(x['route_id'] for x in network1['routes'])
if route_ids0 != route_ids1:
print("Different route_ids", route_ids0, route_ids1)
return False
routes0 = sorted(network0['routes'], key=lambda x: x['route_id'])
routes1 = sorted(network1['routes'], key=lambda x: x['route_id'])
for route0, route1 in zip(routes0, routes1):
route0_props = tuple(route0[k] for k in route_keys)
route1_props = tuple(route1[k] for k in route_keys)
if route0_props != route1_props:
print("Route props of ", route0['route_id'], route1['route_id'],
"are different:", route0_props, route1_props)
return False
itineraries0 = sorted(route0['itineraries'],
key=lambda x: tuple(chain(*x['stops'])))
itineraries1 = sorted(route1['itineraries'],
key=lambda x: tuple(chain(*x['stops'])))
for itin0, itin1 in zip(itineraries0, itineraries1):
if itin0['interval'] != itin1['interval']:
print("Different interval:",
itin0['interval'], "!=", itin1['interval'],
"at route", route0['name'], route0['route_id'])
return False
if itin0['stops'] != itin1['stops']:
print("Different stops at route",
route0['name'], route0['route_id'])
return False
stop_ids0 = sorted(result0['stops'].keys())
stop_ids1 = sorted(result1['stops'].keys())
if stop_ids0 != stop_ids1:
print("Different stop_ids")
return False
stops0 = [v for k, v in sorted(result0['stops'].items())]
stops1 = [v for k, v in sorted(result1['stops'].items())]
for stop0, stop1 in zip(stops0, stops1):
stop0_props = tuple(stop0[k] for k in ('name', 'osm_id', 'osm_type'))
stop1_props = tuple(stop1[k] for k in ('name', 'osm_id', 'osm_type'))
if stop0_props != stop1_props:
print("Different stops properties:", stop0_props, stop1_props)
return False
if not coords_eq(stop0['lon'], stop0['lat'],
stop1['lon'], stop1['lat']):
print("Different stops coordinates:",
stop0_props, stop0['lon'], stop0['lat'],
stop1_props, stop1['lon'], stop1['lat'])
return False
entrances0 = sorted(stop0['entrances'], key=osm_id_comparator)
entrances1 = sorted(stop1['entrances'], key=osm_id_comparator)
if entrances0 != entrances1:
print("Different stop entrances")
return False
exits0 = sorted(stop0['exits'], key=osm_id_comparator)
exits1 = sorted(stop1['exits'], key=osm_id_comparator)
if exits0 != exits1:
print("Different stop exits")
return False
if len(result0['transfers']) != len(result1['transfers']):
print("Different len(transfers):",
len(result0['transfers']), len(result1['transfers']))
return False
transfers0 = [tuple(t) if t[0] < t[1] else tuple([t[1], t[0], t[2]])
for t in result0['transfers']]
transfers1 = [tuple(t) if t[0] < t[1] else tuple([t[1], t[0], t[2]])
for t in result1['transfers']]
transfers0.sort(key=lambda x: tuple(x))
transfers1.sort(key=lambda x: tuple(x))
diff_cnt = 0
for i, (tr0, tr1) in enumerate(zip(transfers0, transfers1)):
if tr0 != tr1:
if i == 0:
print("First pair of different transfers", tr0, tr1)
diff_cnt += 1
if diff_cnt:
print("Different transfers number = ", diff_cnt)
return False
return True
if __name__ == "__main__":
if len(sys.argv) != 3:
print("Usage: {} <cache1.json> <cache2.json>".format(sys.argv[0]))
sys.exit()
path0, path1 = sys.argv[1:3]
j0 = json.load(open(path0, encoding='utf-8'))
j1 = json.load(open(path1, encoding='utf-8'))
equal = compare_jsons(j0, j1)
print("The results are {}equal".format("" if equal else "NOT "))

View file

@ -72,12 +72,12 @@ def compare_jsons(result0, result1):
for itin0, itin1 in zip(itineraries0, itineraries1):
if itin0['interval'] != itin1['interval']:
print("Different interval:",
f"{itin0['interval']} != {itin1['interval']}"
f" at route {route0['name']} {route0['route_id']}")
itin0['interval'], "!=", itin1['interval'],
"at route", route0['name'], route0['route_id'])
return False
if itin0['stops'] != itin1['stops']:
print(f"Different stops at route",
f"{route0['name']} {route0['route_id']}")
print("Different stops at route",
route0['name'], route0['route_id'])
return False
stop_ids0 = sorted(x['id'] for x in result0['stops'])

View file

@ -48,19 +48,19 @@ def process(cities, transfers, cache_name):
def is_cached_city_usable(city, city_cache_data):
"""Checks if cached stops and entrances still exist in osm data"""
for stop_area_id, cached_stop_area in city_cache_data['stops'].items():
station_id = cached_stop_area['osm_type'][0] + str(cached_stop_area['osm_id'])
for stoparea_id, cached_stoparea in city_cache_data['stops'].items():
station_id = cached_stoparea['osm_type'][0] + str(cached_stoparea['osm_id'])
city_station = city.elements.get(station_id)
if (not city_station or
not Station.is_station(city_station, city.modes) or
distance(el_center(city_station),
tuple(cached_stop_area[coord] for coord in ('lon', 'lat'))
tuple(cached_stoparea[coord] for coord in ('lon', 'lat'))
) > CLOSENESS_TO_CACHED_ELEMENT_THRESHOLD
):
return False
for cached_entrance in itertools.chain(cached_stop_area['entrances'],
cached_stop_area['exits']):
for cached_entrance in itertools.chain(cached_stoparea['entrances'],
cached_stoparea['exits']):
entrance_id = cached_entrance['osm_type'][0] + str(cached_entrance['osm_id'])
city_entrance = city.elements.get(entrance_id)
if (not city_entrance or
@ -69,7 +69,8 @@ def process(cities, transfers, cache_name):
) > CLOSENESS_TO_CACHED_ELEMENT_THRESHOLD
):
pass # TODO:
# return False?
# really pass (take cached entances as they are)?
# Or return False?
# Or count broken entrances and leave only good?
# Or ignore all old entrances and use station point as entrance and exit?
@ -81,33 +82,36 @@ def process(cities, transfers, cache_name):
with open(cache_name, 'r', encoding='utf-8') as f:
cache = json.load(f)
route_stops = {} # stop_area el_id -> RouteStop instance
stops = {} # stop_area el_id -> stop jsonified data
stop_areas = {} # stoparea el_id -> StopArea instance
stops = {} # stoparea el_id -> stop jsonified data
networks = []
good_cities = [c for c in cities if c.is_good()]
good_city_names = set(c.name for c in good_cities)
recovered_city_names = set()
for city_name, city_cached_data in cache.items():
if city_name in good_city_names:
continue
# TODO: get a network, stops [[and transfers (?)]] from cache
# TODO: get a network, stops and transfers from cache
city = [c for c in cities if c.name == city_name][0]
if is_cached_city_usable(city, city_cached_data):
stops.update(city_cached_data['stops'])
networks.append(city_cached_data['network'])
print("Taking {} from cache".format(city_name))
recovered_city_names.add(city.name)
platform_nodes = {}
# One stop_area may participate in routes of different cities
stop_cities = defaultdict(set) # stop_area id -> city names
# One stoparea may participate in routes of different cities
stop_cities = defaultdict(set) # stoparea id -> city names
for city in good_cities:
network = {'network': city.name, 'routes': [], 'agency_id': city.id}
cache[city.name] = {
'network': network,
'stops': {} # stop_area el_id -> jsonified stop data
'stops': {}, # stoparea el_id -> jsonified stop data
'transfers': [] # list of tuples (stoparea1_uid, stoparea2_uid, time); uid1 < uid2
}
for route in city:
routes = {
@ -124,7 +128,7 @@ def process(cities, transfers, cache_name):
for i, variant in enumerate(route):
itin = []
for stop in variant:
route_stops[stop.stoparea.id] = stop.stoparea
stop_areas[stop.stoparea.id] = stop.stoparea
stop_cities[stop.stoparea.id].add(city.name)
itin.append([uid(stop.stoparea.id), round(stop.distance*3.6/SPEED_ON_LINE)])
# Make exits from platform nodes, if we don't have proper exits
@ -156,7 +160,7 @@ def process(cities, transfers, cache_name):
network['routes'].append(routes)
networks.append(network)
for stop_id, stop in route_stops.items():
for stop_id, stop in stop_areas.items():
st = {
'name': stop.name,
'int_name': stop.int_name,
@ -208,26 +212,44 @@ def process(cities, transfers, cache_name):
m_stops = list(stops.values())
c_transfers = []
c_transfers = {} # (stoparea1_uid, stoparea2_uid) -> time; uid1 < uid2
for t_set in transfers:
t = list(t_set)
for t_first in range(len(t) - 1):
for t_second in range(t_first + 1, len(t)):
if t[t_first].id in stops and t[t_second].id in stops:
c_transfers.append([
uid(t[t_first].id),
uid(t[t_second].id),
30 + round(distance(t[t_first].center,
t[t_second].center)*3.6/SPEED_ON_TRANSFER)
])
stoparea1 = t[t_first]
stoparea2 = t[t_second]
if stoparea1.id in stops and stoparea2.id in stops:
uid1 = uid(stoparea1.id)
uid2 = uid(stoparea2.id)
uid1, uid2 = sorted([uid1, uid2])
transfer_time = (30 + round(distance(stoparea1.center,
stoparea2.center
) * 3.6/SPEED_ON_TRANSFER))
c_transfers[(uid1, uid2)] = transfer_time
# If a transfer is inside a good city, add it to the city's cache.
for city_name in (good_city_names &
stop_cities[stoparea1.id] &
stop_cities[stoparea2.id]):
cache[city_name]['transfers'].append((uid1, uid2, transfer_time))
# Some transfers may be corrupted in not good cities.
# Take them from recovered cities.
for city_name in recovered_city_names:
for stop1_uid, stop2_uid, transfer_time in cache[city_name]['transfers']:
if (stop1_uid, stop2_uid) not in c_transfers:
c_transfers[(stop1_uid, stop2_uid)] = transfer_time
if cache_name:
with open(cache_name, 'w', encoding='utf-8') as f:
json.dump(cache, f, ensure_ascii=False)
json.dump(cache, f, indent=2, ensure_ascii=False)
m_transfers = [(stop1_uid, stop2_uid, transfer_time)
for (stop1_uid, stop2_uid), transfer_time in c_transfers.items()]
result = {
'stops': m_stops,
'transfers': c_transfers,
'transfers': m_transfers,
'networks': networks
}
return result

View file

@ -71,7 +71,9 @@ QNODES="railway=station station=subway =light_rail =monorail railway=subway_entr
# Running the validation
VALIDATION="$TMPDIR/validation.json"
"$PYTHON" "$SUBWAYS_PATH/process_subways.py" -q -x "$FILTERED_DATA" -l "$VALIDATION" ${MAPSME+-o "$MAPSME"} ${CITY+-c "$CITY"} ${DUMP+-d "$DUMP"} ${JSON+-j "$JSON"}
"$PYTHON" "$SUBWAYS_PATH/process_subways.py" -q -x "$FILTERED_DATA" -l "$VALIDATION" ${MAPSME+-o "$MAPSME"}\
${CITY+-c "$CITY"} ${DUMP+-d "$DUMP"} ${JSON+-j "$JSON"}\
${ELEMENTS_CACHE+-i "$ELEMENTS_CACHE"} ${CITY_CACHE+--cache "$CITY_CACHE"}
rm "$FILTERED_DATA"
# Preparing HTML files