Merge pull request #65 from alexey-zakharenkov/good-cities-cache

Cache of good cities
This commit is contained in:
Victor Popov 2019-03-22 18:28:12 +03:00 committed by GitHub
commit 40a23d4e65
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 411 additions and 152 deletions

138
checkers/common.py Normal file
View file

@ -0,0 +1,138 @@
import logging
import math
import functools
from itertools import chain
"""A coordinate of a station precision of which we must take into account
is calculated as an average of somewhat 10 elements.
Taking machine epsilon 1e-15, averaging 10 numbers with close magnitudes
ensures relative precision of 1e-14."""
coord_isclose = functools.partial(math.isclose, rel_tol=1e-14)
def coords_eq(lon1, lat1, lon2, lat2):
return coord_isclose(lon1, lon2) and coord_isclose(lat1, lat2)
def osm_id_comparator(el):
"""This function is used as key for sorting lists of
OSM-originated objects
"""
return (el['osm_type'], el['osm_id'])
def compare_stops(stop0, stop1):
"""Compares json of two stops in route"""
stop_keys = ('name', 'int_name', 'id', 'osm_id', 'osm_type')
stop0_props = tuple(stop0[k] for k in stop_keys)
stop1_props = tuple(stop1[k] for k in stop_keys)
if stop0_props != stop1_props:
logging.debug("Different stops properties: %s, %s",
stop0_props, stop1_props)
return False
if not coords_eq(stop0['lon'], stop0['lat'],
stop1['lon'], stop1['lat']):
logging.debug("Different stops coordinates: %s (%f, %f), %s (%f, %f)",
stop0_props, stop0['lon'], stop0['lat'],
stop1_props, stop1['lon'], stop1['lat'])
return False
entrances0 = sorted(stop0['entrances'], key=osm_id_comparator)
entrances1 = sorted(stop1['entrances'], key=osm_id_comparator)
if entrances0 != entrances1:
logging.debug("Different stop entrances")
return False
exits0 = sorted(stop0['exits'], key=osm_id_comparator)
exits1 = sorted(stop1['exits'], key=osm_id_comparator)
if exits0 != exits1:
logging.debug("Different stop exits")
return False
return True
def compare_transfers(transfers0, transfers1):
"""Compares two arrays of transfers of the form
[(stop1_uid, stop2_uid, time), ...]
"""
if len(transfers0) != len(transfers1):
logging.debug("Different len(transfers): %d != %d",
len(transfers0), len(transfers1))
return False
transfers0 = [tuple([t[0], t[1], t[2]])
if t[0] < t[1] else
tuple([t[1], t[0], t[2]])
for t in transfers0]
transfers1 = [tuple([t[0], t[1], t[2]])
if t[0] < t[1] else
tuple([t[1], t[0], t[2]])
for t in transfers1]
transfers0.sort()
transfers1.sort()
diff_cnt = 0
for tr0, tr1 in zip(transfers0, transfers1):
if tr0 != tr1:
if diff_cnt == 0:
logging.debug("First pair of different transfers: %s, %s",
tr0, tr1)
diff_cnt += 1
if diff_cnt:
logging.debug("Different transfers number = %d", diff_cnt)
return False
return True
def compare_networks(network0, network1):
if network0['agency_id'] != network1['agency_id']:
logging.debug("Different agency_id at route '%s'",
network0['network'])
return False
route_ids0 = sorted(x['route_id'] for x in network0['routes'])
route_ids1 = sorted(x['route_id'] for x in network1['routes'])
if route_ids0 != route_ids1:
logging.debug("Different route_ids: %s != %s",
route_ids0, route_ids1)
return False
routes0 = sorted(network0['routes'], key=lambda x: x['route_id'])
routes1 = sorted(network1['routes'], key=lambda x: x['route_id'])
# Keys to compare routes. 'name' key is omitted since RouteMaster
# can get its name from one of its Routes unpredictably.
route_keys = ('type', 'ref', 'colour', 'route_id')
for route0, route1 in zip(routes0, routes1):
route0_props = tuple(route0[k] for k in route_keys)
route1_props = tuple(route1[k] for k in route_keys)
if route0_props != route1_props:
logging.debug("Route props of '%s' are different: %s, %s",
route0['route_id'], route0_props, route1_props)
return False
itineraries0 = sorted(route0['itineraries'],
key=lambda x: tuple(chain(*x['stops'])))
itineraries1 = sorted(route1['itineraries'],
key=lambda x: tuple(chain(*x['stops'])))
for itin0, itin1 in zip(itineraries0, itineraries1):
if itin0['interval'] != itin1['interval']:
logging.debug("Different interval: %d != %d at route %s '%s'",
itin0['interval'], itin1['interval'],
route0['route_id'], route0['name'])
return False
if itin0['stops'] != itin1['stops']:
logging.debug("Different stops at route %s '%s'",
route0['route_id'], route0['name'])
return False
return True

View file

@ -0,0 +1,65 @@
"""This utility allows one to check equivalency of generated city caches
(defined by --cache command line parameter) of process_subways.py.
Due to unordered nature of sets/dicts, two runs of process_subways.py
even on the same input generate equivalent jsons,
which cannot be compared with 'diff' command. The compare_jsons() function
compares two city_cache.json taking into account possible shuffling of
dict items and items of some lists, as well as system-specific subtleties.
This utility is useful to ensure that code improvements which must not
affect the process_subways.py output really doesn't change it.
"""
import sys
import json
import logging
from common import compare_stops, compare_transfers, compare_networks
def compare_jsons(cache0, cache1):
"""Compares two city caches"""
city_names0 = sorted(cache0.keys())
city_names1 = sorted(cache1.keys())
if city_names0 != city_names1:
logging.debug("Different list of city names!")
return False
for name in city_names0:
city0 = cache0[name]
city1 = cache1[name]
if not compare_networks(city0['network'], city1['network']):
return False
stop_ids0 = sorted(city0['stops'].keys())
stop_ids1 = sorted(city1['stops'].keys())
if stop_ids0 != stop_ids1:
logging.debug("Different stop_ids")
return False
stops0 = [v for k, v in sorted(city0['stops'].items())]
stops1 = [v for k, v in sorted(city1['stops'].items())]
for stop0, stop1 in zip(stops0, stops1):
if not compare_stops(stop0, stop1):
return False
if not compare_transfers(city0['transfers'], city1['transfers']):
return False
return True
if __name__ == "__main__":
if len(sys.argv) != 3:
print("Usage: {} <cache1.json> <cache2.json>".format(sys.argv[0]))
sys.exit()
logging.basicConfig(level=logging.DEBUG)
path0, path1 = sys.argv[1:3]
j0 = json.load(open(path0, encoding='utf-8'))
j1 = json.load(open(path1, encoding='utf-8'))
equal = compare_jsons(j0, j1)
print("The city caches are {}equal".format("" if equal else "NOT "))

View file

@ -12,129 +12,36 @@
import sys
import json
from itertools import chain
import logging
from common import compare_stops, compare_transfers, compare_networks
def compare_jsons(result0, result1):
def floats_eq(a, b):
return abs(b - a) < 1e-14
def coords_eq(lon1, lat1, lon2, lat2):
return floats_eq(lon1, lon2) and floats_eq(lat1, lat2)
def osm_id_comparator(el):
return (el['osm_type'], el['osm_id'])
"""Compares two objects which are results of subway generation"""
network_names0 = sorted([x['network'] for x in result0['networks']])
network_names1 = sorted([x['network'] for x in result1['networks']])
if network_names0 != network_names1:
print("Different list of network names!")
logging.debug("Different list of network names!")
return False
networks0 = sorted(result0['networks'], key=lambda x: x['network'])
networks1 = sorted(result1['networks'], key=lambda x: x['network'])
# Keys to compare routes. 'name' key is omitted since RouteMaster
# can get its name from one of its Routes unpredictably.
route_keys = ('type', 'ref', 'colour', 'route_id')
for network0, network1 in zip(networks0, networks1):
if network0['agency_id'] != network1['agency_id']:
print("Different agency_id:",
network0['network'], network1['network'])
if not compare_networks(network0, network1):
return False
route_ids0 = sorted(x['route_id'] for x in network0['routes'])
route_ids1 = sorted(x['route_id'] for x in network1['routes'])
if route_ids0 != route_ids1:
print("Different route_ids", route_ids0, route_ids1)
return False
routes0 = sorted(network0['routes'], key=lambda x: x['route_id'])
routes1 = sorted(network1['routes'], key=lambda x: x['route_id'])
for route0, route1 in zip(routes0, routes1):
route0_props = tuple(route0[k] for k in route_keys)
route1_props = tuple(route1[k] for k in route_keys)
if route0_props != route1_props:
print("Route props of ", route0['route_id'], route1['route_id'],
"are different:", route0_props, route1_props)
return False
itineraries0 = sorted(route0['itineraries'],
key=lambda x: tuple(chain(*x['stops'])))
itineraries1 = sorted(route1['itineraries'],
key=lambda x: tuple(chain(*x['stops'])))
for itin0, itin1 in zip(itineraries0, itineraries1):
if itin0['interval'] != itin1['interval']:
print("Different interval:",
f"{itin0['interval']} != {itin1['interval']}"
f" at route {route0['name']} {route0['route_id']}")
return False
if itin0['stops'] != itin1['stops']:
print(f"Different stops at route",
f"{route0['name']} {route0['route_id']}")
return False
stop_ids0 = sorted(x['id'] for x in result0['stops'])
stop_ids1 = sorted(x['id'] for x in result1['stops'])
if stop_ids0 != stop_ids1:
print("Different stop_ids")
logging.debug("Different stop_ids")
return False
stops0 = sorted(result0['stops'], key=lambda x: x['id'])
stops1 = sorted(result1['stops'], key=lambda x: x['id'])
for stop0, stop1 in zip(stops0, stops1):
stop0_props = tuple(stop0[k] for k in ('name', 'osm_id', 'osm_type'))
stop1_props = tuple(stop1[k] for k in ('name', 'osm_id', 'osm_type'))
if stop0_props != stop1_props:
print("Different stops properties:", stop0_props, stop1_props)
return False
if not coords_eq(stop0['lon'], stop0['lat'],
stop1['lon'], stop1['lat']):
print("Different stops coordinates:",
stop0_props, stop0['lon'], stop0['lat'],
stop1_props, stop1['lon'], stop1['lat'])
if not compare_stops(stop0, stop1):
return False
entrances0 = sorted(stop0['entrances'], key=osm_id_comparator)
entrances1 = sorted(stop1['entrances'], key=osm_id_comparator)
if entrances0 != entrances1:
print("Different stop entrances")
return False
exits0 = sorted(stop0['exits'], key=osm_id_comparator)
exits1 = sorted(stop1['exits'], key=osm_id_comparator)
if exits0 != exits1:
print("Different stop exits")
return False
if len(result0['transfers']) != len(result1['transfers']):
print("Different len(transfers):",
len(result0['transfers']), len(result1['transfers']))
return False
transfers0 = [tuple(t) if t[0] < t[1] else tuple([t[1], t[0], t[2]])
for t in result0['transfers']]
transfers1 = [tuple(t) if t[0] < t[1] else tuple([t[1], t[0], t[2]])
for t in result1['transfers']]
transfers0.sort(key=lambda x: tuple(x))
transfers1.sort(key=lambda x: tuple(x))
diff_cnt = 0
for i, (tr0, tr1) in enumerate(zip(transfers0, transfers1)):
if tr0 != tr1:
if i == 0:
print("First pair of different transfers", tr0, tr1)
diff_cnt += 1
if diff_cnt:
print("Different transfers number = ", diff_cnt)
if not compare_transfers(result0['transfers'], result1['transfers']):
return False
return True
@ -145,6 +52,8 @@ if __name__ == "__main__":
print("Usage: {} <file1.json> <file2.json>".format(sys.argv[0]))
sys.exit()
logging.basicConfig(level=logging.DEBUG)
path0, path1 = sys.argv[1:3]
j0 = json.load(open(path0, encoding='utf-8'))
@ -152,4 +61,4 @@ if __name__ == "__main__":
equal = compare_jsons(j0, j1)
print("The results are {}equal".format("" if equal else "NOT "))
print("The results are {}equal".format("" if equal else "NOT "))

View file

@ -406,5 +406,5 @@ if __name__ == '__main__':
json.dump(res, options.log)
if options.output:
json.dump(processor.process(good_cities, transfers, options.cache),
json.dump(processor.process(cities, transfers, options.cache),
options.output, indent=1, ensure_ascii=False)

View file

@ -1,24 +1,166 @@
import json
from subway_structure import distance
import os
import logging
from collections import defaultdict
from subway_structure import distance, el_center, Station
OSM_TYPES = {'n': (0, 'node'), 'w': (2, 'way'), 'r': (3, 'relation')}
ENTRANCE_PENALTY = 60 # seconds
SPEED_TO_ENTRANCE = 5 # km/h
SPEED_ON_TRANSFER = 3.5
SPEED_ON_LINE = 40
TRANSFER_PENALTY = 30 # seconds
KMPH_TO_MPS = 1/3.6 # km/h to m/s conversion multiplier
SPEED_TO_ENTRANCE = 5 * KMPH_TO_MPS # m/s
SPEED_ON_TRANSFER = 3.5 * KMPH_TO_MPS # m/s
SPEED_ON_LINE = 40 * KMPH_TO_MPS # m/s
DEFAULT_INTERVAL = 2.5 # minutes
DISPLACEMENT_TOLERANCE = 300 # meters
def process(cities, transfers, cache_name):
def uid(elid, typ=None):
t = elid[0]
osm_id = int(elid[1:])
if not typ:
osm_id = osm_id << 2 + OSM_TYPES[t][0]
elif typ != t:
raise Exception('Got {}, expected {}'.format(elid, typ))
return osm_id << 1
def uid(elid, typ=None):
t = elid[0]
osm_id = int(elid[1:])
if not typ:
osm_id = (osm_id << 2) + OSM_TYPES[t][0]
elif typ != t:
raise Exception('Got {}, expected {}'.format(elid, typ))
return osm_id << 1
class DummyCache:
"""This class may be used when you need to omit all cache processing"""
def __init__(self, cache_path, cities):
pass
def __getattr__(self, name):
"""This results in that a call to any method effectively does nothing
and does not generate exceptions."""
def method(*args, **kwargs):
return None
return method
def if_object_is_used(method):
"""Decorator to skip method execution under certain condition.
Relies on "is_used" object property."""
def inner(self, *args, **kwargs):
if not self.is_used:
return
return method(self, *args, **kwargs)
return inner
class MapsmeCache:
def __init__(self, cache_path, cities):
if not cache_path:
# cache is not used, all actions with cache must be silently skipped
self.is_used = False
return
self.cache_path = cache_path
self.is_used = True
self.cache = {}
if os.path.exists(cache_path):
try:
with open(cache_path, 'r', encoding='utf-8') as f:
self.cache = json.load(f)
except json.decoder.JSONDecodeError:
logging.warning("City cache '%s' is not a valid json file. "
"Building cache from scratch.", cache_path)
self.recovered_city_names = set()
# One stoparea may participate in routes of different cities
self.stop_cities = defaultdict(set) # stoparea id -> city names
self.city_dict = {c.name: c for c in cities}
self.good_city_names = {c.name for c in cities if c.is_good()}
def _is_cached_city_usable(self, city):
"""Check if cached stations still exist in osm data and
not moved far away.
"""
city_cache_data = self.cache[city.name]
for stoparea_id, cached_stoparea in city_cache_data['stops'].items():
station_id = cached_stoparea['osm_type'][0] + str(cached_stoparea['osm_id'])
city_station = city.elements.get(station_id)
if (not city_station or
not Station.is_station(city_station, city.modes)):
return False
station_coords = el_center(city_station)
cached_station_coords = tuple(cached_stoparea[coord] for coord in ('lon', 'lat'))
displacement = distance(station_coords, cached_station_coords)
if displacement > DISPLACEMENT_TOLERANCE:
return False
return True
@if_object_is_used
def provide_stops_and_networks(self, stops, networks):
"""Put stops and networks for bad cities into containers
passed as arguments."""
for city in self.city_dict.values():
if not city.is_good() and city.name in self.cache:
city_cached_data = self.cache[city.name]
if self._is_cached_city_usable(city):
stops.update(city_cached_data['stops'])
networks.append(city_cached_data['network'])
logging.info("Taking %s from cache", city.name)
self.recovered_city_names.add(city.name)
@if_object_is_used
def provide_transfers(self, transfers):
"""Add transfers from usable cached cities to 'transfers' dict
passed as argument."""
for city_name in self.recovered_city_names:
city_cached_transfers = self.cache[city_name]['transfers']
for stop1_uid, stop2_uid, transfer_time in city_cached_transfers:
if (stop1_uid, stop2_uid) not in transfers:
transfers[(stop1_uid, stop2_uid)] = transfer_time
@if_object_is_used
def initialize_good_city(self, city_name, network):
"""Create/replace one cache element with new data container.
This should be done for each good city."""
self.cache[city_name] = {
'network': network,
'stops': {}, # stoparea el_id -> jsonified stop data
'transfers': [] # list of tuples (stoparea1_uid, stoparea2_uid, time); uid1 < uid2
}
@if_object_is_used
def link_stop_with_city(self, stoparea_id, city_name):
"""Remember that some stop_area is used in a city."""
stoparea_uid = uid(stoparea_id)
self.stop_cities[stoparea_uid].add(city_name)
@if_object_is_used
def add_stop(self, stoparea_id, st):
"""Add stoparea to the cache of each city the stoparea is in."""
stoparea_uid = uid(stoparea_id)
for city_name in self.stop_cities[stoparea_uid]:
self.cache[city_name]['stops'][stoparea_id] = st
@if_object_is_used
def add_transfer(self, stoparea1_uid, stoparea2_uid, transfer_time):
"""If a transfer is inside a good city, add it to the city's cache."""
for city_name in (self.good_city_names &
self.stop_cities[stoparea1_uid] &
self.stop_cities[stoparea2_uid]):
self.cache[city_name]['transfers'].append(
(stoparea1_uid, stoparea2_uid, transfer_time)
)
@if_object_is_used
def save(self):
try:
with open(self.cache_path, 'w', encoding='utf-8') as f:
json.dump(self.cache, f, ensure_ascii=False)
except Exception as e:
logging.warning("Failed to save cache: %s", str(e))
def process(cities, transfers, cache_path):
"""cities - list of City instances;
transfers - list of sets of StopArea.id;
cache_path - path to json-file with good cities cache or None.
"""
def format_colour(c):
return c[1:] if c else None
@ -42,23 +184,19 @@ def process(cities, transfers, cache_name):
exits.append(n)
return exits
cache = {}
if cache_name:
with open(cache_name, 'r', encoding='utf-8') as f:
cache = json.load(f)
stops = {} # el_id -> station data
cache = MapsmeCache(cache_path, cities)
stop_areas = {} # stoparea el_id -> StopArea instance
stops = {} # stoparea el_id -> stop jsonified data
networks = []
good_cities = set([c.name for c in cities])
for city_name, data in cache.items():
if city_name in good_cities:
continue
# TODO: get a network, stops and transfers from cache
good_cities = [c for c in cities if c.is_good()]
platform_nodes = {}
for city in cities:
cache.provide_stops_and_networks(stops, networks)
for city in good_cities:
network = {'network': city.name, 'routes': [], 'agency_id': city.id}
cache.initialize_good_city(city.name, network)
for route in city:
routes = {
'type': route.mode,
@ -74,8 +212,9 @@ def process(cities, transfers, cache_name):
for i, variant in enumerate(route):
itin = []
for stop in variant:
stops[stop.stoparea.id] = stop.stoparea
itin.append([uid(stop.stoparea.id), round(stop.distance*3.6/SPEED_ON_LINE)])
stop_areas[stop.stoparea.id] = stop.stoparea
cache.link_stop_with_city(stop.stoparea.id, city.name)
itin.append([uid(stop.stoparea.id), round(stop.distance/SPEED_ON_LINE)])
# Make exits from platform nodes, if we don't have proper exits
if len(stop.stoparea.entrances) + len(stop.stoparea.exits) == 0:
for pl in stop.stoparea.platforms:
@ -105,8 +244,7 @@ def process(cities, transfers, cache_name):
network['routes'].append(routes)
networks.append(network)
m_stops = []
for stop in stops.values():
for stop_id, stop in stop_areas.items():
st = {
'name': stop.name,
'int_name': stop.int_name,
@ -127,7 +265,7 @@ def process(cities, transfers, cache_name):
'lon': stop.centers[e][0],
'lat': stop.centers[e][1],
'distance': ENTRANCE_PENALTY + round(distance(
stop.centers[e], stop.center)*3.6/SPEED_TO_ENTRANCE)
stop.centers[e], stop.center)/SPEED_TO_ENTRANCE)
})
if len(stop.entrances) + len(stop.exits) == 0:
if stop.platforms:
@ -140,7 +278,7 @@ def process(cities, transfers, cache_name):
'lon': n['lon'],
'lat': n['lat'],
'distance': ENTRANCE_PENALTY + round(distance(
(n['lon'], n['lat']), stop.center)*3.6/SPEED_TO_ENTRANCE)
(n['lon'], n['lat']), stop.center)/SPEED_TO_ENTRANCE)
})
else:
for k in ('entrances', 'exits'):
@ -152,28 +290,37 @@ def process(cities, transfers, cache_name):
'distance': 60
})
m_stops.append(st)
stops[stop_id] = st
cache.add_stop(stop_id, st)
c_transfers = []
pairwise_transfers = {} # (stoparea1_uid, stoparea2_uid) -> time; uid1 < uid2
for t_set in transfers:
t = list(t_set)
for t_first in range(len(t) - 1):
for t_second in range(t_first + 1, len(t)):
if t[t_first].id in stops and t[t_second].id in stops:
c_transfers.append([
uid(t[t_first].id),
uid(t[t_second].id),
30 + round(distance(t[t_first].center,
t[t_second].center)*3.6/SPEED_ON_TRANSFER)
])
stoparea1 = t[t_first]
stoparea2 = t[t_second]
if stoparea1.id in stops and stoparea2.id in stops:
uid1 = uid(stoparea1.id)
uid2 = uid(stoparea2.id)
uid1, uid2 = sorted([uid1, uid2])
transfer_time = (TRANSFER_PENALTY
+ round(distance(stoparea1.center,
stoparea2.center)
/ SPEED_ON_TRANSFER))
pairwise_transfers[(uid1, uid2)] = transfer_time
cache.add_transfer(uid1, uid2, transfer_time)
if cache_name:
with open(cache_name, 'w', encoding='utf-8') as f:
json.dump(cache, f)
cache.provide_transfers(pairwise_transfers)
cache.save()
pairwise_transfers = [(stop1_uid, stop2_uid, transfer_time)
for (stop1_uid, stop2_uid), transfer_time
in pairwise_transfers.items()]
result = {
'stops': m_stops,
'transfers': c_transfers,
'stops': list(stops.values()),
'transfers': pairwise_transfers,
'networks': networks
}
return result

View file

@ -71,7 +71,9 @@ QNODES="railway=station station=subway =light_rail =monorail railway=subway_entr
# Running the validation
VALIDATION="$TMPDIR/validation.json"
"$PYTHON" "$SUBWAYS_PATH/process_subways.py" -q -x "$FILTERED_DATA" -l "$VALIDATION" ${MAPSME+-o "$MAPSME"} ${CITY+-c "$CITY"} ${DUMP+-d "$DUMP"} ${JSON+-j "$JSON"}
"$PYTHON" "$SUBWAYS_PATH/process_subways.py" -q -x "$FILTERED_DATA" -l "$VALIDATION" ${MAPSME+-o "$MAPSME"}\
${CITY+-c "$CITY"} ${DUMP+-d "$DUMP"} ${JSON+-j "$JSON"}\
${ELEMENTS_CACHE+-i "$ELEMENTS_CACHE"} ${CITY_CACHE+--cache "$CITY_CACHE"}
rm "$FILTERED_DATA"
# Preparing HTML files

View file

@ -23,7 +23,6 @@ RAILWAY_TYPES = set(('rail', 'light_rail', 'subway', 'narrow_gauge',
CONSTRUCTION_KEYS = ('construction', 'proposed', 'construction:railway', 'proposed:railway')
NOWHERE_STOP = (0, 0) # too far away from any metro system
transfers = []
used_entrances = set()
@ -1213,7 +1212,6 @@ class City:
def find_transfers(elements, cities):
global transfers
transfers = []
stop_area_groups = []
for el in elements: