diff --git a/checkers/compare_city_caches.py b/checkers/compare_city_caches.py new file mode 100644 index 0000000..8fad676 --- /dev/null +++ b/checkers/compare_city_caches.py @@ -0,0 +1,159 @@ +"""This utility allows one to check equivalency of generated city caches + (defined by --cache command line parameter) of process_subways.py. + + Due to unordered nature of sets/dicts, two runs of process_subways.py + even on the same input generate equivalent jsons, + which cannot be compared with 'diff' command. The compare_jsons() function + compares two city_cache.json taking into account possible shuffling of + dict items and items of some lists, as well as system-specific subtleties. + This utility is useful to ensure that code improvements which must not + affect the process_subways.py output really doesn't change it. +""" + +import sys +import json +from itertools import chain + + +def compare_jsons(cache0, cache1): + + def floats_eq(a, b): + return abs(b - a) < 1e-13 + + def coords_eq(lon1, lat1, lon2, lat2): + return floats_eq(lon1, lon2) and floats_eq(lat1, lat2) + + def osm_id_comparator(el): + return (el['osm_type'], el['osm_id']) + + city_names0 = sorted(cache0.keys()) + city_names1 = sorted(cache1.keys()) + + if city_names0 != city_names1: + print("Different list of city names!") + return False + + for name in city_names0: + result0 = cache0[name] + result1 = cache1[name] + + network0 = result0['network'] + network1 = result1['network'] + + if network0['agency_id'] != network1['agency_id']: + print("Different agency_id:", + network0['network'], network1['network']) + return False + + # Keys to compare routes. 'name' key is omitted since RouteMaster + # can get its name from one of its Routes unpredictably. + route_keys = ('type', 'ref', 'colour', 'route_id') + + route_ids0 = sorted(x['route_id'] for x in network0['routes']) + route_ids1 = sorted(x['route_id'] for x in network1['routes']) + + if route_ids0 != route_ids1: + print("Different route_ids", route_ids0, route_ids1) + return False + + routes0 = sorted(network0['routes'], key=lambda x: x['route_id']) + routes1 = sorted(network1['routes'], key=lambda x: x['route_id']) + + for route0, route1 in zip(routes0, routes1): + route0_props = tuple(route0[k] for k in route_keys) + route1_props = tuple(route1[k] for k in route_keys) + if route0_props != route1_props: + print("Route props of ", route0['route_id'], route1['route_id'], + "are different:", route0_props, route1_props) + return False + + itineraries0 = sorted(route0['itineraries'], + key=lambda x: tuple(chain(*x['stops']))) + itineraries1 = sorted(route1['itineraries'], + key=lambda x: tuple(chain(*x['stops']))) + + for itin0, itin1 in zip(itineraries0, itineraries1): + if itin0['interval'] != itin1['interval']: + print("Different interval:", + itin0['interval'], "!=", itin1['interval'], + "at route", route0['name'], route0['route_id']) + return False + if itin0['stops'] != itin1['stops']: + print("Different stops at route", + route0['name'], route0['route_id']) + return False + + stop_ids0 = sorted(result0['stops'].keys()) + stop_ids1 = sorted(result1['stops'].keys()) + if stop_ids0 != stop_ids1: + print("Different stop_ids") + return False + + stops0 = [v for k, v in sorted(result0['stops'].items())] + stops1 = [v for k, v in sorted(result1['stops'].items())] + + for stop0, stop1 in zip(stops0, stops1): + stop0_props = tuple(stop0[k] for k in ('name', 'osm_id', 'osm_type')) + stop1_props = tuple(stop1[k] for k in ('name', 'osm_id', 'osm_type')) + if stop0_props != stop1_props: + print("Different stops properties:", stop0_props, stop1_props) + return False + if not coords_eq(stop0['lon'], stop0['lat'], + stop1['lon'], stop1['lat']): + print("Different stops coordinates:", + stop0_props, stop0['lon'], stop0['lat'], + stop1_props, stop1['lon'], stop1['lat']) + return False + + entrances0 = sorted(stop0['entrances'], key=osm_id_comparator) + entrances1 = sorted(stop1['entrances'], key=osm_id_comparator) + if entrances0 != entrances1: + print("Different stop entrances") + return False + + exits0 = sorted(stop0['exits'], key=osm_id_comparator) + exits1 = sorted(stop1['exits'], key=osm_id_comparator) + if exits0 != exits1: + print("Different stop exits") + return False + + + if len(result0['transfers']) != len(result1['transfers']): + print("Different len(transfers):", + len(result0['transfers']), len(result1['transfers'])) + return False + + transfers0 = [tuple(t) if t[0] < t[1] else tuple([t[1], t[0], t[2]]) + for t in result0['transfers']] + transfers1 = [tuple(t) if t[0] < t[1] else tuple([t[1], t[0], t[2]]) + for t in result1['transfers']] + + transfers0.sort(key=lambda x: tuple(x)) + transfers1.sort(key=lambda x: tuple(x)) + + diff_cnt = 0 + for i, (tr0, tr1) in enumerate(zip(transfers0, transfers1)): + if tr0 != tr1: + if i == 0: + print("First pair of different transfers", tr0, tr1) + diff_cnt += 1 + if diff_cnt: + print("Different transfers number = ", diff_cnt) + return False + + return True + + +if __name__ == "__main__": + if len(sys.argv) != 3: + print("Usage: {} ".format(sys.argv[0])) + sys.exit() + + path0, path1 = sys.argv[1:3] + + j0 = json.load(open(path0, encoding='utf-8')) + j1 = json.load(open(path1, encoding='utf-8')) + + equal = compare_jsons(j0, j1) + + print("The results are {}equal".format("" if equal else "NOT ")) \ No newline at end of file diff --git a/checkers/compare_json_outputs.py b/checkers/compare_json_outputs.py index ccbd9b6..b79d141 100644 --- a/checkers/compare_json_outputs.py +++ b/checkers/compare_json_outputs.py @@ -72,12 +72,12 @@ def compare_jsons(result0, result1): for itin0, itin1 in zip(itineraries0, itineraries1): if itin0['interval'] != itin1['interval']: print("Different interval:", - f"{itin0['interval']} != {itin1['interval']}" - f" at route {route0['name']} {route0['route_id']}") + itin0['interval'], "!=", itin1['interval'], + "at route", route0['name'], route0['route_id']) return False if itin0['stops'] != itin1['stops']: - print(f"Different stops at route", - f"{route0['name']} {route0['route_id']}") + print("Different stops at route", + route0['name'], route0['route_id']) return False stop_ids0 = sorted(x['id'] for x in result0['stops']) diff --git a/processors/mapsme.py b/processors/mapsme.py index 19b3a06..c8c889a 100755 --- a/processors/mapsme.py +++ b/processors/mapsme.py @@ -48,19 +48,19 @@ def process(cities, transfers, cache_name): def is_cached_city_usable(city, city_cache_data): """Checks if cached stops and entrances still exist in osm data""" - for stop_area_id, cached_stop_area in city_cache_data['stops'].items(): - station_id = cached_stop_area['osm_type'][0] + str(cached_stop_area['osm_id']) + for stoparea_id, cached_stoparea in city_cache_data['stops'].items(): + station_id = cached_stoparea['osm_type'][0] + str(cached_stoparea['osm_id']) city_station = city.elements.get(station_id) if (not city_station or not Station.is_station(city_station, city.modes) or distance(el_center(city_station), - tuple(cached_stop_area[coord] for coord in ('lon', 'lat')) + tuple(cached_stoparea[coord] for coord in ('lon', 'lat')) ) > CLOSENESS_TO_CACHED_ELEMENT_THRESHOLD ): return False - for cached_entrance in itertools.chain(cached_stop_area['entrances'], - cached_stop_area['exits']): + for cached_entrance in itertools.chain(cached_stoparea['entrances'], + cached_stoparea['exits']): entrance_id = cached_entrance['osm_type'][0] + str(cached_entrance['osm_id']) city_entrance = city.elements.get(entrance_id) if (not city_entrance or @@ -69,7 +69,8 @@ def process(cities, transfers, cache_name): ) > CLOSENESS_TO_CACHED_ELEMENT_THRESHOLD ): pass # TODO: - # return False? + # really pass (take cached entances as they are)? + # Or return False? # Or count broken entrances and leave only good? # Or ignore all old entrances and use station point as entrance and exit? @@ -81,33 +82,36 @@ def process(cities, transfers, cache_name): with open(cache_name, 'r', encoding='utf-8') as f: cache = json.load(f) - route_stops = {} # stop_area el_id -> RouteStop instance - stops = {} # stop_area el_id -> stop jsonified data + stop_areas = {} # stoparea el_id -> StopArea instance + stops = {} # stoparea el_id -> stop jsonified data networks = [] good_cities = [c for c in cities if c.is_good()] good_city_names = set(c.name for c in good_cities) + recovered_city_names = set() for city_name, city_cached_data in cache.items(): if city_name in good_city_names: continue - # TODO: get a network, stops [[and transfers (?)]] from cache + # TODO: get a network, stops and transfers from cache city = [c for c in cities if c.name == city_name][0] if is_cached_city_usable(city, city_cached_data): stops.update(city_cached_data['stops']) networks.append(city_cached_data['network']) print("Taking {} from cache".format(city_name)) + recovered_city_names.add(city.name) platform_nodes = {} - # One stop_area may participate in routes of different cities - stop_cities = defaultdict(set) # stop_area id -> city names + # One stoparea may participate in routes of different cities + stop_cities = defaultdict(set) # stoparea id -> city names for city in good_cities: network = {'network': city.name, 'routes': [], 'agency_id': city.id} cache[city.name] = { 'network': network, - 'stops': {} # stop_area el_id -> jsonified stop data + 'stops': {}, # stoparea el_id -> jsonified stop data + 'transfers': [] # list of tuples (stoparea1_uid, stoparea2_uid, time); uid1 < uid2 } for route in city: routes = { @@ -124,7 +128,7 @@ def process(cities, transfers, cache_name): for i, variant in enumerate(route): itin = [] for stop in variant: - route_stops[stop.stoparea.id] = stop.stoparea + stop_areas[stop.stoparea.id] = stop.stoparea stop_cities[stop.stoparea.id].add(city.name) itin.append([uid(stop.stoparea.id), round(stop.distance*3.6/SPEED_ON_LINE)]) # Make exits from platform nodes, if we don't have proper exits @@ -156,7 +160,7 @@ def process(cities, transfers, cache_name): network['routes'].append(routes) networks.append(network) - for stop_id, stop in route_stops.items(): + for stop_id, stop in stop_areas.items(): st = { 'name': stop.name, 'int_name': stop.int_name, @@ -208,26 +212,44 @@ def process(cities, transfers, cache_name): m_stops = list(stops.values()) - c_transfers = [] + c_transfers = {} # (stoparea1_uid, stoparea2_uid) -> time; uid1 < uid2 for t_set in transfers: t = list(t_set) for t_first in range(len(t) - 1): for t_second in range(t_first + 1, len(t)): - if t[t_first].id in stops and t[t_second].id in stops: - c_transfers.append([ - uid(t[t_first].id), - uid(t[t_second].id), - 30 + round(distance(t[t_first].center, - t[t_second].center)*3.6/SPEED_ON_TRANSFER) - ]) + stoparea1 = t[t_first] + stoparea2 = t[t_second] + if stoparea1.id in stops and stoparea2.id in stops: + uid1 = uid(stoparea1.id) + uid2 = uid(stoparea2.id) + uid1, uid2 = sorted([uid1, uid2]) + transfer_time = (30 + round(distance(stoparea1.center, + stoparea2.center + ) * 3.6/SPEED_ON_TRANSFER)) + c_transfers[(uid1, uid2)] = transfer_time + # If a transfer is inside a good city, add it to the city's cache. + for city_name in (good_city_names & + stop_cities[stoparea1.id] & + stop_cities[stoparea2.id]): + cache[city_name]['transfers'].append((uid1, uid2, transfer_time)) + + # Some transfers may be corrupted in not good cities. + # Take them from recovered cities. + for city_name in recovered_city_names: + for stop1_uid, stop2_uid, transfer_time in cache[city_name]['transfers']: + if (stop1_uid, stop2_uid) not in c_transfers: + c_transfers[(stop1_uid, stop2_uid)] = transfer_time if cache_name: with open(cache_name, 'w', encoding='utf-8') as f: - json.dump(cache, f, ensure_ascii=False) + json.dump(cache, f, indent=2, ensure_ascii=False) + + m_transfers = [(stop1_uid, stop2_uid, transfer_time) + for (stop1_uid, stop2_uid), transfer_time in c_transfers.items()] result = { 'stops': m_stops, - 'transfers': c_transfers, + 'transfers': m_transfers, 'networks': networks } return result diff --git a/scripts/process_subways.sh b/scripts/process_subways.sh index 43ca02e..63530a4 100755 --- a/scripts/process_subways.sh +++ b/scripts/process_subways.sh @@ -71,7 +71,9 @@ QNODES="railway=station station=subway =light_rail =monorail railway=subway_entr # Running the validation VALIDATION="$TMPDIR/validation.json" -"$PYTHON" "$SUBWAYS_PATH/process_subways.py" -q -x "$FILTERED_DATA" -l "$VALIDATION" ${MAPSME+-o "$MAPSME"} ${CITY+-c "$CITY"} ${DUMP+-d "$DUMP"} ${JSON+-j "$JSON"} +"$PYTHON" "$SUBWAYS_PATH/process_subways.py" -q -x "$FILTERED_DATA" -l "$VALIDATION" ${MAPSME+-o "$MAPSME"}\ + ${CITY+-c "$CITY"} ${DUMP+-d "$DUMP"} ${JSON+-j "$JSON"}\ + ${ELEMENTS_CACHE+-i "$ELEMENTS_CACHE"} ${CITY_CACHE+--cache "$CITY_CACHE"} rm "$FILTERED_DATA" # Preparing HTML files