Caching transfers in good cities

2019-02-12 13:05:04 +03:00 · 2019-02-12 13:05:04 +03:00 · 5ee07c9b81
commit 5ee07c9b81
parent c168529300
4 changed files with 212 additions and 29 deletions
--- a/checkers/compare_city_caches.py
+++ b/checkers/compare_city_caches.py
@ -0,0 +1,159 @@
+"""This utility allows one to check equivalency of generated city caches
+   (defined by --cache command line parameter) of process_subways.py.
+
+   Due to unordered nature of sets/dicts, two runs of process_subways.py
+   even on the same input generate equivalent jsons,
+   which cannot be compared with 'diff' command. The compare_jsons() function
+   compares two city_cache.json taking into account possible shuffling of
+   dict items and items of some lists, as well as system-specific subtleties.
+   This utility is useful to ensure that code improvements which must not
+   affect the process_subways.py output really doesn't change it.
+"""
+
+import sys
+import json
+from itertools import chain
+
+
+def compare_jsons(cache0, cache1):
+
+    def floats_eq(a, b):
+        return abs(b - a) < 1e-13
+
+    def coords_eq(lon1, lat1, lon2, lat2):
+        return floats_eq(lon1, lon2) and floats_eq(lat1, lat2)
+
+    def osm_id_comparator(el):
+        return (el['osm_type'], el['osm_id'])
+
+    city_names0 = sorted(cache0.keys())
+    city_names1 = sorted(cache1.keys())
+
+    if city_names0 != city_names1:
+        print("Different list of city names!")
+        return False
+
+    for name in city_names0:
+        result0 = cache0[name]
+        result1 = cache1[name]
+
+        network0 = result0['network']
+        network1 = result1['network']
+
+        if network0['agency_id'] != network1['agency_id']:
+            print("Different agency_id:",
+                  network0['network'], network1['network'])
+            return False
+
+        # Keys to compare routes. 'name' key is omitted since RouteMaster
+        # can get its name from one of its Routes unpredictably.
+        route_keys = ('type', 'ref', 'colour', 'route_id')
+
+        route_ids0 = sorted(x['route_id'] for x in network0['routes'])
+        route_ids1 = sorted(x['route_id'] for x in network1['routes'])
+
+        if route_ids0 != route_ids1:
+            print("Different route_ids", route_ids0, route_ids1)
+            return False
+
+        routes0 = sorted(network0['routes'], key=lambda x: x['route_id'])
+        routes1 = sorted(network1['routes'], key=lambda x: x['route_id'])
+
+        for route0, route1 in zip(routes0, routes1):
+            route0_props = tuple(route0[k] for k in route_keys)
+            route1_props = tuple(route1[k] for k in route_keys)
+            if route0_props != route1_props:
+                print("Route props of ", route0['route_id'], route1['route_id'],
+                      "are different:", route0_props, route1_props)
+                return False
+
+            itineraries0 = sorted(route0['itineraries'],
+                                  key=lambda x: tuple(chain(*x['stops'])))
+            itineraries1 = sorted(route1['itineraries'],
+                                  key=lambda x: tuple(chain(*x['stops'])))
+
+            for itin0, itin1 in zip(itineraries0, itineraries1):
+                if itin0['interval'] != itin1['interval']:
+                    print("Different interval:",
+                          itin0['interval'], "!=", itin1['interval'],
+                          "at route", route0['name'],  route0['route_id'])
+                    return False
+                if itin0['stops'] != itin1['stops']:
+                    print("Different stops at route",
+                          route0['name'], route0['route_id'])
+                    return False
+
+        stop_ids0 = sorted(result0['stops'].keys())
+        stop_ids1 = sorted(result1['stops'].keys())
+        if stop_ids0 != stop_ids1:
+            print("Different stop_ids")
+            return False
+
+        stops0 = [v for k, v in sorted(result0['stops'].items())]
+        stops1 = [v for k, v in sorted(result1['stops'].items())]
+
+        for stop0, stop1 in zip(stops0, stops1):
+            stop0_props = tuple(stop0[k] for k in ('name', 'osm_id', 'osm_type'))
+            stop1_props = tuple(stop1[k] for k in ('name', 'osm_id', 'osm_type'))
+            if stop0_props != stop1_props:
+                print("Different stops properties:", stop0_props, stop1_props)
+                return False
+            if not coords_eq(stop0['lon'], stop0['lat'],
+                             stop1['lon'], stop1['lat']):
+                print("Different stops coordinates:",
+                      stop0_props, stop0['lon'], stop0['lat'],
+                      stop1_props, stop1['lon'], stop1['lat'])
+                return False
+
+            entrances0 = sorted(stop0['entrances'], key=osm_id_comparator)
+            entrances1 = sorted(stop1['entrances'], key=osm_id_comparator)
+            if entrances0 != entrances1:
+                print("Different stop entrances")
+                return False
+
+            exits0 = sorted(stop0['exits'], key=osm_id_comparator)
+            exits1 = sorted(stop1['exits'], key=osm_id_comparator)
+            if exits0 != exits1:
+                print("Different stop exits")
+                return False
+
+
+        if len(result0['transfers']) != len(result1['transfers']):
+            print("Different len(transfers):",
+                  len(result0['transfers']), len(result1['transfers']))
+            return False
+
+        transfers0 = [tuple(t) if t[0] < t[1] else tuple([t[1], t[0], t[2]])
+                          for t in result0['transfers']]
+        transfers1 = [tuple(t) if t[0] < t[1] else tuple([t[1], t[0], t[2]])
+                          for t in result1['transfers']]
+
+        transfers0.sort(key=lambda x: tuple(x))
+        transfers1.sort(key=lambda x: tuple(x))
+
+        diff_cnt = 0
+        for i, (tr0, tr1) in enumerate(zip(transfers0, transfers1)):
+            if tr0 != tr1:
+                if i == 0:
+                    print("First pair of different transfers", tr0, tr1)
+                diff_cnt += 1
+        if diff_cnt:
+            print("Different transfers number = ", diff_cnt)
+            return False
+
+    return True
+
+
+if __name__ == "__main__":
+    if len(sys.argv) != 3:
+        print("Usage: {} <cache1.json> <cache2.json>".format(sys.argv[0]))
+        sys.exit()
+
+    path0, path1 = sys.argv[1:3]
+
+    j0 = json.load(open(path0, encoding='utf-8'))
+    j1 = json.load(open(path1, encoding='utf-8'))
+
+    equal = compare_jsons(j0, j1)
+
+    print("The results are {}equal".format("" if equal else "NOT "))
--- a/checkers/compare_json_outputs.py
+++ b/checkers/compare_json_outputs.py
@ -72,12 +72,12 @@ def compare_jsons(result0, result1):
            for itin0, itin1 in zip(itineraries0, itineraries1):
                if itin0['interval'] != itin1['interval']:
                    print("Different interval:",
-                          f"{itin0['interval']} != {itin1['interval']}"
-                          f" at route {route0['name']} {route0['route_id']}")
+                          itin0['interval'], "!=",  itin1['interval'],
+                          "at route", route0['name'], route0['route_id'])
                    return False
                if itin0['stops'] != itin1['stops']:
-                    print(f"Different stops at route",
-                          f"{route0['name']} {route0['route_id']}")
+                    print("Different stops at route",
+                          route0['name'], route0['route_id'])
                    return False

    stop_ids0 = sorted(x['id'] for x in result0['stops'])
--- a/processors/mapsme.py
+++ b/processors/mapsme.py
@ -48,19 +48,19 @@ def process(cities, transfers, cache_name):

    def is_cached_city_usable(city, city_cache_data):
        """Checks if cached stops and entrances still exist in osm data"""
-        for stop_area_id, cached_stop_area in city_cache_data['stops'].items():
-            station_id = cached_stop_area['osm_type'][0] + str(cached_stop_area['osm_id'])
+        for stoparea_id, cached_stoparea in city_cache_data['stops'].items():
+            station_id = cached_stoparea['osm_type'][0] + str(cached_stoparea['osm_id'])
            city_station = city.elements.get(station_id)
            if (not city_station or
                not Station.is_station(city_station, city.modes) or
                distance(el_center(city_station),
-                         tuple(cached_stop_area[coord] for coord in ('lon', 'lat'))
+                         tuple(cached_stoparea[coord] for coord in ('lon', 'lat'))
                        ) > CLOSENESS_TO_CACHED_ELEMENT_THRESHOLD
            ):
                return False

-            for cached_entrance in itertools.chain(cached_stop_area['entrances'],
-                                                   cached_stop_area['exits']):
+            for cached_entrance in itertools.chain(cached_stoparea['entrances'],
+                                                   cached_stoparea['exits']):
                entrance_id = cached_entrance['osm_type'][0] + str(cached_entrance['osm_id'])
                city_entrance = city.elements.get(entrance_id)
                if (not city_entrance or
@ -69,7 +69,8 @@ def process(cities, transfers, cache_name):
                            ) > CLOSENESS_TO_CACHED_ELEMENT_THRESHOLD
                ):
                    pass  # TODO:
-                          # return False?
+                          # really pass (take cached entances as they are)?
+                          # Or return False?
                          # Or count broken entrances and leave only good?
                          # Or ignore all old entrances and use station point as entrance and exit?

@ -81,33 +82,36 @@ def process(cities, transfers, cache_name):
        with open(cache_name, 'r', encoding='utf-8') as f:
            cache = json.load(f)

-    route_stops = {}  # stop_area el_id -> RouteStop instance
-    stops = {}  # stop_area el_id -> stop jsonified data
+    stop_areas = {}  # stoparea el_id -> StopArea instance
+    stops = {}  # stoparea el_id -> stop jsonified data
    networks = []

    good_cities = [c for c in cities if c.is_good()]
    good_city_names = set(c.name for c in good_cities)
+    recovered_city_names = set()

    for city_name, city_cached_data in cache.items():
        if city_name in good_city_names:
            continue
-        # TODO: get a network, stops [[and transfers (?)]] from cache
+        # TODO: get a network, stops and transfers from cache
        city = [c for c in cities if c.name == city_name][0]
        if is_cached_city_usable(city, city_cached_data):
            stops.update(city_cached_data['stops'])
            networks.append(city_cached_data['network'])
            print("Taking {} from cache".format(city_name))
+            recovered_city_names.add(city.name)

    platform_nodes = {}

-    # One stop_area may participate in routes of different cities
-    stop_cities = defaultdict(set)  # stop_area id -> city names
+    # One stoparea may participate in routes of different cities
+    stop_cities = defaultdict(set)  # stoparea id -> city names

    for city in good_cities:
        network = {'network': city.name, 'routes': [], 'agency_id': city.id}
        cache[city.name] = {
            'network': network,
-            'stops': {}  # stop_area el_id -> jsonified stop data
+            'stops': {},  # stoparea el_id -> jsonified stop data
+            'transfers': []  # list of tuples (stoparea1_uid, stoparea2_uid, time); uid1 < uid2
        }
        for route in city:
            routes = {
@ -124,7 +128,7 @@ def process(cities, transfers, cache_name):
            for i, variant in enumerate(route):
                itin = []
                for stop in variant:
-                    route_stops[stop.stoparea.id] = stop.stoparea
+                    stop_areas[stop.stoparea.id] = stop.stoparea
                    stop_cities[stop.stoparea.id].add(city.name)
                    itin.append([uid(stop.stoparea.id), round(stop.distance*3.6/SPEED_ON_LINE)])
                    # Make exits from platform nodes, if we don't have proper exits
@ -156,7 +160,7 @@ def process(cities, transfers, cache_name):
            network['routes'].append(routes)
        networks.append(network)

-    for stop_id, stop in route_stops.items():
+    for stop_id, stop in stop_areas.items():
        st = {
            'name': stop.name,
            'int_name': stop.int_name,
@ -208,26 +212,44 @@ def process(cities, transfers, cache_name):

    m_stops = list(stops.values())

-    c_transfers = []
+    c_transfers = {}  # (stoparea1_uid, stoparea2_uid) -> time;  uid1 < uid2
    for t_set in transfers:
        t = list(t_set)
        for t_first in range(len(t) - 1):
            for t_second in range(t_first + 1, len(t)):
-                if t[t_first].id in stops and t[t_second].id in stops:
-                    c_transfers.append([
-                        uid(t[t_first].id),
-                        uid(t[t_second].id),
-                        30 + round(distance(t[t_first].center,
-                                            t[t_second].center)*3.6/SPEED_ON_TRANSFER)
-                    ])
+                stoparea1 = t[t_first]
+                stoparea2 = t[t_second]
+                if stoparea1.id in stops and stoparea2.id in stops:
+                    uid1 = uid(stoparea1.id)
+                    uid2 = uid(stoparea2.id)
+                    uid1, uid2 = sorted([uid1, uid2])
+                    transfer_time = (30 + round(distance(stoparea1.center,
+                                                         stoparea2.center
+                                                ) * 3.6/SPEED_ON_TRANSFER))
+                    c_transfers[(uid1, uid2)] = transfer_time
+                    # If a transfer is inside a good city, add it to the city's cache.
+                    for city_name in (good_city_names &
+                                      stop_cities[stoparea1.id] &
+                                      stop_cities[stoparea2.id]):
+                        cache[city_name]['transfers'].append((uid1, uid2, transfer_time))
+
+    # Some transfers may be corrupted in not good cities.
+    # Take them from recovered cities.
+    for city_name in recovered_city_names:
+        for stop1_uid, stop2_uid, transfer_time in cache[city_name]['transfers']:
+            if (stop1_uid, stop2_uid) not in c_transfers:
+                c_transfers[(stop1_uid, stop2_uid)] = transfer_time

    if cache_name:
        with open(cache_name, 'w', encoding='utf-8') as f:
-            json.dump(cache, f, ensure_ascii=False)
+            json.dump(cache, f, indent=2, ensure_ascii=False)
+
+    m_transfers = [(stop1_uid, stop2_uid, transfer_time)
+                   for (stop1_uid, stop2_uid), transfer_time in c_transfers.items()]

    result = {
        'stops': m_stops,
-        'transfers': c_transfers,
+        'transfers': m_transfers,
        'networks': networks
    }
    return result
--- a/scripts/process_subways.sh
+++ b/scripts/process_subways.sh
@ -71,7 +71,9 @@ QNODES="railway=station station=subway =light_rail =monorail railway=subway_entr
 # Running the validation

 VALIDATION="$TMPDIR/validation.json"
-"$PYTHON" "$SUBWAYS_PATH/process_subways.py" -q -x "$FILTERED_DATA" -l "$VALIDATION" ${MAPSME+-o "$MAPSME"} ${CITY+-c "$CITY"} ${DUMP+-d "$DUMP"} ${JSON+-j "$JSON"}
+"$PYTHON" "$SUBWAYS_PATH/process_subways.py" -q -x "$FILTERED_DATA" -l "$VALIDATION" ${MAPSME+-o "$MAPSME"}\
+    ${CITY+-c "$CITY"} ${DUMP+-d "$DUMP"} ${JSON+-j "$JSON"}\
+    ${ELEMENTS_CACHE+-i "$ELEMENTS_CACHE"} ${CITY_CACHE+--cache "$CITY_CACHE"}
 rm "$FILTERED_DATA"

 # Preparing HTML files