From ed95c94aac1a13b634dbe210f568301ceb546082 Mon Sep 17 00:00:00 2001 From: Alexey Zakharenkov Date: Fri, 2 Jun 2023 14:26:57 +0300 Subject: [PATCH 01/15] Unify tests --- subway_structure.py | 2 +- tests/README.md | 13 + tests/assets/kuntsevskaya_centers.json | 28 -- tests/assets/tiny_world.osm | 217 +++++++++++ tests/assets/tiny_world_gtfs.zip | Bin 0 -> 4775 bytes tests/sample_data_for_build_tracks.py | 154 ++++++-- ... => sample_data_for_center_calculation.py} | 18 +- tests/sample_data_for_error_messages.py | 49 ++- tests/sample_data_for_outputs.py | 345 ++++++++++++++++++ tests/test_build_tracks.py | 71 ++-- tests/test_center_calculation.py | 29 +- tests/test_error_messages.py | 17 +- tests/test_gtfs_processor.py | 73 +++- tests/test_storage.py | 26 ++ tests/util.py | 213 +++++++++-- 15 files changed, 1085 insertions(+), 170 deletions(-) create mode 100644 tests/README.md delete mode 100644 tests/assets/kuntsevskaya_centers.json create mode 100644 tests/assets/tiny_world.osm create mode 100644 tests/assets/tiny_world_gtfs.zip rename tests/{assets/kuntsevskaya_transfer.osm => sample_data_for_center_calculation.py} (84%) create mode 100644 tests/sample_data_for_outputs.py create mode 100644 tests/test_storage.py diff --git a/subway_structure.py b/subway_structure.py index 823aea6..bb38f85 100644 --- a/subway_structure.py +++ b/subway_structure.py @@ -673,7 +673,7 @@ class Route: self.stops = [] # List of RouteStop # Would be a list of (lon, lat) for the longest stretch. Can be empty. self.tracks = None - # Index of the fist stop that is located on/near the self.tracks + # Index of the first stop that is located on/near the self.tracks self.first_stop_on_rails_index = None # Index of the last stop that is located on/near the self.tracks self.last_stop_on_rails_index = None diff --git a/tests/README.md b/tests/README.md new file mode 100644 index 0000000..d6da466 --- /dev/null +++ b/tests/README.md @@ -0,0 +1,13 @@ +To perform tests manually, run this command from the top directory +of the repository: + +```bash +python -m unittest discover tests +``` + +or simply + +```bash +python -m unittest +``` + diff --git a/tests/assets/kuntsevskaya_centers.json b/tests/assets/kuntsevskaya_centers.json deleted file mode 100644 index 36317ec..0000000 --- a/tests/assets/kuntsevskaya_centers.json +++ /dev/null @@ -1,28 +0,0 @@ -{ - "w38836456": { - "lat": 55.73064775, - "lon": 37.446065950000005 - }, - "w489951237": { - "lat": 55.730760724999996, - "lon": 37.44602055 - }, - "r7588527": { - "lat": 55.73066371666667, - "lon": 37.44604881666667 - }, - "r7588528": { - "lat": 55.73075192499999, - "lon": 37.44609837 - }, - "r7588561": { - "lat": 55.73070782083333, - "lon": 37.44607359333334 - }, - "r13426423": { - "lat": 55.730760724999996, - "lon": 37.44602055 - }, - "r100": null, - "r101": null -} diff --git a/tests/assets/tiny_world.osm b/tests/assets/tiny_world.osm new file mode 100644 index 0000000..6ee2096 --- /dev/null +++ b/tests/assets/tiny_world.osm @@ -0,0 +1,217 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/assets/tiny_world_gtfs.zip b/tests/assets/tiny_world_gtfs.zip new file mode 100644 index 0000000000000000000000000000000000000000..ef7a66a7a36f903560f0d85b419ab67255481f21 GIT binary patch literal 4775 zcma)AOOG2x5Oy9Q#ljGYoREuyKi?|z=&zkX>jpho)I zd|#S^^ku50E)Y`AzLDDEqiqgbOmNpt%f1-T+eU$Fa(H$&N7$^^))rz(Zb67vbT%him zxLL7U(VBV%@$cNjf3reQ0v%|#qHDJozrQ$pA2y?Q4R$MSvg?|yF!m&gSYKk2-rR4D zj*DcS$*5(_II;9X^Nf~0O!MfNh+?C6EaY@!Od4%;VwXCKOi?5cQzJsetSwvbbP4|u zLCLWxmO4%IQxl7^bb6}K3T0=!Gk$g%b7t^UY*!)tfH~6jSYu&vM=W1es0DNQ*=6`* zed$KSu7wvr+3u`m&A8S0}vHi~E1A(>+T+oif(<#sD8>QS=_d;!9J z6&<)*rjBC^`wExbHZ>3sTM&afp48-p^q_#! zlGM#vKQ6w1@HKK3wKvcrotiAxmg=ciJ+RF&d8W#}DX2$cM&ZUYo4sXXWfK|~y4+hP z_KYY>jp``YrBRT}#AxW!ER{g5zs1a>K&Hl2*E`Rjz5C$uU_k9nmtL1tdvX<1 zRV>=t znT6a{9Es_i$^6Nb-3=`ACl~cQ!gyZt8B3W59Dg0ceSKdTS4;?#~Mqal}R6kg$qZuCm|=GUCXV!V*TPN)DJ z4$fYmL(+uPF9t~(bNE|H0@=E%SO_O$nWNQkR#uSL)MKe|i0=xuU^3=86mL@;4w59C za2&!@=_`V8G7WIHAd_ooWS5C-$PdN=&<)2M`_)#bQwQq4p69oI`Qiy)3$+m%LRVI- za22Nrzq0ujrGwl~%Df@nA_;ukB%_GOFm_tFrIe+dAU)-KQ%p&YXGS$89&Er#Er3?eEZ0L_G^$o;neXv @@ -37,7 +38,11 @@ sample_networks = { """, - "num_stations": 2, + "cities_info": [ + { + "num_stations": 2, + }, + ], "tracks": [], "extended_tracks": [ (0.0, 0.0), @@ -55,7 +60,8 @@ sample_networks = { "positions_on_rails": [], }, }, - "Only 2 stations connected with rails": { + { + "name": "Only 2 stations connected with rails", "xml": """ @@ -100,7 +106,11 @@ sample_networks = { """, - "num_stations": 2, + "cities_info": [ + { + "num_stations": 2, + }, + ], "tracks": [ (0.0, 0.0), (1.0, 0.0), @@ -124,7 +134,8 @@ sample_networks = { "positions_on_rails": [[0], [1]], }, }, - "Only 6 stations, no rails": { + { + "name": "Only 6 stations, no rails", "xml": """ @@ -190,7 +201,11 @@ sample_networks = { """, - "num_stations": 6, + "cities_info": [ + { + "num_stations": 6, + }, + ], "tracks": [], "extended_tracks": [ (0.0, 0.0), @@ -212,7 +227,8 @@ sample_networks = { "positions_on_rails": [], }, }, - "One rail line connecting all stations": { + { + "name": "One rail line connecting all stations", "xml": """ @@ -289,7 +305,11 @@ sample_networks = { """, - "num_stations": 6, + "cities_info": [ + { + "num_stations": 6, + }, + ], "tracks": [ (0.0, 0.0), (1.0, 0.0), @@ -325,7 +345,8 @@ sample_networks = { "positions_on_rails": [[0], [1], [2], [3], [4], [5]], }, }, - "One rail line connecting all stations except the last": { + { + "name": "One rail line connecting all stations except the last", "xml": """ @@ -401,7 +422,11 @@ sample_networks = { """, - "num_stations": 6, + "cities_info": [ + { + "num_stations": 6, + }, + ], "tracks": [ (0.0, 0.0), (1.0, 0.0), @@ -435,7 +460,8 @@ sample_networks = { "positions_on_rails": [[0], [1], [2], [3], [4]], }, }, - "One rail line connecting all stations except the fist": { + { + "name": "One rail line connecting all stations except the first", "xml": """ @@ -511,7 +537,11 @@ sample_networks = { """, - "num_stations": 6, + "cities_info": [ + { + "num_stations": 6, + }, + ], "tracks": [ (1.0, 0.0), (2.0, 0.0), @@ -545,7 +575,11 @@ sample_networks = { "positions_on_rails": [[0], [1], [2], [3], [4]], }, }, - "One rail line connecting all stations except the fist and the last": { + { + "name": ( + "One rail line connecting all stations " + "except the first and the last", + ), "xml": """ @@ -620,7 +654,11 @@ sample_networks = { """, - "num_stations": 6, + "cities_info": [ + { + "num_stations": 6, + }, + ], "tracks": [ (1.0, 0.0), (2.0, 0.0), @@ -652,7 +690,8 @@ sample_networks = { "positions_on_rails": [[0], [1], [2], [3]], }, }, - "One rail line connecting only 2 first stations": { + { + "name": "One rail line connecting only 2 first stations", "xml": """ @@ -725,7 +764,11 @@ sample_networks = { """, - "num_stations": 6, + "cities_info": [ + { + "num_stations": 6, + }, + ], "tracks": [ (0.0, 0.0), (1.0, 0.0), @@ -753,7 +796,8 @@ sample_networks = { "positions_on_rails": [[0], [1]], }, }, - "One rail line connecting only 2 last stations": { + { + "name": "One rail line connecting only 2 last stations", "xml": """ @@ -826,7 +870,11 @@ sample_networks = { """, - "num_stations": 6, + "cities_info": [ + { + "num_stations": 6, + }, + ], "tracks": [ (4.0, 0.0), (5.0, 0.0), @@ -854,7 +902,8 @@ sample_networks = { "positions_on_rails": [[0], [1]], }, }, - "One rail connecting all stations and protruding at both ends": { + { + "name": "One rail connecting all stations and protruding at both ends", "xml": """ @@ -937,7 +986,11 @@ sample_networks = { """, - "num_stations": 6, + "cities_info": [ + { + "num_stations": 6, + }, + ], "tracks": [ (-1.0, 0.0), (0.0, 0.0), @@ -977,10 +1030,11 @@ sample_networks = { "positions_on_rails": [[1], [2], [3], [4], [5], [6]], }, }, - ( - "Several rails with reversed order for backward route, " - "connecting all stations and protruding at both ends" - ): { + { + "name": ( + "Several rails with reversed order for backward route, " + "connecting all stations and protruding at both ends" + ), "xml": """ @@ -1069,7 +1123,11 @@ sample_networks = { """, - "num_stations": 6, + "cities_info": [ + { + "num_stations": 6, + }, + ], "tracks": [ (-1.0, 0.0), (0.0, 0.0), @@ -1109,10 +1167,11 @@ sample_networks = { "positions_on_rails": [[1], [2], [3], [4], [5], [6]], }, }, - ( - "One rail laying near all stations requiring station projecting, " - "protruding at both ends" - ): { + { + "name": ( + "One rail laying near all stations requiring station projecting, " + "protruding at both ends" + ), "xml": """ @@ -1189,7 +1248,11 @@ sample_networks = { """, - "num_stations": 6, + "cities_info": [ + { + "num_stations": 6, + }, + ], "tracks": [ (-1.0, 0.0), (6.0, 0.0), @@ -1227,7 +1290,8 @@ sample_networks = { ], }, }, - "One rail laying near all stations except the first and last": { + { + "name": "One rail laying near all stations except the first and last", "xml": """ @@ -1304,7 +1368,11 @@ sample_networks = { """, - "num_stations": 6, + "cities_info": [ + { + "num_stations": 6, + }, + ], "tracks": [ (1.0, 0.0), (4.0, 0.0), @@ -1330,7 +1398,8 @@ sample_networks = { "positions_on_rails": [[0], [1 / 3], [2 / 3], [1]], }, }, - "Circle route without rails": { + { + "name": "Circle route without rails", "xml": """ @@ -1377,7 +1446,11 @@ sample_networks = { """, - "num_stations": 4, + "cities_info": [ + { + "num_stations": 4, + }, + ], "tracks": [], "extended_tracks": [ (0.0, 0.0), @@ -1398,7 +1471,8 @@ sample_networks = { "positions_on_rails": [], }, }, - "Circle route with closed rail line connecting all stations": { + { + "name": "Circle route with closed rail line connecting all stations", "xml": """ @@ -1455,7 +1529,11 @@ sample_networks = { """, - "num_stations": 4, + "cities_info": [ + { + "num_stations": 4, + }, + ], "tracks": [ (0.0, 0.0), (0.0, 1.0), @@ -1488,4 +1566,4 @@ sample_networks = { "positions_on_rails": [[0, 4], [1], [2], [3], [0, 4]], }, }, -} +] diff --git a/tests/assets/kuntsevskaya_transfer.osm b/tests/sample_data_for_center_calculation.py similarity index 84% rename from tests/assets/kuntsevskaya_transfer.osm rename to tests/sample_data_for_center_calculation.py index 48bf044..49ab368 100644 --- a/tests/assets/kuntsevskaya_transfer.osm +++ b/tests/sample_data_for_center_calculation.py @@ -1,4 +1,7 @@ - +metro_samples = [ + { + "name": "Transfer at Kuntsevskaya", + "xml": """ @@ -80,3 +83,16 @@ +""", # noqa: E501 + "expected_centers": { + "w38836456": {"lat": 55.73064775, "lon": 37.446065950000005}, + "w489951237": {"lat": 55.730760724999996, "lon": 37.44602055}, + "r7588527": {"lat": 55.73066371666667, "lon": 37.44604881666667}, + "r7588528": {"lat": 55.73075192499999, "lon": 37.44609837}, + "r7588561": {"lat": 55.73070782083333, "lon": 37.44607359333334}, + "r13426423": {"lat": 55.730760724999996, "lon": 37.44602055}, + "r100": None, + "r101": None, + }, + }, +] diff --git a/tests/sample_data_for_error_messages.py b/tests/sample_data_for_error_messages.py index 9d5c5fc..9bea1c7 100644 --- a/tests/sample_data_for_error_messages.py +++ b/tests/sample_data_for_error_messages.py @@ -1,5 +1,6 @@ -sample_networks = { - "No errors": { +metro_samples = [ + { + "name": "No errors", "xml": """ @@ -38,7 +39,11 @@ sample_networks = { """, - "num_stations": 2, + "cities_info": [ + { + "num_stations": 2, + }, + ], "num_lines": 1, "num_light_lines": 0, "num_interchanges": 0, @@ -46,7 +51,8 @@ sample_networks = { "warnings": [], "notices": [], }, - "Bad station order": { + { + "name": "Bad station order", "xml": """ @@ -99,7 +105,11 @@ sample_networks = { """, - "num_stations": 4, + "cities_info": [ + { + "num_stations": 4, + }, + ], "num_lines": 1, "num_light_lines": 0, "num_interchanges": 0, @@ -112,7 +122,8 @@ sample_networks = { "warnings": [], "notices": [], }, - "Angle < 20 degrees": { + { + "name": "Angle < 20 degrees", "xml": """ @@ -159,7 +170,11 @@ sample_networks = { """, - "num_stations": 3, + "cities_info": [ + { + "num_stations": 3, + }, + ], "num_lines": 1, "num_light_lines": 0, "num_interchanges": 0, @@ -172,7 +187,8 @@ sample_networks = { "warnings": [], "notices": [], }, - "Angle between 20 and 45 degrees": { + { + "name": "Angle between 20 and 45 degrees", "xml": """ @@ -219,7 +235,11 @@ sample_networks = { """, - "num_stations": 3, + "cities_info": [ + { + "num_stations": 3, + }, + ], "num_lines": 1, "num_light_lines": 0, "num_interchanges": 0, @@ -232,7 +252,8 @@ sample_networks = { 'is too narrow, 27 degrees (relation 2, "Backward")', ], }, - "Stops unordered along tracks provided each angle > 45 degrees": { + { + "name": "Unordered stops provided each angle > 45 degrees", "xml": """ @@ -300,7 +321,11 @@ sample_networks = { """, - "num_stations": 4, + "cities_info": [ + { + "num_stations": 4, + }, + ], "num_lines": 1, "num_light_lines": 0, "num_interchanges": 0, @@ -313,4 +338,4 @@ sample_networks = { "warnings": [], "notices": [], }, -} +] diff --git a/tests/sample_data_for_outputs.py b/tests/sample_data_for_outputs.py new file mode 100644 index 0000000..3c2a590 --- /dev/null +++ b/tests/sample_data_for_outputs.py @@ -0,0 +1,345 @@ +metro_samples = [ + { + "name": "tiny_world", + "xml_file": """assets/tiny_world.osm""", + "cities_info": [ + { + "id": 1, + "name": "Intersecting 2 metro lines", + "country": "World", + "continent": "Africa", + "num_stations": 6, + "num_lines": 2, + "num_light_lines": 0, + "num_interchanges": 1, + "bbox": "-179, -89, 179, 89", + "networks": "network-1", + }, + { + "id": 2, + "name": "One light rail line", + "country": "World", + "continent": "Africa", + "num_stations": 2, + "num_lines": 0, + "num_light_lines": 1, + "num_interchanges": 0, + "bbox": "-179, -89, 179, 89", + "networks": "network-2", + }, + ], + "gtfs_file": "assets/tiny_world_gtfs.zip", + "json_dump": """ +{ + "stopareas": { + "n1": { + "id": "n1", + "center": [ + 0, + 0 + ], + "name": "Station 1", + "entrances": [] + }, + "r1": { + "id": "r1", + "center": [ + 0.00470373068, + 0.0047037307 + ], + "name": "Station 2", + "entrances": [] + }, + "r3": { + "id": "r3", + "center": [ + 0.01012040581, + 0.0097589171 + ], + "name": "Station 3", + "entrances": [] + }, + "n4": { + "id": "n4", + "center": [ + 0, + 0.01 + ], + "name": "Station 4", + "entrances": [] + }, + "r2": { + "id": "r2", + "center": [ + 0.0047718624, + 0.00514739839 + ], + "name": "Station 5", + "entrances": [] + }, + "n6": { + "id": "n6", + "center": [ + 0.01, + 0 + ], + "name": "Station 6", + "entrances": [] + }, + "r4": { + "id": "r4", + "center": [ + 0.009716854315, + 0.010286367745 + ], + "name": "Station 7", + "entrances": [] + }, + "r16": { + "id": "r16", + "center": [ + 0.012405493905, + 0.014377764559999999 + ], + "name": "Station 8", + "entrances": [] + } + }, + "networks": { + "Intersecting 2 metro lines": { + "id": 1, + "name": "Intersecting 2 metro lines", + "routes": [ + { + "id": "r15", + "mode": "subway", + "ref": "1", + "name": "Blue Line", + "colour": "#0000ff", + "infill": null, + "itineraries": [ + { + "id": "r7", + "tracks": [ + [ + 0, + 0 + ], + [ + 0.00470373068, + 0.0047037307 + ], + [ + 0.009939661455227341, + 0.009939661455455193 + ] + ], + "start_time": null, + "end_time": null, + "interval": null, + "stops": [ + { + "stoparea_id": "n1", + "distance": 0 + }, + { + "stoparea_id": "r1", + "distance": 741 + }, + { + "stoparea_id": "r3", + "distance": 1565 + } + ] + }, + { + "id": "r8", + "tracks": [ + [ + 0.009939661455227341, + 0.009939661455455193 + ], + [ + 0.00470373068, + 0.0047037307 + ], + [ + 0, + 0 + ] + ], + "start_time": null, + "end_time": null, + "interval": null, + "stops": [ + { + "stoparea_id": "r3", + "distance": 0 + }, + { + "stoparea_id": "r1", + "distance": 824 + }, + { + "stoparea_id": "n1", + "distance": 1565 + } + ] + } + ] + }, + { + "id": "r14", + "mode": "subway", + "ref": "2", + "name": "Red Line", + "colour": "#ff0000", + "infill": null, + "itineraries": [ + { + "id": "r12", + "tracks": [ + [ + 0, + 0.01 + ], + [ + 0.01, + 0 + ] + ], + "start_time": null, + "end_time": null, + "interval": null, + "stops": [ + { + "stoparea_id": "n4", + "distance": 0 + }, + { + "stoparea_id": "r2", + "distance": 758 + }, + { + "stoparea_id": "n6", + "distance": 1575 + } + ] + }, + { + "id": "r13", + "tracks": [ + [ + 0.01, + 0 + ], + [ + 0, + 0.01 + ] + ], + "start_time": null, + "end_time": null, + "interval": null, + "stops": [ + { + "stoparea_id": "n6", + "distance": 0 + }, + { + "stoparea_id": "r2", + "distance": 817 + }, + { + "stoparea_id": "n4", + "distance": 1575 + } + ] + } + ] + } + ] + }, + "One light rail line": { + "id": 2, + "name": "One light rail line", + "routes": [ + { + "id": "r11", + "mode": "light_rail", + "ref": "LR", + "name": "LR Line", + "colour": "#a52a2a", + "infill": "#ffffff", + "itineraries": [ + { + "id": "r9", + "tracks": [ + [ + 0.00976752835, + 0.01025306758 + ], + [ + 0.01245616794, + 0.01434446439 + ] + ], + "start_time": null, + "end_time": null, + "interval": null, + "stops": [ + { + "stoparea_id": "r4", + "distance": 0 + }, + { + "stoparea_id": "r16", + "distance": 545 + } + ] + }, + { + "id": "r10", + "tracks": [ + [ + 0.012321033122529725, + 0.014359650255679167 + ], + [ + 0.00966618028, + 0.01031966791 + ] + ], + "start_time": null, + "end_time": null, + "interval": null, + "stops": [ + { + "stoparea_id": "r16", + "distance": 0 + }, + { + "stoparea_id": "r4", + "distance": 538 + } + ] + } + ] + } + ] + } + }, + "transfers": [ + [ + "r1", + "r2" + ], + [ + "r3", + "r4" + ] + ] +} +""", + }, +] diff --git a/tests/test_build_tracks.py b/tests/test_build_tracks.py index 14ea86b..a1b6a6c 100644 --- a/tests/test_build_tracks.py +++ b/tests/test_build_tracks.py @@ -1,24 +1,13 @@ -""" -To perform tests manually, run this command from the top directory -of the repository: - -> python -m unittest discover tests - -or simply - -> python -m unittest -""" - - -from tests.sample_data_for_build_tracks import sample_networks +from tests.sample_data_for_build_tracks import metro_samples from tests.util import TestCase class TestOneRouteTracks(TestCase): """Test tracks extending and truncating on one-route networks""" - def prepare_city_routes(self, network) -> tuple: - city = self.validate_city(network) + def prepare_city_routes(self, metro_sample: dict) -> tuple: + cities, transfers = self.prepare_cities(metro_sample) + city = cities[0] self.assertTrue(city.is_good) @@ -30,56 +19,56 @@ class TestOneRouteTracks(TestCase): return fwd_route, bwd_route - def _test_tracks_extending_for_network(self, network_data): - fwd_route, bwd_route = self.prepare_city_routes(network_data) + def _test_tracks_extending_for_network(self, metro_sample: dict) -> None: + fwd_route, bwd_route = self.prepare_city_routes(metro_sample) self.assertEqual( fwd_route.tracks, - network_data["tracks"], + metro_sample["tracks"], "Wrong tracks", ) extended_tracks = fwd_route.get_extended_tracks() self.assertEqual( extended_tracks, - network_data["extended_tracks"], + metro_sample["extended_tracks"], "Wrong tracks after extending", ) self.assertEqual( bwd_route.tracks, - network_data["tracks"][::-1], + metro_sample["tracks"][::-1], "Wrong backward tracks", ) extended_tracks = bwd_route.get_extended_tracks() self.assertEqual( extended_tracks, - network_data["extended_tracks"][::-1], + metro_sample["extended_tracks"][::-1], "Wrong backward tracks after extending", ) - def _test_tracks_truncating_for_network(self, network_data): - fwd_route, bwd_route = self.prepare_city_routes(network_data) + def _test_tracks_truncating_for_network(self, metro_sample: dict) -> None: + fwd_route, bwd_route = self.prepare_city_routes(metro_sample) truncated_tracks = fwd_route.get_truncated_tracks(fwd_route.tracks) self.assertEqual( truncated_tracks, - network_data["truncated_tracks"], + metro_sample["truncated_tracks"], "Wrong tracks after truncating", ) truncated_tracks = bwd_route.get_truncated_tracks(bwd_route.tracks) self.assertEqual( truncated_tracks, - network_data["truncated_tracks"][::-1], + metro_sample["truncated_tracks"][::-1], "Wrong backward tracks after truncating", ) - def _test_stop_positions_on_rails_for_network(self, network_data): - fwd_route, bwd_route = self.prepare_city_routes(network_data) + def _test_stop_positions_on_rails_for_network(self, sample: dict) -> None: + fwd_route, bwd_route = self.prepare_city_routes(sample) for route, route_label in zip( (fwd_route, bwd_route), ("forward", "backward") ): - route_data = network_data[route_label] + route_data = sample[route_label] for attr in ( "first_stop_on_rails_index", @@ -97,21 +86,27 @@ class TestOneRouteTracks(TestCase): rs.positions_on_rails for rs in route.stops[first_ind : last_ind + 1] # noqa E203 ] - self.assertListAlmostEqual( + self.assertSequenceAlmostEqual( positions_on_rails, route_data["positions_on_rails"] ) def test_tracks_extending(self) -> None: - for network_name, network_data in sample_networks.items(): - with self.subTest(msg=network_name): - self._test_tracks_extending_for_network(network_data) + for sample in metro_samples: + sample_name = sample["name"] + sample["cities_info"][0]["name"] = sample_name + with self.subTest(msg=sample_name): + self._test_tracks_extending_for_network(sample) def test_tracks_truncating(self) -> None: - for network_name, network_data in sample_networks.items(): - with self.subTest(msg=network_name): - self._test_tracks_truncating_for_network(network_data) + for sample in metro_samples: + sample_name = sample["name"] + sample["cities_info"][0]["name"] = sample_name + with self.subTest(msg=sample_name): + self._test_tracks_truncating_for_network(sample) def test_stop_position_on_rails(self) -> None: - for network_name, network_data in sample_networks.items(): - with self.subTest(msg=network_name): - self._test_stop_positions_on_rails_for_network(network_data) + for sample in metro_samples: + sample_name = sample["name"] + sample["cities_info"][0]["name"] = sample_name + with self.subTest(msg=sample_name): + self._test_stop_positions_on_rails_for_network(sample) diff --git a/tests/test_center_calculation.py b/tests/test_center_calculation.py index 4f01a3c..0e42360 100644 --- a/tests/test_center_calculation.py +++ b/tests/test_center_calculation.py @@ -1,28 +1,28 @@ -import json -from pathlib import Path +import io from unittest import TestCase from process_subways import calculate_centers from subway_io import load_xml +from tests.sample_data_for_center_calculation import metro_samples class TestCenterCalculation(TestCase): """Test center calculation. Test data [should] contain among others the following edge cases: - - an empty relation. It's element should not obtain "center" key. - - relation as member of relation, the child relation following the parent - in the OSM XML file. + - an empty relation. Its element should not obtain "center" key. + - relation as member of another relation, the child relation following + the parent in the OSM XML. - relation with incomplete members (broken references). - relations with cyclic references. """ - ASSETS_PATH = Path(__file__).resolve().parent / "assets" - OSM_DATA = str(ASSETS_PATH / "kuntsevskaya_transfer.osm") - CORRECT_CENTERS = str(ASSETS_PATH / "kuntsevskaya_centers.json") - - def test__calculate_centers(self) -> None: - elements = load_xml(self.OSM_DATA) + def test_calculate_centers(self) -> None: + for sample in metro_samples: + with self.subTest(msg=sample["name"]): + self._test_calculate_centers_for_sample(sample) + def _test_calculate_centers_for_sample(self, metro_sample: dict) -> None: + elements = load_xml(io.BytesIO(metro_sample["xml"].encode())) calculate_centers(elements) elements_dict = { @@ -36,12 +36,11 @@ class TestCenterCalculation(TestCase): if "center" in el } - with open(self.CORRECT_CENTERS) as f: - correct_centers = json.load(f) + expected_centers = metro_sample["expected_centers"] - self.assertTrue(set(calculated_centers).issubset(correct_centers)) + self.assertTrue(set(calculated_centers).issubset(expected_centers)) - for k, correct_center in correct_centers.items(): + for k, correct_center in expected_centers.items(): if correct_center is None: self.assertNotIn("center", elements_dict[k]) else: diff --git a/tests/test_error_messages.py b/tests/test_error_messages.py index 12a5583..aee6f48 100644 --- a/tests/test_error_messages.py +++ b/tests/test_error_messages.py @@ -1,4 +1,4 @@ -from tests.sample_data_for_error_messages import sample_networks +from tests.sample_data_for_error_messages import metro_samples from tests.util import TestCase @@ -7,16 +7,19 @@ class TestValidationMessages(TestCase): on different types of errors in input OSM data. """ - def _test_validation_messages_for_network(self, network_data): - city = self.validate_city(network_data) + def _test_validation_messages_for_network( + self, metro_sample: dict + ) -> None: + cities, transfers = self.prepare_cities(metro_sample) + city = cities[0] for err_level in ("errors", "warnings", "notices"): self.assertListEqual( sorted(getattr(city, err_level)), - sorted(network_data[err_level]), + sorted(metro_sample[err_level]), ) def test_validation_messages(self) -> None: - for network_name, network_data in sample_networks.items(): - with self.subTest(msg=network_name): - self._test_validation_messages_for_network(network_data) + for sample in metro_samples: + with self.subTest(msg=sample["name"]): + self._test_validation_messages_for_network(sample) diff --git a/tests/test_gtfs_processor.py b/tests/test_gtfs_processor.py index 5a234e8..86d1cac 100644 --- a/tests/test_gtfs_processor.py +++ b/tests/test_gtfs_processor.py @@ -1,9 +1,13 @@ -from unittest import TestCase +import codecs +import csv +from functools import partial +from pathlib import Path +from zipfile import ZipFile -from processors.gtfs import ( - dict_to_row, - GTFS_COLUMNS, -) +from processors._common import transit_to_dict +from processors.gtfs import dict_to_row, GTFS_COLUMNS, transit_data_to_gtfs +from tests.util import TestCase +from tests.sample_data_for_outputs import metro_samples class TestGTFS(TestCase): @@ -94,3 +98,62 @@ class TestGTFS(TestCase): self.assertListEqual( dict_to_row(shape["shape_data"], "shapes"), shape["answer"] ) + + def test__transit_data_to_gtfs(self) -> None: + for metro_sample in metro_samples: + cities, transfers = self.prepare_cities(metro_sample) + calculated_transit_data = transit_to_dict(cities, transfers) + calculated_gtfs_data = transit_data_to_gtfs( + calculated_transit_data + ) + + control_gtfs_data = self._readGtfs( + Path(__file__).resolve().parent / metro_sample["gtfs_file"] + ) + self._compareGtfs(calculated_gtfs_data, control_gtfs_data) + + @staticmethod + def _readGtfs(filepath: str) -> dict: + gtfs_data = dict() + with ZipFile(filepath) as zf: + for gtfs_feature in GTFS_COLUMNS: + with zf.open(f"{gtfs_feature}.txt") as f: + reader = csv.reader(codecs.iterdecode(f, "utf-8")) + next(reader) # read header + rows = list(reader) + gtfs_data[gtfs_feature] = rows + return gtfs_data + + def _compareGtfs( + self, calculated_gtfs_data: dict, control_gtfs_data: dict + ) -> None: + for gtfs_feature in GTFS_COLUMNS: + calculated_rows = sorted( + map( + partial(dict_to_row, record_type=gtfs_feature), + calculated_gtfs_data[gtfs_feature], + ) + ) + control_rows = sorted(control_gtfs_data[gtfs_feature]) + + self.assertEqual(len(calculated_rows), len(control_rows)) + + for i, (calculated_row, control_row) in enumerate( + zip(calculated_rows, control_rows) + ): + self.assertEqual( + len(calculated_row), + len(control_row), + f"Different length of {i}-th row of {gtfs_feature}", + ) + for calculated_value, control_value in zip( + calculated_row, control_row + ): + if calculated_value is None: + self.assertEqual(control_value, "", f"in {i}-th row") + else: # convert str to float/int/str + self.assertAlmostEqual( + calculated_value, + type(calculated_value)(control_value), + places=10, + ) diff --git a/tests/test_storage.py b/tests/test_storage.py new file mode 100644 index 0000000..978529f --- /dev/null +++ b/tests/test_storage.py @@ -0,0 +1,26 @@ +import json + +from processors._common import transit_to_dict +from tests.sample_data_for_outputs import metro_samples +from tests.util import TestCase, TestTransitDataMixin + + +class TestStorage(TestCase, TestTransitDataMixin): + def test_storage(self) -> None: + for sample in metro_samples: + with self.subTest(msg=sample["name"]): + self._test_storage_for_sample(sample) + + def _test_storage_for_sample(self, metro_sample: dict) -> None: + cities, transfers = self.prepare_cities(metro_sample) + + calculated_transit_data = transit_to_dict(cities, transfers) + + control_transit_data = json.loads(metro_sample["json_dump"]) + control_transit_data["transfers"] = set( + map(tuple, control_transit_data["transfers"]) + ) + + self.compare_transit_data( + calculated_transit_data, control_transit_data + ) diff --git a/tests/util.py b/tests/util.py index efab8c2..56b1962 100644 --- a/tests/util.py +++ b/tests/util.py @@ -1,15 +1,23 @@ import io +from collections.abc import Sequence, Mapping +from operator import itemgetter +from pathlib import Path +from typing import Any from unittest import TestCase as unittestTestCase +from process_subways import ( + add_osm_elements_to_cities, + validate_cities, + calculate_centers, +) from subway_io import load_xml -from subway_structure import City +from subway_structure import City, find_transfers class TestCase(unittestTestCase): """TestCase class for testing the Subway Validator""" CITY_TEMPLATE = { - "id": 1, "name": "Null Island", "country": "World", "continent": "Africa", @@ -21,29 +29,184 @@ class TestCase(unittestTestCase): "num_interchanges": 0, } - def validate_city(self, network) -> City: - city_data = self.CITY_TEMPLATE.copy() - for attr in self.CITY_TEMPLATE.keys(): - if attr in network: - city_data[attr] = network[attr] + @classmethod + def setUpClass(cls) -> None: + cls.city_class = City - city = City(city_data) - elements = load_xml(io.BytesIO(network["xml"].encode("utf-8"))) - for el in elements: - city.add(el) - city.extract_routes() - city.validate() - return city + def prepare_cities(self, metro_sample: dict) -> tuple: + """Load cities from file/string, validate them and return cities + and transfers. + """ - def assertListAlmostEqual(self, list1, list2, places=10) -> None: - if not (isinstance(list1, list) and isinstance(list2, list)): - raise RuntimeError( - f"Not lists passed to the '{self.__class__.__name__}." - "assertListAlmostEqual' method" - ) - self.assertEqual(len(list1), len(list2)) - for a, b in zip(list1, list2): - if isinstance(a, list) and isinstance(b, list): - self.assertListAlmostEqual(a, b, places) + def assign_unique_id(city_info: dict, cities_info: list[dict]) -> None: + """city_info - newly added city, cities_info - already added + cities. Check city id uniqueness / assign unique id to the city. + """ + occupied_ids = set(c["id"] for c in cities_info) + if "id" in city_info: + if city_info["id"] in occupied_ids: + raise RuntimeError("Not unique city ids in test data") else: - self.assertAlmostEqual(a, b, places) + city_info["id"] = max(occupied_ids, default=1) + 1 + + cities_given_info = metro_sample["cities_info"] + cities_info = list() + for city_given_info in cities_given_info: + city_info = self.CITY_TEMPLATE.copy() + for attr in city_given_info.keys(): + city_info[attr] = city_given_info[attr] + assign_unique_id(city_info, cities_info) + cities_info.append(city_info) + + if len(set(ci["name"] for ci in cities_info)) < len(cities_info): + raise RuntimeError("Not unique city names in test data") + + cities = list(map(self.city_class, cities_info)) + if "xml" in metro_sample: + xml_file = io.BytesIO(metro_sample["xml"].encode()) + else: + xml_file = ( + Path(__file__).resolve().parent / metro_sample["xml_file"] + ) + elements = load_xml(xml_file) + calculate_centers(elements) + add_osm_elements_to_cities(elements, cities) + validate_cities(cities) + transfers = find_transfers(elements, cities) + return cities, transfers + + def _assertAnyAlmostEqual( + self, + first: Any, + second: Any, + places: int = 10, + ignore_keys: set = None, + ) -> None: + """Dispatcher method to other "...AlmostEqual" methods + depending on argument types. + """ + if isinstance(first, Mapping): + self.assertMappingAlmostEqual(first, second, places, ignore_keys) + elif isinstance(first, Sequence) and not isinstance( + first, (str, bytes) + ): + self.assertSequenceAlmostEqual(first, second, places, ignore_keys) + else: + self.assertAlmostEqual(first, second, places) + + def assertSequenceAlmostEqual( + self, + seq1: Sequence, + seq2: Sequence, + places: int = 10, + ignore_keys: set = None, + ) -> None: + """Compare two sequences, items of numeric types being compared + approximately, containers being approx-compared recursively. + + :param: seq1 a sequence of values of any types, including collections + :param: seq2 a sequence of values of any types, including collections + :param: places number of fractional digits (passed to + assertAlmostEqual() method of parent class) + :param: ignore_keys a set of strs with keys in dictionaries + that should be ignored during recursive comparison + :return: None + """ + if not (isinstance(seq1, Sequence) and isinstance(seq2, Sequence)): + raise RuntimeError( + f"Not a sequence passed to the '{self.__class__.__name__}." + "assertSequenceAlmostEqual' method" + ) + self.assertEqual(len(seq1), len(seq2)) + for a, b in zip(seq1, seq2): + self._assertAnyAlmostEqual(a, b, places, ignore_keys) + + def assertMappingAlmostEqual( + self, + d1: Mapping, + d2: Mapping, + places: int = 10, + ignore_keys: set = None, + ) -> None: + """Compare dictionaries recursively, numeric values being compared + approximately. + + :param: d1 a mapping of arbitrary key/value types, + including collections + :param: d1 a mapping of arbitrary key/value types, + including collections + :param: places number of fractional digits (passed to + assertAlmostEqual() method of parent class) + :param: ignore_keys a set of strs with keys in dictionaries + that should be ignored during recursive comparison + :return: None + """ + if not (isinstance(d1, Mapping) and isinstance(d2, Mapping)): + raise RuntimeError( + f"Not a dictionary passed to the '{self.__class__.__name__}." + "assertMappingAlmostEqual' method" + ) + + d1_keys = set(d1.keys()) + d2_keys = set(d2.keys()) + if ignore_keys: + d1_keys -= ignore_keys + d2_keys -= ignore_keys + self.assertSetEqual(d1_keys, d2_keys) + for k in d1_keys: + v1 = d1[k] + v2 = d2[k] + self._assertAnyAlmostEqual(v1, v2, places, ignore_keys) + + +class TestTransitDataMixin: + def compare_transit_data(self, td1: dict, td2: dict) -> None: + """Compare transit data td1 and td2 remembering that: + - arrays that represent sets ("routes", "itineraries", "entrances") + should be compared without order; + - all floating-point values (coordinates) should be compared + approximately. + """ + self.assertMappingAlmostEqual( + td1, + td2, + ignore_keys={"stopareas", "routes", "itineraries"}, + ) + + networks1 = td1["networks"] + networks2 = td2["networks"] + + id_cmp = itemgetter("id") + + for network_name, network_data1 in networks1.items(): + network_data2 = networks2[network_name] + routes1 = sorted(network_data1["routes"], key=id_cmp) + routes2 = sorted(network_data2["routes"], key=id_cmp) + self.assertEqual(len(routes1), len(routes2)) + for r1, r2 in zip(routes1, routes2): + self.assertMappingAlmostEqual( + r1, r2, ignore_keys={"itineraries"} + ) + its1 = sorted(r1["itineraries"], key=id_cmp) + its2 = sorted(r2["itineraries"], key=id_cmp) + self.assertEqual(len(its1), len(its2)) + for it1, it2 in zip(its1, its2): + self.assertMappingAlmostEqual(it1, it2) + + transfers1 = td1["transfers"] + transfers2 = td2["transfers"] + self.assertSetEqual(transfers1, transfers2) + + stopareas1 = td1["stopareas"] + stopareas2 = td2["stopareas"] + self.assertMappingAlmostEqual( + stopareas1, stopareas2, ignore_keys={"entrances"} + ) + + for sa_id, sa1_data in stopareas1.items(): + sa2_data = stopareas2[sa_id] + entrances1 = sorted(sa1_data["entrances"], key=id_cmp) + entrances2 = sorted(sa2_data["entrances"], key=id_cmp) + self.assertEqual(len(entrances1), len(entrances2)) + for e1, e2 in zip(entrances1, entrances2): + self.assertMappingAlmostEqual(e1, e2) From 6c796ac8c18794b80bb9d16b6784eb6fe1efdd1f Mon Sep 17 00:00:00 2001 From: Alexey Zakharenkov Date: Thu, 23 Nov 2023 10:07:54 +0300 Subject: [PATCH 02/15] Check if stations are missing/differ in backward direction of some route --- subway_structure.py | 320 +++++++-- tests/assets/route_masters.osm | 527 ++++++++++++++ tests/assets/twin_routes.osm | 578 ++++++++++++++++ tests/assets/twin_routes_with_divergence.osm | 680 +++++++++++++++++++ tests/sample_data_for_error_messages.py | 76 ++- tests/sample_data_for_outputs.py | 10 +- tests/sample_data_for_twin_routes.py | 78 +++ tests/test_error_messages.py | 15 +- tests/test_route_master.py | 26 + 9 files changed, 2215 insertions(+), 95 deletions(-) create mode 100644 tests/assets/route_masters.osm create mode 100644 tests/assets/twin_routes.osm create mode 100644 tests/assets/twin_routes_with_divergence.osm create mode 100644 tests/sample_data_for_twin_routes.py create mode 100644 tests/test_route_master.py diff --git a/subway_structure.py b/subway_structure.py index bb38f85..e79d213 100644 --- a/subway_structure.py +++ b/subway_structure.py @@ -12,27 +12,26 @@ ALLOWED_STATIONS_MISMATCH = 0.02 # part of total station count ALLOWED_TRANSFERS_MISMATCH = 0.07 # part of total interchanges count ALLOWED_ANGLE_BETWEEN_STOPS = 45 # in degrees DISALLOWED_ANGLE_BETWEEN_STOPS = 20 # in degrees +SUGGEST_TRANSFER_MIN_DISTANCE = 100 # in meters # If an object was moved not too far compared to previous script run, # it is likely the same object DISPLACEMENT_TOLERANCE = 300 # in meters -MODES_RAPID = set(("subway", "light_rail", "monorail", "train")) -MODES_OVERGROUND = set(("tram", "bus", "trolleybus", "aerialway", "ferry")) -DEFAULT_MODES_RAPID = set(("subway", "light_rail")) -DEFAULT_MODES_OVERGROUND = set(("tram",)) # TODO: bus and trolleybus? +MODES_RAPID = {"subway", "light_rail", "monorail", "train"} +MODES_OVERGROUND = {"tram", "bus", "trolleybus", "aerialway", "ferry"} +DEFAULT_MODES_RAPID = {"subway", "light_rail"} +DEFAULT_MODES_OVERGROUND = {"tram"} # TODO: bus and trolleybus? ALL_MODES = MODES_RAPID | MODES_OVERGROUND -RAILWAY_TYPES = set( - ( - "rail", - "light_rail", - "subway", - "narrow_gauge", - "funicular", - "monorail", - "tram", - ) -) +RAILWAY_TYPES = { + "rail", + "light_rail", + "subway", + "narrow_gauge", + "funicular", + "monorail", + "tram", +} CONSTRUCTION_KEYS = ( "construction", "proposed", @@ -49,7 +48,7 @@ START_END_TIMES_RE = re.compile(r".*?(\d{2}):(\d{2})-(\d{2}):(\d{2}).*") def get_start_end_times(opening_hours): """Very simplified method to parse OSM opening_hours tag. We simply take the first HH:MM-HH:MM substring which is the most probable - opening hours interval for the most of weekdays. + opening hours interval for the most of the weekdays. """ start_time, end_time = None, None m = START_END_TIMES_RE.match(opening_hours) @@ -102,9 +101,9 @@ def el_center(el): if not el: return None if "lat" in el: - return (el["lon"], el["lat"]) + return el["lon"], el["lat"] elif "center" in el: - return (el["center"]["lon"], el["center"]["lat"]) + return el["center"]["lon"], el["center"]["lat"] return None @@ -485,7 +484,7 @@ class StopArea: self.center[i] /= len(self.stops) + len(self.platforms) def get_elements(self): - result = set([self.id, self.station.id]) + result = {self.id, self.station.id} result.update(self.entrances) result.update(self.exits) result.update(self.stops) @@ -1156,6 +1155,12 @@ class Route: return tracks + def are_tracks_complete(self) -> bool: + return ( + self.first_stop_on_rails_index == 0 + and self.last_stop_on_rails_index == len(self) - 1 + ) + def get_tracks_geometry(self): tracks = self.get_extended_tracks() tracks = self.get_truncated_tracks(tracks) @@ -1350,6 +1355,36 @@ class Route: ] return True + def get_end_transfers(self) -> tuple[str, str]: + """Using transfer ids because a train can arrive at different + stations within a transfer. But disregard transfer that may give + an impression of a circular route (for example, + Simonis / Elisabeth station and route 2 in Brussels). + """ + return ( + (self[0].stoparea.id, self[-1].stoparea.id) + if ( + self[0].stoparea.transfer is not None + and self[0].stoparea.transfer == self[-1].stoparea.transfer + ) + else ( + self[0].stoparea.transfer or self[0].stoparea.id, + self[-1].stoparea.transfer or self[-1].stoparea.id, + ) + ) + + def get_transfers_sequence(self) -> list[str]: + """Return a list of stoparea or transfer (if not None) ids.""" + transfer_seq = [ + stop.stoparea.transfer or stop.stoparea.id for stop in self + ] + if ( + self[0].stoparea.transfer is not None + and self[0].stoparea.transfer == self[-1].stoparea.transfer + ): + transfer_seq[0], transfer_seq[-1] = self.get_end_transfers() + return transfer_seq + def __len__(self): return len(self.stops) @@ -1479,13 +1514,75 @@ class RouteMaster: else: self.interval = min(self.interval, route.interval) + # Choose minimal id for determinancy if not self.has_master and (not self.id or self.id > route.id): self.id = route.id self.routes.append(route) - if not self.best or len(route.stops) > len(self.best.stops): + if ( + not self.best + or len(route.stops) > len(self.best.stops) + or ( + # Choose route with minimal id for determinancy + len(route.stops) == len(self.best.stops) + and route.element["id"] < self.best.element["id"] + ) + ): self.best = route + def get_meaningful_routes(self) -> list[Route]: + return [route for route in self if len(route) >= 2] + + def find_twin_routes(self) -> dict[Route, Route]: + """Two routes are twins if they have the same end stations + and opposite directions, and the number of stations is + the same or almost the same. We'll then find stops that are present + in one direction and is missing in another direction - to warn. + """ + + twin_routes = {} # route => "twin" route + + for route in self.get_meaningful_routes(): + if route.is_circular: + continue # Difficult to calculate. TODO(?) in the future + if route in twin_routes: + continue + if len(route) < 2: + continue + + route_transfer_ids = set(route.get_transfers_sequence()) + ends = route.get_end_transfers() + ends_reversed = ends[::-1] + + twin_candidates = [ + r + for r in self + if not r.is_circular + and r not in twin_routes + and r.get_end_transfers() == ends_reversed + # If absolute or relative difference in station count is large, + # possibly it's an express version of a route - skip it. + and ( + abs(len(r) - len(route)) <= 2 + or abs(len(r) - len(route)) / max(len(r), len(route)) + <= 0.2 + ) + ] + + if not twin_candidates: + continue + + twin_route = min( + twin_candidates, + key=lambda r: len( + route_transfer_ids ^ set(r.get_transfers_sequence()) + ), + ) + twin_routes[route] = twin_route + twin_routes[twin_route] = route + + return twin_routes + def stop_areas(self): """Returns a list of all stations on all route variants.""" seen_ids = set() @@ -1521,6 +1618,7 @@ class City: self.errors = [] self.warnings = [] self.notices = [] + self.id = None self.try_fill_int_attribute(city_data, "id") self.name = city_data["name"] self.country = city_data["country"] @@ -1555,7 +1653,7 @@ class City: else: self.modes = DEFAULT_MODES_RAPID else: - self.modes = set([x.strip() for x in networks[0].split(",")]) + self.modes = {x.strip() for x in networks[0].split(",")} # Reversing bbox so it is (xmin, ymin, xmax, ymax) bbox = city_data["bbox"].split(",") @@ -1627,7 +1725,7 @@ class City: self.warnings.append(msg) def error(self, message, el=None): - """Error if a critical problem that invalidates the city""" + """Error is a critical problem that invalidates the city.""" msg = City.log_message(message, el) self.errors.append(msg) @@ -1914,37 +2012,18 @@ class City: f"relations: {format_elid_list(not_in_sa)}" ) - def check_return_routes(self, rmaster): - variants = {} - have_return = set() - for variant in rmaster: - if len(variant) < 2: - continue - # Using transfer ids because a train can arrive at different - # stations within a transfer. But disregard transfer that may give - # an impression of a circular route (for example, - # Simonis / Elisabeth station and route 2 in Brussels) - if variant[0].stoparea.transfer == variant[-1].stoparea.transfer: - t = (variant[0].stoparea.id, variant[-1].stoparea.id) - else: - t = ( - variant[0].stoparea.transfer or variant[0].stoparea.id, - variant[-1].stoparea.transfer or variant[-1].stoparea.id, - ) - if t in variants: - continue - variants[t] = variant.element - tr = (t[1], t[0]) - if tr in variants: - have_return.add(t) - have_return.add(tr) + def check_return_routes(self, rmaster: RouteMaster) -> None: + """Check if a route has return direction, and if twin routes + miss stations. + """ + meaningful_routes = rmaster.get_meaningful_routes() - if len(variants) == 0: + if len(meaningful_routes) == 0: self.error( - "An empty route master {}. Please set construction:route " - "if it is under construction".format(rmaster.id) + f"An empty route master {rmaster.id}. " + "Please set construction:route if it is under construction" ) - elif len(variants) == 1: + elif len(meaningful_routes) == 1: log_function = ( self.error if not rmaster.best.is_circular else self.notice ) @@ -1954,9 +2033,144 @@ class City: rmaster.best.element, ) else: - for t, rel in variants.items(): - if t not in have_return: - self.notice("Route does not have a return direction", rel) + all_ends = { + route.get_end_transfers(): route for route in meaningful_routes + } + for route in meaningful_routes: + ends = route.get_end_transfers() + if ends[::-1] not in all_ends: + self.notice( + "Route does not have a return direction", route.element + ) + + twin_routes = rmaster.find_twin_routes() + for route1, route2 in twin_routes.items(): + if route1.id > route2.id: + continue # to process a pair of routes only once + # and to ensure the order of routes in the pair + self.alert_twin_routes_differ(route1, route2) + + def alert_twin_routes_differ(self, route1: Route, route2: Route) -> None: + """Arguments are that route1.id < route2.id""" + ( + stops_missing_from_route1, + stops_missing_from_route2, + stops_that_dont_match, + ) = self.calculate_twin_routes_diff(route1, route2) + + for st in stops_missing_from_route1: + if ( + not route1.are_tracks_complete() + or ( + projected_point := project_on_line( + st.stoparea.center, route1.tracks + )["projected_point"] + ) + is not None + and distance(st.stoparea.center, projected_point) + <= MAX_DISTANCE_STOP_TO_LINE + ): + self.notice( + f"Stop {st.stoparea.station.name} {st.stop} is included " + f"into the {route2.id} but not included into {route1.id}", + route1.element, + ) + + for st in stops_missing_from_route2: + if ( + not route2.are_tracks_complete() + or ( + projected_point := project_on_line( + st.stoparea.center, route2.tracks + )["projected_point"] + ) + is not None + and distance(st.stoparea.center, projected_point) + <= MAX_DISTANCE_STOP_TO_LINE + ): + self.notice( + f"Stop {st.stoparea.station.name} {st.stop} is included " + f"into the {route1.id} but not included into {route2.id}", + route2.element, + ) + + for st1, st2 in stops_that_dont_match: + if ( + st1.stoparea.station == st2.stoparea.station + or distance(st1.stop, st2.stop) < SUGGEST_TRANSFER_MIN_DISTANCE + ): + self.notice( + "Should there be one stoparea or a transfer between " + f"{st1.stoparea.station.name} {st1.stop} and " + f"{st2.stoparea.station.name} {st2.stop}?", + route1.element, + ) + + @staticmethod + def calculate_twin_routes_diff(route1: Route, route2: Route) -> tuple: + """Wagner–Fischer algorithm for stops diff in two twin routes.""" + + stops1 = route1.stops + stops2 = route2.stops[::-1] + + def stops_match(stop1: RouteStop, stop2: RouteStop) -> bool: + return ( + stop1.stoparea == stop2.stoparea + or stop1.stoparea.transfer is not None + and stop1.stoparea.transfer == stop2.stoparea.transfer + ) + + d = [[0] * (len(stops2) + 1) for _ in range(len(stops1) + 1)] + d[0] = list(range(len(stops2) + 1)) + for i in range(len(stops1) + 1): + d[i][0] = i + + for i in range(1, len(stops1) + 1): + for j in range(1, len(stops2) + 1): + d[i][j] = ( + d[i - 1][j - 1] + if stops_match(stops1[i - 1], stops2[j - 1]) + else min((d[i - 1][j], d[i][j - 1], d[i - 1][j - 1])) + 1 + ) + + stops_missing_from_route1: list[RouteStop] = [] + stops_missing_from_route2: list[RouteStop] = [] + stops_that_dont_match: list[tuple[RouteStop, RouteStop]] = [] + + i = len(stops1) + j = len(stops2) + while not (i == 0 and j == 0): + action = None + if i > 0 and j > 0: + match = stops_match(stops1[i - 1], stops2[j - 1]) + if match and d[i - 1][j - 1] == d[i][j]: + action = "no" + elif not match and d[i - 1][j - 1] + 1 == d[i][j]: + action = "change" + if not action and i > 0 and d[i - 1][j] + 1 == d[i][j]: + action = "add_2" + if not action and j > 0 and d[i][j - 1] + 1 == d[i][j]: + action = "add_1" + + match action: + case "add_1": + stops_missing_from_route1.append(stops2[j - 1]) + j -= 1 + case "add_2": + stops_missing_from_route2.append(stops1[i - 1]) + i -= 1 + case _: + if action == "change": + stops_that_dont_match.append( + (stops1[i - 1], stops2[j - 1]) + ) + i -= 1 + j -= 1 + return ( + stops_missing_from_route1, + stops_missing_from_route2, + stops_that_dont_match, + ) def validate_lines(self): self.found_light_lines = len( diff --git a/tests/assets/route_masters.osm b/tests/assets/route_masters.osm new file mode 100644 index 0000000..0635a2b --- /dev/null +++ b/tests/assets/route_masters.osm @@ -0,0 +1,527 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/assets/twin_routes.osm b/tests/assets/twin_routes.osm new file mode 100644 index 0000000..e2e7f42 --- /dev/null +++ b/tests/assets/twin_routes.osm @@ -0,0 +1,578 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/assets/twin_routes_with_divergence.osm b/tests/assets/twin_routes_with_divergence.osm new file mode 100644 index 0000000..057cca3 --- /dev/null +++ b/tests/assets/twin_routes_with_divergence.osm @@ -0,0 +1,680 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/sample_data_for_error_messages.py b/tests/sample_data_for_error_messages.py index 9bea1c7..245cfbb 100644 --- a/tests/sample_data_for_error_messages.py +++ b/tests/sample_data_for_error_messages.py @@ -42,11 +42,11 @@ metro_samples = [ "cities_info": [ { "num_stations": 2, + "num_lines": 1, + "num_light_lines": 0, + "num_interchanges": 0, }, ], - "num_lines": 1, - "num_light_lines": 0, - "num_interchanges": 0, "errors": [], "warnings": [], "notices": [], @@ -110,14 +110,9 @@ metro_samples = [ "num_stations": 4, }, ], - "num_lines": 1, - "num_light_lines": 0, - "num_interchanges": 0, "errors": [ - 'Angle between stops around "Station 3" (2.0, 0.0) ' - 'is too narrow, 0 degrees (relation 1, "Forward")', - 'Angle between stops around "Station 2" (1.0, 0.0) ' - 'is too narrow, 0 degrees (relation 1, "Forward")', + 'Angle between stops around "Station 3" (2.0, 0.0) is too narrow, 0 degrees (relation 1, "Forward")', # noqa: E501 + 'Angle between stops around "Station 2" (1.0, 0.0) is too narrow, 0 degrees (relation 1, "Forward")', # noqa: E501 ], "warnings": [], "notices": [], @@ -175,14 +170,9 @@ metro_samples = [ "num_stations": 3, }, ], - "num_lines": 1, - "num_light_lines": 0, - "num_interchanges": 0, "errors": [ - 'Angle between stops around "Station 2" (1.0, 0.0) ' - 'is too narrow, 11 degrees (relation 1, "Forward")', - 'Angle between stops around "Station 2" (1.0, 0.0) ' - 'is too narrow, 11 degrees (relation 2, "Backward")', + 'Angle between stops around "Station 2" (1.0, 0.0) is too narrow, 11 degrees (relation 1, "Forward")', # noqa: E501 + 'Angle between stops around "Station 2" (1.0, 0.0) is too narrow, 11 degrees (relation 2, "Backward")', # noqa: E501 ], "warnings": [], "notices": [], @@ -240,16 +230,11 @@ metro_samples = [ "num_stations": 3, }, ], - "num_lines": 1, - "num_light_lines": 0, - "num_interchanges": 0, "errors": [], "warnings": [], "notices": [ - 'Angle between stops around "Station 2" (1.0, 0.0) ' - 'is too narrow, 27 degrees (relation 1, "Forward")', - 'Angle between stops around "Station 2" (1.0, 0.0) ' - 'is too narrow, 27 degrees (relation 2, "Backward")', + 'Angle between stops around "Station 2" (1.0, 0.0) is too narrow, 27 degrees (relation 1, "Forward")', # noqa: E501 + 'Angle between stops around "Station 2" (1.0, 0.0) is too narrow, 27 degrees (relation 2, "Backward")', # noqa: E501 ], }, { @@ -326,16 +311,45 @@ metro_samples = [ "num_stations": 4, }, ], - "num_lines": 1, - "num_light_lines": 0, - "num_interchanges": 0, "errors": [ - 'Stops on tracks are unordered near "Station 2" (1.0, 0.0) ' - '(relation 1, "Forward")', - 'Stops on tracks are unordered near "Station 3" (0.0, 0.5) ' - '(relation 2, "Backward")', + 'Stops on tracks are unordered near "Station 2" (1.0, 0.0) (relation 1, "Forward")', # noqa: E501 + 'Stops on tracks are unordered near "Station 3" (0.0, 0.5) (relation 2, "Backward")', # noqa: E501 ], "warnings": [], "notices": [], }, + { + "name": ( + "Many different route masters, both on naked stations and " + "stop_positions/stop_areas/transfers, both linear and circular" + ), + "xml_file": "assets/route_masters.osm", + "cities_info": [ + { + "num_stations": (3 + 3 + 3 + 5 + 3 + 3 + 4) + + (3 + 3 + 3 + 3 + 3 + 3 + 4), + "num_lines": 7 + 7, + "num_interchanges": 0 + 1, + }, + ], + "errors": [ + 'Only one route in route_master. Please check if it needs a return route (relation 162, "03: 1-2-3")' # noqa: E501 + ], + "warnings": [], + "notices": [ + 'Route does not have a return direction (relation 155, "02: 1-2-3")', # noqa: E501 + 'Route does not have a return direction (relation 158, "02: 1-3 (2)")', # noqa: E501 + 'Only one route in route_master. Please check if it needs a return route (relation 159, "C: 1-3-5-1")', # noqa: E501 + 'Route does not have a return direction (relation 163, "04: 1-2-3")', # noqa: E501 + 'Route does not have a return direction (relation 164, "04: 2-1")', # noqa: E501 + 'Stop Station 2 (1.0, 0.0) is included into the r203 but not included into r204 (relation 204, "2: 3-1")', # noqa: E501 + 'Route does not have a return direction (relation 205, "3: 1-2-3")', # noqa: E501 + 'Route does not have a return direction (relation 206, "3: 1-2-3")', # noqa: E501 + 'Route does not have a return direction (relation 207, "4: 4-3-2-1")', # noqa: E501 + 'Route does not have a return direction (relation 208, "4: 1-2-3-4")', # noqa: E501 + 'Route does not have a return direction (relation 209, "5: 1-2-3")', # noqa: E501 + 'Route does not have a return direction (relation 210, "5: 2-1")', # noqa: E501 + 'Only one route in route_master. Please check if it needs a return route (relation 213, "C3: 1-2-3-8-1")', # noqa: E501 + ], + }, ] diff --git a/tests/sample_data_for_outputs.py b/tests/sample_data_for_outputs.py index 3c2a590..5419353 100644 --- a/tests/sample_data_for_outputs.py +++ b/tests/sample_data_for_outputs.py @@ -6,25 +6,17 @@ metro_samples = [ { "id": 1, "name": "Intersecting 2 metro lines", - "country": "World", - "continent": "Africa", - "num_stations": 6, + "num_stations": 4 + 2, "num_lines": 2, - "num_light_lines": 0, "num_interchanges": 1, - "bbox": "-179, -89, 179, 89", "networks": "network-1", }, { "id": 2, "name": "One light rail line", - "country": "World", - "continent": "Africa", "num_stations": 2, "num_lines": 0, "num_light_lines": 1, - "num_interchanges": 0, - "bbox": "-179, -89, 179, 89", "networks": "network-2", }, ], diff --git a/tests/sample_data_for_twin_routes.py b/tests/sample_data_for_twin_routes.py new file mode 100644 index 0000000..58b9e17 --- /dev/null +++ b/tests/sample_data_for_twin_routes.py @@ -0,0 +1,78 @@ +metro_samples = [ + { + "name": ( + "Many different routes, both on naked stations and stop_positions/stop_areas/transfers, both linear and circular" # noqa: E501 + ), + "xml_file": "assets/twin_routes.osm", + "cities_info": [ + { + "num_stations": (3 + 4 + 5 + 5) + (3 + 6 + 7 + 5 + 6 + 7 + 7), + "num_lines": 4 + 7, + "num_interchanges": 0 + 2, + }, + ], + "twin_routes": { # route master => twin routes + "r10021": {"r151": "r153", "r153": "r151"}, + "r10022": {}, + "r10023": {}, + "C": {}, + "r10001": {"r201": "r202", "r202": "r201"}, + "r10002": {}, + "r10003": {"r205": "r206", "r206": "r205"}, + "r10004": {}, + "r10005": {}, + "r10006": {}, + "C3": {}, + }, + "errors": [], + "warnings": [], + "notices": [ + 'Route does not have a return direction (relation 154, "02: 4-3")', + 'Route does not have a return direction (relation 155, "02: 1-3")', + 'Route does not have a return direction (relation 156, "02: 2-4")', + 'Route does not have a return direction (relation 157, "02: 4-1")', + 'Route does not have a return direction (relation 158, "02: 1-3 (2)")', # noqa: E501 + 'Only one route in route_master. Please check if it needs a return route (relation 159, "C: 1-2-3-4-5-1")', # noqa: E501 + 'Stop Station 4 (3.0, 0.0) is included into the r205 but not included into r206 (relation 206, "3: 7-6-5-3-2-1")', # noqa: E501 + 'Route does not have a return direction (relation 207, "4: 4-3-2-1")', # noqa: E501 + 'Route does not have a return direction (relation 208, "4: 1-2-3-4")', # noqa: E501 + 'Route does not have a return direction (relation 209, "5: 1-2-3-5-6-7")', # noqa: E501 + 'Route does not have a return direction (relation 210, "5: 6-5-3-2-1")', # noqa: E501 + 'Only one route in route_master. Please check if it needs a return route (relation 213, "C3: 1-2-3-5-6-7-8-1")', # noqa: E501 + ], + }, + { + "name": "Twin routes diverging for some extent", + "xml_file": "assets/twin_routes_with_divergence.osm", + "cities_info": [ + { + "num_stations": (22 + 22 + 21 + 21) * 2, + "num_lines": 4 * 2, + "num_interchanges": 0, + }, + ], + "twin_routes": { # route master => twin routes + "r1101": {"r101": "r102", "r102": "r101"}, + "r1102": {"r103": "r104", "r104": "r103"}, + "r1103": {"r105": "r106", "r106": "r105"}, + "r1104": {"r107": "r108", "r108": "r107"}, + "r1201": {"r201": "r202", "r202": "r201"}, + "r1202": {"r203": "r204", "r204": "r203"}, + "r1203": {"r205": "r206", "r206": "r205"}, + "r1204": {"r207": "r208", "r208": "r207"}, + }, + "errors": [], + "warnings": [], + "notices": [ + 'Should there be one stoparea or a transfer between Station 11 (0.1, 0.0) and Station 11(1) (0.1, 0.0003)? (relation 101, "1: 1-...-9-10-11-...-20")', # noqa: E501 + 'Should there be one stoparea or a transfer between Station 10 (0.09, 0.0) and Station 10(1) (0.09, 0.0003)? (relation 101, "1: 1-...-9-10-11-...-20")', # noqa: E501 + 'Stop Station 10 (0.09, 0.0) is included into the r105 but not included into r106 (relation 106, "3: 20-...-12-11(1)-9-...-1")', # noqa: E501 + 'Should there be one stoparea or a transfer between Station 11 (0.1, 0.0) and Station 11(1) (0.1, 0.0003)? (relation 105, "3: 1-...-9-10-11-...-20")', # noqa: E501 + 'Stop Station 10 (0.09, 0.0) is included into the r107 but not included into r108 (relation 108, "4: 20-...12-11(2)-9-...-1")', # noqa: E501 + 'Should there be one stoparea or a transfer between Station 11 (0.1, 0.0) and Station 11(1) (0.1, 0.0003)? (relation 201, "11: 1-...-9-10-11-...-20")', # noqa: E501 + 'Should there be one stoparea or a transfer between Station 10 (0.09, 0.0) and Station 10(1) (0.09, 0.0003)? (relation 201, "11: 1-...-9-10-11-...-20")', # noqa: E501 + 'Stop Station 10 (0.09, 0.0) is included into the r205 but not included into r206 (relation 206, "13: 20-...-12-11(1)-9-...-1")', # noqa: E501 + 'Should there be one stoparea or a transfer between Station 11 (0.1, 0.0) and Station 11(1) (0.1, 0.0003)? (relation 205, "13: 1-...-9-10-11-...-20")', # noqa: E501 + ], + }, +] diff --git a/tests/test_error_messages.py b/tests/test_error_messages.py index aee6f48..c833001 100644 --- a/tests/test_error_messages.py +++ b/tests/test_error_messages.py @@ -1,4 +1,11 @@ -from tests.sample_data_for_error_messages import metro_samples +import itertools + +from tests.sample_data_for_error_messages import ( + metro_samples as metro_samples_error, +) +from tests.sample_data_for_twin_routes import ( + metro_samples as metro_samples_route_masters, +) from tests.util import TestCase @@ -20,6 +27,10 @@ class TestValidationMessages(TestCase): ) def test_validation_messages(self) -> None: - for sample in metro_samples: + for sample in itertools.chain( + metro_samples_error, metro_samples_route_masters + ): + if "errors" not in sample: + continue with self.subTest(msg=sample["name"]): self._test_validation_messages_for_network(sample) diff --git a/tests/test_route_master.py b/tests/test_route_master.py new file mode 100644 index 0000000..1bab617 --- /dev/null +++ b/tests/test_route_master.py @@ -0,0 +1,26 @@ +from tests.util import TestCase + +from tests.sample_data_for_twin_routes import metro_samples + + +class TestRouteMaster(TestCase): + def _test_find_twin_routes_for_network(self, metro_sample: dict) -> None: + cities, transfers = self.prepare_cities(metro_sample) + city = cities[0] + + self.assertTrue(city.is_good) + + for route_master_id, expected_twin_ids in metro_sample[ + "twin_routes" + ].items(): + route_master = city.routes[route_master_id] + calculated_twins = route_master.find_twin_routes() + calculated_twin_ids = { + r1.id: r2.id for r1, r2 in calculated_twins.items() + } + self.assertDictEqual(expected_twin_ids, calculated_twin_ids) + + def test_find_twin_routes(self) -> None: + for sample in metro_samples: + with self.subTest(msg=sample["name"]): + self._test_find_twin_routes_for_network(sample) From 970b4a51ee25a6a03e70f3fbcadd974205597c79 Mon Sep 17 00:00:00 2001 From: Alexey Zakharenkov Date: Wed, 29 Nov 2023 12:35:37 +0300 Subject: [PATCH 03/15] process train_station_entrance similar to subway_entrance --- process_subways.py | 31 ++-- scripts/download_all_subways.sh | 6 - scripts/filter_all_subways.sh | 6 - scripts/process_subways.sh | 2 +- subway_structure.py | 157 ++++++++++-------- tests/assets/tiny_world.osm | 25 +++ tests/assets/tiny_world_gtfs.zip | Bin 4775 -> 0 bytes tests/assets/tiny_world_gtfs/agency.txt | 3 + tests/assets/tiny_world_gtfs/calendar.txt | 2 + tests/assets/tiny_world_gtfs/frequencies.txt | 7 + tests/assets/tiny_world_gtfs/routes.txt | 4 + tests/assets/tiny_world_gtfs/shapes.txt | 15 ++ tests/assets/tiny_world_gtfs/stop_times.txt | 17 ++ tests/assets/tiny_world_gtfs/stops.txt | 27 +++ tests/assets/tiny_world_gtfs/transfers.txt | 5 + tests/assets/tiny_world_gtfs/trips.txt | 7 + tests/sample_data_for_outputs.py | 41 ++++- tests/test_gtfs_processor.py | 21 +-- tests/test_overpass.py | 163 +++++++++++++++++++ tests/test_station.py | 46 ++++++ 20 files changed, 474 insertions(+), 111 deletions(-) delete mode 100755 scripts/download_all_subways.sh delete mode 100755 scripts/filter_all_subways.sh delete mode 100644 tests/assets/tiny_world_gtfs.zip create mode 100644 tests/assets/tiny_world_gtfs/agency.txt create mode 100644 tests/assets/tiny_world_gtfs/calendar.txt create mode 100644 tests/assets/tiny_world_gtfs/frequencies.txt create mode 100644 tests/assets/tiny_world_gtfs/routes.txt create mode 100644 tests/assets/tiny_world_gtfs/shapes.txt create mode 100644 tests/assets/tiny_world_gtfs/stop_times.txt create mode 100644 tests/assets/tiny_world_gtfs/stops.txt create mode 100644 tests/assets/tiny_world_gtfs/transfers.txt create mode 100644 tests/assets/tiny_world_gtfs/trips.txt create mode 100644 tests/test_overpass.py create mode 100644 tests/test_station.py diff --git a/process_subways.py b/process_subways.py index 6f7e846..1fd2262 100755 --- a/process_subways.py +++ b/process_subways.py @@ -24,7 +24,7 @@ from subway_structure import ( City, CriticalValidationError, find_transfers, - get_unused_entrances_geojson, + get_unused_subway_entrances_geojson, MODES_OVERGROUND, MODES_RAPID, ) @@ -38,26 +38,37 @@ DEFAULT_CITIES_INFO_URL = ( Point = tuple[float, float] -def overpass_request( - overground: bool, overpass_api: str, bboxes: list[list[float]] -) -> list[dict]: +def compose_overpass_request( + overground: bool, bboxes: list[list[float]] +) -> str: + if not bboxes: + raise RuntimeError("No bboxes given for overpass request") + query = "[out:json][timeout:1000];(" modes = MODES_OVERGROUND if overground else MODES_RAPID for bbox in bboxes: - bbox_part = "({})".format(",".join(str(coord) for coord in bbox)) + bbox_part = f"({','.join(str(coord) for coord in bbox)})" query += "(" - for mode in modes: - query += 'rel[route="{}"]{};'.format(mode, bbox_part) + for mode in sorted(modes): + query += f'rel[route="{mode}"]{bbox_part};' query += ");" query += "rel(br)[type=route_master];" if not overground: - query += "node[railway=subway_entrance]{};".format(bbox_part) - query += "rel[public_transport=stop_area]{};".format(bbox_part) + query += f"node[railway=subway_entrance]{bbox_part};" + query += f"node[railway=train_station_entrance]{bbox_part};" + query += f"rel[public_transport=stop_area]{bbox_part};" query += ( "rel(br)[type=public_transport][public_transport=stop_area_group];" ) query += ");(._;>>;);out body center qt;" logging.debug("Query: %s", query) + return query + + +def overpass_request( + overground: bool, overpass_api: str, bboxes: list[list[float]] +) -> list[dict]: + query = compose_overpass_request(overground, bboxes) url = "{}?data={}".format(overpass_api, urllib.parse.quote(query)) response = urllib.request.urlopen(url, timeout=1000) if (r_code := response.getcode()) != 200: @@ -489,7 +500,7 @@ def main() -> None: write_recovery_data(options.recovery_path, recovery_data, cities) if options.entrances: - json.dump(get_unused_entrances_geojson(osm), options.entrances) + json.dump(get_unused_subway_entrances_geojson(osm), options.entrances) if options.dump: if os.path.isdir(options.dump): diff --git a/scripts/download_all_subways.sh b/scripts/download_all_subways.sh deleted file mode 100755 index 2797520..0000000 --- a/scripts/download_all_subways.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash -# Still times out, do not use unless you want to be blocked for some hours on Overpass API -TIMEOUT=2000 -QUERY='[out:json][timeout:'$TIMEOUT'];(rel["route"="subway"];rel["route"="light_rail"];rel["public_transport"="stop_area"];rel["public_transport"="stop_area_group"];node["station"="subway"];node["station"="light_rail"];node["railway"="subway_entrance"];);(._;>;);out body center qt;' -http http://overpass-api.de/api/interpreter "data==$QUERY" --timeout $TIMEOUT > subways-$(date +%y%m%d).json -http https://overpass-api.de/api/status | grep available diff --git a/scripts/filter_all_subways.sh b/scripts/filter_all_subways.sh deleted file mode 100755 index 5627f10..0000000 --- a/scripts/filter_all_subways.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash -[ $# -lt 1 ] && echo 'Usage: $0 [] []' && exit 1 -OSMFILTER=${2-./osmfilter} -QRELATIONS="route=subway =light_rail =monorail route_master=subway =light_rail =monorail public_transport=stop_area =stop_area_group" -QNODES="station=subway =light_rail =monorail railway=subway_entrance subway=yes light_rail=yes monorail=yes" -"$OSMFILTER" "$1" --keep= --keep-relations="$QRELATIONS" --keep-nodes="$QNODES" --drop-author -o="${3:-subways-$(date +%y%m%d).osm}" diff --git a/scripts/process_subways.sh b/scripts/process_subways.sh index a27f283..241b3c1 100755 --- a/scripts/process_subways.sh +++ b/scripts/process_subways.sh @@ -217,7 +217,7 @@ if [ -n "${NEED_FILTER-}" ]; then check_osmctools mkdir -p $TMPDIR/osmfilter_temp/ QRELATIONS="route=subway =light_rail =monorail =train route_master=subway =light_rail =monorail =train public_transport=stop_area =stop_area_group" - QNODES="railway=station station=subway =light_rail =monorail railway=subway_entrance subway=yes light_rail=yes monorail=yes train=yes" + QNODES="railway=station =subway_entrance =train_station_entrance station=subway =light_rail =monorail subway=yes light_rail=yes monorail=yes train=yes" "$OSMCTOOLS/osmfilter" "$PLANET_METRO" \ --keep= \ --keep-relations="$QRELATIONS" \ diff --git a/subway_structure.py b/subway_structure.py index e79d213..e55bf71 100644 --- a/subway_structure.py +++ b/subway_structure.py @@ -1,11 +1,12 @@ +from __future__ import annotations + import math import re from collections import Counter, defaultdict -from itertools import islice +from itertools import chain, islice from css_colours import normalize_colour - MAX_DISTANCE_TO_ENTRANCES = 300 # in meters MAX_DISTANCE_STOP_TO_LINE = 50 # in meters ALLOWED_STATIONS_MISMATCH = 0.02 # part of total station count @@ -283,13 +284,11 @@ def format_elid_list(ids): class Station: @staticmethod - def get_modes(el): - mode = el["tags"].get("station") - modes = [] if not mode else [mode] - for m in ALL_MODES: - if el["tags"].get(m) == "yes": - modes.append(m) - return set(modes) + def get_modes(el: dict) -> set[str]: + modes = {m for m in ALL_MODES if el["tags"].get(m) == "yes"} + if mode := el["tags"].get("station"): + modes.add(mode) + return modes @staticmethod def is_station(el, modes): @@ -367,7 +366,9 @@ class StopArea: return False return el["tags"].get("railway") in RAILWAY_TYPES - def __init__(self, station, city, stop_area=None): + def __init__( + self, station: Station, city: City, stop_area: StopArea | None = None + ) -> None: """Call this with a Station object.""" self.element = stop_area or station.element @@ -375,9 +376,10 @@ class StopArea: self.station = station self.stops = set() # set of el_ids of stop_positions self.platforms = set() # set of el_ids of platforms - self.exits = set() # el_id of subway_entrance for leaving the platform - self.entrances = set() # el_id of subway_entrance for entering - # the platform + self.exits = set() # el_id of subway_entrance/train_station_entrance + # for leaving the platform + self.entrances = set() # el_id of subway/train_station entrance + # for entering the platform self.center = None # lon, lat of the station centre point self.centers = {} # el_id -> (lon, lat) for all elements self.transfer = None # el_id of a transfer relation @@ -400,62 +402,9 @@ class StopArea: except ValueError as e: city.warn(str(e), stop_area) - # If we have a stop area, add all elements from it - warned_about_tracks = False - for m in stop_area["members"]: - k = el_id(m) - m_el = city.elements.get(k) - if m_el and "tags" in m_el: - if Station.is_station(m_el, city.modes): - if k != station.id: - city.error( - "Stop area has multiple stations", stop_area - ) - elif StopArea.is_stop(m_el): - self.stops.add(k) - elif StopArea.is_platform(m_el): - self.platforms.add(k) - elif m_el["tags"].get("railway") == "subway_entrance": - if m_el["type"] != "node": - city.warn("Subway entrance is not a node", m_el) - if ( - m_el["tags"].get("entrance") != "exit" - and m["role"] != "exit_only" - ): - self.entrances.add(k) - if ( - m_el["tags"].get("entrance") != "entrance" - and m["role"] != "entry_only" - ): - self.exits.add(k) - elif StopArea.is_track(m_el): - if not warned_about_tracks: - city.warn( - "Tracks in a stop_area relation", stop_area - ) - warned_about_tracks = True + self._process_members(station, city, stop_area) else: - # Otherwise add nearby entrances - center = station.center - for c_el in city.elements.values(): - if c_el.get("tags", {}).get("railway") == "subway_entrance": - c_id = el_id(c_el) - if c_id not in city.stop_areas: - c_center = el_center(c_el) - if ( - c_center - and distance(center, c_center) - <= MAX_DISTANCE_TO_ENTRANCES - ): - if c_el["type"] != "node": - city.warn( - "Subway entrance is not a node", c_el - ) - etag = c_el["tags"].get("entrance") - if etag != "exit": - self.entrances.add(c_id) - if etag != "entrance": - self.exits.add(c_id) + self._add_nearby_entrances(station, city) if self.exits and not self.entrances: city.warn( @@ -476,13 +425,77 @@ class StopArea: self.center = station.center else: self.center = [0, 0] - for sp in self.stops | self.platforms: + for sp in chain(self.stops, self.platforms): spc = self.centers[sp] for i in range(2): self.center[i] += spc[i] for i in range(2): self.center[i] /= len(self.stops) + len(self.platforms) + def _process_members( + self, station: Station, city: City, stop_area: dict + ) -> None: + # If we have a stop area, add all elements from it + tracks_detected = False + for m in stop_area["members"]: + k = el_id(m) + m_el = city.elements.get(k) + if not m_el or "tags" not in m_el: + continue + if Station.is_station(m_el, city.modes): + if k != station.id: + city.error("Stop area has multiple stations", stop_area) + elif StopArea.is_stop(m_el): + self.stops.add(k) + elif StopArea.is_platform(m_el): + self.platforms.add(k) + elif (entrance_type := m_el["tags"].get("railway")) in ( + "subway_entrance", + "train_station_entrance", + ): + if m_el["type"] != "node": + city.warn(f"{entrance_type} is not a node", m_el) + if ( + m_el["tags"].get("entrance") != "exit" + and m["role"] != "exit_only" + ): + self.entrances.add(k) + if ( + m_el["tags"].get("entrance") != "entrance" + and m["role"] != "entry_only" + ): + self.exits.add(k) + elif StopArea.is_track(m_el): + tracks_detected = True + + if tracks_detected: + city.warn("Tracks in a stop_area relation", stop_area) + + def _add_nearby_entrances(self, station: Station, city: City) -> None: + center = station.center + for entrance_el in ( + el + for el in city.elements.values() + if "tags" in el + and (entrance_type := el["tags"].get("railway")) + in ("subway_entrance", "train_station_entrance") + ): + entrance_id = el_id(entrance_el) + if entrance_id in city.stop_areas: + continue # This entrance belongs to some stop_area + c_center = el_center(entrance_el) + if ( + c_center + and distance(center, c_center) <= MAX_DISTANCE_TO_ENTRANCES + ): + if entrance_el["type"] != "node": + city.warn(f"{entrance_type} is not a node", entrance_el) + etag = entrance_el["tags"].get("entrance") + if etag != "exit": + self.entrances.add(entrance_id) + if etag != "entrance": + self.exits.add(entrance_id) + def get_elements(self): result = {self.id, self.station.id} result.update(self.entrances) @@ -1816,7 +1829,7 @@ class City: if len(transfer) > 1: self.transfers.append(transfer) - def extract_routes(self): + def extract_routes(self) -> None: # Extract stations processed_stop_areas = set() for el in self.elements.values(): @@ -1850,7 +1863,7 @@ class City: # Check that stops and platforms belong to # a single stop_area - for sp in station.stops | station.platforms: + for sp in chain(station.stops, station.platforms): if sp in self.stops_and_platforms: self.notice( f"A stop or a platform {sp} belongs to " @@ -2328,7 +2341,7 @@ def find_transfers(elements, cities): return transfers -def get_unused_entrances_geojson(elements): +def get_unused_subway_entrances_geojson(elements: list[dict]) -> dict: global used_entrances features = [] for el in elements: diff --git a/tests/assets/tiny_world.osm b/tests/assets/tiny_world.osm index 6ee2096..276fb80 100644 --- a/tests/assets/tiny_world.osm +++ b/tests/assets/tiny_world.osm @@ -56,6 +56,27 @@ + + + + + + + + + + + + + + + + + + + + + @@ -95,6 +116,8 @@ + + @@ -102,6 +125,8 @@ + + diff --git a/tests/assets/tiny_world_gtfs.zip b/tests/assets/tiny_world_gtfs.zip deleted file mode 100644 index ef7a66a7a36f903560f0d85b419ab67255481f21..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 4775 zcma)AOOG2x5Oy9Q#ljGYoREuyKi?|z=&zkX>jpho)I zd|#S^^ku50E)Y`AzLDDEqiqgbOmNpt%f1-T+eU$Fa(H$&N7$^^))rz(Zb67vbT%him zxLL7U(VBV%@$cNjf3reQ0v%|#qHDJozrQ$pA2y?Q4R$MSvg?|yF!m&gSYKk2-rR4D zj*DcS$*5(_II;9X^Nf~0O!MfNh+?C6EaY@!Od4%;VwXCKOi?5cQzJsetSwvbbP4|u zLCLWxmO4%IQxl7^bb6}K3T0=!Gk$g%b7t^UY*!)tfH~6jSYu&vM=W1es0DNQ*=6`* zed$KSu7wvr+3u`m&A8S0}vHi~E1A(>+T+oif(<#sD8>QS=_d;!9J z6&<)*rjBC^`wExbHZ>3sTM&afp48-p^q_#! zlGM#vKQ6w1@HKK3wKvcrotiAxmg=ciJ+RF&d8W#}DX2$cM&ZUYo4sXXWfK|~y4+hP z_KYY>jp``YrBRT}#AxW!ER{g5zs1a>K&Hl2*E`Rjz5C$uU_k9nmtL1tdvX<1 zRV>=t znT6a{9Es_i$^6Nb-3=`ACl~cQ!gyZt8B3W59Dg0ceSKdTS4;?#~Mqal}R6kg$qZuCm|=GUCXV!V*TPN)DJ z4$fYmL(+uPF9t~(bNE|H0@=E%SO_O$nWNQkR#uSL)MKe|i0=xuU^3=86mL@;4w59C za2&!@=_`V8G7WIHAd_ooWS5C-$PdN=&<)2M`_)#bQwQq4p69oI`Qiy)3$+m%LRVI- za22Nrzq0ujrGwl~%Df@nA_;ukB%_GOFm_tFrIe+dAU)-KQ%p&YXGS$89&Er#Er3?eEZ0L_G^$o;neXv dict: + def _readGtfs(gtfs_dir: Path) -> dict: gtfs_data = dict() - with ZipFile(filepath) as zf: - for gtfs_feature in GTFS_COLUMNS: - with zf.open(f"{gtfs_feature}.txt") as f: - reader = csv.reader(codecs.iterdecode(f, "utf-8")) - next(reader) # read header - rows = list(reader) - gtfs_data[gtfs_feature] = rows + for gtfs_feature in GTFS_COLUMNS: + with open(gtfs_dir / f"{gtfs_feature}.txt") as f: + reader = csv.reader(f) + next(reader) # read header + rows = list(reader) + gtfs_data[gtfs_feature] = rows return gtfs_data def _compareGtfs( diff --git a/tests/test_overpass.py b/tests/test_overpass.py new file mode 100644 index 0000000..2b0afa3 --- /dev/null +++ b/tests/test_overpass.py @@ -0,0 +1,163 @@ +from unittest import TestCase, mock + +from process_subways import compose_overpass_request, overpass_request + + +class TestOverpassQuery(TestCase): + def test__compose_overpass_request__no_bboxes(self) -> None: + bboxes = [] + for overground in (True, False): + with self.subTest(msg=f"{overground=}"): + with self.assertRaises(RuntimeError): + compose_overpass_request(overground, bboxes) + + def test__compose_overpass_request__one_bbox(self) -> None: + bboxes = [[1, 2, 3, 4]] + + expected = { + False: ( + "[out:json][timeout:1000];" + "(" + "(" + 'rel[route="light_rail"](1,2,3,4);' + 'rel[route="monorail"](1,2,3,4);' + 'rel[route="subway"](1,2,3,4);' + 'rel[route="train"](1,2,3,4);' + ");" + "rel(br)[type=route_master];" + "node[railway=subway_entrance](1,2,3,4);" + "node[railway=train_station_entrance](1,2,3,4);" + "rel[public_transport=stop_area](1,2,3,4);" + "rel(br)[type=public_transport]" + "[public_transport=stop_area_group];" + ");" + "(._;>>;);" + "out body center qt;" + ), + True: ( + "[out:json][timeout:1000];" + "(" + "(" + 'rel[route="aerialway"](1,2,3,4);' + 'rel[route="bus"](1,2,3,4);' + 'rel[route="ferry"](1,2,3,4);' + 'rel[route="tram"](1,2,3,4);' + 'rel[route="trolleybus"](1,2,3,4);' + ");" + "rel(br)[type=route_master];" + "rel[public_transport=stop_area](1,2,3,4);" + "rel(br)[type=public_transport]" + "[public_transport=stop_area_group];" + ");" + "(._;>>;);" + "out body center qt;" + ), + } + + for overground, expected_answer in expected.items(): + with self.subTest(msg=f"{overground=}"): + self.assertEqual( + expected_answer, + compose_overpass_request(overground, bboxes), + ) + + def test__compose_overpass_request__several_bboxes(self) -> None: + bboxes = [[1, 2, 3, 4], [5, 6, 7, 8]] + + expected = { + False: ( + "[out:json][timeout:1000];" + "(" + "(" + 'rel[route="light_rail"](1,2,3,4);' + 'rel[route="monorail"](1,2,3,4);' + 'rel[route="subway"](1,2,3,4);' + 'rel[route="train"](1,2,3,4);' + ");" + "rel(br)[type=route_master];" + "node[railway=subway_entrance](1,2,3,4);" + "node[railway=train_station_entrance](1,2,3,4);" + "rel[public_transport=stop_area](1,2,3,4);" + "rel(br)[type=public_transport][public_transport=stop_area_group];" # noqa E501 + "(" + 'rel[route="light_rail"](5,6,7,8);' + 'rel[route="monorail"](5,6,7,8);' + 'rel[route="subway"](5,6,7,8);' + 'rel[route="train"](5,6,7,8);' + ");" + "rel(br)[type=route_master];" + "node[railway=subway_entrance](5,6,7,8);" + "node[railway=train_station_entrance](5,6,7,8);" + "rel[public_transport=stop_area](5,6,7,8);" + "rel(br)[type=public_transport][public_transport=stop_area_group];" # noqa E501 + ");" + "(._;>>;);" + "out body center qt;" + ), + True: ( + "[out:json][timeout:1000];" + "(" + "(" + 'rel[route="aerialway"](1,2,3,4);' + 'rel[route="bus"](1,2,3,4);' + 'rel[route="ferry"](1,2,3,4);' + 'rel[route="tram"](1,2,3,4);' + 'rel[route="trolleybus"](1,2,3,4);' + ");" + "rel(br)[type=route_master];" + "rel[public_transport=stop_area](1,2,3,4);" + "rel(br)[type=public_transport][public_transport=stop_area_group];" # noqa E501 + "(" + 'rel[route="aerialway"](5,6,7,8);' + 'rel[route="bus"](5,6,7,8);' + 'rel[route="ferry"](5,6,7,8);' + 'rel[route="tram"](5,6,7,8);' + 'rel[route="trolleybus"](5,6,7,8);' + ");" + "rel(br)[type=route_master];" + "rel[public_transport=stop_area](5,6,7,8);" + "rel(br)[type=public_transport][public_transport=stop_area_group];" # noqa E501 + ");" + "(._;>>;);" + "out body center qt;" + ), + } + + for overground, expected_answer in expected.items(): + with self.subTest(msg=f"{overground=}"): + self.assertEqual( + expected_answer, + compose_overpass_request(overground, bboxes), + ) + + def test__overpass_request(self) -> None: + overpass_api = "http://overpass.example/" + overground = False + bboxes = [[1, 2, 3, 4]] + expected_url = ( + "http://overpass.example/?data=" + "%5Bout%3Ajson%5D%5Btimeout%3A1000%5D%3B%28%28" + "rel%5Broute%3D%22light_rail%22%5D%281%2C2%2C3%2C4" + "%29%3Brel%5Broute%3D%22monorail%22%5D%281%2C2%2C3%2C4%29%3B" + "rel%5Broute%3D%22subway%22%5D%281%2C2%2C3%2C4%29%3B" + "rel%5Broute%3D%22train%22%5D%281%2C2%2C3%2C4%29%3B%29%3B" + "rel%28br%29%5Btype%3Droute_master%5D%3B" + "node%5Brailway%3Dsubway_entrance%5D%281%2C2%2C3%2C4%29%3B" + "node%5Brailway%3Dtrain_station_entrance%5D%281%2C2%2C3%2C4%29%3B" + "rel%5Bpublic_transport%3Dstop_area%5D%281%2C2%2C3%2C4%29%3B" + "rel%28br%29%5Btype%3Dpublic_transport%5D%5Bpublic_transport%3D" + "stop_area_group%5D%3B%29%3B" + "%28._%3B%3E%3E%3B%29%3Bout%20body%20center%20qt%3B" + ) + + with mock.patch("process_subways.json.load") as load_mock: + load_mock.return_value = {"elements": []} + + with mock.patch( + "process_subways.urllib.request.urlopen" + ) as urlopen_mock: + urlopen_mock.return_value.getcode.return_value = 200 + + overpass_request(overground, overpass_api, bboxes) + + urlopen_mock.assert_called_once_with(expected_url, timeout=1000) diff --git a/tests/test_station.py b/tests/test_station.py new file mode 100644 index 0000000..2081aaa --- /dev/null +++ b/tests/test_station.py @@ -0,0 +1,46 @@ +from unittest import TestCase + +from subway_structure import Station + + +class TestStation(TestCase): + def test__get_modes(self) -> None: + cases = [ + {"element": {"tags": {"railway": "station"}}, "modes": set()}, + { + "element": { + "tags": {"railway": "station", "station": "train"} + }, + "modes": {"train"}, + }, + { + "element": {"tags": {"railway": "station", "train": "yes"}}, + "modes": {"train"}, + }, + { + "element": { + "tags": { + "railway": "station", + "station": "subway", + "train": "yes", + } + }, + "modes": {"subway", "train"}, + }, + { + "element": { + "tags": { + "railway": "station", + "subway": "yes", + "train": "yes", + "light_rail": "yes", + "monorail": "yes", + } + }, + "modes": {"subway", "train", "light_rail", "monorail"}, + }, + ] + for case in cases: + element = case["element"] + expected_modes = case["modes"] + self.assertSetEqual(expected_modes, Station.get_modes(element)) From 1e4e434d49f2c4023456ea43d0611621a5952600 Mon Sep 17 00:00:00 2001 From: Alexey Zakharenkov Date: Tue, 19 Dec 2023 13:44:04 +0300 Subject: [PATCH 04/15] Add --dump-city-list option to untie cities.txt formation from mapsme.json --- README.md | 13 ++++++++++++- mapsme_json_to_cities.py | 12 ++++++++++-- process_subways.py | 21 +++++++++++++++++++-- scripts/process_subways.sh | 6 +++++- subway_structure.py | 6 +++--- tests/sample_data_for_error_messages.py | 2 +- tests/sample_data_for_twin_routes.py | 8 ++++---- 7 files changed, 54 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index e259087..b987e5f 100644 --- a/README.md +++ b/README.md @@ -79,13 +79,24 @@ if you allow the `process_subway.py` to fetch data from Overpass API. Here are t python3 ./validation_to_html.py validation.log html ``` +## Publishing validation reports to the Web + +Expose a directory with static contents via a web-server and put into it: +- HTML files from the directory specified in the 2nd parameter of `validation_to_html.py` +- To vitalize "Y" (YAML), "J" (GeoJSON) and "M" (Map) links beside each city name: + - The contents of `render` directory from the repository + - `cities.txt` file generated with `--dump-city-list` parameter of `process_subways.py` + - YAML files created due to -d option of `process_subways.py` + - GeoJSON files created due to -j option of `process_subways.py` + + ## Related external resources Summary information about all metro networks that are monitored is gathered in the [Google Spreadsheet](https://docs.google.com/spreadsheets/d/1SEW1-NiNOnA2qDwievcxYV1FOaQl1mb1fdeyqAxHu3k). Regular updates of validation results are available at -[this website](https://maps.mail.ru/osm/tools/subways/latest/). +[this website](https://maps.vk.com/osm/tools/subways/latest/). You can find more info about this validator instance in [OSM Wiki](https://wiki.openstreetmap.org/wiki/Quality_assurance#subway-preprocessor). diff --git a/mapsme_json_to_cities.py b/mapsme_json_to_cities.py index 1c69a77..736b74b 100644 --- a/mapsme_json_to_cities.py +++ b/mapsme_json_to_cities.py @@ -1,7 +1,15 @@ +""" +Generate sorted list of all cities, with [bad] mark for bad cities. + +!!! Deprecated for use in validation cycle. +Use "process_subways.py --dump-city-list " instead. +""" + + import argparse import json -from process_subways import DEFAULT_CITIES_INFO_URL, get_cities_info +from process_subways import BAD_MARK, DEFAULT_CITIES_INFO_URL, get_cities_info if __name__ == "__main__": @@ -56,7 +64,7 @@ if __name__ == "__main__": if ci["name"] in good_cities: lines.append(f"{ci['name']}, {ci['country']}") elif with_bad: - lines.append(f"{ci['name']}, {ci['country']} (Bad)") + lines.append(f"{ci['name']}, {ci['country']} {BAD_MARK}") for line in sorted(lines): print(line) diff --git a/process_subways.py b/process_subways.py index 1fd2262..ca71ed1 100755 --- a/process_subways.py +++ b/process_subways.py @@ -34,6 +34,7 @@ DEFAULT_CITIES_INFO_URL = ( "https://docs.google.com/spreadsheets/d/" f"{DEFAULT_SPREADSHEET_ID}/export?format=csv" ) +BAD_MARK = "[bad]" Point = tuple[float, float] @@ -69,7 +70,7 @@ def overpass_request( overground: bool, overpass_api: str, bboxes: list[list[float]] ) -> list[dict]: query = compose_overpass_request(overground, bboxes) - url = "{}?data={}".format(overpass_api, urllib.parse.quote(query)) + url = f"{overpass_api}?data={urllib.parse.quote(query)}" response = urllib.request.urlopen(url, timeout=1000) if (r_code := response.getcode()) != 200: raise Exception(f"Failed to query Overpass API: HTTP {r_code}") @@ -82,7 +83,7 @@ def multi_overpass( SLICE_SIZE = 10 INTERREQUEST_WAIT = 5 # in seconds result = [] - for i in range(0, len(bboxes) + SLICE_SIZE - 1, SLICE_SIZE): + for i in range(0, len(bboxes), SLICE_SIZE): if i > 0: time.sleep(INTERREQUEST_WAIT) bboxes_i = bboxes[i : i + SLICE_SIZE] # noqa E203 @@ -383,6 +384,14 @@ def main() -> None: type=argparse.FileType("w", encoding="utf-8"), help="Validation JSON file name", ) + parser.add_argument( + "--dump-city-list", + type=argparse.FileType("w", encoding="utf-8"), + help=( + "Dump sorted list of all city names, possibly with " + f"{BAD_MARK} mark" + ), + ) for processor_name, processor in inspect.getmembers( processors, inspect.ismodule @@ -496,6 +505,14 @@ def main() -> None: ", ".join(sorted(bad_city_names)), ) + if options.dump_city_list: + lines = sorted( + f"{city.name}, {city.country}" + f"{' ' + BAD_MARK if city.name in bad_city_names else ''}\n" + for city in cities + ) + options.dump_city_list.writelines(lines) + if options.recovery_path: write_recovery_data(options.recovery_path, recovery_data, cities) diff --git a/scripts/process_subways.sh b/scripts/process_subways.sh index 241b3c1..345dd2d 100755 --- a/scripts/process_subways.sh +++ b/scripts/process_subways.sh @@ -53,6 +53,7 @@ Environment variable reference: - GIT_PULL: set to 1 to update the scripts - TMPDIR: path to temporary files - HTML_DIR: target path for generated HTML files + - DUMP_CITY_LIST: file name to save sorted list of cities, with [bad] mark for bad cities - SERVER: server name and path to upload HTML files (e.g. ilya@osmz.ru:/var/www/) - SERVER_KEY: rsa key to supply for uploading the files - REMOVE_HTML: set to 1 to remove \$HTML_DIR after uploading @@ -246,7 +247,10 @@ VALIDATION="$TMPDIR/validation.json" ${CITIES_INFO_URL:+--cities-info-url "$CITIES_INFO_URL"} \ ${MAPSME:+--output-mapsme "$MAPSME"} \ ${GTFS:+--output-gtfs "$GTFS"} \ - ${CITY:+-c "$CITY"} ${DUMP:+-d "$DUMP"} ${GEOJSON:+-j "$GEOJSON"} \ + ${CITY:+-c "$CITY"} \ + ${DUMP:+-d "$DUMP"} \ + ${GEOJSON:+-j "$GEOJSON"} \ + ${DUMP_CITY_LIST:+--dump-city-list "$DUMP_CITY_LIST"} \ ${ELEMENTS_CACHE:+-i "$ELEMENTS_CACHE"} \ ${CITY_CACHE:+--cache "$CITY_CACHE"} \ ${RECOVERY_PATH:+-r "$RECOVERY_PATH"} diff --git a/subway_structure.py b/subway_structure.py index e55bf71..7946e47 100644 --- a/subway_structure.py +++ b/subway_structure.py @@ -1024,7 +1024,7 @@ class Route: continue if Station.is_station(el, self.city.modes): - # A station may be not included into this route due to previous + # A station may be not included in this route due to previous # 'stop area has multiple stations' error. No other error # message is needed. pass @@ -2085,7 +2085,7 @@ class City: ): self.notice( f"Stop {st.stoparea.station.name} {st.stop} is included " - f"into the {route2.id} but not included into {route1.id}", + f"in the {route2.id} but not included in {route1.id}", route1.element, ) @@ -2103,7 +2103,7 @@ class City: ): self.notice( f"Stop {st.stoparea.station.name} {st.stop} is included " - f"into the {route1.id} but not included into {route2.id}", + f"in the {route1.id} but not included in {route2.id}", route2.element, ) diff --git a/tests/sample_data_for_error_messages.py b/tests/sample_data_for_error_messages.py index 245cfbb..2e20c73 100644 --- a/tests/sample_data_for_error_messages.py +++ b/tests/sample_data_for_error_messages.py @@ -342,7 +342,7 @@ metro_samples = [ 'Only one route in route_master. Please check if it needs a return route (relation 159, "C: 1-3-5-1")', # noqa: E501 'Route does not have a return direction (relation 163, "04: 1-2-3")', # noqa: E501 'Route does not have a return direction (relation 164, "04: 2-1")', # noqa: E501 - 'Stop Station 2 (1.0, 0.0) is included into the r203 but not included into r204 (relation 204, "2: 3-1")', # noqa: E501 + 'Stop Station 2 (1.0, 0.0) is included in the r203 but not included in r204 (relation 204, "2: 3-1")', # noqa: E501 'Route does not have a return direction (relation 205, "3: 1-2-3")', # noqa: E501 'Route does not have a return direction (relation 206, "3: 1-2-3")', # noqa: E501 'Route does not have a return direction (relation 207, "4: 4-3-2-1")', # noqa: E501 diff --git a/tests/sample_data_for_twin_routes.py b/tests/sample_data_for_twin_routes.py index 58b9e17..5847632 100644 --- a/tests/sample_data_for_twin_routes.py +++ b/tests/sample_data_for_twin_routes.py @@ -33,7 +33,7 @@ metro_samples = [ 'Route does not have a return direction (relation 157, "02: 4-1")', 'Route does not have a return direction (relation 158, "02: 1-3 (2)")', # noqa: E501 'Only one route in route_master. Please check if it needs a return route (relation 159, "C: 1-2-3-4-5-1")', # noqa: E501 - 'Stop Station 4 (3.0, 0.0) is included into the r205 but not included into r206 (relation 206, "3: 7-6-5-3-2-1")', # noqa: E501 + 'Stop Station 4 (3.0, 0.0) is included in the r205 but not included in r206 (relation 206, "3: 7-6-5-3-2-1")', # noqa: E501 'Route does not have a return direction (relation 207, "4: 4-3-2-1")', # noqa: E501 'Route does not have a return direction (relation 208, "4: 1-2-3-4")', # noqa: E501 'Route does not have a return direction (relation 209, "5: 1-2-3-5-6-7")', # noqa: E501 @@ -66,12 +66,12 @@ metro_samples = [ "notices": [ 'Should there be one stoparea or a transfer between Station 11 (0.1, 0.0) and Station 11(1) (0.1, 0.0003)? (relation 101, "1: 1-...-9-10-11-...-20")', # noqa: E501 'Should there be one stoparea or a transfer between Station 10 (0.09, 0.0) and Station 10(1) (0.09, 0.0003)? (relation 101, "1: 1-...-9-10-11-...-20")', # noqa: E501 - 'Stop Station 10 (0.09, 0.0) is included into the r105 but not included into r106 (relation 106, "3: 20-...-12-11(1)-9-...-1")', # noqa: E501 + 'Stop Station 10 (0.09, 0.0) is included in the r105 but not included in r106 (relation 106, "3: 20-...-12-11(1)-9-...-1")', # noqa: E501 'Should there be one stoparea or a transfer between Station 11 (0.1, 0.0) and Station 11(1) (0.1, 0.0003)? (relation 105, "3: 1-...-9-10-11-...-20")', # noqa: E501 - 'Stop Station 10 (0.09, 0.0) is included into the r107 but not included into r108 (relation 108, "4: 20-...12-11(2)-9-...-1")', # noqa: E501 + 'Stop Station 10 (0.09, 0.0) is included in the r107 but not included in r108 (relation 108, "4: 20-...12-11(2)-9-...-1")', # noqa: E501 'Should there be one stoparea or a transfer between Station 11 (0.1, 0.0) and Station 11(1) (0.1, 0.0003)? (relation 201, "11: 1-...-9-10-11-...-20")', # noqa: E501 'Should there be one stoparea or a transfer between Station 10 (0.09, 0.0) and Station 10(1) (0.09, 0.0003)? (relation 201, "11: 1-...-9-10-11-...-20")', # noqa: E501 - 'Stop Station 10 (0.09, 0.0) is included into the r205 but not included into r206 (relation 206, "13: 20-...-12-11(1)-9-...-1")', # noqa: E501 + 'Stop Station 10 (0.09, 0.0) is included in the r205 but not included in r206 (relation 206, "13: 20-...-12-11(1)-9-...-1")', # noqa: E501 'Should there be one stoparea or a transfer between Station 11 (0.1, 0.0) and Station 11(1) (0.1, 0.0003)? (relation 205, "13: 1-...-9-10-11-...-20")', # noqa: E501 ], }, From e449c98a7f13bc7ad8969d2fb93ebda5c6641ce9 Mon Sep 17 00:00:00 2001 From: Alexey Zakharenkov Date: Tue, 26 Dec 2023 12:18:52 +0300 Subject: [PATCH 05/15] Mixin for comparison of json-like python structures --- tests/test_build_tracks.py | 4 +- tests/test_storage.py | 22 ++- tests/util.py | 265 +++++++++++++++++++++++++------------ 3 files changed, 202 insertions(+), 89 deletions(-) diff --git a/tests/test_build_tracks.py b/tests/test_build_tracks.py index a1b6a6c..b694bbe 100644 --- a/tests/test_build_tracks.py +++ b/tests/test_build_tracks.py @@ -1,8 +1,8 @@ from tests.sample_data_for_build_tracks import metro_samples -from tests.util import TestCase +from tests.util import JsonLikeComparisonMixin, TestCase -class TestOneRouteTracks(TestCase): +class TestOneRouteTracks(JsonLikeComparisonMixin, TestCase): """Test tracks extending and truncating on one-route networks""" def prepare_city_routes(self, metro_sample: dict) -> tuple: diff --git a/tests/test_storage.py b/tests/test_storage.py index 978529f..042f428 100644 --- a/tests/test_storage.py +++ b/tests/test_storage.py @@ -1,11 +1,12 @@ import json +from operator import itemgetter from processors._common import transit_to_dict from tests.sample_data_for_outputs import metro_samples -from tests.util import TestCase, TestTransitDataMixin +from tests.util import JsonLikeComparisonMixin, TestCase -class TestStorage(TestCase, TestTransitDataMixin): +class TestStorage(JsonLikeComparisonMixin, TestCase): def test_storage(self) -> None: for sample in metro_samples: with self.subTest(msg=sample["name"]): @@ -21,6 +22,21 @@ class TestStorage(TestCase, TestTransitDataMixin): map(tuple, control_transit_data["transfers"]) ) - self.compare_transit_data( + self._compare_transit_data( calculated_transit_data, control_transit_data ) + + def _compare_transit_data( + self, transit_data1: dict, transit_data2: dict + ) -> None: + id_cmp = itemgetter("id") + + self.assertMappingAlmostEqual( + transit_data1, + transit_data2, + unordered_lists={ + "routes": id_cmp, + "itineraries": id_cmp, + "entrances": id_cmp, + }, + ) diff --git a/tests/util.py b/tests/util.py index 56b1962..b8e29a9 100644 --- a/tests/util.py +++ b/tests/util.py @@ -1,8 +1,7 @@ import io -from collections.abc import Sequence, Mapping -from operator import itemgetter +from collections.abc import Callable, Mapping, Sequence from pathlib import Path -from typing import Any +from typing import Any, TypeAlias, Self from unittest import TestCase as unittestTestCase from process_subways import ( @@ -13,6 +12,8 @@ from process_subways import ( from subway_io import load_xml from subway_structure import City, find_transfers +TestCaseMixin: TypeAlias = Self | unittestTestCase + class TestCase(unittestTestCase): """TestCase class for testing the Subway Validator""" @@ -75,41 +76,82 @@ class TestCase(unittestTestCase): transfers = find_transfers(elements, cities) return cities, transfers + +class JsonLikeComparisonMixin: + """Contains auxiliary methods for the TestCase class that allow + to compare json-like structures where some lists do not imply order + and actually represent sets. + Also, all collections compare floats with given precision to any nesting + depth. + """ + def _assertAnyAlmostEqual( - self, + self: TestCaseMixin, first: Any, second: Any, places: int = 10, - ignore_keys: set = None, + *, + unordered_lists: dict[str, Callable] | None = None, + ignore_keys: set[str] | None = None, ) -> None: """Dispatcher method to other "...AlmostEqual" methods depending on argument types. + + Compare dictionaries/lists recursively, numeric values being compared + approximately. + + :param: first a value of arbitrary type, including collections + :param: second a value of arbitrary type, including collections + :param: places number of fractional digits. Is passed to + the self.assertAlmostEqual() method. + :param: unordered_lists a dict whose keys are names of lists + to be compared without order, values - comparators for + the lists to sort them in an unambiguous order. If a comparator + is None, then the lists are compared as sets. + :param: ignore_keys a set of strs with keys that should be ignored + during recursive comparison of dictionaries. May be used to + elaborate a custom comparison mechanism for some substructures. + :return: None """ - if isinstance(first, Mapping): - self.assertMappingAlmostEqual(first, second, places, ignore_keys) - elif isinstance(first, Sequence) and not isinstance( - first, (str, bytes) + if all(isinstance(x, Mapping) for x in (first, second)): + self.assertMappingAlmostEqual( + first, + second, + places, + unordered_lists=unordered_lists, + ignore_keys=ignore_keys, + ) + elif all( + isinstance(x, Sequence) and not isinstance(x, (str, bytes)) + for x in (first, second) ): - self.assertSequenceAlmostEqual(first, second, places, ignore_keys) - else: + self.assertSequenceAlmostEqual( + first, + second, + places, + unordered_lists=unordered_lists, + ignore_keys=ignore_keys, + ) + elif isinstance(first, float) and isinstance(second, float): self.assertAlmostEqual(first, second, places) + else: + self.assertEqual(first, second) def assertSequenceAlmostEqual( - self, + self: TestCaseMixin, seq1: Sequence, seq2: Sequence, places: int = 10, - ignore_keys: set = None, + *, + unordered_lists: dict[str, Callable] | None = None, + ignore_keys: set[str] | None = None, ) -> None: """Compare two sequences, items of numeric types being compared approximately, containers being approx-compared recursively. - :param: seq1 a sequence of values of any types, including collections - :param: seq2 a sequence of values of any types, including collections - :param: places number of fractional digits (passed to - assertAlmostEqual() method of parent class) - :param: ignore_keys a set of strs with keys in dictionaries - that should be ignored during recursive comparison + :param: places see _assertAnyAlmostEqual() method + :param: unordered_lists see _assertAnyAlmostEqual() method + :param: ignore_keys see _assertAnyAlmostEqual() method :return: None """ if not (isinstance(seq1, Sequence) and isinstance(seq2, Sequence)): @@ -119,26 +161,99 @@ class TestCase(unittestTestCase): ) self.assertEqual(len(seq1), len(seq2)) for a, b in zip(seq1, seq2): - self._assertAnyAlmostEqual(a, b, places, ignore_keys) + self._assertAnyAlmostEqual( + a, + b, + places, + unordered_lists=unordered_lists, + ignore_keys=ignore_keys, + ) + + def assertSequenceAlmostEqualIgnoreOrder( + self: TestCaseMixin, + seq1: Sequence, + seq2: Sequence, + cmp: Callable | None, + places: int = 10, + *, + unordered_lists: dict[str, Callable] | None = None, + ignore_keys: set[str] | None = None, + ) -> None: + """Compares two sequences as sets, i.e. ignoring order. Nested + lists determined with unordered_lists parameter are also compared + without order. + + :param: cmp if None then compare sequences as sets. If elements are + not hashable then this method is inapplicable and the + sorted (with the comparator) sequences are compared. + :param: places see _assertAnyAlmostEqual() method + :param: unordered_lists see _assertAnyAlmostEqual() method + :param: ignore_keys see _assertAnyAlmostEqual() method + :return: None + """ + if cmp is not None: + v1 = sorted(seq1, key=cmp) + v2 = sorted(seq2, key=cmp) + self.assertSequenceAlmostEqual( + v1, + v2, + places, + unordered_lists=unordered_lists, + ignore_keys=ignore_keys, + ) + else: + self.assertEqual(len(seq1), len(seq2)) + v1 = set(seq1) + v2 = set(seq2) + self.assertSetEqual(v1, v2) def assertMappingAlmostEqual( - self, + self: TestCaseMixin, d1: Mapping, d2: Mapping, places: int = 10, - ignore_keys: set = None, + *, + unordered_lists: dict[str, Callable] | None = None, + ignore_keys: set[str] | None = None, ) -> None: """Compare dictionaries recursively, numeric values being compared - approximately. + approximately, some lists being compared without order. - :param: d1 a mapping of arbitrary key/value types, - including collections - :param: d1 a mapping of arbitrary key/value types, - including collections - :param: places number of fractional digits (passed to - assertAlmostEqual() method of parent class) - :param: ignore_keys a set of strs with keys in dictionaries - that should be ignored during recursive comparison + :param: places see _assertAnyAlmostEqual() method + :param: unordered_lists see _assertAnyAlmostEqual() method + Example 1: + d1 = { + "name_from_unordered_list": [a1, b1, c1], + "some_other_name": [e1, f1, g1], + } + d2 = { + "name_from_unordered_list": [a2, b2, c2], + "some_other_name": [e2, f2, g2], + } + Lists [a1, b1, c1] and [a2, b2, c2] will be compared + without order, lists [e1, f1, g1] and [e2, f2, g2] - + considering the order. + + Example 2: + d1 = { + "name_from_unordered_list": { + "key1": [a1, b1, c1], + "key2": [e1, f1, g1], + }, + "some_other_name": [h1, i1, k1], + } + d2 = { + "name_from_unordered_list": { + "key1": [a2, b2, c2], + "key2": [e2, f2, g2], + }, + "some_other_name": [h2, i2, k2], + } + Lists [a1, b1, c1] and [a2, b2, c2] will be compared + without order, as well as [e1, f1, g1] and + [e2, f2, g2]; lists [h1, i1, k1] and [h2, i2, k2] - + considering the order. + :param: ignore_keys see _assertAnyAlmostEqual() method :return: None """ if not (isinstance(d1, Mapping) and isinstance(d2, Mapping)): @@ -153,60 +268,42 @@ class TestCase(unittestTestCase): d1_keys -= ignore_keys d2_keys -= ignore_keys self.assertSetEqual(d1_keys, d2_keys) + + if unordered_lists is None: + unordered_lists = {} + for k in d1_keys: v1 = d1[k] v2 = d2[k] - self._assertAnyAlmostEqual(v1, v2, places, ignore_keys) - - -class TestTransitDataMixin: - def compare_transit_data(self, td1: dict, td2: dict) -> None: - """Compare transit data td1 and td2 remembering that: - - arrays that represent sets ("routes", "itineraries", "entrances") - should be compared without order; - - all floating-point values (coordinates) should be compared - approximately. - """ - self.assertMappingAlmostEqual( - td1, - td2, - ignore_keys={"stopareas", "routes", "itineraries"}, - ) - - networks1 = td1["networks"] - networks2 = td2["networks"] - - id_cmp = itemgetter("id") - - for network_name, network_data1 in networks1.items(): - network_data2 = networks2[network_name] - routes1 = sorted(network_data1["routes"], key=id_cmp) - routes2 = sorted(network_data2["routes"], key=id_cmp) - self.assertEqual(len(routes1), len(routes2)) - for r1, r2 in zip(routes1, routes2): - self.assertMappingAlmostEqual( - r1, r2, ignore_keys={"itineraries"} + if (cmp := unordered_lists.get(k, "")) == "" or not isinstance( + v1, (Sequence, Mapping) + ): + self._assertAnyAlmostEqual( + v1, + v2, + places, + unordered_lists=unordered_lists, + ignore_keys=ignore_keys, ) - its1 = sorted(r1["itineraries"], key=id_cmp) - its2 = sorted(r2["itineraries"], key=id_cmp) - self.assertEqual(len(its1), len(its2)) - for it1, it2 in zip(its1, its2): - self.assertMappingAlmostEqual(it1, it2) - - transfers1 = td1["transfers"] - transfers2 = td2["transfers"] - self.assertSetEqual(transfers1, transfers2) - - stopareas1 = td1["stopareas"] - stopareas2 = td2["stopareas"] - self.assertMappingAlmostEqual( - stopareas1, stopareas2, ignore_keys={"entrances"} - ) - - for sa_id, sa1_data in stopareas1.items(): - sa2_data = stopareas2[sa_id] - entrances1 = sorted(sa1_data["entrances"], key=id_cmp) - entrances2 = sorted(sa2_data["entrances"], key=id_cmp) - self.assertEqual(len(entrances1), len(entrances2)) - for e1, e2 in zip(entrances1, entrances2): - self.assertMappingAlmostEqual(e1, e2) + elif isinstance(v1, Sequence): + self.assertSequenceAlmostEqualIgnoreOrder( + v1, + v2, + cmp, + places, + unordered_lists=unordered_lists, + ignore_keys=ignore_keys, + ) + else: + self.assertSetEqual(set(v1.keys()), set(v2.keys())) + for ik in v1.keys(): + iv1 = v1[ik] + iv2 = v2[ik] + self.assertSequenceAlmostEqualIgnoreOrder( + iv1, + iv2, + cmp, + places, + unordered_lists=unordered_lists, + ignore_keys=ignore_keys, + ) From f7087a0c25c44e74f3c57a2f54cfa2cd94afe5c0 Mon Sep 17 00:00:00 2001 From: Alexey Zakharenkov Date: Fri, 2 Feb 2024 10:14:47 +0300 Subject: [PATCH 06/15] Use stoparea ids instead of instances in transfers; save transfers only for good cities --- process_subways.py | 2 +- processors/_common.py | 15 +- processors/mapsme.py | 48 +++-- subway_structure.py | 122 +++++++----- tests/sample_data_for_outputs.py | 316 +++++++++++++++++++++++++++++++ tests/test_find_transfers.py | 30 +++ tests/test_mapsme_processor.py | 53 ++++++ tests/util.py | 2 +- 8 files changed, 520 insertions(+), 68 deletions(-) create mode 100644 tests/test_find_transfers.py create mode 100644 tests/test_mapsme_processor.py diff --git a/process_subways.py b/process_subways.py index ca71ed1..6f18453 100755 --- a/process_subways.py +++ b/process_subways.py @@ -490,7 +490,7 @@ def main() -> None: good_cities = validate_cities(cities) logging.info("Finding transfer stations") - transfers = find_transfers(osm, cities) + transfers = find_transfers(osm, good_cities) good_city_names = set(c.name for c in good_cities) logging.info( diff --git a/processors/_common.py b/processors/_common.py index e933719..edb19f4 100644 --- a/processors/_common.py +++ b/processors/_common.py @@ -91,18 +91,17 @@ def transit_to_dict( # transfers pairwise_transfers = set() - for stoparea_set in transfers: - stoparea_list = list(stoparea_set) - for first_i in range(len(stoparea_list) - 1): - for second_i in range(first_i + 1, len(stoparea_list)): - stoparea1_id = stoparea_list[first_i].id - stoparea2_id = stoparea_list[second_i].id + for stoparea_id_set in transfers: + stoparea_ids = sorted(stoparea_id_set) + for first_i in range(len(stoparea_ids) - 1): + for second_i in range(first_i + 1, len(stoparea_ids)): + stoparea1_id = stoparea_ids[first_i] + stoparea2_id = stoparea_ids[second_i] if all( st_id in data["stopareas"] for st_id in (stoparea1_id, stoparea2_id) ): - id1, id2 = sorted([stoparea1_id, stoparea2_id]) - pairwise_transfers.add((id1, id2)) + pairwise_transfers.add((stoparea1_id, stoparea2_id)) data["transfers"] = pairwise_transfers return data diff --git a/processors/mapsme.py b/processors/mapsme.py index b8818ea..2f3ec6f 100755 --- a/processors/mapsme.py +++ b/processors/mapsme.py @@ -4,10 +4,12 @@ import os from collections import defaultdict from subway_structure import ( + City, DISPLACEMENT_TOLERANCE, distance, el_center, Station, + TransfersT, ) from ._common import ( DEFAULT_INTERVAL, @@ -180,11 +182,12 @@ class MapsmeCache: logging.warning("Failed to save cache: %s", str(e)) -def process(cities, transfers, filename, cache_path): +def transit_data_to_mapsme( + cities: list[City], transfers: TransfersT, cache_path: str | None +) -> dict: """Generate all output and save to file. :param cities: List of City instances :param transfers: List of sets of StopArea.id - :param filename: Path to file to save the result :param cache_path: Path to json-file with good cities cache or None. """ @@ -362,18 +365,21 @@ def process(cities, transfers, filename, cache_path): pairwise_transfers = ( {} ) # (stoparea1_uid, stoparea2_uid) -> time; uid1 < uid2 - for t_set in transfers: - t = list(t_set) - for t_first in range(len(t) - 1): - for t_second in range(t_first + 1, len(t)): - stoparea1 = t[t_first] - stoparea2 = t[t_second] - if stoparea1.id in stops and stoparea2.id in stops: - uid1 = uid(stoparea1.id) - uid2 = uid(stoparea2.id) + for stoparea_id_set in transfers: + stoparea_ids = list(stoparea_id_set) + for i_first in range(len(stoparea_ids) - 1): + for i_second in range(i_first + 1, len(stoparea_ids)): + stoparea1_id = stoparea_ids[i_first] + stoparea2_id = stoparea_ids[i_second] + if stoparea1_id in stops and stoparea2_id in stops: + uid1 = uid(stoparea1_id) + uid2 = uid(stoparea2_id) uid1, uid2 = sorted([uid1, uid2]) transfer_time = TRANSFER_PENALTY + round( - distance(stoparea1.center, stoparea2.center) + distance( + stop_areas[stoparea1_id].center, + stop_areas[stoparea2_id].center, + ) / SPEED_ON_TRANSFER ) pairwise_transfers[(uid1, uid2)] = transfer_time @@ -392,13 +398,29 @@ def process(cities, transfers, filename, cache_path): "transfers": pairwise_transfers, "networks": networks, } + return result + +def process( + cities: list[City], + transfers: TransfersT, + filename: str, + cache_path: str | None, +): + """Generate all output and save to file. + :param cities: List of City instances + :param transfers: List of sets of StopArea.id + :param filename: Path to file to save the result + :param cache_path: Path to json-file with good cities cache or None. + """ if not filename.lower().endswith("json"): filename = f"{filename}.json" + mapsme_transit = transit_data_to_mapsme(cities, transfers, cache_path) + with open(filename, "w", encoding="utf-8") as f: json.dump( - result, + mapsme_transit, f, indent=1, ensure_ascii=False, diff --git a/subway_structure.py b/subway_structure.py index 7946e47..c7e7327 100644 --- a/subway_structure.py +++ b/subway_structure.py @@ -3,6 +3,7 @@ from __future__ import annotations import math import re from collections import Counter, defaultdict +from collections.abc import Collection, Iterator from itertools import chain, islice from css_colours import normalize_colour @@ -45,6 +46,10 @@ used_entrances = set() START_END_TIMES_RE = re.compile(r".*?(\d{2}):(\d{2})-(\d{2}):(\d{2}).*") +IdT = str # Type of feature ids +TransferT = set[IdT] # A transfer is a set of StopArea IDs +TransfersT = Collection[TransferT] + def get_start_end_times(opening_hours): """Very simplified method to parse OSM opening_hours tag. @@ -664,6 +669,14 @@ class Route: return None return osm_interval_to_seconds(v) + def stopareas(self) -> Iterator[StopArea]: + yielded_stopareas = set() + for route_stop in self: + stoparea = route_stop.stoparea + if stoparea not in yielded_stopareas: + yield stoparea + yielded_stopareas.add(stoparea) + def __init__(self, relation, city, master=None): assert Route.is_route( relation, city.modes @@ -1465,6 +1478,14 @@ class RouteMaster: self.name = None self.interval = None + def stopareas(self) -> Iterator[StopArea]: + yielded_stopareas = set() + for route in self: + for stoparea in route.stopareas(): + if stoparea not in yielded_stopareas: + yield stoparea + yielded_stopareas.add(stoparea) + def add(self, route, city): if not self.network: self.network = route.network @@ -1682,7 +1703,7 @@ class City: self.stop_areas = defaultdict( list ) # El_id → list of stop_area elements it belongs to - self.transfers = [] # List of lists of stop areas + self.transfers: TransfersT = [] # List of sets of stop areas self.station_ids = set() # Set of stations' uid self.stops_and_platforms = set() # Set of stops and platforms el_id self.recovery_data = None @@ -1787,18 +1808,19 @@ class City: else: stop_areas.append(el) - def make_transfer(self, sag): + def make_transfer(self, stoparea_group: dict) -> None: transfer = set() - for m in sag["members"]: + for m in stoparea_group["members"]: k = el_id(m) el = self.elements.get(k) if not el: - # A sag member may validly not belong to the city while - # the sag does - near the city bbox boundary + # A stoparea_group member may validly not belong to the city + # while the stoparea_group does - near the city bbox boundary continue if "tags" not in el: self.warn( - "An untagged object {} in a stop_area_group".format(k), sag + "An untagged object {} in a stop_area_group".format(k), + stoparea_group, ) continue if ( @@ -1825,7 +1847,7 @@ class City: k ) ) - stoparea.transfer = el_id(sag) + stoparea.transfer = el_id(stoparea_group) if len(transfer) > 1: self.transfers.append(transfer) @@ -1918,20 +1940,28 @@ class City: self.make_transfer(el) # Filter transfers, leaving only stations that belong to routes - used_stop_areas = set() - for rmaster in self.routes.values(): - for route in rmaster: - used_stop_areas.update([s.stoparea for s in route.stops]) - new_transfers = [] - for transfer in self.transfers: - new_tr = [s for s in transfer if s in used_stop_areas] - if len(new_tr) > 1: - new_transfers.append(new_tr) - self.transfers = new_transfers + own_stopareas = set(self.stopareas()) + + self.transfers = [ + inner_transfer + for inner_transfer in ( + own_stopareas.intersection(transfer) + for transfer in self.transfers + ) + if len(inner_transfer) > 1 + ] def __iter__(self): return iter(self.routes.values()) + def stopareas(self) -> Iterator[StopArea]: + yielded_stopareas = set() + for route_master in self: + for stoparea in route_master.stopareas(): + if stoparea not in yielded_stopareas: + yield stoparea + yielded_stopareas.add(stoparea) + @property def is_good(self): if not (self.errors or self.validate_called): @@ -2306,36 +2336,38 @@ class City: route.calculate_distances() -def find_transfers(elements, cities): +def find_transfers( + elements: list[dict], cities: Collection[City] +) -> TransfersT: + """As for now, two Cities may contain the same stoparea, but those + StopArea instances would have different python id. So we don't store + references to StopAreas, but only their ids. This is important at + inter-city interchanges. + """ + stop_area_groups = [ + el + for el in elements + if el["type"] == "relation" + and "members" in el + and el.get("tags", {}).get("public_transport") == "stop_area_group" + ] + + stopareas_in_cities_ids = set( + stoparea.id + for city in cities + if city.is_good + for stoparea in city.stopareas() + ) + transfers = [] - stop_area_groups = [] - for el in elements: - if ( - el["type"] == "relation" - and "members" in el - and el.get("tags", {}).get("public_transport") == "stop_area_group" - ): - stop_area_groups.append(el) - - # StopArea.id uniquely identifies a StopArea. We must ensure StopArea - # uniqueness since one stop_area relation may result in - # several StopArea instances at inter-city interchanges. - stop_area_ids = defaultdict(set) # el_id -> set of StopArea.id - stop_area_objects = dict() # StopArea.id -> one of StopArea instances - for city in cities: - for el, st in city.stations.items(): - stop_area_ids[el].update(sa.id for sa in st) - stop_area_objects.update((sa.id, sa) for sa in st) - - for sag in stop_area_groups: - transfer = set() - for m in sag["members"]: - k = el_id(m) - if k not in stop_area_ids: - continue - transfer.update( - stop_area_objects[sa_id] for sa_id in stop_area_ids[k] + for stop_area_group in stop_area_groups: + transfer: TransferT = set( + member_id + for member_id in ( + el_id(member) for member in stop_area_group["members"] ) + if member_id in stopareas_in_cities_ids + ) if len(transfer) > 1: transfers.append(transfer) return transfers diff --git a/tests/sample_data_for_outputs.py b/tests/sample_data_for_outputs.py index 29012d5..b50ddbe 100644 --- a/tests/sample_data_for_outputs.py +++ b/tests/sample_data_for_outputs.py @@ -21,6 +21,7 @@ metro_samples = [ }, ], "gtfs_dir": "assets/tiny_world_gtfs", + "transfers": [{"r1", "r2"}, {"r3", "r4"}], "json_dump": """ { "stopareas": { @@ -366,5 +367,320 @@ metro_samples = [ ] } """, + "mapsme_output": { + "stops": [ + { + "name": "Station 1", + "int_name": None, + "lat": 0.0, + "lon": 0.0, + "osm_type": "node", + "osm_id": 1, + "id": 8, + "entrances": [ + { + "osm_type": "node", + "osm_id": 1, + "lon": 0.0, + "lat": 0.0, + "distance": 60, + } + ], + "exits": [ + { + "osm_type": "node", + "osm_id": 1, + "lon": 0.0, + "lat": 0.0, + "distance": 60, + } + ], + }, + { + "name": "Station 2", + "int_name": None, + "lat": 0.0047037307, + "lon": 0.00470373068, + "osm_type": "node", + "osm_id": 2, + "id": 14, + "entrances": [ + { + "osm_type": "node", + "osm_id": 2, + "lon": 0.0047209447, + "lat": 0.004686516680000001, + "distance": 60, + } + ], + "exits": [ + { + "osm_type": "node", + "osm_id": 2, + "lon": 0.0047209447, + "lat": 0.004686516680000001, + "distance": 60, + } + ], + }, + { + "name": "Station 3", + "int_name": None, + "lat": 0.0097589171, + "lon": 0.01012040581, + "osm_type": "node", + "osm_id": 3, + "id": 30, + "entrances": [ + { + "osm_type": "node", + "osm_id": 201, + "lon": 0.01007169217, + "lat": 0.00967473055, + "distance": 68, + }, + { + "osm_type": "node", + "osm_id": 202, + "lon": 0.01018702716, + "lat": 0.00966936613, + "distance": 69, + }, + ], + "exits": [ + { + "osm_type": "node", + "osm_id": 201, + "lon": 0.01007169217, + "lat": 0.00967473055, + "distance": 68, + }, + { + "osm_type": "node", + "osm_id": 202, + "lon": 0.01018702716, + "lat": 0.00966936613, + "distance": 69, + }, + ], + }, + { + "name": "Station 4", + "int_name": None, + "lat": 0.01, + "lon": 0.0, + "osm_type": "node", + "osm_id": 4, + "id": 32, + "entrances": [ + { + "osm_type": "node", + "osm_id": 205, + "lon": 0.000201163, + "lat": 0.01015484596, + "distance": 80, + } + ], + "exits": [ + { + "osm_type": "node", + "osm_id": 205, + "lon": 0.000201163, + "lat": 0.01015484596, + "distance": 80, + } + ], + }, + { + "name": "Station 5", + "int_name": None, + "lat": 0.00514739839, + "lon": 0.0047718624, + "osm_type": "node", + "osm_id": 5, + "id": 22, + "entrances": [ + { + "osm_type": "node", + "osm_id": 5, + "lon": 0.0047718624, + "lat": 0.00514739839, + "distance": 60, + } + ], + "exits": [ + { + "osm_type": "node", + "osm_id": 5, + "lon": 0.0047718624, + "lat": 0.00514739839, + "distance": 60, + } + ], + }, + { + "name": "Station 6", + "int_name": None, + "lat": 0.0, + "lon": 0.01, + "osm_type": "node", + "osm_id": 6, + "id": 48, + "entrances": [ + { + "osm_type": "node", + "osm_id": 6, + "lon": 0.01, + "lat": 0.0, + "distance": 60, + } + ], + "exits": [ + { + "osm_type": "node", + "osm_id": 6, + "lon": 0.01, + "lat": 0.0, + "distance": 60, + } + ], + }, + { + "name": "Station 7", + "int_name": None, + "lat": 0.010286367745, + "lon": 0.009716854315, + "osm_type": "node", + "osm_id": 7, + "id": 38, + "entrances": [ + { + "osm_type": "node", + "osm_id": 203, + "lon": 0.00959962338, + "lat": 0.01042574907, + "distance": 75, + }, + { + "osm_type": "node", + "osm_id": 204, + "lon": 0.00952183932, + "lat": 0.01034796501, + "distance": 76, + }, + ], + "exits": [ + { + "osm_type": "node", + "osm_id": 203, + "lon": 0.00959962338, + "lat": 0.01042574907, + "distance": 75, + }, + { + "osm_type": "node", + "osm_id": 204, + "lon": 0.00952183932, + "lat": 0.01034796501, + "distance": 76, + }, + ], + }, + { + "name": "Station 8", + "int_name": None, + "lat": 0.014377764559999999, + "lon": 0.012405493905, + "osm_type": "node", + "osm_id": 8, + "id": 134, + "entrances": [ + { + "osm_type": "node", + "osm_id": 8, + "lon": 0.012391026016666667, + "lat": 0.01436273297, + "distance": 60, + } + ], + "exits": [ + { + "osm_type": "node", + "osm_id": 8, + "lon": 0.012391026016666667, + "lat": 0.01436273297, + "distance": 60, + } + ], + }, + ], + "transfers": [(14, 22, 81), (30, 38, 106)], + "networks": [ + { + "network": "Intersecting 2 metro lines", + "routes": [ + { + "type": "subway", + "ref": "1", + "name": "Blue Line", + "colour": "0000ff", + "route_id": 30, + "itineraries": [ + { + "stops": [[8, 0], [14, 67], [30, 141]], + "interval": 150, + }, + { + "stops": [[30, 0], [14, 74], [8, 141]], + "interval": 150, + }, + ], + }, + { + "type": "subway", + "ref": "2", + "name": "Red Line", + "colour": "ff0000", + "route_id": 28, + "itineraries": [ + { + "stops": [[32, 0], [22, 68], [48, 142]], + "interval": 150, + }, + { + "stops": [[48, 0], [22, 74], [32, 142]], + "interval": 150, + }, + ], + }, + ], + "agency_id": 1, + }, + { + "network": "One light rail line", + "routes": [ + { + "type": "light_rail", + "ref": "LR", + "name": "LR Line", + "colour": "ffffff", + "route_id": 22, + "itineraries": [ + { + "stops": [[38, 0], [134, 49]], + "interval": 150, + }, + { + "stops": [[134, 0], [38, 48]], + "interval": 150, + }, + ], + "casing": "a52a2a", + } + ], + "agency_id": 2, + }, + ], + }, }, ] diff --git a/tests/test_find_transfers.py b/tests/test_find_transfers.py new file mode 100644 index 0000000..bb46dc3 --- /dev/null +++ b/tests/test_find_transfers.py @@ -0,0 +1,30 @@ +from copy import deepcopy + +from tests.sample_data_for_outputs import metro_samples +from tests.util import TestCase, JsonLikeComparisonMixin + + +class TestTransfers(JsonLikeComparisonMixin, TestCase): + """Test that the validator provides expected set of transfers.""" + + def _test__find_transfers__for_sample(self, metro_sample: dict) -> None: + cities, transfers = self.prepare_cities(metro_sample) + expected_transfers = metro_sample["transfers"] + + self.assertSequenceAlmostEqualIgnoreOrder( + expected_transfers, + transfers, + cmp=lambda transfer_as_set: sorted(transfer_as_set), + ) + + def test__find_transfers(self) -> None: + sample1 = metro_samples[0] + + sample2 = deepcopy(metro_samples[0]) + # Make the second city invalid and thus exclude the inter-city transfer + sample2["cities_info"][1]["num_stations"] += 1 + sample2["transfers"] = [{"r1", "r2"}] + + for sample in sample1, sample2: + with self.subTest(msg=sample["name"]): + self._test__find_transfers__for_sample(sample) diff --git a/tests/test_mapsme_processor.py b/tests/test_mapsme_processor.py new file mode 100644 index 0000000..64eb9cb --- /dev/null +++ b/tests/test_mapsme_processor.py @@ -0,0 +1,53 @@ +from operator import itemgetter + +from processors.mapsme import transit_data_to_mapsme +from tests.sample_data_for_outputs import metro_samples +from tests.util import JsonLikeComparisonMixin, TestCase + + +class TestMapsme(JsonLikeComparisonMixin, TestCase): + """Test processors/mapsme.py""" + + def test__transit_data_to_mapsme(self) -> None: + for sample in metro_samples: + with self.subTest(msg=sample["name"]): + self._test__transit_data_to_mapsme__for_sample(sample) + + def _test__transit_data_to_mapsme__for_sample( + self, metro_sample: dict + ) -> None: + cities, transfers = self.prepare_cities(metro_sample) + calculated_mapsme_data = transit_data_to_mapsme( + cities, transfers, cache_path=None + ) + control_mapsme_data = metro_sample["mapsme_output"] + + self.assertSetEqual( + set(control_mapsme_data.keys()), + set(calculated_mapsme_data.keys()), + ) + + self.assertSequenceAlmostEqualIgnoreOrder( + control_mapsme_data["stops"], + calculated_mapsme_data["stops"], + cmp=itemgetter("id"), + unordered_lists={ + "entrances": lambda e: (e["osm_type"], e["osm_id"]), + "exits": lambda e: (e["osm_type"], e["osm_id"]), + }, + ) + + self.assertSequenceAlmostEqualIgnoreOrder( + control_mapsme_data["transfers"], + calculated_mapsme_data["transfers"], + ) + + self.assertSequenceAlmostEqualIgnoreOrder( + control_mapsme_data["networks"], + calculated_mapsme_data["networks"], + cmp=itemgetter("network"), + unordered_lists={ + "routes": itemgetter("route_id"), + "itineraries": lambda it: (it["stops"], it["interval"]), + }, + ) diff --git a/tests/util.py b/tests/util.py index b8e29a9..bfc3fd8 100644 --- a/tests/util.py +++ b/tests/util.py @@ -173,7 +173,7 @@ class JsonLikeComparisonMixin: self: TestCaseMixin, seq1: Sequence, seq2: Sequence, - cmp: Callable | None, + cmp: Callable | None = None, places: int = 10, *, unordered_lists: dict[str, Callable] | None = None, From 28f4c0d139891bb5a4e4a8ae31d6301206222808 Mon Sep 17 00:00:00 2001 From: Alexey Zakharenkov Date: Wed, 21 Feb 2024 23:33:07 +0300 Subject: [PATCH 07/15] Special searching for backward counterparts for circular routes --- .gitignore | 1 + subway_io.py | 2 +- subway_structure.py | 456 ++++++++++++++---------- tests/assets/route_masters.osm | 45 ++- tests/assets/twin_routes.osm | 6 +- tests/sample_data_for_error_messages.py | 6 +- tests/test_route_master.py | 92 ++++- 7 files changed, 407 insertions(+), 201 deletions(-) diff --git a/.gitignore b/.gitignore index f2fb32f..129911a 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ tmp_html/ html/ .idea .DS_Store +.venv *.log *.json *.geojson diff --git a/subway_io.py b/subway_io.py index cbd252a..4b02596 100644 --- a/subway_io.py +++ b/subway_io.py @@ -88,7 +88,7 @@ def dump_yaml(city, f): routes = [] for route in city: stations = OrderedDict( - [(sa.transfer or sa.id, sa.name) for sa in route.stop_areas()] + [(sa.transfer or sa.id, sa.name) for sa in route.stopareas()] ) rte = { "type": route.mode, diff --git a/subway_structure.py b/subway_structure.py index c7e7327..d486d90 100644 --- a/subway_structure.py +++ b/subway_structure.py @@ -5,6 +5,7 @@ import re from collections import Counter, defaultdict from collections.abc import Collection, Iterator from itertools import chain, islice +from typing import TypeVar from css_colours import normalize_colour @@ -49,6 +50,7 @@ START_END_TIMES_RE = re.compile(r".*?(\d{2}):(\d{2})-(\d{2}):(\d{2}).*") IdT = str # Type of feature ids TransferT = set[IdT] # A transfer is a set of StopArea IDs TransfersT = Collection[TransferT] +T = TypeVar("T") def get_start_end_times(opening_hours): @@ -626,7 +628,7 @@ class RouteStop: class Route: - """The longest route for a city with a unique ref.""" + """Corresponds to OSM "type=route" relation""" @staticmethod def is_route(el, modes): @@ -677,7 +679,12 @@ class Route: yield stoparea yielded_stopareas.add(stoparea) - def __init__(self, relation, city, master=None): + def __init__( + self, + relation: dict, + city: City, + master: dict | None = None, + ) -> None: assert Route.is_route( relation, city.modes ), f"The relation does not seem to be a route: {relation}" @@ -1440,7 +1447,8 @@ class Route: class RouteMaster: - def __init__(self, master=None): + def __init__(self, city: City, master: dict = None) -> None: + self.city = city self.routes = [] self.best = None self.id = el_id(master) @@ -1486,11 +1494,11 @@ class RouteMaster: yield stoparea yielded_stopareas.add(stoparea) - def add(self, route, city): + def add(self, route: Route) -> None: if not self.network: self.network = route.network elif route.network and route.network != self.network: - city.error( + self.city.error( 'Route has different network ("{}") from master "{}"'.format( route.network, self.network ), @@ -1500,7 +1508,7 @@ class RouteMaster: if not self.colour: self.colour = route.colour elif route.colour and route.colour != self.colour: - city.notice( + self.city.notice( 'Route "{}" has different colour from master "{}"'.format( route.colour, self.colour ), @@ -1510,7 +1518,7 @@ class RouteMaster: if not self.infill: self.infill = route.infill elif route.infill and route.infill != self.infill: - city.notice( + self.city.notice( ( f'Route "{route.infill}" has different infill colour ' f'from master "{self.infill}"' @@ -1521,7 +1529,7 @@ class RouteMaster: if not self.ref: self.ref = route.ref elif route.ref != self.ref: - city.notice( + self.city.notice( 'Route "{}" has different ref from master "{}"'.format( route.ref, self.ref ), @@ -1534,7 +1542,7 @@ class RouteMaster: if not self.mode: self.mode = route.mode elif route.mode != self.mode: - city.error( + self.city.error( "Incompatible PT mode: master has {} and route has {}".format( self.mode, route.mode ), @@ -1568,8 +1576,8 @@ class RouteMaster: return [route for route in self if len(route) >= 2] def find_twin_routes(self) -> dict[Route, Route]: - """Two routes are twins if they have the same end stations - and opposite directions, and the number of stations is + """Two non-circular routes are twins if they have the same end + stations and opposite directions, and the number of stations is the same or almost the same. We'll then find stops that are present in one direction and is missing in another direction - to warn. """ @@ -1581,8 +1589,6 @@ class RouteMaster: continue # Difficult to calculate. TODO(?) in the future if route in twin_routes: continue - if len(route) < 2: - continue route_transfer_ids = set(route.get_transfers_sequence()) ends = route.get_end_transfers() @@ -1617,15 +1623,253 @@ class RouteMaster: return twin_routes - def stop_areas(self): - """Returns a list of all stations on all route variants.""" - seen_ids = set() - for route in self.routes: - for stop in route: - st = stop.stoparea - if st.id not in seen_ids: - seen_ids.add(st.id) - yield st + def check_return_routes(self) -> None: + """Check if a route has return direction, and if twin routes + miss stations. + """ + meaningful_routes = self.get_meaningful_routes() + + if len(meaningful_routes) == 0: + self.city.error( + f"An empty route master {self.id}. " + "Please set construction:route if it is under construction" + ) + elif len(meaningful_routes) == 1: + log_function = ( + self.city.error + if not self.best.is_circular + else self.city.notice + ) + log_function( + "Only one route in route_master. " + "Please check if it needs a return route", + self.best.element, + ) + else: + self.check_return_circular_routes() + self.check_return_noncircular_routes() + + def check_return_noncircular_routes(self) -> None: + routes = [ + route + for route in self.get_meaningful_routes() + if not route.is_circular + ] + all_ends = {route.get_end_transfers(): route for route in routes} + for route in routes: + ends = route.get_end_transfers() + if ends[::-1] not in all_ends: + self.city.notice( + "Route does not have a return direction", route.element + ) + + twin_routes = self.find_twin_routes() + for route1, route2 in twin_routes.items(): + if route1.id > route2.id: + continue # to process a pair of routes only once + # and to ensure the order of routes in the pair + self.alert_twin_routes_differ(route1, route2) + + def check_return_circular_routes(self) -> None: + routes = { + route + for route in self.get_meaningful_routes() + if route.is_circular + } + routes_having_backward = set() + + for route in routes: + if route in routes_having_backward: + continue + transfer_sequence1 = [ + stop.stoparea.transfer or stop.stoparea.id for stop in route + ] + transfer_sequence1.pop() + for potential_backward_route in routes - {route}: + transfer_sequence2 = [ + stop.stoparea.transfer or stop.stoparea.id + for stop in potential_backward_route + ][ + -2::-1 + ] # truncate repeated first stop and reverse + common_subsequence = self.find_common_circular_subsequence( + transfer_sequence1, transfer_sequence2 + ) + if len(common_subsequence) >= 0.8 * min( + len(transfer_sequence1), len(transfer_sequence2) + ): + routes_having_backward.add(route) + routes_having_backward.add(potential_backward_route) + break + + for route in routes - routes_having_backward: + self.city.notice( + "Route does not have a return direction", route.element + ) + + @staticmethod + def find_common_circular_subsequence( + seq1: list[T], seq2: list[T] + ) -> list[T]: + """seq1 and seq2 are supposed to be stops of some circular routes. + Prerequisites to rely on the result: + - elements of each sequence are not repeated + - the order of stations is not violated. + Under these conditions we don't need LCS algorithm. Linear scan is + sufficient. + """ + i1, i2 = -1, -1 + for i1, x in enumerate(seq1): + try: + i2 = seq2.index(x) + except ValueError: + continue + else: + # x is found both in seq1 and seq2 + break + + if i2 == -1: + return [] + + # Shift cyclically so that the common element takes the first position + # both in seq1 and seq2 + seq1 = seq1[i1:] + seq1[:i1] + seq2 = seq2[i2:] + seq2[:i2] + + common_subsequence = [] + i2 = 0 + for x in seq1: + try: + i2 = seq2.index(x, i2) + except ValueError: + continue + common_subsequence.append(x) + i2 += 1 + if i2 >= len(seq2): + break + return common_subsequence + + def alert_twin_routes_differ(self, route1: Route, route2: Route) -> None: + """Arguments are that route1.id < route2.id""" + ( + stops_missing_from_route1, + stops_missing_from_route2, + stops_that_dont_match, + ) = self.calculate_twin_routes_diff(route1, route2) + + for st in stops_missing_from_route1: + if ( + not route1.are_tracks_complete() + or ( + projected_point := project_on_line( + st.stoparea.center, route1.tracks + )["projected_point"] + ) + is not None + and distance(st.stoparea.center, projected_point) + <= MAX_DISTANCE_STOP_TO_LINE + ): + self.city.notice( + f"Stop {st.stoparea.station.name} {st.stop} is included " + f"in the {route2.id} but not included in {route1.id}", + route1.element, + ) + + for st in stops_missing_from_route2: + if ( + not route2.are_tracks_complete() + or ( + projected_point := project_on_line( + st.stoparea.center, route2.tracks + )["projected_point"] + ) + is not None + and distance(st.stoparea.center, projected_point) + <= MAX_DISTANCE_STOP_TO_LINE + ): + self.city.notice( + f"Stop {st.stoparea.station.name} {st.stop} is included " + f"in the {route1.id} but not included in {route2.id}", + route2.element, + ) + + for st1, st2 in stops_that_dont_match: + if ( + st1.stoparea.station == st2.stoparea.station + or distance(st1.stop, st2.stop) < SUGGEST_TRANSFER_MIN_DISTANCE + ): + self.city.notice( + "Should there be one stoparea or a transfer between " + f"{st1.stoparea.station.name} {st1.stop} and " + f"{st2.stoparea.station.name} {st2.stop}?", + route1.element, + ) + + @staticmethod + def calculate_twin_routes_diff(route1: Route, route2: Route) -> tuple: + """Wagner–Fischer algorithm for stops diff in two twin routes.""" + + stops1 = route1.stops + stops2 = route2.stops[::-1] + + def stops_match(stop1: RouteStop, stop2: RouteStop) -> bool: + return ( + stop1.stoparea == stop2.stoparea + or stop1.stoparea.transfer is not None + and stop1.stoparea.transfer == stop2.stoparea.transfer + ) + + d = [[0] * (len(stops2) + 1) for _ in range(len(stops1) + 1)] + d[0] = list(range(len(stops2) + 1)) + for i in range(len(stops1) + 1): + d[i][0] = i + + for i in range(1, len(stops1) + 1): + for j in range(1, len(stops2) + 1): + d[i][j] = ( + d[i - 1][j - 1] + if stops_match(stops1[i - 1], stops2[j - 1]) + else min((d[i - 1][j], d[i][j - 1], d[i - 1][j - 1])) + 1 + ) + + stops_missing_from_route1: list[RouteStop] = [] + stops_missing_from_route2: list[RouteStop] = [] + stops_that_dont_match: list[tuple[RouteStop, RouteStop]] = [] + + i = len(stops1) + j = len(stops2) + while not (i == 0 and j == 0): + action = None + if i > 0 and j > 0: + match = stops_match(stops1[i - 1], stops2[j - 1]) + if match and d[i - 1][j - 1] == d[i][j]: + action = "no" + elif not match and d[i - 1][j - 1] + 1 == d[i][j]: + action = "change" + if not action and i > 0 and d[i - 1][j] + 1 == d[i][j]: + action = "add_2" + if not action and j > 0 and d[i][j - 1] + 1 == d[i][j]: + action = "add_1" + + match action: + case "add_1": + stops_missing_from_route1.append(stops2[j - 1]) + j -= 1 + case "add_2": + stops_missing_from_route2.append(stops1[i - 1]) + i -= 1 + case _: + if action == "change": + stops_that_dont_match.append( + (stops1[i - 1], stops2[j - 1]) + ) + i -= 1 + j -= 1 + return ( + stops_missing_from_route1, + stops_missing_from_route2, + stops_that_dont_match, + ) def __len__(self): return len(self.routes) @@ -1923,8 +2167,8 @@ class City: k = el_id(master) if master else route.ref if k not in self.routes: - self.routes[k] = RouteMaster(master) - self.routes[k].add(route, self) + self.routes[k] = RouteMaster(self, master) + self.routes[k].add(route) # Sometimes adding a route to a newly initialized RouteMaster # can fail @@ -2055,166 +2299,6 @@ class City: f"relations: {format_elid_list(not_in_sa)}" ) - def check_return_routes(self, rmaster: RouteMaster) -> None: - """Check if a route has return direction, and if twin routes - miss stations. - """ - meaningful_routes = rmaster.get_meaningful_routes() - - if len(meaningful_routes) == 0: - self.error( - f"An empty route master {rmaster.id}. " - "Please set construction:route if it is under construction" - ) - elif len(meaningful_routes) == 1: - log_function = ( - self.error if not rmaster.best.is_circular else self.notice - ) - log_function( - "Only one route in route_master. " - "Please check if it needs a return route", - rmaster.best.element, - ) - else: - all_ends = { - route.get_end_transfers(): route for route in meaningful_routes - } - for route in meaningful_routes: - ends = route.get_end_transfers() - if ends[::-1] not in all_ends: - self.notice( - "Route does not have a return direction", route.element - ) - - twin_routes = rmaster.find_twin_routes() - for route1, route2 in twin_routes.items(): - if route1.id > route2.id: - continue # to process a pair of routes only once - # and to ensure the order of routes in the pair - self.alert_twin_routes_differ(route1, route2) - - def alert_twin_routes_differ(self, route1: Route, route2: Route) -> None: - """Arguments are that route1.id < route2.id""" - ( - stops_missing_from_route1, - stops_missing_from_route2, - stops_that_dont_match, - ) = self.calculate_twin_routes_diff(route1, route2) - - for st in stops_missing_from_route1: - if ( - not route1.are_tracks_complete() - or ( - projected_point := project_on_line( - st.stoparea.center, route1.tracks - )["projected_point"] - ) - is not None - and distance(st.stoparea.center, projected_point) - <= MAX_DISTANCE_STOP_TO_LINE - ): - self.notice( - f"Stop {st.stoparea.station.name} {st.stop} is included " - f"in the {route2.id} but not included in {route1.id}", - route1.element, - ) - - for st in stops_missing_from_route2: - if ( - not route2.are_tracks_complete() - or ( - projected_point := project_on_line( - st.stoparea.center, route2.tracks - )["projected_point"] - ) - is not None - and distance(st.stoparea.center, projected_point) - <= MAX_DISTANCE_STOP_TO_LINE - ): - self.notice( - f"Stop {st.stoparea.station.name} {st.stop} is included " - f"in the {route1.id} but not included in {route2.id}", - route2.element, - ) - - for st1, st2 in stops_that_dont_match: - if ( - st1.stoparea.station == st2.stoparea.station - or distance(st1.stop, st2.stop) < SUGGEST_TRANSFER_MIN_DISTANCE - ): - self.notice( - "Should there be one stoparea or a transfer between " - f"{st1.stoparea.station.name} {st1.stop} and " - f"{st2.stoparea.station.name} {st2.stop}?", - route1.element, - ) - - @staticmethod - def calculate_twin_routes_diff(route1: Route, route2: Route) -> tuple: - """Wagner–Fischer algorithm for stops diff in two twin routes.""" - - stops1 = route1.stops - stops2 = route2.stops[::-1] - - def stops_match(stop1: RouteStop, stop2: RouteStop) -> bool: - return ( - stop1.stoparea == stop2.stoparea - or stop1.stoparea.transfer is not None - and stop1.stoparea.transfer == stop2.stoparea.transfer - ) - - d = [[0] * (len(stops2) + 1) for _ in range(len(stops1) + 1)] - d[0] = list(range(len(stops2) + 1)) - for i in range(len(stops1) + 1): - d[i][0] = i - - for i in range(1, len(stops1) + 1): - for j in range(1, len(stops2) + 1): - d[i][j] = ( - d[i - 1][j - 1] - if stops_match(stops1[i - 1], stops2[j - 1]) - else min((d[i - 1][j], d[i][j - 1], d[i - 1][j - 1])) + 1 - ) - - stops_missing_from_route1: list[RouteStop] = [] - stops_missing_from_route2: list[RouteStop] = [] - stops_that_dont_match: list[tuple[RouteStop, RouteStop]] = [] - - i = len(stops1) - j = len(stops2) - while not (i == 0 and j == 0): - action = None - if i > 0 and j > 0: - match = stops_match(stops1[i - 1], stops2[j - 1]) - if match and d[i - 1][j - 1] == d[i][j]: - action = "no" - elif not match and d[i - 1][j - 1] + 1 == d[i][j]: - action = "change" - if not action and i > 0 and d[i - 1][j] + 1 == d[i][j]: - action = "add_2" - if not action and j > 0 and d[i][j - 1] + 1 == d[i][j]: - action = "add_1" - - match action: - case "add_1": - stops_missing_from_route1.append(stops2[j - 1]) - j -= 1 - case "add_2": - stops_missing_from_route2.append(stops1[i - 1]) - i -= 1 - case _: - if action == "change": - stops_that_dont_match.append( - (stops1[i - 1], stops2[j - 1]) - ) - i -= 1 - j -= 1 - return ( - stops_missing_from_route1, - stops_missing_from_route2, - stops_that_dont_match, - ) - def validate_lines(self): self.found_light_lines = len( [x for x in self.routes.values() if x.mode != "subway"] @@ -2267,9 +2351,9 @@ class City: for rmaster in self.routes.values(): networks[str(rmaster.network)] += 1 if not self.overground: - self.check_return_routes(rmaster) + rmaster.check_return_routes() route_stations = set() - for sa in rmaster.stop_areas(): + for sa in rmaster.stopareas(): route_stations.add(sa.transfer or sa.id) unused_stations.discard(sa.station.id) self.found_stations += len(route_stations) diff --git a/tests/assets/route_masters.osm b/tests/assets/route_masters.osm index 0635a2b..1d466c8 100644 --- a/tests/assets/route_masters.osm +++ b/tests/assets/route_masters.osm @@ -194,7 +194,7 @@ - + @@ -202,8 +202,8 @@ - + @@ -272,12 +272,27 @@ - - - + + + + + - - + + + + + + + + + + + + + + + @@ -524,4 +539,20 @@ + + + + + + + + + + + + + + + + diff --git a/tests/assets/twin_routes.osm b/tests/assets/twin_routes.osm index e2e7f42..38cbe6c 100644 --- a/tests/assets/twin_routes.osm +++ b/tests/assets/twin_routes.osm @@ -288,10 +288,10 @@ - - - + + + diff --git a/tests/sample_data_for_error_messages.py b/tests/sample_data_for_error_messages.py index 2e20c73..0f5a434 100644 --- a/tests/sample_data_for_error_messages.py +++ b/tests/sample_data_for_error_messages.py @@ -326,9 +326,9 @@ metro_samples = [ "xml_file": "assets/route_masters.osm", "cities_info": [ { - "num_stations": (3 + 3 + 3 + 5 + 3 + 3 + 4) + "num_stations": (3 + 3 + 3 + 5 + 3 + 3 + 4 + 3) + (3 + 3 + 3 + 3 + 3 + 3 + 4), - "num_lines": 7 + 7, + "num_lines": 8 + 7, "num_interchanges": 0 + 1, }, ], @@ -350,6 +350,8 @@ metro_samples = [ 'Route does not have a return direction (relation 209, "5: 1-2-3")', # noqa: E501 'Route does not have a return direction (relation 210, "5: 2-1")', # noqa: E501 'Only one route in route_master. Please check if it needs a return route (relation 213, "C3: 1-2-3-8-1")', # noqa: E501 + 'Route does not have a return direction (relation 168, "C5: 1-3-5-1")', # noqa: E501 + 'Route does not have a return direction (relation 169, "C5: 3-5-1-3")', # noqa: E501 ], }, ] diff --git a/tests/test_route_master.py b/tests/test_route_master.py index 1bab617..22d2f8b 100644 --- a/tests/test_route_master.py +++ b/tests/test_route_master.py @@ -1,9 +1,97 @@ -from tests.util import TestCase - +from subway_structure import RouteMaster from tests.sample_data_for_twin_routes import metro_samples +from tests.util import TestCase class TestRouteMaster(TestCase): + def test__find_common_circular_subsequence(self) -> None: + cases = [ + { # the 1st sequence is empty + "sequence1": [], + "sequence2": [1, 2, 3, 4], + "answer": [], + }, + { # the 2nd sequence is empty + "sequence1": [1, 2, 3, 4], + "sequence2": [], + "answer": [], + }, + { # equal sequences + "sequence1": [1, 2, 3, 4], + "sequence2": [1, 2, 3, 4], + "answer": [1, 2, 3, 4], + }, + { # one sequence is a cyclic shift of the other + "sequence1": [1, 2, 3, 4], + "sequence2": [4, 1, 2, 3], + "answer": [1, 2, 3, 4], + }, + { # the 2nd sequence is a subsequence of the 1st; equal ends + "sequence1": [1, 2, 3, 4], + "sequence2": [1, 2, 4], + "answer": [1, 2, 4], + }, + { # the 1st sequence is a subsequence of the 2nd; equal ends + "sequence1": [1, 2, 4], + "sequence2": [1, 2, 3, 4], + "answer": [1, 2, 4], + }, + { # the 2nd sequence is an innter subsequence of the 1st + "sequence1": [1, 2, 3, 4], + "sequence2": [2, 3], + "answer": [2, 3], + }, + { # the 1st sequence is an inner subsequence of the 2nd + "sequence1": [2, 3], + "sequence2": [1, 2, 3, 4], + "answer": [2, 3], + }, + { # the 2nd sequence is a continuation of the 1st + "sequence1": [1, 2, 3, 4], + "sequence2": [4, 5, 6], + "answer": [4], + }, + { # the 1st sequence is a continuation of the 2nd + "sequence1": [4, 5, 6], + "sequence2": [1, 2, 3, 4], + "answer": [4], + }, + { # no common elements + "sequence1": [1, 2, 3, 4], + "sequence2": [5, 6, 7], + "answer": [], + }, + { # one sequence is the reversed other + "sequence1": [1, 2, 3, 4], + "sequence2": [4, 3, 2, 1], + "answer": [1, 2], + }, + { # the 2nd is a subsequence of shifted 1st + "sequence1": [1, 2, 3, 4], + "sequence2": [2, 4, 1], + "answer": [1, 2, 4], + }, + { # the 1st is a subsequence of shifted 2nd + "sequence1": [2, 4, 1], + "sequence2": [1, 2, 3, 4], + "answer": [2, 4, 1], + }, + { # mixed case: few common elements + "sequence1": [1, 2, 4], + "sequence2": [2, 3, 4], + "answer": [2, 4], + }, + ] + + for i, case in enumerate(cases): + with self.subTest(f"case#{i}"): + self.assertListEqual( + case["answer"], + RouteMaster.find_common_circular_subsequence( + case["sequence1"], case["sequence2"] + ), + ) + def _test_find_twin_routes_for_network(self, metro_sample: dict) -> None: cities, transfers = self.prepare_cities(metro_sample) city = cities[0] From c2f2956da1131ac51bf253b0eecb747c3560bba9 Mon Sep 17 00:00:00 2001 From: Alexey Zakharenkov Date: Tue, 27 Feb 2024 14:59:51 +0300 Subject: [PATCH 08/15] Add type aliases, declarations and annotations --- css_colours.py | 2 +- process_subways.py | 58 +++++----- processors/_common.py | 10 +- processors/gtfs.py | 23 ++-- processors/mapsme.py | 71 +++++++----- subway_io.py | 37 +++--- subway_structure.py | 256 +++++++++++++++++++++++------------------- validation_to_html.py | 7 +- 8 files changed, 249 insertions(+), 215 deletions(-) diff --git a/css_colours.py b/css_colours.py index 7218054..170d390 100644 --- a/css_colours.py +++ b/css_colours.py @@ -152,7 +152,7 @@ CSS_COLOURS = { } -def normalize_colour(c): +def normalize_colour(c: str | None) -> str | None: if not c: return None c = c.strip().lower() diff --git a/process_subways.py b/process_subways.py index 6f18453..3726f3a 100755 --- a/process_subways.py +++ b/process_subways.py @@ -25,8 +25,10 @@ from subway_structure import ( CriticalValidationError, find_transfers, get_unused_subway_entrances_geojson, + LonLat, MODES_OVERGROUND, MODES_RAPID, + OsmElementT, ) DEFAULT_SPREADSHEET_ID = "1SEW1-NiNOnA2qDwievcxYV1FOaQl1mb1fdeyqAxHu3k" @@ -36,8 +38,6 @@ DEFAULT_CITIES_INFO_URL = ( ) BAD_MARK = "[bad]" -Point = tuple[float, float] - def compose_overpass_request( overground: bool, bboxes: list[list[float]] @@ -68,7 +68,7 @@ def compose_overpass_request( def overpass_request( overground: bool, overpass_api: str, bboxes: list[list[float]] -) -> list[dict]: +) -> list[OsmElementT]: query = compose_overpass_request(overground, bboxes) url = f"{overpass_api}?data={urllib.parse.quote(query)}" response = urllib.request.urlopen(url, timeout=1000) @@ -79,7 +79,7 @@ def overpass_request( def multi_overpass( overground: bool, overpass_api: str, bboxes: list[list[float]] -) -> list[dict]: +) -> list[OsmElementT]: SLICE_SIZE = 10 INTERREQUEST_WAIT = 5 # in seconds result = [] @@ -96,8 +96,8 @@ def slugify(name: str) -> str: def get_way_center( - element: dict, node_centers: dict[int, Point] -) -> Point | None: + element: OsmElementT, node_centers: dict[int, LonLat] +) -> LonLat | None: """ :param element: dict describing OSM element :param node_centers: osm_id => (lat, lon) @@ -107,7 +107,7 @@ def get_way_center( # If elements have been queried via overpass-api with # 'out center;' clause then ways already have 'center' attribute if "center" in element: - return element["center"]["lat"], element["center"]["lon"] + return element["center"]["lon"], element["center"]["lat"] if "nodes" not in element: return None @@ -131,22 +131,22 @@ def get_way_center( count += 1 if count == 0: return None - element["center"] = {"lat": center[0] / count, "lon": center[1] / count} - return element["center"]["lat"], element["center"]["lon"] + element["center"] = {"lat": center[1] / count, "lon": center[0] / count} + return element["center"]["lon"], element["center"]["lat"] def get_relation_center( - element: dict, - node_centers: dict[int, Point], - way_centers: dict[int, Point], - relation_centers: dict[int, Point], + element: OsmElementT, + node_centers: dict[int, LonLat], + way_centers: dict[int, LonLat], + relation_centers: dict[int, LonLat], ignore_unlocalized_child_relations: bool = False, -) -> Point | None: +) -> LonLat | None: """ :param element: dict describing OSM element - :param node_centers: osm_id => (lat, lon) - :param way_centers: osm_id => (lat, lon) - :param relation_centers: osm_id => (lat, lon) + :param node_centers: osm_id => LonLat + :param way_centers: osm_id => LonLat + :param relation_centers: osm_id => LonLat :param ignore_unlocalized_child_relations: if a member that is a relation has no center, skip it and calculate center based on member nodes, ways and other, "localized" (with known centers), relations @@ -159,7 +159,7 @@ def get_relation_center( # of other relations (e.g., route_master, stop_area_group or # stop_area with only members that are multipolygons) if "center" in element: - return element["center"]["lat"], element["center"]["lon"] + return element["center"]["lon"], element["center"]["lat"] center = [0, 0] count = 0 @@ -186,25 +186,25 @@ def get_relation_center( count += 1 if count == 0: return None - element["center"] = {"lat": center[0] / count, "lon": center[1] / count} - return element["center"]["lat"], element["center"]["lon"] + element["center"] = {"lat": center[1] / count, "lon": center[0] / count} + return element["center"]["lon"], element["center"]["lat"] -def calculate_centers(elements: list[dict]) -> None: +def calculate_centers(elements: list[OsmElementT]) -> None: """Adds 'center' key to each way/relation in elements, except for empty ways or relations. Relies on nodes-ways-relations order in the elements list. """ - nodes: dict[int, Point] = {} # id => (lat, lon) - ways: dict[int, Point] = {} # id => (lat, lon) - relations: dict[int, Point] = {} # id => (lat, lon) + nodes: dict[int, LonLat] = {} # id => LonLat + ways: dict[int, LonLat] = {} # id => approx center LonLat + relations: dict[int, LonLat] = {} # id => approx center LonLat - unlocalized_relations = [] # 'unlocalized' means the center of the - # relation has not been calculated yet + unlocalized_relations: list[OsmElementT] = [] # 'unlocalized' means + # the center of the relation has not been calculated yet for el in elements: if el["type"] == "node": - nodes[el["id"]] = (el["lat"], el["lon"]) + nodes[el["id"]] = (el["lon"], el["lat"]) elif el["type"] == "way": if center := get_way_center(el, nodes): ways[el["id"]] = center @@ -216,7 +216,7 @@ def calculate_centers(elements: list[dict]) -> None: def iterate_relation_centers_calculation( ignore_unlocalized_child_relations: bool, - ) -> list[dict]: + ) -> list[OsmElementT]: unlocalized_relations_upd = [] for rel in unlocalized_relations: if center := get_relation_center( @@ -244,7 +244,7 @@ def calculate_centers(elements: list[dict]) -> None: def add_osm_elements_to_cities( - osm_elements: list[dict], cities: list[City] + osm_elements: list[OsmElementT], cities: list[City] ) -> None: for el in osm_elements: for c in cities: diff --git a/processors/_common.py b/processors/_common.py index edb19f4..d60ff07 100644 --- a/processors/_common.py +++ b/processors/_common.py @@ -1,6 +1,4 @@ -from typing import List, Set - -from subway_structure import City, el_center, StopArea +from subway_structure import City, el_center, TransfersT DEFAULT_INTERVAL = 2.5 * 60 # seconds KMPH_TO_MPS = 1 / 3.6 # km/h to m/s conversion multiplier @@ -8,14 +6,12 @@ SPEED_ON_TRANSFER = 3.5 * KMPH_TO_MPS # m/s TRANSFER_PENALTY = 30 # seconds -def format_colour(colour): +def format_colour(colour: str | None) -> str | None: """Truncate leading # sign.""" return colour[1:] if colour else None -def transit_to_dict( - cities: List[City], transfers: List[Set[StopArea]] -) -> dict: +def transit_to_dict(cities: list[City], transfers: TransfersT) -> dict: """Get data for good cities as a dictionary.""" data = { "stopareas": {}, # stoparea id => stoparea data diff --git a/processors/gtfs.py b/processors/gtfs.py index 5dc3952..463443e 100644 --- a/processors/gtfs.py +++ b/processors/gtfs.py @@ -3,7 +3,6 @@ from functools import partial from io import BytesIO, StringIO from itertools import permutations from tarfile import TarFile, TarInfo -from typing import List, Optional, Set from zipfile import ZipFile from ._common import ( @@ -16,7 +15,7 @@ from ._common import ( from subway_structure import ( City, distance, - StopArea, + TransfersT, ) @@ -133,13 +132,13 @@ GTFS_COLUMNS = { } -def round_coords(coords_tuple): +def round_coords(coords_tuple: tuple) -> tuple: return tuple( map(lambda coord: round(coord, COORDINATE_PRECISION), coords_tuple) ) -def transit_data_to_gtfs(data): +def transit_data_to_gtfs(data: dict) -> dict: # Keys correspond GTFS file names gtfs_data = {key: [] for key in GTFS_COLUMNS.keys()} @@ -313,14 +312,14 @@ def transit_data_to_gtfs(data): def process( - cities: List[City], - transfers: List[Set[StopArea]], + cities: list[City], + transfers: TransfersT, filename: str, - cache_path: str, -): + cache_path: str | None, +) -> None: """Generate all output and save to file. - :param cities: List of City instances - :param transfers: List of sets of StopArea.id + :param cities: list of City instances + :param transfers: all collected transfers in the world :param filename: Path to file to save the result :param cache_path: Path to json-file with good cities cache or None. """ @@ -344,9 +343,7 @@ def dict_to_row(dict_data: dict, record_type: str) -> list: ] -def make_gtfs( - filename: str, gtfs_data: dict, fmt: Optional[str] = None -) -> None: +def make_gtfs(filename: str, gtfs_data: dict, fmt: str | None = None) -> None: if not fmt: fmt = "tar" if filename.endswith(".tar") else "zip" diff --git a/processors/mapsme.py b/processors/mapsme.py index 2f3ec6f..e87ffe0 100755 --- a/processors/mapsme.py +++ b/processors/mapsme.py @@ -2,13 +2,19 @@ import json import logging import os from collections import defaultdict +from collections.abc import Callable +from typing import Any, TypeAlias from subway_structure import ( City, DISPLACEMENT_TOLERANCE, distance, el_center, + IdT, + LonLat, + OsmElementT, Station, + StopArea, TransfersT, ) from ._common import ( @@ -19,14 +25,16 @@ from ._common import ( TRANSFER_PENALTY, ) - OSM_TYPES = {"n": (0, "node"), "w": (2, "way"), "r": (3, "relation")} ENTRANCE_PENALTY = 60 # seconds SPEED_TO_ENTRANCE = 5 * KMPH_TO_MPS # m/s SPEED_ON_LINE = 40 * KMPH_TO_MPS # m/s +# (stoparea1_uid, stoparea2_uid) -> seconds; stoparea1_uid < stoparea2_uid +TransferTimesT: TypeAlias = dict[tuple[int, int], int] -def uid(elid, typ=None): + +def uid(elid: IdT, typ: str | None = None) -> int: t = elid[0] osm_id = int(elid[1:]) if not typ: @@ -39,24 +47,24 @@ def uid(elid, typ=None): class DummyCache: """This class may be used when you need to omit all cache processing""" - def __init__(self, cache_path, cities): + def __init__(self, cache_path: str, cities: list[City]) -> None: pass - def __getattr__(self, name): + def __getattr__(self, name: str) -> Callable[..., None]: """This results in that a call to any method effectively does nothing and does not generate exceptions.""" - def method(*args, **kwargs): + def method(*args, **kwargs) -> None: return None return method -def if_object_is_used(method): +def if_object_is_used(method: Callable) -> Callable: """Decorator to skip method execution under certain condition. Relies on "is_used" object property.""" - def inner(self, *args, **kwargs): + def inner(self, *args, **kwargs) -> Any: if not self.is_used: return return method(self, *args, **kwargs) @@ -65,7 +73,7 @@ def if_object_is_used(method): class MapsmeCache: - def __init__(self, cache_path, cities): + def __init__(self, cache_path: str, cities: list[City]) -> None: if not cache_path: # Cache is not used, # all actions with cache must be silently skipped @@ -90,7 +98,7 @@ class MapsmeCache: self.city_dict = {c.name: c for c in cities} self.good_city_names = {c.name for c in cities if c.is_good} - def _is_cached_city_usable(self, city): + def _is_cached_city_usable(self, city: City) -> bool: """Check if cached stations still exist in osm data and not moved far away. """ @@ -105,8 +113,9 @@ class MapsmeCache: ): return False station_coords = el_center(city_station) - cached_station_coords = tuple( - cached_stoparea[coord] for coord in ("lon", "lat") + cached_station_coords = ( + cached_stoparea["lon"], + cached_stoparea["lat"], ) displacement = distance(station_coords, cached_station_coords) if displacement > DISPLACEMENT_TOLERANCE: @@ -115,7 +124,9 @@ class MapsmeCache: return True @if_object_is_used - def provide_stops_and_networks(self, stops, networks): + def provide_stops_and_networks( + self, stops: dict, networks: list[dict] + ) -> None: """Put stops and networks for bad cities into containers passed as arguments.""" for city in self.city_dict.values(): @@ -128,7 +139,7 @@ class MapsmeCache: self.recovered_city_names.add(city.name) @if_object_is_used - def provide_transfers(self, transfers): + def provide_transfers(self, transfers: TransferTimesT) -> None: """Add transfers from usable cached cities to 'transfers' dict passed as argument.""" for city_name in self.recovered_city_names: @@ -138,7 +149,7 @@ class MapsmeCache: transfers[(stop1_uid, stop2_uid)] = transfer_time @if_object_is_used - def initialize_good_city(self, city_name, network): + def initialize_good_city(self, city_name: str, network: dict) -> None: """Create/replace one cache element with new data container. This should be done for each good city.""" self.cache[city_name] = { @@ -149,20 +160,22 @@ class MapsmeCache: } @if_object_is_used - def link_stop_with_city(self, stoparea_id, city_name): + def link_stop_with_city(self, stoparea_id: IdT, city_name: str) -> None: """Remember that some stop_area is used in a city.""" stoparea_uid = uid(stoparea_id) self.stop_cities[stoparea_uid].add(city_name) @if_object_is_used - def add_stop(self, stoparea_id, st): + def add_stop(self, stoparea_id: IdT, st: dict) -> None: """Add stoparea to the cache of each city the stoparea is in.""" stoparea_uid = uid(stoparea_id) for city_name in self.stop_cities[stoparea_uid]: self.cache[city_name]["stops"][stoparea_id] = st @if_object_is_used - def add_transfer(self, stoparea1_uid, stoparea2_uid, transfer_time): + def add_transfer( + self, stoparea1_uid: int, stoparea2_uid: int, transfer_time: int + ) -> None: """If a transfer is inside a good city, add it to the city's cache.""" for city_name in ( self.good_city_names @@ -174,7 +187,7 @@ class MapsmeCache: ) @if_object_is_used - def save(self): + def save(self) -> None: try: with open(self.cache_path, "w", encoding="utf-8") as f: json.dump(self.cache, f, ensure_ascii=False) @@ -191,7 +204,9 @@ def transit_data_to_mapsme( :param cache_path: Path to json-file with good cities cache or None. """ - def find_exits_for_platform(center, nodes): + def find_exits_for_platform( + center: LonLat, nodes: list[OsmElementT] + ) -> list[OsmElementT]: exits = [] min_distance = None for n in nodes: @@ -212,8 +227,8 @@ def transit_data_to_mapsme( cache = MapsmeCache(cache_path, cities) - stop_areas = {} # stoparea el_id -> StopArea instance - stops = {} # stoparea el_id -> stop jsonified data + stop_areas: dict[IdT, StopArea] = {} + stops: dict[IdT, dict] = {} # stoparea el_id -> stop jsonified data networks = [] good_cities = [c for c in cities if c.is_good] platform_nodes = {} @@ -362,9 +377,7 @@ def transit_data_to_mapsme( stops[stop_id] = st cache.add_stop(stop_id, st) - pairwise_transfers = ( - {} - ) # (stoparea1_uid, stoparea2_uid) -> time; uid1 < uid2 + pairwise_transfers: TransferTimesT = {} for stoparea_id_set in transfers: stoparea_ids = list(stoparea_id_set) for i_first in range(len(stoparea_ids) - 1): @@ -388,14 +401,14 @@ def transit_data_to_mapsme( cache.provide_transfers(pairwise_transfers) cache.save() - pairwise_transfers = [ + pairwise_transfers_list = [ (stop1_uid, stop2_uid, transfer_time) for (stop1_uid, stop2_uid), transfer_time in pairwise_transfers.items() ] result = { "stops": list(stops.values()), - "transfers": pairwise_transfers, + "transfers": pairwise_transfers_list, "networks": networks, } return result @@ -406,10 +419,10 @@ def process( transfers: TransfersT, filename: str, cache_path: str | None, -): +) -> None: """Generate all output and save to file. - :param cities: List of City instances - :param transfers: List of sets of StopArea.id + :param cities: list of City instances + :param transfers: all collected transfers in the world :param filename: Path to file to save the result :param cache_path: Path to json-file with good cities cache or None. """ diff --git a/subway_io.py b/subway_io.py index 4b02596..8ef5f6f 100644 --- a/subway_io.py +++ b/subway_io.py @@ -1,15 +1,18 @@ import json import logging from collections import OrderedDict +from typing import Any, TextIO + +from subway_structure import City, OsmElementT, StopArea -def load_xml(f): +def load_xml(f: TextIO | str) -> list[OsmElementT]: try: from lxml import etree except ImportError: import xml.etree.ElementTree as etree - elements = [] + elements: list[OsmElementT] = [] for event, element in etree.iterparse(f): if element.tag in ("node", "way", "relation"): @@ -49,7 +52,7 @@ _YAML_SPECIAL_CHARACTERS = "!&*{}[],#|>@`'\"" _YAML_SPECIAL_SEQUENCES = ("- ", ": ", "? ") -def _get_yaml_compatible_string(scalar): +def _get_yaml_compatible_string(scalar: Any) -> str: """Enclose string in single quotes in some cases""" string = str(scalar) if string and ( @@ -62,8 +65,8 @@ def _get_yaml_compatible_string(scalar): return string -def dump_yaml(city, f): - def write_yaml(data, f, indent=""): +def dump_yaml(city: City, f: TextIO) -> None: + def write_yaml(data: dict, f: TextIO, indent: str = "") -> None: if isinstance(data, (set, list)): f.write("\n") for i in data: @@ -138,10 +141,10 @@ def dump_yaml(city, f): write_yaml(result, f) -def make_geojson(city, include_tracks_geometry=True): - transfers = set() +def make_geojson(city: City, include_tracks_geometry: bool = True) -> dict: + stopareas_in_transfers: set[StopArea] = set() for t in city.transfers: - transfers.update(t) + stopareas_in_transfers.update(t) features = [] stopareas = set() stops = set() @@ -196,7 +199,7 @@ def make_geojson(city, include_tracks_geometry=True): "name": stoparea.name, "marker-size": "small", "marker-color": "#ff2600" - if stoparea in transfers + if stoparea in stopareas_in_transfers else "#797979", }, } @@ -204,7 +207,7 @@ def make_geojson(city, include_tracks_geometry=True): return {"type": "FeatureCollection", "features": features} -def _dumps_route_id(route_id): +def _dumps_route_id(route_id: tuple[str | None, str | None]) -> str: """Argument is a route_id that depends on route colour and ref. Name can be taken from route_master or can be route's own, we don't take it into consideration. Some of route attributes can be None. The function makes @@ -212,13 +215,13 @@ def _dumps_route_id(route_id): return json.dumps(route_id, ensure_ascii=False) -def _loads_route_id(route_id_dump): +def _loads_route_id(route_id_dump: str) -> tuple[str | None, str | None]: """Argument is a json-encoded identifier of a route. Return a tuple (colour, ref).""" return tuple(json.loads(route_id_dump)) -def read_recovery_data(path): +def read_recovery_data(path: str) -> dict: """Recovery data is a json with data from previous transport builds. It helps to recover cities from some errors, e.g. by resorting shuffled stations in routes.""" @@ -246,11 +249,15 @@ def read_recovery_data(path): return data -def write_recovery_data(path, current_data, cities): +def write_recovery_data( + path: str, current_data: dict, cities: list[City] +) -> None: """Updates recovery data with good cities data and writes to file.""" - def make_city_recovery_data(city): - routes = {} + def make_city_recovery_data( + city: City, + ) -> dict[tuple[str | None, str | None], list[dict]]: + routes: dict[tuple(str | None, str | None), list[dict]] = {} for route in city: # Recovery is based primarily on route/station names/refs. # If route's ref/colour changes, the route won't be used. diff --git a/subway_structure.py b/subway_structure.py index d486d90..94c6f47 100644 --- a/subway_structure.py +++ b/subway_structure.py @@ -3,9 +3,9 @@ from __future__ import annotations import math import re from collections import Counter, defaultdict -from collections.abc import Collection, Iterator +from collections.abc import Callable, Collection, Iterator from itertools import chain, islice -from typing import TypeVar +from typing import TypeAlias, TypeVar from css_colours import normalize_colour @@ -47,13 +47,18 @@ used_entrances = set() START_END_TIMES_RE = re.compile(r".*?(\d{2}):(\d{2})-(\d{2}):(\d{2}).*") -IdT = str # Type of feature ids -TransferT = set[IdT] # A transfer is a set of StopArea IDs -TransfersT = Collection[TransferT] +OsmElementT: TypeAlias = dict +IdT: TypeAlias = str # Type of feature ids +TransferT: TypeAlias = set[IdT] # A transfer is a set of StopArea IDs +TransfersT: TypeAlias = list[TransferT] +LonLat: TypeAlias = tuple[float, float] +RailT: TypeAlias = list[LonLat] T = TypeVar("T") -def get_start_end_times(opening_hours): +def get_start_end_times( + opening_hours: str, +) -> tuple[tuple[int, int], tuple[int, int]] | tuple[None, None]: """Very simplified method to parse OSM opening_hours tag. We simply take the first HH:MM-HH:MM substring which is the most probable opening hours interval for the most of the weekdays. @@ -67,7 +72,7 @@ def get_start_end_times(opening_hours): return start_time, end_time -def osm_interval_to_seconds(interval_str): +def osm_interval_to_seconds(interval_str: str) -> int | None: """Convert to int an OSM value for 'interval'/'headway' tag which may be in these formats: HH:MM:SS, @@ -97,7 +102,7 @@ class CriticalValidationError(Exception): that prevents further validation of a city.""" -def el_id(el): +def el_id(el: OsmElementT) -> IdT | None: if not el: return None if "type" not in el: @@ -105,7 +110,7 @@ def el_id(el): return el["type"][0] + str(el.get("id", el.get("ref", ""))) -def el_center(el): +def el_center(el: OsmElementT) -> LonLat | None: if not el: return None if "lat" in el: @@ -115,7 +120,7 @@ def el_center(el): return None -def distance(p1, p2): +def distance(p1: LonLat, p2: LonLat) -> float: if p1 is None or p2 is None: raise Exception( "One of arguments to distance({}, {}) is None".format(p1, p2) @@ -127,14 +132,14 @@ def distance(p1, p2): return 6378137 * math.sqrt(dx * dx + dy * dy) -def is_near(p1, p2): +def is_near(p1: LonLat, p2: LonLat) -> bool: return ( p1[0] - 1e-8 <= p2[0] <= p1[0] + 1e-8 and p1[1] - 1e-8 <= p2[1] <= p1[1] + 1e-8 ) -def project_on_segment(p, p1, p2): +def project_on_segment(p: LonLat, p1: LonLat, p2: LonLat) -> float | None: """Given three points, return u - the position of projection of point p onto segment p1p2 regarding point p1 and (p2-p1) direction vector """ @@ -148,7 +153,7 @@ def project_on_segment(p, p1, p2): return u -def project_on_line(p, line): +def project_on_line(p: LonLat, line: RailT) -> dict: result = { # In the first approximation, position on rails is the index of the # closest vertex of line to the point p. Fractional value means that @@ -212,7 +217,9 @@ def project_on_line(p, line): return result -def find_segment(p, line, start_vertex=0): +def find_segment( + p: LonLat, line: RailT, start_vertex: int = 0 +) -> tuple[int, float] | tuple[None, None]: """Returns index of a segment and a position inside it.""" EPS = 1e-9 for seg in range(start_vertex, len(line) - 1): @@ -237,7 +244,9 @@ def find_segment(p, line, start_vertex=0): return None, None -def distance_on_line(p1, p2, line, start_vertex=0): +def distance_on_line( + p1: LonLat, p2: LonLat, line: RailT, start_vertex: int = 0 +) -> tuple[float, int] | None: """Calculates distance via line between projections of points p1 and p2. Returns a TUPLE of (d, vertex): d is the distance and vertex is the number of the second @@ -270,7 +279,7 @@ def distance_on_line(p1, p2, line, start_vertex=0): return d, seg2 % line_len -def angle_between(p1, c, p2): +def angle_between(p1: LonLat, c: LonLat, p2: LonLat) -> float: a = round( abs( math.degrees( @@ -282,7 +291,7 @@ def angle_between(p1, c, p2): return a if a <= 180 else 360 - a -def format_elid_list(ids): +def format_elid_list(ids: Collection[IdT]) -> str: msg = ", ".join(sorted(ids)[:20]) if len(ids) > 20: msg += ", ..." @@ -291,14 +300,14 @@ def format_elid_list(ids): class Station: @staticmethod - def get_modes(el: dict) -> set[str]: + def get_modes(el: OsmElementT) -> set[str]: modes = {m for m in ALL_MODES if el["tags"].get(m) == "yes"} if mode := el["tags"].get("station"): modes.add(mode) return modes @staticmethod - def is_station(el, modes): + def is_station(el: OsmElementT, modes: set[str]) -> bool: # public_transport=station is too ambiguous and unspecific to use, # so we expect for it to be backed by railway=station. if ( @@ -316,7 +325,7 @@ class Station: return False return True - def __init__(self, el, city): + def __init__(self, el: OsmElementT, city: City) -> None: """Call this with a railway=station node.""" if not Station.is_station(el, city.modes): raise Exception( @@ -324,8 +333,8 @@ class Station: "Got: {}".format(el) ) - self.id = el_id(el) - self.element = el + self.id: IdT = el_id(el) + self.element: OsmElementT = el self.modes = Station.get_modes(el) self.name = el["tags"].get("name", "?") self.int_name = el["tags"].get( @@ -340,7 +349,7 @@ class Station: if self.center is None: raise Exception("Could not find center of {}".format(el)) - def __repr__(self): + def __repr__(self) -> str: return "Station(id={}, modes={}, name={}, center={})".format( self.id, ",".join(self.modes), self.name, self.center ) @@ -348,7 +357,7 @@ class Station: class StopArea: @staticmethod - def is_stop(el): + def is_stop(el: OsmElementT) -> bool: if "tags" not in el: return False if el["tags"].get("railway") == "stop": @@ -358,7 +367,7 @@ class StopArea: return False @staticmethod - def is_platform(el): + def is_platform(el: OsmElementT) -> bool: if "tags" not in el: return False if el["tags"].get("railway") in ("platform", "platform_edge"): @@ -368,19 +377,22 @@ class StopArea: return False @staticmethod - def is_track(el): + def is_track(el: OsmElementT) -> bool: if el["type"] != "way" or "tags" not in el: return False return el["tags"].get("railway") in RAILWAY_TYPES def __init__( - self, station: Station, city: City, stop_area: StopArea | None = None + self, + station: Station, + city: City, + stop_area: OsmElementT | None = None, ) -> None: """Call this with a Station object.""" - self.element = stop_area or station.element - self.id = el_id(self.element) - self.station = station + self.element: OsmElementT = stop_area or station.element + self.id: IdT = el_id(self.element) + self.station: Station = station self.stops = set() # set of el_ids of stop_positions self.platforms = set() # set of el_ids of platforms self.exits = set() # el_id of subway_entrance/train_station_entrance @@ -440,7 +452,7 @@ class StopArea: self.center[i] /= len(self.stops) + len(self.platforms) def _process_members( - self, station: Station, city: City, stop_area: dict + self, station: Station, city: City, stop_area: OsmElementT ) -> None: # If we have a stop area, add all elements from it tracks_detected = False @@ -503,7 +515,7 @@ class StopArea: if etag != "entrance": self.exits.add(entrance_id) - def get_elements(self): + def get_elements(self) -> set[IdT]: result = {self.id, self.station.id} result.update(self.entrances) result.update(self.exits) @@ -511,7 +523,7 @@ class StopArea: result.update(self.platforms) return result - def __repr__(self): + def __repr__(self) -> str: return ( f"StopArea(id={self.id}, name={self.name}, station={self.station}," f" transfer={self.transfer}, center={self.center})" @@ -519,9 +531,9 @@ class StopArea: class RouteStop: - def __init__(self, stoparea): - self.stoparea = stoparea - self.stop = None # Stop position (lon, lat), possibly projected + def __init__(self, stoparea: StopArea) -> None: + self.stoparea: StopArea = stoparea + self.stop: LonLat = None # Stop position, possibly projected self.distance = 0 # In meters from the start of the route self.platform_entry = None # Platform el_id self.platform_exit = None # Platform el_id @@ -533,11 +545,13 @@ class RouteStop: self.seen_station = False @property - def seen_platform(self): + def seen_platform(self) -> bool: return self.seen_platform_entry or self.seen_platform_exit @staticmethod - def get_actual_role(el, role, modes): + def get_actual_role( + el: OsmElementT, role: str, modes: set[str] + ) -> str | None: if StopArea.is_stop(el): return "stop" elif StopArea.is_platform(el): @@ -549,7 +563,7 @@ class RouteStop: return "stop" return None - def add(self, member, relation, city): + def add(self, member: dict, relation: OsmElementT, city: City) -> None: el = city.elements[el_id(member)] role = member["role"] @@ -616,7 +630,7 @@ class RouteStop: relation, ) - def __repr__(self): + def __repr__(self) -> str: return ( "RouteStop(stop={}, pl_entry={}, pl_exit={}, stoparea={})".format( self.stop, @@ -628,10 +642,10 @@ class RouteStop: class Route: - """Corresponds to OSM "type=route" relation""" + """The longest route for a city with a unique ref.""" @staticmethod - def is_route(el, modes): + def is_route(el: OsmElementT, modes: set[str]) -> bool: if ( el["type"] != "relation" or el.get("tags", {}).get("type") != "route" @@ -649,14 +663,14 @@ class Route: return True @staticmethod - def get_network(relation): + def get_network(relation: OsmElementT) -> str | None: for k in ("network:metro", "network", "operator"): if k in relation["tags"]: return relation["tags"][k] return None @staticmethod - def get_interval(tags): + def get_interval(tags: dict) -> int | None: v = None for k in ("interval", "headway"): if k in tags: @@ -681,16 +695,16 @@ class Route: def __init__( self, - relation: dict, + relation: OsmElementT, city: City, - master: dict | None = None, + master: OsmElementT | None = None, ) -> None: assert Route.is_route( relation, city.modes ), f"The relation does not seem to be a route: {relation}" self.city = city - self.element = relation - self.id = el_id(relation) + self.element: OsmElementT = relation + self.id: IdT = el_id(relation) self.ref = None self.name = None @@ -702,7 +716,7 @@ class Route: self.start_time = None self.end_time = None self.is_circular = False - self.stops = [] # List of RouteStop + self.stops: list[RouteStop] = [] # Would be a list of (lon, lat) for the longest stretch. Can be empty. self.tracks = None # Index of the first stop that is located on/near the self.tracks @@ -714,10 +728,10 @@ class Route: stop_position_elements = self.process_stop_members() self.process_tracks(stop_position_elements) - def build_longest_line(self): - line_nodes = set() - last_track = [] - track = [] + def build_longest_line(self) -> tuple[list[IdT], set[IdT]]: + line_nodes: set[IdT] = set() + last_track: list[IdT] = [] + track: list[IdT] = [] warned_about_holes = False for m in self.element["members"]: el = self.city.elements.get(el_id(m), None) @@ -726,7 +740,7 @@ class Route: if "nodes" not in el or len(el["nodes"]) < 2: self.city.error("Cannot find nodes in a railway", el) continue - nodes = ["n{}".format(n) for n in el["nodes"]] + nodes: list[IdT] = ["n{}".format(n) for n in el["nodes"]] if m["role"] == "backward": nodes.reverse() line_nodes.update(nodes) @@ -773,10 +787,10 @@ class Route: ] return last_track, line_nodes - def get_stop_projections(self): + def get_stop_projections(self) -> tuple[list[dict], Callable[[int], bool]]: projected = [project_on_line(x.stop, self.tracks) for x in self.stops] - def stop_near_tracks_criterion(stop_index: int): + def stop_near_tracks_criterion(stop_index: int) -> bool: return ( projected[stop_index]["projected_point"] is not None and distance( @@ -788,14 +802,14 @@ class Route: return projected, stop_near_tracks_criterion - def project_stops_on_line(self): + def project_stops_on_line(self) -> dict: projected, stop_near_tracks_criterion = self.get_stop_projections() projected_stops_data = { "first_stop_on_rails_index": None, "last_stop_on_rails_index": None, "stops_on_longest_line": [], # list [{'route_stop': RouteStop, - # 'coords': (lon, lat), + # 'coords': LonLat, # 'positions_on_rails': [] } } first_index = 0 @@ -848,7 +862,7 @@ class Route: projected_stops_data["stops_on_longest_line"].append(stop_data) return projected_stops_data - def calculate_distances(self): + def calculate_distances(self) -> None: dist = 0 vertex = 0 for i, stop in enumerate(self.stops): @@ -870,7 +884,7 @@ class Route: dist += round(direct) stop.distance = dist - def process_tags(self, master): + def process_tags(self, master: OsmElementT) -> None: relation = self.element master_tags = {} if not master else master["tags"] if "ref" not in relation["tags"] and "ref" not in master_tags: @@ -918,12 +932,12 @@ class Route: relation, ) - def process_stop_members(self): - stations = set() # temporary for recording stations + def process_stop_members(self) -> list[OsmElementT]: + stations: set[StopArea] = set() # temporary for recording stations seen_stops = False seen_platforms = False repeat_pos = None - stop_position_elements = [] + stop_position_elements: list[OsmElementT] = [] for m in self.element["members"]: if "inactive" in m["role"]: continue @@ -1072,7 +1086,9 @@ class Route: ) return stop_position_elements - def process_tracks(self, stop_position_elements: list[dict]) -> None: + def process_tracks( + self, stop_position_elements: list[OsmElementT] + ) -> None: tracks, line_nodes = self.build_longest_line() for stop_el in stop_position_elements: @@ -1130,7 +1146,7 @@ class Route: if stop_coords := stop_data["coords"]: route_stop.stop = stop_coords - def get_extended_tracks(self): + def get_extended_tracks(self) -> RailT: """Amend tracks with points of leading/trailing self.stops that were not projected onto the longest tracks line. Return a new array. @@ -1153,7 +1169,7 @@ class Route: ) return tracks - def get_truncated_tracks(self, tracks): + def get_truncated_tracks(self, tracks: RailT) -> RailT: """Truncate leading/trailing segments of `tracks` param that are beyond the first and last stop locations. Return a new array. @@ -1194,12 +1210,12 @@ class Route: and self.last_stop_on_rails_index == len(self) - 1 ) - def get_tracks_geometry(self): + def get_tracks_geometry(self) -> RailT: tracks = self.get_extended_tracks() tracks = self.get_truncated_tracks(tracks) return tracks - def check_stops_order_by_angle(self) -> tuple[list, list]: + def check_stops_order_by_angle(self) -> tuple[list[str], list[str]]: disorder_warnings = [] disorder_errors = [] for i, route_stop in enumerate( @@ -1222,7 +1238,9 @@ class Route: disorder_warnings.append(msg) return disorder_warnings, disorder_errors - def check_stops_order_on_tracks_direct(self, stop_sequence) -> str | None: + def check_stops_order_on_tracks_direct( + self, stop_sequence: Iterator[dict] + ) -> str | None: """Checks stops order on tracks, following stop_sequence in direct order only. :param stop_sequence: list of dict{'route_stop', 'positions_on_rails', @@ -1253,7 +1271,9 @@ class Route: ) max_position_on_rails = positions_on_rails[suitable_occurrence] - def check_stops_order_on_tracks(self, projected_stops_data) -> str | None: + def check_stops_order_on_tracks( + self, projected_stops_data: dict + ) -> str | None: """Checks stops order on tracks, trying direct and reversed order of stops in the stop_sequence. :param projected_stops_data: info about RouteStops that belong to the @@ -1280,7 +1300,9 @@ class Route: return error_message - def check_stops_order(self, projected_stops_data): + def check_stops_order( + self, projected_stops_data: dict + ) -> tuple[list[str], list[str]]: ( angle_disorder_warnings, angle_disorder_errors, @@ -1294,7 +1316,9 @@ class Route: disorder_errors.append(disorder_on_tracks_error) return disorder_warnings, disorder_errors - def check_and_recover_stops_order(self, projected_stops_data: dict): + def check_and_recover_stops_order( + self, projected_stops_data: dict + ) -> None: """ :param projected_stops_data: may change if we need to reverse tracks """ @@ -1319,7 +1343,7 @@ class Route: for msg in disorder_errors: self.city.error(msg, self.element) - def try_resort_stops(self): + def try_resort_stops(self) -> bool: """Precondition: self.city.recovery_data is not None. Return success of station order recovering.""" self_stops = {} # station name => RouteStop @@ -1388,7 +1412,7 @@ class Route: ] return True - def get_end_transfers(self) -> tuple[str, str]: + def get_end_transfers(self) -> tuple[IdT, IdT]: """Using transfer ids because a train can arrive at different stations within a transfer. But disregard transfer that may give an impression of a circular route (for example, @@ -1406,7 +1430,7 @@ class Route: ) ) - def get_transfers_sequence(self) -> list[str]: + def get_transfers_sequence(self) -> list[IdT]: """Return a list of stoparea or transfer (if not None) ids.""" transfer_seq = [ stop.stoparea.transfer or stop.stoparea.id for stop in self @@ -1418,16 +1442,16 @@ class Route: transfer_seq[0], transfer_seq[-1] = self.get_end_transfers() return transfer_seq - def __len__(self): + def __len__(self) -> int: return len(self.stops) - def __getitem__(self, i): + def __getitem__(self, i) -> RouteStop: return self.stops[i] - def __iter__(self): + def __iter__(self) -> Iterator[RouteStop]: return iter(self.stops) - def __repr__(self): + def __repr__(self) -> str: return ( "Route(id={}, mode={}, ref={}, name={}, network={}, interval={}, " "circular={}, num_stops={}, line_length={} m, from={}, to={}" @@ -1447,11 +1471,11 @@ class Route: class RouteMaster: - def __init__(self, city: City, master: dict = None) -> None: + def __init__(self, city: City, master: OsmElementT = None) -> None: self.city = city self.routes = [] - self.best = None - self.id = el_id(master) + self.best: Route = None + self.id: IdT = el_id(master) self.has_master = master is not None self.interval_from_master = False if master: @@ -1871,16 +1895,16 @@ class RouteMaster: stops_that_dont_match, ) - def __len__(self): + def __len__(self) -> int: return len(self.routes) - def __getitem__(self, i): + def __getitem__(self, i) -> Route: return self.routes[i] - def __iter__(self): + def __iter__(self) -> Iterator[Route]: return iter(self.routes) - def __repr__(self): + def __repr__(self) -> str: return ( f"RouteMaster(id={self.id}, mode={self.mode}, ref={self.ref}, " f"name={self.name}, network={self.network}, " @@ -1891,11 +1915,11 @@ class RouteMaster: class City: route_class = Route - def __init__(self, city_data, overground=False): + def __init__(self, city_data: dict, overground: bool = False) -> None: self.validate_called = False - self.errors = [] - self.warnings = [] - self.notices = [] + self.errors: list[str] = [] + self.warnings: list[str] = [] + self.notices: list[str] = [] self.id = None self.try_fill_int_attribute(city_data, "id") self.name = city_data["name"] @@ -1940,16 +1964,14 @@ class City: else: self.bbox = None - self.elements = {} # Dict el_id → el - self.stations = defaultdict(list) # Dict el_id → list of StopAreas - self.routes = {} # Dict route_master_ref → RouteMaster - self.masters = {} # Dict el_id of route → route_master - self.stop_areas = defaultdict( - list - ) # El_id → list of stop_area elements it belongs to - self.transfers: TransfersT = [] # List of sets of stop areas - self.station_ids = set() # Set of stations' uid - self.stops_and_platforms = set() # Set of stops and platforms el_id + self.elements: dict[IdT, OsmElementT] = {} + self.stations: dict[IdT, list[StopArea]] = defaultdict(list) + self.routes: dict[str, RouteMaster] = {} # keys are route_master refs + self.masters: dict[IdT, OsmElementT] = {} # Route id → master element + self.stop_areas: [IdT, list[OsmElementT]] = defaultdict(list) + self.transfers: list[set[StopArea]] = [] + self.station_ids: set[IdT] = set() + self.stops_and_platforms: set[IdT] = set() self.recovery_data = None def try_fill_int_attribute( @@ -1980,7 +2002,7 @@ class City: setattr(self, attr, attr_int) @staticmethod - def log_message(message, el): + def log_message(message: str, el: OsmElementT) -> str: if el: tags = el.get("tags", {}) message += ' ({} {}, "{}")'.format( @@ -1990,24 +2012,24 @@ class City: ) return message - def notice(self, message, el=None): + def notice(self, message: str, el: OsmElementT | None = None) -> None: """This type of message may point to a potential problem.""" msg = City.log_message(message, el) self.notices.append(msg) - def warn(self, message, el=None): + def warn(self, message: str, el: OsmElementT | None = None) -> None: """A warning is definitely a problem but is doesn't prevent from building a routing file and doesn't invalidate the city. """ msg = City.log_message(message, el) self.warnings.append(msg) - def error(self, message, el=None): + def error(self, message: str, el: OsmElementT | None = None) -> None: """Error is a critical problem that invalidates the city.""" msg = City.log_message(message, el) self.errors.append(msg) - def contains(self, el): + def contains(self, el: OsmElementT) -> bool: center = el_center(el) if center: return ( @@ -2016,7 +2038,7 @@ class City: ) return False - def add(self, el): + def add(self, el: OsmElementT) -> None: if el["type"] == "relation" and "members" not in el: return @@ -2052,8 +2074,8 @@ class City: else: stop_areas.append(el) - def make_transfer(self, stoparea_group: dict) -> None: - transfer = set() + def make_transfer(self, stoparea_group: OsmElementT) -> None: + transfer: set[StopArea] = set() for m in stoparea_group["members"]: k = el_id(m) el = self.elements.get(k) @@ -2195,7 +2217,7 @@ class City: if len(inner_transfer) > 1 ] - def __iter__(self): + def __iter__(self) -> Iterator[RouteMaster]: return iter(self.routes.values()) def stopareas(self) -> Iterator[StopArea]: @@ -2207,7 +2229,7 @@ class City: yielded_stopareas.add(stoparea) @property - def is_good(self): + def is_good(self) -> bool: if not (self.errors or self.validate_called): raise RuntimeError( "You mustn't refer to City.is_good property before calling " @@ -2215,7 +2237,7 @@ class City: ) return len(self.errors) == 0 - def get_validation_result(self): + def get_validation_result(self) -> dict: result = { "name": self.name, "country": self.country, @@ -2260,7 +2282,7 @@ class City: result["notices"] = self.notices return result - def count_unused_entrances(self): + def count_unused_entrances(self) -> None: global used_entrances stop_areas = set() for el in self.elements.values(): @@ -2299,7 +2321,7 @@ class City: f"relations: {format_elid_list(not_in_sa)}" ) - def validate_lines(self): + def validate_lines(self) -> None: self.found_light_lines = len( [x for x in self.routes.values() if x.mode != "subway"] ) @@ -2317,7 +2339,7 @@ class City: ) ) - def validate_overground_lines(self): + def validate_overground_lines(self) -> None: self.found_tram_lines = len( [x for x in self.routes.values() if x.mode == "tram"] ) @@ -2344,7 +2366,7 @@ class City: ), ) - def validate(self): + def validate(self) -> None: networks = Counter() self.found_stations = 0 unused_stations = set(self.station_ids) @@ -2421,7 +2443,7 @@ class City: def find_transfers( - elements: list[dict], cities: Collection[City] + elements: list[OsmElementT], cities: Collection[City] ) -> TransfersT: """As for now, two Cities may contain the same stoparea, but those StopArea instances would have different python id. So we don't store @@ -2457,7 +2479,7 @@ def find_transfers( return transfers -def get_unused_subway_entrances_geojson(elements: list[dict]) -> dict: +def get_unused_subway_entrances_geojson(elements: list[OsmElementT]) -> dict: global used_entrances features = [] for el in elements: diff --git a/validation_to_html.py b/validation_to_html.py index f772a4f..0f9ec3b 100755 --- a/validation_to_html.py +++ b/validation_to_html.py @@ -7,7 +7,7 @@ import json import os import re from collections import defaultdict -from typing import Any, Optional +from typing import Any from process_subways import DEFAULT_SPREADSHEET_ID from v2h_templates import ( @@ -22,8 +22,7 @@ from v2h_templates import ( class CityData: - def __init__(self, city: Optional[str] = None) -> None: - self.city = city is not None + def __init__(self, city: dict | None = None) -> None: self.data = { "good_cities": 0, "total_cities": 1 if city else 0, @@ -93,7 +92,7 @@ class CityData: return s -def tmpl(s: str, data: Optional[CityData] = None, **kwargs) -> str: +def tmpl(s: str, data: CityData | None = None, **kwargs) -> str: if data: s = data.format(s) if kwargs: From 60821b60d67727e4887a9341e89d16f760fc1c9b Mon Sep 17 00:00:00 2001 From: Alexey Zakharenkov Date: Tue, 5 Mar 2024 16:43:20 +0300 Subject: [PATCH 09/15] Refactor project structure --- .github/workflows/python-app.yml | 4 +- README.md | 38 +- process_subways.py | 577 ---- scripts/process_subways.py | 276 ++ scripts/process_subways.sh | 16 +- subway_structure.py | 2505 ----------------- subways/__init__.py | 92 + subways/consts.py | 26 + css_colours.py => subways/css_colours.py | 0 subways/geom_utils.py | 175 ++ subways/osm_element.py | 19 + subways/overpass.py | 60 + .../processors}/__init__.py | 6 +- {processors => subways/processors}/_common.py | 10 +- {processors => subways/processors}/gtfs.py | 13 +- {processors => subways/processors}/mapsme.py | 25 +- requirements.txt => subways/requirements.txt | 0 subways/structure/__init__.py | 17 + subways/structure/city.py | 626 ++++ subways/structure/route.py | 903 ++++++ subways/structure/route_master.py | 464 +++ subways/structure/route_stop.py | 122 + subways/structure/station.py | 62 + subways/structure/stop_area.py | 191 ++ subway_io.py => subways/subway_io.py | 14 +- {tests => subways/tests}/README.md | 0 {tests => subways/tests}/__init__.py | 0 .../assets/cities_info_with_bad_values.csv | 0 .../tests}/assets/route_masters.osm | 0 .../tests}/assets/tiny_world.osm | 0 .../tests}/assets/tiny_world_gtfs/agency.txt | 0 .../assets/tiny_world_gtfs/calendar.txt | 0 .../assets/tiny_world_gtfs/frequencies.txt | 0 .../tests}/assets/tiny_world_gtfs/routes.txt | 0 .../tests}/assets/tiny_world_gtfs/shapes.txt | 0 .../assets/tiny_world_gtfs/stop_times.txt | 0 .../tests}/assets/tiny_world_gtfs/stops.txt | 0 .../assets/tiny_world_gtfs/transfers.txt | 0 .../tests}/assets/tiny_world_gtfs/trips.txt | 0 .../tests}/assets/twin_routes.osm | 0 .../assets/twin_routes_with_divergence.osm | 0 .../tests}/sample_data_for_build_tracks.py | 0 .../sample_data_for_center_calculation.py | 0 .../tests}/sample_data_for_error_messages.py | 99 +- .../tests}/sample_data_for_outputs.py | 0 .../tests}/sample_data_for_twin_routes.py | 0 {tests => subways/tests}/test_build_tracks.py | 4 +- .../tests}/test_center_calculation.py | 6 +- .../tests}/test_error_messages.py | 6 +- .../tests}/test_find_transfers.py | 4 +- .../tests}/test_gtfs_processor.py | 12 +- .../tests}/test_mapsme_processor.py | 6 +- {tests => subways/tests}/test_overpass.py | 6 +- .../tests}/test_prepare_cities.py | 2 +- {tests => subways/tests}/test_projection.py | 22 +- {tests => subways/tests}/test_route_master.py | 6 +- {tests => subways/tests}/test_station.py | 2 +- {tests => subways/tests}/test_storage.py | 6 +- {tests => subways/tests}/util.py | 6 +- subways/types.py | 14 + subways/validation.py | 253 ++ {checkers => tools/checkers}/common.py | 0 .../checkers}/compare_city_caches.py | 0 .../checkers}/compare_json_outputs.py | 0 .../legacy/mapsme_json_to_cities.py | 10 +- .../make_poly/make_all_metro_poly.py | 2 +- tools/make_poly/tests/__init__.py | 0 .../tests}/assets/cities_info_1city.csv | 0 .../tests}/assets/cities_info_2cities.csv | 0 .../tests}/test_make_all_metro_poly.py | 7 +- .../stop_areas}/make_stop_areas.py | 0 .../stop_areas}/make_tram_areas.py | 0 .../stop_areas}/requirements.txt | 0 {stop_areas => tools/stop_areas}/serve.py | 0 .../stop_areas}/templates/index.html | 0 .../v2h/v2h_templates.py | 0 .../v2h/validation_to_html.py | 2 +- 77 files changed, 3535 insertions(+), 3181 deletions(-) delete mode 100755 process_subways.py create mode 100755 scripts/process_subways.py delete mode 100644 subway_structure.py create mode 100644 subways/__init__.py create mode 100644 subways/consts.py rename css_colours.py => subways/css_colours.py (100%) create mode 100644 subways/geom_utils.py create mode 100644 subways/osm_element.py create mode 100644 subways/overpass.py rename {processors => subways/processors}/__init__.py (56%) rename {processors => subways/processors}/_common.py (95%) rename {processors => subways/processors}/gtfs.py (98%) rename {processors => subways/processors}/mapsme.py (97%) rename requirements.txt => subways/requirements.txt (100%) create mode 100644 subways/structure/__init__.py create mode 100644 subways/structure/city.py create mode 100644 subways/structure/route.py create mode 100644 subways/structure/route_master.py create mode 100644 subways/structure/route_stop.py create mode 100644 subways/structure/station.py create mode 100644 subways/structure/stop_area.py rename subway_io.py => subways/subway_io.py (96%) rename {tests => subways/tests}/README.md (100%) rename {tests => subways/tests}/__init__.py (100%) rename {tests => subways/tests}/assets/cities_info_with_bad_values.csv (100%) rename {tests => subways/tests}/assets/route_masters.osm (100%) rename {tests => subways/tests}/assets/tiny_world.osm (100%) rename {tests => subways/tests}/assets/tiny_world_gtfs/agency.txt (100%) rename {tests => subways/tests}/assets/tiny_world_gtfs/calendar.txt (100%) rename {tests => subways/tests}/assets/tiny_world_gtfs/frequencies.txt (100%) rename {tests => subways/tests}/assets/tiny_world_gtfs/routes.txt (100%) rename {tests => subways/tests}/assets/tiny_world_gtfs/shapes.txt (100%) rename {tests => subways/tests}/assets/tiny_world_gtfs/stop_times.txt (100%) rename {tests => subways/tests}/assets/tiny_world_gtfs/stops.txt (100%) rename {tests => subways/tests}/assets/tiny_world_gtfs/transfers.txt (100%) rename {tests => subways/tests}/assets/tiny_world_gtfs/trips.txt (100%) rename {tests => subways/tests}/assets/twin_routes.osm (100%) rename {tests => subways/tests}/assets/twin_routes_with_divergence.osm (100%) rename {tests => subways/tests}/sample_data_for_build_tracks.py (100%) rename {tests => subways/tests}/sample_data_for_center_calculation.py (100%) rename {tests => subways/tests}/sample_data_for_error_messages.py (78%) rename {tests => subways/tests}/sample_data_for_outputs.py (100%) rename {tests => subways/tests}/sample_data_for_twin_routes.py (100%) rename {tests => subways/tests}/test_build_tracks.py (96%) rename {tests => subways/tests}/test_center_calculation.py (91%) rename {tests => subways/tests}/test_error_messages.py (86%) rename {tests => subways/tests}/test_find_transfers.py (88%) rename {tests => subways/tests}/test_gtfs_processor.py (95%) rename {tests => subways/tests}/test_mapsme_processor.py (89%) rename {tests => subways/tests}/test_overpass.py (97%) rename {tests => subways/tests}/test_prepare_cities.py (96%) rename {tests => subways/tests}/test_projection.py (86%) rename {tests => subways/tests}/test_route_master.py (96%) rename {tests => subways/tests}/test_station.py (96%) rename {tests => subways/tests}/test_storage.py (86%) rename {tests => subways/tests}/util.py (98%) create mode 100644 subways/types.py create mode 100644 subways/validation.py rename {checkers => tools/checkers}/common.py (100%) rename {checkers => tools/checkers}/compare_city_caches.py (100%) rename {checkers => tools/checkers}/compare_json_outputs.py (100%) rename mapsme_json_to_cities.py => tools/legacy/mapsme_json_to_cities.py (89%) rename make_all_metro_poly.py => tools/make_poly/make_all_metro_poly.py (95%) create mode 100644 tools/make_poly/tests/__init__.py rename {tests => tools/make_poly/tests}/assets/cities_info_1city.csv (100%) rename {tests => tools/make_poly/tests}/assets/cities_info_2cities.csv (100%) rename {tests => tools/make_poly/tests}/test_make_all_metro_poly.py (94%) rename {stop_areas => tools/stop_areas}/make_stop_areas.py (100%) rename {stop_areas => tools/stop_areas}/make_tram_areas.py (100%) rename {stop_areas => tools/stop_areas}/requirements.txt (100%) rename {stop_areas => tools/stop_areas}/serve.py (100%) rename {stop_areas => tools/stop_areas}/templates/index.html (100%) rename v2h_templates.py => tools/v2h/v2h_templates.py (100%) rename validation_to_html.py => tools/v2h/validation_to_html.py (99%) diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index b735261..55ce353 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -27,7 +27,7 @@ jobs: run: | python -m pip install --upgrade pip pip install flake8==6.0.0 black==23.1.0 shapely==2.0.1 - if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + pip install -r subways/requirements.txt - name: Lint with flake8 run: | flake8 @@ -36,4 +36,4 @@ jobs: black --check --line-length 79 . - name: Test with unittest run: | - python -m unittest discover tests + python -m unittest discover tests \ No newline at end of file diff --git a/README.md b/README.md index b987e5f..157e1ad 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # Subway Preprocessor Here you see a list of scripts that can be used for preprocessing all the metro -systems in the world from OpenStreetMap. `subway_structure.py` produces +systems in the world from OpenStreetMap. `scripts/subway_structure.py` produces a list of disjunct systems that can be used for routing and for displaying of metro maps. @@ -16,14 +16,14 @@ of metro maps. 2. If you don't specify `--xml` or `--source` option to the `process_subways.py` script it tries to fetch data over [Overpass API](https://wiki.openstreetmap.org/wiki/Overpass_API). **Not suitable for the whole planet or large countries.** -* Run `process_subways.py` with appropriate set of command line arguments +* Run `scripts/process_subways.py` with appropriate set of command line arguments to build metro structures and receive a validation log. -* Run `validation_to_html.py` on that log to create readable HTML tables. +* Run `tools/v2h/validation_to_html.py` on that log to create readable HTML tables. ## Validating of all metro networks -There is a `process_subways.sh` in the `scripts` directory that is suitable +There is a `scripts/process_subways.sh` script that is suitable for validation of all or many metro networks. It relies on a bunch of environment variables and takes advantage of previous validation runs for effective recurring validations. See @@ -51,17 +51,21 @@ a city's bbox has been extended. ## Validating of a single city A single city or a country with few metro networks can be validated much faster -if you allow the `process_subway.py` to fetch data from Overpass API. Here are the steps: +if you allow the `scripts/process_subway.py` to fetch data from Overpass API. Here are the steps: 1. Python3 interpreter required (3.11+) 2. Clone the repo - ``` + ```bash git clone https://github.com/alexey-zakharenkov/subways.git subways_validator cd subways_validator ``` -3. Execute +3. Install python dependencies + ```bash + pip install -r subways/requirements.txt + ``` +4. Execute ```bash - python3 ./process_subways.py -c "London" \ + python3 scripts/process_subways.py -c "London" \ -l validation.log -d London.yaml ``` here @@ -73,21 +77,21 @@ if you allow the `process_subway.py` to fetch data from Overpass API. Here are t `validation.log` would contain the list of errors and warnings. To convert it into pretty HTML format -4. do +5. do ```bash mkdir html - python3 ./validation_to_html.py validation.log html + python3 tools/v2h/validation_to_html.py validation.log html ``` ## Publishing validation reports to the Web Expose a directory with static contents via a web-server and put into it: -- HTML files from the directory specified in the 2nd parameter of `validation_to_html.py` +- HTML files from the directory specified in the 2nd parameter of `scripts/v2h/validation_to_html.py` - To vitalize "Y" (YAML), "J" (GeoJSON) and "M" (Map) links beside each city name: - The contents of `render` directory from the repository - - `cities.txt` file generated with `--dump-city-list` parameter of `process_subways.py` - - YAML files created due to -d option of `process_subways.py` - - GeoJSON files created due to -j option of `process_subways.py` + - `cities.txt` file generated with `--dump-city-list` parameter of `scripts/process_subways.py` + - YAML files created due to -d option of `scripts/process_subways.py` + - GeoJSON files created due to -j option of `scripts/process_subways.py` ## Related external resources @@ -103,9 +107,9 @@ You can find more info about this validator instance in ## Adding Stop Areas To OSM -To quickly add `stop_area` relations for the entire city, use the `make_stop_areas.py` script -from the `stop_area` directory. Give it a bounding box or a `.json` file download from Overpass API. -It would produce an JOSM XML file that you should manually check in JOSM. After that +To quickly add `stop_area` relations for the entire city, use the `tools/stop_areas/make_stop_areas.py` script. +Give it a bounding box or a `.json` file download from Overpass API. +It would produce a JOSM XML file that you should manually check in JOSM. After that just upload it. ## Author and License diff --git a/process_subways.py b/process_subways.py deleted file mode 100755 index 3726f3a..0000000 --- a/process_subways.py +++ /dev/null @@ -1,577 +0,0 @@ -#!/usr/bin/env python3 -import argparse -import csv -import inspect -import json -import logging -import os -import re -import sys -import time -import urllib.parse -import urllib.request -from functools import partial - -import processors -from subway_io import ( - dump_yaml, - load_xml, - make_geojson, - read_recovery_data, - write_recovery_data, -) -from subway_structure import ( - City, - CriticalValidationError, - find_transfers, - get_unused_subway_entrances_geojson, - LonLat, - MODES_OVERGROUND, - MODES_RAPID, - OsmElementT, -) - -DEFAULT_SPREADSHEET_ID = "1SEW1-NiNOnA2qDwievcxYV1FOaQl1mb1fdeyqAxHu3k" -DEFAULT_CITIES_INFO_URL = ( - "https://docs.google.com/spreadsheets/d/" - f"{DEFAULT_SPREADSHEET_ID}/export?format=csv" -) -BAD_MARK = "[bad]" - - -def compose_overpass_request( - overground: bool, bboxes: list[list[float]] -) -> str: - if not bboxes: - raise RuntimeError("No bboxes given for overpass request") - - query = "[out:json][timeout:1000];(" - modes = MODES_OVERGROUND if overground else MODES_RAPID - for bbox in bboxes: - bbox_part = f"({','.join(str(coord) for coord in bbox)})" - query += "(" - for mode in sorted(modes): - query += f'rel[route="{mode}"]{bbox_part};' - query += ");" - query += "rel(br)[type=route_master];" - if not overground: - query += f"node[railway=subway_entrance]{bbox_part};" - query += f"node[railway=train_station_entrance]{bbox_part};" - query += f"rel[public_transport=stop_area]{bbox_part};" - query += ( - "rel(br)[type=public_transport][public_transport=stop_area_group];" - ) - query += ");(._;>>;);out body center qt;" - logging.debug("Query: %s", query) - return query - - -def overpass_request( - overground: bool, overpass_api: str, bboxes: list[list[float]] -) -> list[OsmElementT]: - query = compose_overpass_request(overground, bboxes) - url = f"{overpass_api}?data={urllib.parse.quote(query)}" - response = urllib.request.urlopen(url, timeout=1000) - if (r_code := response.getcode()) != 200: - raise Exception(f"Failed to query Overpass API: HTTP {r_code}") - return json.load(response)["elements"] - - -def multi_overpass( - overground: bool, overpass_api: str, bboxes: list[list[float]] -) -> list[OsmElementT]: - SLICE_SIZE = 10 - INTERREQUEST_WAIT = 5 # in seconds - result = [] - for i in range(0, len(bboxes), SLICE_SIZE): - if i > 0: - time.sleep(INTERREQUEST_WAIT) - bboxes_i = bboxes[i : i + SLICE_SIZE] # noqa E203 - result.extend(overpass_request(overground, overpass_api, bboxes_i)) - return result - - -def slugify(name: str) -> str: - return re.sub(r"[^a-z0-9_-]+", "", name.lower().replace(" ", "_")) - - -def get_way_center( - element: OsmElementT, node_centers: dict[int, LonLat] -) -> LonLat | None: - """ - :param element: dict describing OSM element - :param node_centers: osm_id => (lat, lon) - :return: tuple with center coordinates, or None - """ - - # If elements have been queried via overpass-api with - # 'out center;' clause then ways already have 'center' attribute - if "center" in element: - return element["center"]["lon"], element["center"]["lat"] - - if "nodes" not in element: - return None - - center = [0, 0] - count = 0 - way_nodes = element["nodes"] - way_nodes_len = len(element["nodes"]) - for i, nd in enumerate(way_nodes): - if nd not in node_centers: - continue - # Don't count the first node of a closed way twice - if ( - i == way_nodes_len - 1 - and way_nodes_len > 1 - and way_nodes[0] == way_nodes[-1] - ): - break - center[0] += node_centers[nd][0] - center[1] += node_centers[nd][1] - count += 1 - if count == 0: - return None - element["center"] = {"lat": center[1] / count, "lon": center[0] / count} - return element["center"]["lon"], element["center"]["lat"] - - -def get_relation_center( - element: OsmElementT, - node_centers: dict[int, LonLat], - way_centers: dict[int, LonLat], - relation_centers: dict[int, LonLat], - ignore_unlocalized_child_relations: bool = False, -) -> LonLat | None: - """ - :param element: dict describing OSM element - :param node_centers: osm_id => LonLat - :param way_centers: osm_id => LonLat - :param relation_centers: osm_id => LonLat - :param ignore_unlocalized_child_relations: if a member that is a relation - has no center, skip it and calculate center based on member nodes, - ways and other, "localized" (with known centers), relations - :return: tuple with center coordinates, or None - """ - - # If elements have been queried via overpass-api with - # 'out center;' clause then some relations already have 'center' - # attribute. But this is not the case for relations composed only - # of other relations (e.g., route_master, stop_area_group or - # stop_area with only members that are multipolygons) - if "center" in element: - return element["center"]["lon"], element["center"]["lat"] - - center = [0, 0] - count = 0 - for m in element.get("members", list()): - m_id = m["ref"] - m_type = m["type"] - if m_type == "relation" and m_id not in relation_centers: - if ignore_unlocalized_child_relations: - continue - else: - # Cannot calculate fair center because the center - # of a child relation is not known yet - return None - member_container = ( - node_centers - if m_type == "node" - else way_centers - if m_type == "way" - else relation_centers - ) - if m_id in member_container: - center[0] += member_container[m_id][0] - center[1] += member_container[m_id][1] - count += 1 - if count == 0: - return None - element["center"] = {"lat": center[1] / count, "lon": center[0] / count} - return element["center"]["lon"], element["center"]["lat"] - - -def calculate_centers(elements: list[OsmElementT]) -> None: - """Adds 'center' key to each way/relation in elements, - except for empty ways or relations. - Relies on nodes-ways-relations order in the elements list. - """ - nodes: dict[int, LonLat] = {} # id => LonLat - ways: dict[int, LonLat] = {} # id => approx center LonLat - relations: dict[int, LonLat] = {} # id => approx center LonLat - - unlocalized_relations: list[OsmElementT] = [] # 'unlocalized' means - # the center of the relation has not been calculated yet - - for el in elements: - if el["type"] == "node": - nodes[el["id"]] = (el["lon"], el["lat"]) - elif el["type"] == "way": - if center := get_way_center(el, nodes): - ways[el["id"]] = center - elif el["type"] == "relation": - if center := get_relation_center(el, nodes, ways, relations): - relations[el["id"]] = center - else: - unlocalized_relations.append(el) - - def iterate_relation_centers_calculation( - ignore_unlocalized_child_relations: bool, - ) -> list[OsmElementT]: - unlocalized_relations_upd = [] - for rel in unlocalized_relations: - if center := get_relation_center( - rel, nodes, ways, relations, ignore_unlocalized_child_relations - ): - relations[rel["id"]] = center - else: - unlocalized_relations_upd.append(rel) - return unlocalized_relations_upd - - # Calculate centers for relations that have no one yet - while unlocalized_relations: - unlocalized_relations_upd = iterate_relation_centers_calculation(False) - progress = len(unlocalized_relations_upd) < len(unlocalized_relations) - if not progress: - unlocalized_relations_upd = iterate_relation_centers_calculation( - True - ) - progress = len(unlocalized_relations_upd) < len( - unlocalized_relations - ) - if not progress: - break - unlocalized_relations = unlocalized_relations_upd - - -def add_osm_elements_to_cities( - osm_elements: list[OsmElementT], cities: list[City] -) -> None: - for el in osm_elements: - for c in cities: - if c.contains(el): - c.add(el) - - -def validate_cities(cities: list[City]) -> list[City]: - """Validate cities. Return list of good cities.""" - good_cities = [] - for c in cities: - try: - c.extract_routes() - except CriticalValidationError as e: - logging.error( - "Critical validation error while processing %s: %s", - c.name, - e, - ) - c.error(str(e)) - except AssertionError as e: - logging.error( - "Validation logic error while processing %s: %s", - c.name, - e, - ) - c.error(f"Validation logic error: {e}") - else: - c.validate() - if c.is_good: - c.calculate_distances() - good_cities.append(c) - - return good_cities - - -def get_cities_info( - cities_info_url: str = DEFAULT_CITIES_INFO_URL, -) -> list[dict]: - response = urllib.request.urlopen(cities_info_url) - if ( - not cities_info_url.startswith("file://") - and (r_code := response.getcode()) != 200 - ): - raise Exception( - f"Failed to download cities spreadsheet: HTTP {r_code}" - ) - data = response.read().decode("utf-8") - reader = csv.DictReader( - data.splitlines(), - fieldnames=( - "id", - "name", - "country", - "continent", - "num_stations", - "num_lines", - "num_light_lines", - "num_interchanges", - "bbox", - "networks", - ), - ) - - cities_info = list() - names = set() - next(reader) # skipping the header - for city_info in reader: - if city_info["id"] and city_info["bbox"]: - cities_info.append(city_info) - name = city_info["name"].strip() - if name in names: - logging.warning( - "Duplicate city name in city list: %s", - city_info, - ) - names.add(name) - return cities_info - - -def prepare_cities( - cities_info_url: str = DEFAULT_CITIES_INFO_URL, overground: bool = False -) -> list[City]: - if overground: - raise NotImplementedError("Overground transit not implemented yet") - cities_info = get_cities_info(cities_info_url) - return list(map(partial(City, overground=overground), cities_info)) - - -def main() -> None: - parser = argparse.ArgumentParser() - parser.add_argument( - "--cities-info-url", - default=DEFAULT_CITIES_INFO_URL, - help=( - "URL of CSV file with reference information about rapid transit " - "networks. file:// protocol is also supported." - ), - ) - parser.add_argument( - "-i", - "--source", - help="File to write backup of OSM data, or to read data from", - ) - parser.add_argument( - "-x", "--xml", help="OSM extract with routes, to read data from" - ) - parser.add_argument( - "--overpass-api", - default="http://overpass-api.de/api/interpreter", - help="Overpass API URL", - ) - parser.add_argument( - "-q", - "--quiet", - action="store_true", - help="Show only warnings and errors", - ) - parser.add_argument( - "-c", "--city", help="Validate only a single city or a country" - ) - parser.add_argument( - "-t", - "--overground", - action="store_true", - help="Process overground transport instead of subways", - ) - parser.add_argument( - "-e", - "--entrances", - type=argparse.FileType("w", encoding="utf-8"), - help="Export unused subway entrances as GeoJSON here", - ) - parser.add_argument( - "-l", - "--log", - type=argparse.FileType("w", encoding="utf-8"), - help="Validation JSON file name", - ) - parser.add_argument( - "--dump-city-list", - type=argparse.FileType("w", encoding="utf-8"), - help=( - "Dump sorted list of all city names, possibly with " - f"{BAD_MARK} mark" - ), - ) - - for processor_name, processor in inspect.getmembers( - processors, inspect.ismodule - ): - if not processor_name.startswith("_"): - parser.add_argument( - f"--output-{processor_name}", - help=( - "Processed metro systems output filename " - f"in {processor_name.upper()} format" - ), - ) - - parser.add_argument("--cache", help="Cache file name for processed data") - parser.add_argument( - "-r", "--recovery-path", help="Cache file name for error recovery" - ) - parser.add_argument( - "-d", "--dump", help="Make a YAML file for a city data" - ) - parser.add_argument( - "-j", "--geojson", help="Make a GeoJSON file for a city data" - ) - parser.add_argument( - "--crude", - action="store_true", - help="Do not use OSM railway geometry for GeoJSON", - ) - options = parser.parse_args() - - if options.quiet: - log_level = logging.WARNING - else: - log_level = logging.INFO - logging.basicConfig( - level=log_level, - datefmt="%H:%M:%S", - format="%(asctime)s %(levelname)-7s %(message)s", - ) - - cities = prepare_cities(options.cities_info_url, options.overground) - if options.city: - cities = [ - c - for c in cities - if c.name == options.city or c.country == options.city - ] - if not cities: - logging.error("No cities to process") - sys.exit(2) - - # Augment cities with recovery data - recovery_data = None - if options.recovery_path: - recovery_data = read_recovery_data(options.recovery_path) - for city in cities: - city.recovery_data = recovery_data.get(city.name, None) - - logging.info("Read %s metro networks", len(cities)) - - # Reading cached json, loading XML or querying Overpass API - if options.source and os.path.exists(options.source): - logging.info("Reading %s", options.source) - with open(options.source, "r") as f: - osm = json.load(f) - if "elements" in osm: - osm = osm["elements"] - calculate_centers(osm) - elif options.xml: - logging.info("Reading %s", options.xml) - osm = load_xml(options.xml) - calculate_centers(osm) - if options.source: - with open(options.source, "w", encoding="utf-8") as f: - json.dump(osm, f) - else: - if len(cities) > 10: - logging.error( - "Would not download that many cities from Overpass API, " - "choose a smaller set" - ) - sys.exit(3) - bboxes = [c.bbox for c in cities] - logging.info("Downloading data from Overpass API") - osm = multi_overpass(options.overground, options.overpass_api, bboxes) - calculate_centers(osm) - if options.source: - with open(options.source, "w", encoding="utf-8") as f: - json.dump(osm, f) - logging.info("Downloaded %s elements", len(osm)) - - logging.info("Sorting elements by city") - add_osm_elements_to_cities(osm, cities) - - logging.info("Building routes for each city") - good_cities = validate_cities(cities) - - logging.info("Finding transfer stations") - transfers = find_transfers(osm, good_cities) - - good_city_names = set(c.name for c in good_cities) - logging.info( - "%s good cities: %s", - len(good_city_names), - ", ".join(sorted(good_city_names)), - ) - bad_city_names = set(c.name for c in cities) - good_city_names - logging.info( - "%s bad cities: %s", - len(bad_city_names), - ", ".join(sorted(bad_city_names)), - ) - - if options.dump_city_list: - lines = sorted( - f"{city.name}, {city.country}" - f"{' ' + BAD_MARK if city.name in bad_city_names else ''}\n" - for city in cities - ) - options.dump_city_list.writelines(lines) - - if options.recovery_path: - write_recovery_data(options.recovery_path, recovery_data, cities) - - if options.entrances: - json.dump(get_unused_subway_entrances_geojson(osm), options.entrances) - - if options.dump: - if os.path.isdir(options.dump): - for c in cities: - with open( - os.path.join(options.dump, slugify(c.name) + ".yaml"), - "w", - encoding="utf-8", - ) as f: - dump_yaml(c, f) - elif len(cities) == 1: - with open(options.dump, "w", encoding="utf-8") as f: - dump_yaml(cities[0], f) - else: - logging.error("Cannot dump %s cities at once", len(cities)) - - if options.geojson: - if os.path.isdir(options.geojson): - for c in cities: - with open( - os.path.join( - options.geojson, slugify(c.name) + ".geojson" - ), - "w", - encoding="utf-8", - ) as f: - json.dump(make_geojson(c, not options.crude), f) - elif len(cities) == 1: - with open(options.geojson, "w", encoding="utf-8") as f: - json.dump(make_geojson(cities[0], not options.crude), f) - else: - logging.error( - "Cannot make a geojson of %s cities at once", len(cities) - ) - - if options.log: - res = [] - for c in cities: - v = c.get_validation_result() - v["slug"] = slugify(c.name) - res.append(v) - json.dump(res, options.log, indent=2, ensure_ascii=False) - - for processor_name, processor in inspect.getmembers( - processors, inspect.ismodule - ): - option_name = f"output_{processor_name}" - - if not getattr(options, option_name, None): - continue - - filename = getattr(options, option_name) - processor.process(cities, transfers, filename, options.cache) - - -if __name__ == "__main__": - main() diff --git a/scripts/process_subways.py b/scripts/process_subways.py new file mode 100755 index 0000000..65d1600 --- /dev/null +++ b/scripts/process_subways.py @@ -0,0 +1,276 @@ +import argparse +import inspect +import json +import logging +import os +import re +import sys + +from subways import processors +from subways.overpass import multi_overpass +from subways.subway_io import ( + dump_yaml, + load_xml, + make_geojson, + read_recovery_data, + write_recovery_data, +) +from subways.structure.city import ( + find_transfers, + get_unused_subway_entrances_geojson, +) +from subways.validation import ( + add_osm_elements_to_cities, + BAD_MARK, + calculate_centers, + DEFAULT_CITIES_INFO_URL, + prepare_cities, + validate_cities, +) + + +def slugify(name: str) -> str: + return re.sub(r"[^a-z0-9_-]+", "", name.lower().replace(" ", "_")) + + +def main() -> None: + parser = argparse.ArgumentParser() + parser.add_argument( + "--cities-info-url", + default=DEFAULT_CITIES_INFO_URL, + help=( + "URL of CSV file with reference information about rapid transit " + "networks. file:// protocol is also supported." + ), + ) + parser.add_argument( + "-i", + "--source", + help="File to write backup of OSM data, or to read data from", + ) + parser.add_argument( + "-x", "--xml", help="OSM extract with routes, to read data from" + ) + parser.add_argument( + "--overpass-api", + default="http://overpass-api.de/api/interpreter", + help="Overpass API URL", + ) + parser.add_argument( + "-q", + "--quiet", + action="store_true", + help="Show only warnings and errors", + ) + parser.add_argument( + "-c", "--city", help="Validate only a single city or a country" + ) + parser.add_argument( + "-t", + "--overground", + action="store_true", + help="Process overground transport instead of subways", + ) + parser.add_argument( + "-e", + "--entrances", + type=argparse.FileType("w", encoding="utf-8"), + help="Export unused subway entrances as GeoJSON here", + ) + parser.add_argument( + "-l", + "--log", + type=argparse.FileType("w", encoding="utf-8"), + help="Validation JSON file name", + ) + parser.add_argument( + "--dump-city-list", + type=argparse.FileType("w", encoding="utf-8"), + help=( + "Dump sorted list of all city names, possibly with " + f"{BAD_MARK} mark" + ), + ) + + for processor_name, processor in inspect.getmembers( + processors, inspect.ismodule + ): + if not processor_name.startswith("_"): + parser.add_argument( + f"--output-{processor_name}", + help=( + "Processed metro systems output filename " + f"in {processor_name.upper()} format" + ), + ) + + parser.add_argument("--cache", help="Cache file name for processed data") + parser.add_argument( + "-r", "--recovery-path", help="Cache file name for error recovery" + ) + parser.add_argument( + "-d", "--dump", help="Make a YAML file for a city data" + ) + parser.add_argument( + "-j", "--geojson", help="Make a GeoJSON file for a city data" + ) + parser.add_argument( + "--crude", + action="store_true", + help="Do not use OSM railway geometry for GeoJSON", + ) + options = parser.parse_args() + + if options.quiet: + log_level = logging.WARNING + else: + log_level = logging.INFO + logging.basicConfig( + level=log_level, + datefmt="%H:%M:%S", + format="%(asctime)s %(levelname)-7s %(message)s", + ) + + cities = prepare_cities(options.cities_info_url, options.overground) + if options.city: + cities = [ + c + for c in cities + if c.name == options.city or c.country == options.city + ] + if not cities: + logging.error("No cities to process") + sys.exit(2) + + # Augment cities with recovery data + recovery_data = None + if options.recovery_path: + recovery_data = read_recovery_data(options.recovery_path) + for city in cities: + city.recovery_data = recovery_data.get(city.name, None) + + logging.info("Read %s metro networks", len(cities)) + + # Reading cached json, loading XML or querying Overpass API + if options.source and os.path.exists(options.source): + logging.info("Reading %s", options.source) + with open(options.source, "r") as f: + osm = json.load(f) + if "elements" in osm: + osm = osm["elements"] + calculate_centers(osm) + elif options.xml: + logging.info("Reading %s", options.xml) + osm = load_xml(options.xml) + calculate_centers(osm) + if options.source: + with open(options.source, "w", encoding="utf-8") as f: + json.dump(osm, f) + else: + if len(cities) > 10: + logging.error( + "Would not download that many cities from Overpass API, " + "choose a smaller set" + ) + sys.exit(3) + bboxes = [c.bbox for c in cities] + logging.info("Downloading data from Overpass API") + osm = multi_overpass(options.overground, options.overpass_api, bboxes) + calculate_centers(osm) + if options.source: + with open(options.source, "w", encoding="utf-8") as f: + json.dump(osm, f) + logging.info("Downloaded %s elements", len(osm)) + + logging.info("Sorting elements by city") + add_osm_elements_to_cities(osm, cities) + + logging.info("Building routes for each city") + good_cities = validate_cities(cities) + + logging.info("Finding transfer stations") + transfers = find_transfers(osm, good_cities) + + good_city_names = set(c.name for c in good_cities) + logging.info( + "%s good cities: %s", + len(good_city_names), + ", ".join(sorted(good_city_names)), + ) + bad_city_names = set(c.name for c in cities) - good_city_names + logging.info( + "%s bad cities: %s", + len(bad_city_names), + ", ".join(sorted(bad_city_names)), + ) + + if options.dump_city_list: + lines = sorted( + f"{city.name}, {city.country}" + f"{' ' + BAD_MARK if city.name in bad_city_names else ''}\n" + for city in cities + ) + options.dump_city_list.writelines(lines) + + if options.recovery_path: + write_recovery_data(options.recovery_path, recovery_data, cities) + + if options.entrances: + json.dump(get_unused_subway_entrances_geojson(osm), options.entrances) + + if options.dump: + if os.path.isdir(options.dump): + for c in cities: + with open( + os.path.join(options.dump, slugify(c.name) + ".yaml"), + "w", + encoding="utf-8", + ) as f: + dump_yaml(c, f) + elif len(cities) == 1: + with open(options.dump, "w", encoding="utf-8") as f: + dump_yaml(cities[0], f) + else: + logging.error("Cannot dump %s cities at once", len(cities)) + + if options.geojson: + if os.path.isdir(options.geojson): + for c in cities: + with open( + os.path.join( + options.geojson, slugify(c.name) + ".geojson" + ), + "w", + encoding="utf-8", + ) as f: + json.dump(make_geojson(c, not options.crude), f) + elif len(cities) == 1: + with open(options.geojson, "w", encoding="utf-8") as f: + json.dump(make_geojson(cities[0], not options.crude), f) + else: + logging.error( + "Cannot make a geojson of %s cities at once", len(cities) + ) + + if options.log: + res = [] + for c in cities: + v = c.get_validation_result() + v["slug"] = slugify(c.name) + res.append(v) + json.dump(res, options.log, indent=2, ensure_ascii=False) + + for processor_name, processor in inspect.getmembers( + processors, inspect.ismodule + ): + option_name = f"output_{processor_name}" + + if not getattr(options, option_name, None): + continue + + filename = getattr(options, option_name) + processor.process(cities, transfers, filename, options.cache) + + +if __name__ == "__main__": + main() diff --git a/scripts/process_subways.sh b/scripts/process_subways.sh index 345dd2d..62a45e7 100755 --- a/scripts/process_subways.sh +++ b/scripts/process_subways.sh @@ -94,7 +94,7 @@ function check_poly() { if [ -n "$("$PYTHON" -c "import shapely" 2>&1)" ]; then "$PYTHON" -m pip install shapely==2.0.1 fi - "$PYTHON" "$SUBWAYS_PATH"/make_all_metro_poly.py \ + "$PYTHON" "$SUBWAYS_REPO_PATH"/tools/make_poly/make_all_metro_poly.py \ ${CITIES_INFO_URL:+--cities-info-url "$CITIES_INFO_URL"} > "$POLY" fi fi @@ -107,13 +107,15 @@ PYTHON=${PYTHON:-python3} # This will fail if there is no python "$PYTHON" --version > /dev/null -SUBWAYS_PATH="$(dirname "$0")/.." -if [ ! -f "$SUBWAYS_PATH/process_subways.py" ]; then +# "readlink -f" echoes canonicalized absolute path to a file/directory +SUBWAYS_REPO_PATH="$(readlink -f $(dirname "$0")/..)" + +if [ ! -f "$SUBWAYS_REPO_PATH/scripts/process_subways.py" ]; then echo "Please clone the subways repo to $SUBWAYS_PATH" exit 2 fi -TMPDIR="${TMPDIR:-$SUBWAYS_PATH}" +TMPDIR="${TMPDIR:-$SUBWAYS_REPO_PATH}" mkdir -p "$TMPDIR" # Downloading the latest version of the subways script @@ -242,7 +244,7 @@ if [ -n "${DUMP-}" ]; then fi VALIDATION="$TMPDIR/validation.json" -"$PYTHON" "$SUBWAYS_PATH/process_subways.py" ${QUIET:+-q} \ +"$PYTHON" "$SUBWAYS_REPO_PATH/scripts/process_subways.py" ${QUIET:+-q} \ -x "$FILTERED_DATA" -l "$VALIDATION" \ ${CITIES_INFO_URL:+--cities-info-url "$CITIES_INFO_URL"} \ ${MAPSME:+--output-mapsme "$MAPSME"} \ @@ -262,13 +264,13 @@ fi # Preparing HTML files if [ -z "${HTML_DIR-}" ]; then - HTML_DIR="$SUBWAYS_PATH/html" + HTML_DIR="$SUBWAYS_REPO_PATH/html" REMOVE_HTML=1 fi mkdir -p $HTML_DIR rm -f "$HTML_DIR"/*.html -"$PYTHON" "$SUBWAYS_PATH/validation_to_html.py" \ +"$PYTHON" "$SUBWAYS_REPO_PATH/tools/v2h/validation_to_html.py" \ ${CITIES_INFO_URL:+--cities-info-url "$CITIES_INFO_URL"} \ "$VALIDATION" "$HTML_DIR" diff --git a/subway_structure.py b/subway_structure.py deleted file mode 100644 index 94c6f47..0000000 --- a/subway_structure.py +++ /dev/null @@ -1,2505 +0,0 @@ -from __future__ import annotations - -import math -import re -from collections import Counter, defaultdict -from collections.abc import Callable, Collection, Iterator -from itertools import chain, islice -from typing import TypeAlias, TypeVar - -from css_colours import normalize_colour - -MAX_DISTANCE_TO_ENTRANCES = 300 # in meters -MAX_DISTANCE_STOP_TO_LINE = 50 # in meters -ALLOWED_STATIONS_MISMATCH = 0.02 # part of total station count -ALLOWED_TRANSFERS_MISMATCH = 0.07 # part of total interchanges count -ALLOWED_ANGLE_BETWEEN_STOPS = 45 # in degrees -DISALLOWED_ANGLE_BETWEEN_STOPS = 20 # in degrees -SUGGEST_TRANSFER_MIN_DISTANCE = 100 # in meters - -# If an object was moved not too far compared to previous script run, -# it is likely the same object -DISPLACEMENT_TOLERANCE = 300 # in meters - -MODES_RAPID = {"subway", "light_rail", "monorail", "train"} -MODES_OVERGROUND = {"tram", "bus", "trolleybus", "aerialway", "ferry"} -DEFAULT_MODES_RAPID = {"subway", "light_rail"} -DEFAULT_MODES_OVERGROUND = {"tram"} # TODO: bus and trolleybus? -ALL_MODES = MODES_RAPID | MODES_OVERGROUND -RAILWAY_TYPES = { - "rail", - "light_rail", - "subway", - "narrow_gauge", - "funicular", - "monorail", - "tram", -} -CONSTRUCTION_KEYS = ( - "construction", - "proposed", - "construction:railway", - "proposed:railway", -) - -used_entrances = set() - - -START_END_TIMES_RE = re.compile(r".*?(\d{2}):(\d{2})-(\d{2}):(\d{2}).*") - -OsmElementT: TypeAlias = dict -IdT: TypeAlias = str # Type of feature ids -TransferT: TypeAlias = set[IdT] # A transfer is a set of StopArea IDs -TransfersT: TypeAlias = list[TransferT] -LonLat: TypeAlias = tuple[float, float] -RailT: TypeAlias = list[LonLat] -T = TypeVar("T") - - -def get_start_end_times( - opening_hours: str, -) -> tuple[tuple[int, int], tuple[int, int]] | tuple[None, None]: - """Very simplified method to parse OSM opening_hours tag. - We simply take the first HH:MM-HH:MM substring which is the most probable - opening hours interval for the most of the weekdays. - """ - start_time, end_time = None, None - m = START_END_TIMES_RE.match(opening_hours) - if m: - ints = tuple(map(int, m.groups())) - start_time = (ints[0], ints[1]) - end_time = (ints[2], ints[3]) - return start_time, end_time - - -def osm_interval_to_seconds(interval_str: str) -> int | None: - """Convert to int an OSM value for 'interval'/'headway' tag - which may be in these formats: - HH:MM:SS, - HH:MM, - MM, - M - (https://wiki.openstreetmap.org/wiki/Key:interval#Format) - """ - hours, minutes, seconds = 0, 0, 0 - semicolon_count = interval_str.count(":") - try: - if semicolon_count == 0: - minutes = int(interval_str) - elif semicolon_count == 1: - hours, minutes = map(int, interval_str.split(":")) - elif semicolon_count == 2: - hours, minutes, seconds = map(int, interval_str.split(":")) - else: - return None - except ValueError: - return None - return seconds + 60 * minutes + 60 * 60 * hours - - -class CriticalValidationError(Exception): - """Is thrown if an error occurs - that prevents further validation of a city.""" - - -def el_id(el: OsmElementT) -> IdT | None: - if not el: - return None - if "type" not in el: - raise Exception("What is this element? {}".format(el)) - return el["type"][0] + str(el.get("id", el.get("ref", ""))) - - -def el_center(el: OsmElementT) -> LonLat | None: - if not el: - return None - if "lat" in el: - return el["lon"], el["lat"] - elif "center" in el: - return el["center"]["lon"], el["center"]["lat"] - return None - - -def distance(p1: LonLat, p2: LonLat) -> float: - if p1 is None or p2 is None: - raise Exception( - "One of arguments to distance({}, {}) is None".format(p1, p2) - ) - dx = math.radians(p1[0] - p2[0]) * math.cos( - 0.5 * math.radians(p1[1] + p2[1]) - ) - dy = math.radians(p1[1] - p2[1]) - return 6378137 * math.sqrt(dx * dx + dy * dy) - - -def is_near(p1: LonLat, p2: LonLat) -> bool: - return ( - p1[0] - 1e-8 <= p2[0] <= p1[0] + 1e-8 - and p1[1] - 1e-8 <= p2[1] <= p1[1] + 1e-8 - ) - - -def project_on_segment(p: LonLat, p1: LonLat, p2: LonLat) -> float | None: - """Given three points, return u - the position of projection of - point p onto segment p1p2 regarding point p1 and (p2-p1) direction vector - """ - dp = (p2[0] - p1[0], p2[1] - p1[1]) - d2 = dp[0] * dp[0] + dp[1] * dp[1] - if d2 < 1e-14: - return None - u = ((p[0] - p1[0]) * dp[0] + (p[1] - p1[1]) * dp[1]) / d2 - if not 0 <= u <= 1: - return None - return u - - -def project_on_line(p: LonLat, line: RailT) -> dict: - result = { - # In the first approximation, position on rails is the index of the - # closest vertex of line to the point p. Fractional value means that - # the projected point lies on a segment between two vertices. - # More than one value can occur if a route follows the same tracks - # more than once. - "positions_on_line": None, - "projected_point": None, # (lon, lat) - } - - if len(line) < 2: - return result - d_min = MAX_DISTANCE_STOP_TO_LINE * 5 - closest_to_vertex = False - # First, check vertices in the line - for i, vertex in enumerate(line): - d = distance(p, vertex) - if d < d_min: - result["positions_on_line"] = [i] - result["projected_point"] = vertex - d_min = d - closest_to_vertex = True - elif vertex == result["projected_point"]: - # Repeated occurrence of the track vertex in line, like Oslo Line 5 - result["positions_on_line"].append(i) - # And then calculate distances to each segment - for seg in range(len(line) - 1): - # Check bbox for speed - if not ( - ( - min(line[seg][0], line[seg + 1][0]) - MAX_DISTANCE_STOP_TO_LINE - <= p[0] - <= max(line[seg][0], line[seg + 1][0]) - + MAX_DISTANCE_STOP_TO_LINE - ) - and ( - min(line[seg][1], line[seg + 1][1]) - MAX_DISTANCE_STOP_TO_LINE - <= p[1] - <= max(line[seg][1], line[seg + 1][1]) - + MAX_DISTANCE_STOP_TO_LINE - ) - ): - continue - u = project_on_segment(p, line[seg], line[seg + 1]) - if u: - projected_point = ( - line[seg][0] + u * (line[seg + 1][0] - line[seg][0]), - line[seg][1] + u * (line[seg + 1][1] - line[seg][1]), - ) - d = distance(p, projected_point) - if d < d_min: - result["positions_on_line"] = [seg + u] - result["projected_point"] = projected_point - d_min = d - closest_to_vertex = False - elif projected_point == result["projected_point"]: - # Repeated occurrence of the track segment in line, - # like Oslo Line 5 - if not closest_to_vertex: - result["positions_on_line"].append(seg + u) - return result - - -def find_segment( - p: LonLat, line: RailT, start_vertex: int = 0 -) -> tuple[int, float] | tuple[None, None]: - """Returns index of a segment and a position inside it.""" - EPS = 1e-9 - for seg in range(start_vertex, len(line) - 1): - if is_near(p, line[seg]): - return seg, 0.0 - if line[seg][0] == line[seg + 1][0]: - if not (p[0] - EPS <= line[seg][0] <= p[0] + EPS): - continue - px = None - else: - px = (p[0] - line[seg][0]) / (line[seg + 1][0] - line[seg][0]) - if px is None or (0 <= px <= 1): - if line[seg][1] == line[seg + 1][1]: - if not (p[1] - EPS <= line[seg][1] <= p[1] + EPS): - continue - py = None - else: - py = (p[1] - line[seg][1]) / (line[seg + 1][1] - line[seg][1]) - if py is None or (0 <= py <= 1): - if py is None or px is None or (px - EPS <= py <= px + EPS): - return seg, px or py - return None, None - - -def distance_on_line( - p1: LonLat, p2: LonLat, line: RailT, start_vertex: int = 0 -) -> tuple[float, int] | None: - """Calculates distance via line between projections - of points p1 and p2. Returns a TUPLE of (d, vertex): - d is the distance and vertex is the number of the second - vertex, to continue calculations for the next point.""" - line_len = len(line) - seg1, pos1 = find_segment(p1, line, start_vertex) - if seg1 is None: - # logging.warn('p1 %s is not projected, st=%s', p1, start_vertex) - return None - seg2, pos2 = find_segment(p2, line, seg1) - if seg2 is None: - if line[0] == line[-1]: - line = line + line[1:] - seg2, pos2 = find_segment(p2, line, seg1) - if seg2 is None: - # logging.warn('p2 %s is not projected, st=%s', p2, start_vertex) - return None - if seg1 == seg2: - return distance(line[seg1], line[seg1 + 1]) * abs(pos2 - pos1), seg1 - if seg2 < seg1: - # Should not happen - raise Exception("Pos1 %s is after pos2 %s", seg1, seg2) - d = 0 - if pos1 < 1: - d += distance(line[seg1], line[seg1 + 1]) * (1 - pos1) - for i in range(seg1 + 1, seg2): - d += distance(line[i], line[i + 1]) - if pos2 > 0: - d += distance(line[seg2], line[seg2 + 1]) * pos2 - return d, seg2 % line_len - - -def angle_between(p1: LonLat, c: LonLat, p2: LonLat) -> float: - a = round( - abs( - math.degrees( - math.atan2(p1[1] - c[1], p1[0] - c[0]) - - math.atan2(p2[1] - c[1], p2[0] - c[0]) - ) - ) - ) - return a if a <= 180 else 360 - a - - -def format_elid_list(ids: Collection[IdT]) -> str: - msg = ", ".join(sorted(ids)[:20]) - if len(ids) > 20: - msg += ", ..." - return msg - - -class Station: - @staticmethod - def get_modes(el: OsmElementT) -> set[str]: - modes = {m for m in ALL_MODES if el["tags"].get(m) == "yes"} - if mode := el["tags"].get("station"): - modes.add(mode) - return modes - - @staticmethod - def is_station(el: OsmElementT, modes: set[str]) -> bool: - # public_transport=station is too ambiguous and unspecific to use, - # so we expect for it to be backed by railway=station. - if ( - "tram" in modes - and el.get("tags", {}).get("railway") == "tram_stop" - ): - return True - if el.get("tags", {}).get("railway") not in ("station", "halt"): - return False - for k in CONSTRUCTION_KEYS: - if k in el["tags"]: - return False - # Not checking for station=train, obviously - if "train" not in modes and Station.get_modes(el).isdisjoint(modes): - return False - return True - - def __init__(self, el: OsmElementT, city: City) -> None: - """Call this with a railway=station node.""" - if not Station.is_station(el, city.modes): - raise Exception( - "Station object should be instantiated from a station node. " - "Got: {}".format(el) - ) - - self.id: IdT = el_id(el) - self.element: OsmElementT = el - self.modes = Station.get_modes(el) - self.name = el["tags"].get("name", "?") - self.int_name = el["tags"].get( - "int_name", el["tags"].get("name:en", None) - ) - try: - self.colour = normalize_colour(el["tags"].get("colour", None)) - except ValueError as e: - self.colour = None - city.warn(str(e), el) - self.center = el_center(el) - if self.center is None: - raise Exception("Could not find center of {}".format(el)) - - def __repr__(self) -> str: - return "Station(id={}, modes={}, name={}, center={})".format( - self.id, ",".join(self.modes), self.name, self.center - ) - - -class StopArea: - @staticmethod - def is_stop(el: OsmElementT) -> bool: - if "tags" not in el: - return False - if el["tags"].get("railway") == "stop": - return True - if el["tags"].get("public_transport") == "stop_position": - return True - return False - - @staticmethod - def is_platform(el: OsmElementT) -> bool: - if "tags" not in el: - return False - if el["tags"].get("railway") in ("platform", "platform_edge"): - return True - if el["tags"].get("public_transport") == "platform": - return True - return False - - @staticmethod - def is_track(el: OsmElementT) -> bool: - if el["type"] != "way" or "tags" not in el: - return False - return el["tags"].get("railway") in RAILWAY_TYPES - - def __init__( - self, - station: Station, - city: City, - stop_area: OsmElementT | None = None, - ) -> None: - """Call this with a Station object.""" - - self.element: OsmElementT = stop_area or station.element - self.id: IdT = el_id(self.element) - self.station: Station = station - self.stops = set() # set of el_ids of stop_positions - self.platforms = set() # set of el_ids of platforms - self.exits = set() # el_id of subway_entrance/train_station_entrance - # for leaving the platform - self.entrances = set() # el_id of subway/train_station entrance - # for entering the platform - self.center = None # lon, lat of the station centre point - self.centers = {} # el_id -> (lon, lat) for all elements - self.transfer = None # el_id of a transfer relation - - self.modes = station.modes - self.name = station.name - self.int_name = station.int_name - self.colour = station.colour - - if stop_area: - self.name = stop_area["tags"].get("name", self.name) - self.int_name = stop_area["tags"].get( - "int_name", stop_area["tags"].get("name:en", self.int_name) - ) - try: - self.colour = ( - normalize_colour(stop_area["tags"].get("colour")) - or self.colour - ) - except ValueError as e: - city.warn(str(e), stop_area) - - self._process_members(station, city, stop_area) - else: - self._add_nearby_entrances(station, city) - - if self.exits and not self.entrances: - city.warn( - "Only exits for a station, no entrances", - stop_area or station.element, - ) - if self.entrances and not self.exits: - city.warn("No exits for a station", stop_area or station.element) - - for el in self.get_elements(): - self.centers[el] = el_center(city.elements[el]) - - """Calculate the center point of the station. This algorithm - cannot rely on a station node, since many stop_areas can share one. - Basically it averages center points of all platforms - and stop positions.""" - if len(self.stops) + len(self.platforms) == 0: - self.center = station.center - else: - self.center = [0, 0] - for sp in chain(self.stops, self.platforms): - spc = self.centers[sp] - for i in range(2): - self.center[i] += spc[i] - for i in range(2): - self.center[i] /= len(self.stops) + len(self.platforms) - - def _process_members( - self, station: Station, city: City, stop_area: OsmElementT - ) -> None: - # If we have a stop area, add all elements from it - tracks_detected = False - for m in stop_area["members"]: - k = el_id(m) - m_el = city.elements.get(k) - if not m_el or "tags" not in m_el: - continue - if Station.is_station(m_el, city.modes): - if k != station.id: - city.error("Stop area has multiple stations", stop_area) - elif StopArea.is_stop(m_el): - self.stops.add(k) - elif StopArea.is_platform(m_el): - self.platforms.add(k) - elif (entrance_type := m_el["tags"].get("railway")) in ( - "subway_entrance", - "train_station_entrance", - ): - if m_el["type"] != "node": - city.warn(f"{entrance_type} is not a node", m_el) - if ( - m_el["tags"].get("entrance") != "exit" - and m["role"] != "exit_only" - ): - self.entrances.add(k) - if ( - m_el["tags"].get("entrance") != "entrance" - and m["role"] != "entry_only" - ): - self.exits.add(k) - elif StopArea.is_track(m_el): - tracks_detected = True - - if tracks_detected: - city.warn("Tracks in a stop_area relation", stop_area) - - def _add_nearby_entrances(self, station: Station, city: City) -> None: - center = station.center - for entrance_el in ( - el - for el in city.elements.values() - if "tags" in el - and (entrance_type := el["tags"].get("railway")) - in ("subway_entrance", "train_station_entrance") - ): - entrance_id = el_id(entrance_el) - if entrance_id in city.stop_areas: - continue # This entrance belongs to some stop_area - c_center = el_center(entrance_el) - if ( - c_center - and distance(center, c_center) <= MAX_DISTANCE_TO_ENTRANCES - ): - if entrance_el["type"] != "node": - city.warn(f"{entrance_type} is not a node", entrance_el) - etag = entrance_el["tags"].get("entrance") - if etag != "exit": - self.entrances.add(entrance_id) - if etag != "entrance": - self.exits.add(entrance_id) - - def get_elements(self) -> set[IdT]: - result = {self.id, self.station.id} - result.update(self.entrances) - result.update(self.exits) - result.update(self.stops) - result.update(self.platforms) - return result - - def __repr__(self) -> str: - return ( - f"StopArea(id={self.id}, name={self.name}, station={self.station}," - f" transfer={self.transfer}, center={self.center})" - ) - - -class RouteStop: - def __init__(self, stoparea: StopArea) -> None: - self.stoparea: StopArea = stoparea - self.stop: LonLat = None # Stop position, possibly projected - self.distance = 0 # In meters from the start of the route - self.platform_entry = None # Platform el_id - self.platform_exit = None # Platform el_id - self.can_enter = False - self.can_exit = False - self.seen_stop = False - self.seen_platform_entry = False - self.seen_platform_exit = False - self.seen_station = False - - @property - def seen_platform(self) -> bool: - return self.seen_platform_entry or self.seen_platform_exit - - @staticmethod - def get_actual_role( - el: OsmElementT, role: str, modes: set[str] - ) -> str | None: - if StopArea.is_stop(el): - return "stop" - elif StopArea.is_platform(el): - return "platform" - elif Station.is_station(el, modes): - if "platform" in role: - return "platform" - else: - return "stop" - return None - - def add(self, member: dict, relation: OsmElementT, city: City) -> None: - el = city.elements[el_id(member)] - role = member["role"] - - if StopArea.is_stop(el): - if "platform" in role: - city.warn("Stop position in a platform role in a route", el) - if el["type"] != "node": - city.error("Stop position is not a node", el) - self.stop = el_center(el) - if "entry_only" not in role: - self.can_exit = True - if "exit_only" not in role: - self.can_enter = True - - elif Station.is_station(el, city.modes): - if el["type"] != "node": - city.notice("Station in route is not a node", el) - - if not self.seen_stop and not self.seen_platform: - self.stop = el_center(el) - self.can_enter = True - self.can_exit = True - - elif StopArea.is_platform(el): - if "stop" in role: - city.warn("Platform in a stop role in a route", el) - if "exit_only" not in role: - self.platform_entry = el_id(el) - self.can_enter = True - if "entry_only" not in role: - self.platform_exit = el_id(el) - self.can_exit = True - if not self.seen_stop: - self.stop = el_center(el) - - multiple_check = False - actual_role = RouteStop.get_actual_role(el, role, city.modes) - if actual_role == "platform": - if role == "platform_entry_only": - multiple_check = self.seen_platform_entry - self.seen_platform_entry = True - elif role == "platform_exit_only": - multiple_check = self.seen_platform_exit - self.seen_platform_exit = True - else: - if role != "platform" and "stop" not in role: - city.warn( - f'Platform "{el["tags"].get("name", "")}" ' - f'({el_id(el)}) with invalid role "{role}" in route', - relation, - ) - multiple_check = self.seen_platform - self.seen_platform_entry = True - self.seen_platform_exit = True - elif actual_role == "stop": - multiple_check = self.seen_stop - self.seen_stop = True - if multiple_check: - log_function = city.error if actual_role == "stop" else city.notice - log_function( - f'Multiple {actual_role}s for a station "' - f'{el["tags"].get("name", "")} ' - f"({el_id(el)}) in a route relation", - relation, - ) - - def __repr__(self) -> str: - return ( - "RouteStop(stop={}, pl_entry={}, pl_exit={}, stoparea={})".format( - self.stop, - self.platform_entry, - self.platform_exit, - self.stoparea, - ) - ) - - -class Route: - """The longest route for a city with a unique ref.""" - - @staticmethod - def is_route(el: OsmElementT, modes: set[str]) -> bool: - if ( - el["type"] != "relation" - or el.get("tags", {}).get("type") != "route" - ): - return False - if "members" not in el: - return False - if el["tags"].get("route") not in modes: - return False - for k in CONSTRUCTION_KEYS: - if k in el["tags"]: - return False - if "ref" not in el["tags"] and "name" not in el["tags"]: - return False - return True - - @staticmethod - def get_network(relation: OsmElementT) -> str | None: - for k in ("network:metro", "network", "operator"): - if k in relation["tags"]: - return relation["tags"][k] - return None - - @staticmethod - def get_interval(tags: dict) -> int | None: - v = None - for k in ("interval", "headway"): - if k in tags: - v = tags[k] - break - else: - for kk in tags: - if kk.startswith(k + ":"): - v = tags[kk] - break - if not v: - return None - return osm_interval_to_seconds(v) - - def stopareas(self) -> Iterator[StopArea]: - yielded_stopareas = set() - for route_stop in self: - stoparea = route_stop.stoparea - if stoparea not in yielded_stopareas: - yield stoparea - yielded_stopareas.add(stoparea) - - def __init__( - self, - relation: OsmElementT, - city: City, - master: OsmElementT | None = None, - ) -> None: - assert Route.is_route( - relation, city.modes - ), f"The relation does not seem to be a route: {relation}" - self.city = city - self.element: OsmElementT = relation - self.id: IdT = el_id(relation) - - self.ref = None - self.name = None - self.mode = None - self.colour = None - self.infill = None - self.network = None - self.interval = None - self.start_time = None - self.end_time = None - self.is_circular = False - self.stops: list[RouteStop] = [] - # Would be a list of (lon, lat) for the longest stretch. Can be empty. - self.tracks = None - # Index of the first stop that is located on/near the self.tracks - self.first_stop_on_rails_index = None - # Index of the last stop that is located on/near the self.tracks - self.last_stop_on_rails_index = None - - self.process_tags(master) - stop_position_elements = self.process_stop_members() - self.process_tracks(stop_position_elements) - - def build_longest_line(self) -> tuple[list[IdT], set[IdT]]: - line_nodes: set[IdT] = set() - last_track: list[IdT] = [] - track: list[IdT] = [] - warned_about_holes = False - for m in self.element["members"]: - el = self.city.elements.get(el_id(m), None) - if not el or not StopArea.is_track(el): - continue - if "nodes" not in el or len(el["nodes"]) < 2: - self.city.error("Cannot find nodes in a railway", el) - continue - nodes: list[IdT] = ["n{}".format(n) for n in el["nodes"]] - if m["role"] == "backward": - nodes.reverse() - line_nodes.update(nodes) - if not track: - is_first = True - track.extend(nodes) - else: - new_segment = list(nodes) # copying - if new_segment[0] == track[-1]: - track.extend(new_segment[1:]) - elif new_segment[-1] == track[-1]: - track.extend(reversed(new_segment[:-1])) - elif is_first and track[0] in ( - new_segment[0], - new_segment[-1], - ): - # We can reverse the track and try again - track.reverse() - if new_segment[0] == track[-1]: - track.extend(new_segment[1:]) - else: - track.extend(reversed(new_segment[:-1])) - else: - # Store the track if it is long and clean it - if not warned_about_holes: - self.city.warn( - "Hole in route rails near node {}".format( - track[-1] - ), - self.element, - ) - warned_about_holes = True - if len(track) > len(last_track): - last_track = track - track = [] - is_first = False - if len(track) > len(last_track): - last_track = track - # Remove duplicate points - last_track = [ - last_track[i] - for i in range(0, len(last_track)) - if i == 0 or last_track[i - 1] != last_track[i] - ] - return last_track, line_nodes - - def get_stop_projections(self) -> tuple[list[dict], Callable[[int], bool]]: - projected = [project_on_line(x.stop, self.tracks) for x in self.stops] - - def stop_near_tracks_criterion(stop_index: int) -> bool: - return ( - projected[stop_index]["projected_point"] is not None - and distance( - self.stops[stop_index].stop, - projected[stop_index]["projected_point"], - ) - <= MAX_DISTANCE_STOP_TO_LINE - ) - - return projected, stop_near_tracks_criterion - - def project_stops_on_line(self) -> dict: - projected, stop_near_tracks_criterion = self.get_stop_projections() - - projected_stops_data = { - "first_stop_on_rails_index": None, - "last_stop_on_rails_index": None, - "stops_on_longest_line": [], # list [{'route_stop': RouteStop, - # 'coords': LonLat, - # 'positions_on_rails': [] } - } - first_index = 0 - while first_index < len(self.stops) and not stop_near_tracks_criterion( - first_index - ): - first_index += 1 - projected_stops_data["first_stop_on_rails_index"] = first_index - - last_index = len(self.stops) - 1 - while last_index > projected_stops_data[ - "first_stop_on_rails_index" - ] and not stop_near_tracks_criterion(last_index): - last_index -= 1 - projected_stops_data["last_stop_on_rails_index"] = last_index - - for i, route_stop in enumerate(self.stops): - if not first_index <= i <= last_index: - continue - - if projected[i]["projected_point"] is None: - self.city.error( - 'Stop "{}" {} is nowhere near the tracks'.format( - route_stop.stoparea.name, route_stop.stop - ), - self.element, - ) - else: - stop_data = { - "route_stop": route_stop, - "coords": None, - "positions_on_rails": None, - } - projected_point = projected[i]["projected_point"] - # We've got two separate stations with a good stretch of - # railway tracks between them. Put these on tracks. - d = round(distance(route_stop.stop, projected_point)) - if d > MAX_DISTANCE_STOP_TO_LINE: - self.city.notice( - 'Stop "{}" {} is {} meters from the tracks'.format( - route_stop.stoparea.name, route_stop.stop, d - ), - self.element, - ) - else: - stop_data["coords"] = projected_point - stop_data["positions_on_rails"] = projected[i][ - "positions_on_line" - ] - projected_stops_data["stops_on_longest_line"].append(stop_data) - return projected_stops_data - - def calculate_distances(self) -> None: - dist = 0 - vertex = 0 - for i, stop in enumerate(self.stops): - if i > 0: - direct = distance(stop.stop, self.stops[i - 1].stop) - d_line = None - if ( - self.first_stop_on_rails_index - <= i - <= self.last_stop_on_rails_index - ): - d_line = distance_on_line( - self.stops[i - 1].stop, stop.stop, self.tracks, vertex - ) - if d_line and direct - 10 <= d_line[0] <= direct * 2: - vertex = d_line[1] - dist += round(d_line[0]) - else: - dist += round(direct) - stop.distance = dist - - def process_tags(self, master: OsmElementT) -> None: - relation = self.element - master_tags = {} if not master else master["tags"] - if "ref" not in relation["tags"] and "ref" not in master_tags: - self.city.notice("Missing ref on a route", relation) - self.ref = relation["tags"].get( - "ref", master_tags.get("ref", relation["tags"].get("name", None)) - ) - self.name = relation["tags"].get("name", None) - self.mode = relation["tags"]["route"] - if ( - "colour" not in relation["tags"] - and "colour" not in master_tags - and self.mode != "tram" - ): - self.city.notice("Missing colour on a route", relation) - try: - self.colour = normalize_colour( - relation["tags"].get("colour", master_tags.get("colour", None)) - ) - except ValueError as e: - self.colour = None - self.city.warn(str(e), relation) - try: - self.infill = normalize_colour( - relation["tags"].get( - "colour:infill", master_tags.get("colour:infill", None) - ) - ) - except ValueError as e: - self.infill = None - self.city.warn(str(e), relation) - self.network = Route.get_network(relation) - self.interval = Route.get_interval( - relation["tags"] - ) or Route.get_interval(master_tags) - self.start_time, self.end_time = get_start_end_times( - relation["tags"].get( - "opening_hours", master_tags.get("opening_hours", "") - ) - ) - if relation["tags"].get("public_transport:version") == "1": - self.city.warn( - "Public transport version is 1, which means the route " - "is an unsorted pile of objects", - relation, - ) - - def process_stop_members(self) -> list[OsmElementT]: - stations: set[StopArea] = set() # temporary for recording stations - seen_stops = False - seen_platforms = False - repeat_pos = None - stop_position_elements: list[OsmElementT] = [] - for m in self.element["members"]: - if "inactive" in m["role"]: - continue - k = el_id(m) - if k in self.city.stations: - st_list = self.city.stations[k] - st = st_list[0] - if len(st_list) > 1: - self.city.error( - f"Ambiguous station {st.name} in route. Please " - "use stop_position or split interchange stations", - self.element, - ) - el = self.city.elements[k] - actual_role = RouteStop.get_actual_role( - el, m["role"], self.city.modes - ) - if actual_role: - if m["role"] and actual_role not in m["role"]: - self.city.warn( - "Wrong role '{}' for {} {}".format( - m["role"], actual_role, k - ), - self.element, - ) - if repeat_pos is None: - if not self.stops or st not in stations: - stop = RouteStop(st) - self.stops.append(stop) - stations.add(st) - elif self.stops[-1].stoparea.id == st.id: - stop = self.stops[-1] - else: - # We've got a repeat - if ( - (seen_stops and seen_platforms) - or ( - actual_role == "stop" - and not seen_platforms - ) - or ( - actual_role == "platform" - and not seen_stops - ) - ): - # Circular route! - stop = RouteStop(st) - self.stops.append(stop) - stations.add(st) - else: - repeat_pos = 0 - if repeat_pos is not None: - if repeat_pos >= len(self.stops): - continue - # Check that the type matches - if (actual_role == "stop" and seen_stops) or ( - actual_role == "platform" and seen_platforms - ): - self.city.error( - 'Found an out-of-place {}: "{}" ({})'.format( - actual_role, el["tags"].get("name", ""), k - ), - self.element, - ) - continue - # Find the matching stop starting with index repeat_pos - while ( - repeat_pos < len(self.stops) - and self.stops[repeat_pos].stoparea.id != st.id - ): - repeat_pos += 1 - if repeat_pos >= len(self.stops): - self.city.error( - "Incorrect order of {}s at {}".format( - actual_role, k - ), - self.element, - ) - continue - stop = self.stops[repeat_pos] - - stop.add(m, self.element, self.city) - if repeat_pos is None: - seen_stops |= stop.seen_stop or stop.seen_station - seen_platforms |= stop.seen_platform - - if StopArea.is_stop(el): - stop_position_elements.append(el) - - continue - - if k not in self.city.elements: - if "stop" in m["role"] or "platform" in m["role"]: - raise CriticalValidationError( - f"{m['role']} {m['type']} {m['ref']} for route " - f"relation {self.element['id']} is not in the dataset" - ) - continue - el = self.city.elements[k] - if "tags" not in el: - self.city.error( - f"Untagged object {k} in a route", self.element - ) - continue - - is_under_construction = False - for ck in CONSTRUCTION_KEYS: - if ck in el["tags"]: - self.city.warn( - f"Under construction {m['role'] or 'feature'} {k} " - "in route. Consider setting 'inactive' role or " - "removing construction attributes", - self.element, - ) - is_under_construction = True - break - if is_under_construction: - continue - - if Station.is_station(el, self.city.modes): - # A station may be not included in this route due to previous - # 'stop area has multiple stations' error. No other error - # message is needed. - pass - elif el["tags"].get("railway") in ("station", "halt"): - self.city.error( - "Missing station={} on a {}".format(self.mode, m["role"]), - el, - ) - else: - actual_role = RouteStop.get_actual_role( - el, m["role"], self.city.modes - ) - if actual_role: - self.city.error( - f"{actual_role} {m['type']} {m['ref']} is not " - "connected to a station in route", - self.element, - ) - elif not StopArea.is_track(el): - self.city.warn( - "Unknown member type for {} {} in route".format( - m["type"], m["ref"] - ), - self.element, - ) - return stop_position_elements - - def process_tracks( - self, stop_position_elements: list[OsmElementT] - ) -> None: - tracks, line_nodes = self.build_longest_line() - - for stop_el in stop_position_elements: - stop_id = el_id(stop_el) - if stop_id not in line_nodes: - self.city.warn( - 'Stop position "{}" ({}) is not on tracks'.format( - stop_el["tags"].get("name", ""), stop_id - ), - self.element, - ) - - # self.tracks would be a list of (lon, lat) for the longest stretch. - # Can be empty. - self.tracks = [el_center(self.city.elements.get(k)) for k in tracks] - if ( - None in self.tracks - ): # usually, extending BBOX for the city is needed - self.tracks = [] - for n in filter(lambda x: x not in self.city.elements, tracks): - self.city.warn( - f"The dataset is missing the railway tracks node {n}", - self.element, - ) - break - - if len(self.stops) > 1: - self.is_circular = ( - self.stops[0].stoparea == self.stops[-1].stoparea - ) - if ( - self.is_circular - and self.tracks - and self.tracks[0] != self.tracks[-1] - ): - self.city.warn( - "Non-closed rail sequence in a circular route", - self.element, - ) - - projected_stops_data = self.project_stops_on_line() - self.check_and_recover_stops_order(projected_stops_data) - self.apply_projected_stops_data(projected_stops_data) - - def apply_projected_stops_data(self, projected_stops_data: dict) -> None: - """Store better stop coordinates and indexes of first/last stops - that lie on a continuous track line, to the instance attributes. - """ - for attr in ("first_stop_on_rails_index", "last_stop_on_rails_index"): - setattr(self, attr, projected_stops_data[attr]) - - for stop_data in projected_stops_data["stops_on_longest_line"]: - route_stop = stop_data["route_stop"] - route_stop.positions_on_rails = stop_data["positions_on_rails"] - if stop_coords := stop_data["coords"]: - route_stop.stop = stop_coords - - def get_extended_tracks(self) -> RailT: - """Amend tracks with points of leading/trailing self.stops - that were not projected onto the longest tracks line. - Return a new array. - """ - if self.first_stop_on_rails_index >= len(self.stops): - tracks = [route_stop.stop for route_stop in self.stops] - else: - tracks = ( - [ - route_stop.stop - for i, route_stop in enumerate(self.stops) - if i < self.first_stop_on_rails_index - ] - + self.tracks - + [ - route_stop.stop - for i, route_stop in enumerate(self.stops) - if i > self.last_stop_on_rails_index - ] - ) - return tracks - - def get_truncated_tracks(self, tracks: RailT) -> RailT: - """Truncate leading/trailing segments of `tracks` param - that are beyond the first and last stop locations. - Return a new array. - """ - if self.is_circular: - return tracks.copy() - - first_stop_location = find_segment(self.stops[0].stop, tracks, 0) - last_stop_location = find_segment(self.stops[-1].stop, tracks, 0) - - if last_stop_location != (None, None): - seg2, u2 = last_stop_location - if u2 == 0.0: - # Make seg2 the segment the last_stop_location is - # at the middle or end of - seg2 -= 1 - # u2 = 1.0 - if seg2 + 2 < len(tracks): - tracks = tracks[0 : seg2 + 2] # noqa E203 - tracks[-1] = self.stops[-1].stop - - if first_stop_location != (None, None): - seg1, u1 = first_stop_location - if u1 == 1.0: - # Make seg1 the segment the first_stop_location is - # at the beginning or middle of - seg1 += 1 - # u1 = 0.0 - if seg1 > 0: - tracks = tracks[seg1:] - tracks[0] = self.stops[0].stop - - return tracks - - def are_tracks_complete(self) -> bool: - return ( - self.first_stop_on_rails_index == 0 - and self.last_stop_on_rails_index == len(self) - 1 - ) - - def get_tracks_geometry(self) -> RailT: - tracks = self.get_extended_tracks() - tracks = self.get_truncated_tracks(tracks) - return tracks - - def check_stops_order_by_angle(self) -> tuple[list[str], list[str]]: - disorder_warnings = [] - disorder_errors = [] - for i, route_stop in enumerate( - islice(self.stops, 1, len(self.stops) - 1), start=1 - ): - angle = angle_between( - self.stops[i - 1].stop, - route_stop.stop, - self.stops[i + 1].stop, - ) - if angle < ALLOWED_ANGLE_BETWEEN_STOPS: - msg = ( - "Angle between stops around " - f'"{route_stop.stoparea.name}" {route_stop.stop} ' - f"is too narrow, {angle} degrees" - ) - if angle < DISALLOWED_ANGLE_BETWEEN_STOPS: - disorder_errors.append(msg) - else: - disorder_warnings.append(msg) - return disorder_warnings, disorder_errors - - def check_stops_order_on_tracks_direct( - self, stop_sequence: Iterator[dict] - ) -> str | None: - """Checks stops order on tracks, following stop_sequence - in direct order only. - :param stop_sequence: list of dict{'route_stop', 'positions_on_rails', - 'coords'} for RouteStops that belong to the longest contiguous - sequence of tracks in a route. - :return: error message on the first order violation or None. - """ - allowed_order_violations = 1 if self.is_circular else 0 - max_position_on_rails = -1 - for stop_data in stop_sequence: - positions_on_rails = stop_data["positions_on_rails"] - suitable_occurrence = 0 - while ( - suitable_occurrence < len(positions_on_rails) - and positions_on_rails[suitable_occurrence] - < max_position_on_rails - ): - suitable_occurrence += 1 - if suitable_occurrence == len(positions_on_rails): - if allowed_order_violations > 0: - suitable_occurrence -= 1 - allowed_order_violations -= 1 - else: - route_stop = stop_data["route_stop"] - return ( - "Stops on tracks are unordered near " - f'"{route_stop.stoparea.name}" {route_stop.stop}' - ) - max_position_on_rails = positions_on_rails[suitable_occurrence] - - def check_stops_order_on_tracks( - self, projected_stops_data: dict - ) -> str | None: - """Checks stops order on tracks, trying direct and reversed - order of stops in the stop_sequence. - :param projected_stops_data: info about RouteStops that belong to the - longest contiguous sequence of tracks in a route. May be changed - if tracks reversing is performed. - :return: error message on the first order violation or None. - """ - error_message = self.check_stops_order_on_tracks_direct( - projected_stops_data["stops_on_longest_line"] - ) - if error_message: - error_message_reversed = self.check_stops_order_on_tracks_direct( - reversed(projected_stops_data["stops_on_longest_line"]) - ) - if error_message_reversed is None: - error_message = None - self.city.warn( - "Tracks seem to go in the opposite direction to stops", - self.element, - ) - self.tracks.reverse() - new_projected_stops_data = self.project_stops_on_line() - projected_stops_data.update(new_projected_stops_data) - - return error_message - - def check_stops_order( - self, projected_stops_data: dict - ) -> tuple[list[str], list[str]]: - ( - angle_disorder_warnings, - angle_disorder_errors, - ) = self.check_stops_order_by_angle() - disorder_on_tracks_error = self.check_stops_order_on_tracks( - projected_stops_data - ) - disorder_warnings = angle_disorder_warnings - disorder_errors = angle_disorder_errors - if disorder_on_tracks_error: - disorder_errors.append(disorder_on_tracks_error) - return disorder_warnings, disorder_errors - - def check_and_recover_stops_order( - self, projected_stops_data: dict - ) -> None: - """ - :param projected_stops_data: may change if we need to reverse tracks - """ - disorder_warnings, disorder_errors = self.check_stops_order( - projected_stops_data - ) - if disorder_warnings or disorder_errors: - resort_success = False - if self.city.recovery_data: - resort_success = self.try_resort_stops() - if resort_success: - for msg in disorder_warnings: - self.city.notice(msg, self.element) - for msg in disorder_errors: - self.city.warn( - "Fixed with recovery data: " + msg, self.element - ) - - if not resort_success: - for msg in disorder_warnings: - self.city.notice(msg, self.element) - for msg in disorder_errors: - self.city.error(msg, self.element) - - def try_resort_stops(self) -> bool: - """Precondition: self.city.recovery_data is not None. - Return success of station order recovering.""" - self_stops = {} # station name => RouteStop - for stop in self.stops: - station = stop.stoparea.station - stop_name = station.name - if stop_name == "?" and station.int_name: - stop_name = station.int_name - # We won't programmatically recover routes with repeating stations: - # such cases are rare and deserves manual verification - if stop_name in self_stops: - return False - self_stops[stop_name] = stop - - route_id = (self.colour, self.ref) - if route_id not in self.city.recovery_data: - return False - - stop_names = list(self_stops.keys()) - suitable_itineraries = [] - for itinerary in self.city.recovery_data[route_id]: - itinerary_stop_names = [ - stop["name"] for stop in itinerary["stations"] - ] - if not ( - len(stop_names) == len(itinerary_stop_names) - and sorted(stop_names) == sorted(itinerary_stop_names) - ): - continue - big_station_displacement = False - for it_stop in itinerary["stations"]: - name = it_stop["name"] - it_stop_center = it_stop["center"] - self_stop_center = self_stops[name].stoparea.station.center - if ( - distance(it_stop_center, self_stop_center) - > DISPLACEMENT_TOLERANCE - ): - big_station_displacement = True - break - if not big_station_displacement: - suitable_itineraries.append(itinerary) - - if len(suitable_itineraries) == 0: - return False - elif len(suitable_itineraries) == 1: - matching_itinerary = suitable_itineraries[0] - else: - from_tag = self.element["tags"].get("from") - to_tag = self.element["tags"].get("to") - if not from_tag and not to_tag: - return False - matching_itineraries = [ - itin - for itin in suitable_itineraries - if from_tag - and itin["from"] == from_tag - or to_tag - and itin["to"] == to_tag - ] - if len(matching_itineraries) != 1: - return False - matching_itinerary = matching_itineraries[0] - self.stops = [ - self_stops[stop["name"]] for stop in matching_itinerary["stations"] - ] - return True - - def get_end_transfers(self) -> tuple[IdT, IdT]: - """Using transfer ids because a train can arrive at different - stations within a transfer. But disregard transfer that may give - an impression of a circular route (for example, - Simonis / Elisabeth station and route 2 in Brussels). - """ - return ( - (self[0].stoparea.id, self[-1].stoparea.id) - if ( - self[0].stoparea.transfer is not None - and self[0].stoparea.transfer == self[-1].stoparea.transfer - ) - else ( - self[0].stoparea.transfer or self[0].stoparea.id, - self[-1].stoparea.transfer or self[-1].stoparea.id, - ) - ) - - def get_transfers_sequence(self) -> list[IdT]: - """Return a list of stoparea or transfer (if not None) ids.""" - transfer_seq = [ - stop.stoparea.transfer or stop.stoparea.id for stop in self - ] - if ( - self[0].stoparea.transfer is not None - and self[0].stoparea.transfer == self[-1].stoparea.transfer - ): - transfer_seq[0], transfer_seq[-1] = self.get_end_transfers() - return transfer_seq - - def __len__(self) -> int: - return len(self.stops) - - def __getitem__(self, i) -> RouteStop: - return self.stops[i] - - def __iter__(self) -> Iterator[RouteStop]: - return iter(self.stops) - - def __repr__(self) -> str: - return ( - "Route(id={}, mode={}, ref={}, name={}, network={}, interval={}, " - "circular={}, num_stops={}, line_length={} m, from={}, to={}" - ).format( - self.id, - self.mode, - self.ref, - self.name, - self.network, - self.interval, - self.is_circular, - len(self.stops), - self.stops[-1].distance, - self.stops[0], - self.stops[-1], - ) - - -class RouteMaster: - def __init__(self, city: City, master: OsmElementT = None) -> None: - self.city = city - self.routes = [] - self.best: Route = None - self.id: IdT = el_id(master) - self.has_master = master is not None - self.interval_from_master = False - if master: - self.ref = master["tags"].get( - "ref", master["tags"].get("name", None) - ) - try: - self.colour = normalize_colour( - master["tags"].get("colour", None) - ) - except ValueError: - self.colour = None - try: - self.infill = normalize_colour( - master["tags"].get("colour:infill", None) - ) - except ValueError: - self.infill = None - self.network = Route.get_network(master) - self.mode = master["tags"].get( - "route_master", None - ) # This tag is required, but okay - self.name = master["tags"].get("name", None) - self.interval = Route.get_interval(master["tags"]) - self.interval_from_master = self.interval is not None - else: - self.ref = None - self.colour = None - self.infill = None - self.network = None - self.mode = None - self.name = None - self.interval = None - - def stopareas(self) -> Iterator[StopArea]: - yielded_stopareas = set() - for route in self: - for stoparea in route.stopareas(): - if stoparea not in yielded_stopareas: - yield stoparea - yielded_stopareas.add(stoparea) - - def add(self, route: Route) -> None: - if not self.network: - self.network = route.network - elif route.network and route.network != self.network: - self.city.error( - 'Route has different network ("{}") from master "{}"'.format( - route.network, self.network - ), - route.element, - ) - - if not self.colour: - self.colour = route.colour - elif route.colour and route.colour != self.colour: - self.city.notice( - 'Route "{}" has different colour from master "{}"'.format( - route.colour, self.colour - ), - route.element, - ) - - if not self.infill: - self.infill = route.infill - elif route.infill and route.infill != self.infill: - self.city.notice( - ( - f'Route "{route.infill}" has different infill colour ' - f'from master "{self.infill}"' - ), - route.element, - ) - - if not self.ref: - self.ref = route.ref - elif route.ref != self.ref: - self.city.notice( - 'Route "{}" has different ref from master "{}"'.format( - route.ref, self.ref - ), - route.element, - ) - - if not self.name: - self.name = route.name - - if not self.mode: - self.mode = route.mode - elif route.mode != self.mode: - self.city.error( - "Incompatible PT mode: master has {} and route has {}".format( - self.mode, route.mode - ), - route.element, - ) - return - - if not self.interval_from_master and route.interval: - if not self.interval: - self.interval = route.interval - else: - self.interval = min(self.interval, route.interval) - - # Choose minimal id for determinancy - if not self.has_master and (not self.id or self.id > route.id): - self.id = route.id - - self.routes.append(route) - if ( - not self.best - or len(route.stops) > len(self.best.stops) - or ( - # Choose route with minimal id for determinancy - len(route.stops) == len(self.best.stops) - and route.element["id"] < self.best.element["id"] - ) - ): - self.best = route - - def get_meaningful_routes(self) -> list[Route]: - return [route for route in self if len(route) >= 2] - - def find_twin_routes(self) -> dict[Route, Route]: - """Two non-circular routes are twins if they have the same end - stations and opposite directions, and the number of stations is - the same or almost the same. We'll then find stops that are present - in one direction and is missing in another direction - to warn. - """ - - twin_routes = {} # route => "twin" route - - for route in self.get_meaningful_routes(): - if route.is_circular: - continue # Difficult to calculate. TODO(?) in the future - if route in twin_routes: - continue - - route_transfer_ids = set(route.get_transfers_sequence()) - ends = route.get_end_transfers() - ends_reversed = ends[::-1] - - twin_candidates = [ - r - for r in self - if not r.is_circular - and r not in twin_routes - and r.get_end_transfers() == ends_reversed - # If absolute or relative difference in station count is large, - # possibly it's an express version of a route - skip it. - and ( - abs(len(r) - len(route)) <= 2 - or abs(len(r) - len(route)) / max(len(r), len(route)) - <= 0.2 - ) - ] - - if not twin_candidates: - continue - - twin_route = min( - twin_candidates, - key=lambda r: len( - route_transfer_ids ^ set(r.get_transfers_sequence()) - ), - ) - twin_routes[route] = twin_route - twin_routes[twin_route] = route - - return twin_routes - - def check_return_routes(self) -> None: - """Check if a route has return direction, and if twin routes - miss stations. - """ - meaningful_routes = self.get_meaningful_routes() - - if len(meaningful_routes) == 0: - self.city.error( - f"An empty route master {self.id}. " - "Please set construction:route if it is under construction" - ) - elif len(meaningful_routes) == 1: - log_function = ( - self.city.error - if not self.best.is_circular - else self.city.notice - ) - log_function( - "Only one route in route_master. " - "Please check if it needs a return route", - self.best.element, - ) - else: - self.check_return_circular_routes() - self.check_return_noncircular_routes() - - def check_return_noncircular_routes(self) -> None: - routes = [ - route - for route in self.get_meaningful_routes() - if not route.is_circular - ] - all_ends = {route.get_end_transfers(): route for route in routes} - for route in routes: - ends = route.get_end_transfers() - if ends[::-1] not in all_ends: - self.city.notice( - "Route does not have a return direction", route.element - ) - - twin_routes = self.find_twin_routes() - for route1, route2 in twin_routes.items(): - if route1.id > route2.id: - continue # to process a pair of routes only once - # and to ensure the order of routes in the pair - self.alert_twin_routes_differ(route1, route2) - - def check_return_circular_routes(self) -> None: - routes = { - route - for route in self.get_meaningful_routes() - if route.is_circular - } - routes_having_backward = set() - - for route in routes: - if route in routes_having_backward: - continue - transfer_sequence1 = [ - stop.stoparea.transfer or stop.stoparea.id for stop in route - ] - transfer_sequence1.pop() - for potential_backward_route in routes - {route}: - transfer_sequence2 = [ - stop.stoparea.transfer or stop.stoparea.id - for stop in potential_backward_route - ][ - -2::-1 - ] # truncate repeated first stop and reverse - common_subsequence = self.find_common_circular_subsequence( - transfer_sequence1, transfer_sequence2 - ) - if len(common_subsequence) >= 0.8 * min( - len(transfer_sequence1), len(transfer_sequence2) - ): - routes_having_backward.add(route) - routes_having_backward.add(potential_backward_route) - break - - for route in routes - routes_having_backward: - self.city.notice( - "Route does not have a return direction", route.element - ) - - @staticmethod - def find_common_circular_subsequence( - seq1: list[T], seq2: list[T] - ) -> list[T]: - """seq1 and seq2 are supposed to be stops of some circular routes. - Prerequisites to rely on the result: - - elements of each sequence are not repeated - - the order of stations is not violated. - Under these conditions we don't need LCS algorithm. Linear scan is - sufficient. - """ - i1, i2 = -1, -1 - for i1, x in enumerate(seq1): - try: - i2 = seq2.index(x) - except ValueError: - continue - else: - # x is found both in seq1 and seq2 - break - - if i2 == -1: - return [] - - # Shift cyclically so that the common element takes the first position - # both in seq1 and seq2 - seq1 = seq1[i1:] + seq1[:i1] - seq2 = seq2[i2:] + seq2[:i2] - - common_subsequence = [] - i2 = 0 - for x in seq1: - try: - i2 = seq2.index(x, i2) - except ValueError: - continue - common_subsequence.append(x) - i2 += 1 - if i2 >= len(seq2): - break - return common_subsequence - - def alert_twin_routes_differ(self, route1: Route, route2: Route) -> None: - """Arguments are that route1.id < route2.id""" - ( - stops_missing_from_route1, - stops_missing_from_route2, - stops_that_dont_match, - ) = self.calculate_twin_routes_diff(route1, route2) - - for st in stops_missing_from_route1: - if ( - not route1.are_tracks_complete() - or ( - projected_point := project_on_line( - st.stoparea.center, route1.tracks - )["projected_point"] - ) - is not None - and distance(st.stoparea.center, projected_point) - <= MAX_DISTANCE_STOP_TO_LINE - ): - self.city.notice( - f"Stop {st.stoparea.station.name} {st.stop} is included " - f"in the {route2.id} but not included in {route1.id}", - route1.element, - ) - - for st in stops_missing_from_route2: - if ( - not route2.are_tracks_complete() - or ( - projected_point := project_on_line( - st.stoparea.center, route2.tracks - )["projected_point"] - ) - is not None - and distance(st.stoparea.center, projected_point) - <= MAX_DISTANCE_STOP_TO_LINE - ): - self.city.notice( - f"Stop {st.stoparea.station.name} {st.stop} is included " - f"in the {route1.id} but not included in {route2.id}", - route2.element, - ) - - for st1, st2 in stops_that_dont_match: - if ( - st1.stoparea.station == st2.stoparea.station - or distance(st1.stop, st2.stop) < SUGGEST_TRANSFER_MIN_DISTANCE - ): - self.city.notice( - "Should there be one stoparea or a transfer between " - f"{st1.stoparea.station.name} {st1.stop} and " - f"{st2.stoparea.station.name} {st2.stop}?", - route1.element, - ) - - @staticmethod - def calculate_twin_routes_diff(route1: Route, route2: Route) -> tuple: - """Wagner–Fischer algorithm for stops diff in two twin routes.""" - - stops1 = route1.stops - stops2 = route2.stops[::-1] - - def stops_match(stop1: RouteStop, stop2: RouteStop) -> bool: - return ( - stop1.stoparea == stop2.stoparea - or stop1.stoparea.transfer is not None - and stop1.stoparea.transfer == stop2.stoparea.transfer - ) - - d = [[0] * (len(stops2) + 1) for _ in range(len(stops1) + 1)] - d[0] = list(range(len(stops2) + 1)) - for i in range(len(stops1) + 1): - d[i][0] = i - - for i in range(1, len(stops1) + 1): - for j in range(1, len(stops2) + 1): - d[i][j] = ( - d[i - 1][j - 1] - if stops_match(stops1[i - 1], stops2[j - 1]) - else min((d[i - 1][j], d[i][j - 1], d[i - 1][j - 1])) + 1 - ) - - stops_missing_from_route1: list[RouteStop] = [] - stops_missing_from_route2: list[RouteStop] = [] - stops_that_dont_match: list[tuple[RouteStop, RouteStop]] = [] - - i = len(stops1) - j = len(stops2) - while not (i == 0 and j == 0): - action = None - if i > 0 and j > 0: - match = stops_match(stops1[i - 1], stops2[j - 1]) - if match and d[i - 1][j - 1] == d[i][j]: - action = "no" - elif not match and d[i - 1][j - 1] + 1 == d[i][j]: - action = "change" - if not action and i > 0 and d[i - 1][j] + 1 == d[i][j]: - action = "add_2" - if not action and j > 0 and d[i][j - 1] + 1 == d[i][j]: - action = "add_1" - - match action: - case "add_1": - stops_missing_from_route1.append(stops2[j - 1]) - j -= 1 - case "add_2": - stops_missing_from_route2.append(stops1[i - 1]) - i -= 1 - case _: - if action == "change": - stops_that_dont_match.append( - (stops1[i - 1], stops2[j - 1]) - ) - i -= 1 - j -= 1 - return ( - stops_missing_from_route1, - stops_missing_from_route2, - stops_that_dont_match, - ) - - def __len__(self) -> int: - return len(self.routes) - - def __getitem__(self, i) -> Route: - return self.routes[i] - - def __iter__(self) -> Iterator[Route]: - return iter(self.routes) - - def __repr__(self) -> str: - return ( - f"RouteMaster(id={self.id}, mode={self.mode}, ref={self.ref}, " - f"name={self.name}, network={self.network}, " - f"num_variants={len(self.routes)}" - ) - - -class City: - route_class = Route - - def __init__(self, city_data: dict, overground: bool = False) -> None: - self.validate_called = False - self.errors: list[str] = [] - self.warnings: list[str] = [] - self.notices: list[str] = [] - self.id = None - self.try_fill_int_attribute(city_data, "id") - self.name = city_data["name"] - self.country = city_data["country"] - self.continent = city_data["continent"] - self.overground = overground - if not overground: - self.try_fill_int_attribute(city_data, "num_stations") - self.try_fill_int_attribute(city_data, "num_lines", "0") - self.try_fill_int_attribute(city_data, "num_light_lines", "0") - self.try_fill_int_attribute(city_data, "num_interchanges", "0") - else: - self.try_fill_int_attribute(city_data, "num_tram_lines", "0") - self.try_fill_int_attribute(city_data, "num_trolleybus_lines", "0") - self.try_fill_int_attribute(city_data, "num_bus_lines", "0") - self.try_fill_int_attribute(city_data, "num_other_lines", "0") - - # Acquiring list of networks and modes - networks = ( - None - if not city_data["networks"] - else city_data["networks"].split(":") - ) - if not networks or len(networks[-1]) == 0: - self.networks = [] - else: - self.networks = set( - filter(None, [x.strip() for x in networks[-1].split(";")]) - ) - if not networks or len(networks) < 2 or len(networks[0]) == 0: - if self.overground: - self.modes = DEFAULT_MODES_OVERGROUND - else: - self.modes = DEFAULT_MODES_RAPID - else: - self.modes = {x.strip() for x in networks[0].split(",")} - - # Reversing bbox so it is (xmin, ymin, xmax, ymax) - bbox = city_data["bbox"].split(",") - if len(bbox) == 4: - self.bbox = [float(bbox[i]) for i in (1, 0, 3, 2)] - else: - self.bbox = None - - self.elements: dict[IdT, OsmElementT] = {} - self.stations: dict[IdT, list[StopArea]] = defaultdict(list) - self.routes: dict[str, RouteMaster] = {} # keys are route_master refs - self.masters: dict[IdT, OsmElementT] = {} # Route id → master element - self.stop_areas: [IdT, list[OsmElementT]] = defaultdict(list) - self.transfers: list[set[StopArea]] = [] - self.station_ids: set[IdT] = set() - self.stops_and_platforms: set[IdT] = set() - self.recovery_data = None - - def try_fill_int_attribute( - self, city_data: dict, attr: str, default: str | None = None - ) -> None: - """Try to convert string value to int. Conversion is considered - to fail if one of the following is true: - * attr is not empty and data type casting fails; - * attr is empty and no default value is given. - In such cases the city is marked as bad by adding an error - to the city validation log. - """ - attr_value = city_data[attr] - if not attr_value and default is not None: - attr_value = default - - try: - attr_int = int(attr_value) - except ValueError: - print_value = ( - f"{city_data[attr]}" if city_data[attr] else "" - ) - self.error( - f"Configuration error: wrong value for {attr}: {print_value}" - ) - setattr(self, attr, 0) - else: - setattr(self, attr, attr_int) - - @staticmethod - def log_message(message: str, el: OsmElementT) -> str: - if el: - tags = el.get("tags", {}) - message += ' ({} {}, "{}")'.format( - el["type"], - el.get("id", el.get("ref")), - tags.get("name", tags.get("ref", "")), - ) - return message - - def notice(self, message: str, el: OsmElementT | None = None) -> None: - """This type of message may point to a potential problem.""" - msg = City.log_message(message, el) - self.notices.append(msg) - - def warn(self, message: str, el: OsmElementT | None = None) -> None: - """A warning is definitely a problem but is doesn't prevent - from building a routing file and doesn't invalidate the city. - """ - msg = City.log_message(message, el) - self.warnings.append(msg) - - def error(self, message: str, el: OsmElementT | None = None) -> None: - """Error is a critical problem that invalidates the city.""" - msg = City.log_message(message, el) - self.errors.append(msg) - - def contains(self, el: OsmElementT) -> bool: - center = el_center(el) - if center: - return ( - self.bbox[0] <= center[1] <= self.bbox[2] - and self.bbox[1] <= center[0] <= self.bbox[3] - ) - return False - - def add(self, el: OsmElementT) -> None: - if el["type"] == "relation" and "members" not in el: - return - - self.elements[el_id(el)] = el - if not (el["type"] == "relation" and "tags" in el): - return - - relation_type = el["tags"].get("type") - if relation_type == "route_master": - for m in el["members"]: - if m["type"] != "relation": - continue - - if el_id(m) in self.masters: - self.error("Route in two route_masters", m) - self.masters[el_id(m)] = el - - elif el["tags"].get("public_transport") == "stop_area": - if relation_type != "public_transport": - self.warn( - "stop_area relation with " - f"type={relation_type}, needed type=public_transport", - el, - ) - return - - warned_about_duplicates = False - for m in el["members"]: - stop_areas = self.stop_areas[el_id(m)] - if el in stop_areas and not warned_about_duplicates: - self.warn("Duplicate element in a stop area", el) - warned_about_duplicates = True - else: - stop_areas.append(el) - - def make_transfer(self, stoparea_group: OsmElementT) -> None: - transfer: set[StopArea] = set() - for m in stoparea_group["members"]: - k = el_id(m) - el = self.elements.get(k) - if not el: - # A stoparea_group member may validly not belong to the city - # while the stoparea_group does - near the city bbox boundary - continue - if "tags" not in el: - self.warn( - "An untagged object {} in a stop_area_group".format(k), - stoparea_group, - ) - continue - if ( - el["type"] != "relation" - or el["tags"].get("type") != "public_transport" - or el["tags"].get("public_transport") != "stop_area" - ): - continue - if k in self.stations: - stoparea = self.stations[k][0] - transfer.add(stoparea) - if stoparea.transfer: - # TODO: properly process such cases. - # Counterexample 1: Paris, - # Châtelet subway station <-> - # "Châtelet - Les Halles" railway station <-> - # Les Halles subway station - # Counterexample 2: Saint-Petersburg, transfers - # Витебский вокзал <-> - # Пушкинская <-> - # Звенигородская - self.warn( - "Stop area {} belongs to multiple interchanges".format( - k - ) - ) - stoparea.transfer = el_id(stoparea_group) - if len(transfer) > 1: - self.transfers.append(transfer) - - def extract_routes(self) -> None: - # Extract stations - processed_stop_areas = set() - for el in self.elements.values(): - if Station.is_station(el, self.modes): - # See PR https://github.com/mapsme/subways/pull/98 - if ( - el["type"] == "relation" - and el["tags"].get("type") != "multipolygon" - ): - rel_type = el["tags"].get("type") - self.warn( - "A railway station cannot be a relation of type " - f"{rel_type}", - el, - ) - continue - st = Station(el, self) - self.station_ids.add(st.id) - if st.id in self.stop_areas: - stations = [] - for sa in self.stop_areas[st.id]: - stations.append(StopArea(st, self, sa)) - else: - stations = [StopArea(st, self)] - - for station in stations: - if station.id not in processed_stop_areas: - processed_stop_areas.add(station.id) - for st_el in station.get_elements(): - self.stations[st_el].append(station) - - # Check that stops and platforms belong to - # a single stop_area - for sp in chain(station.stops, station.platforms): - if sp in self.stops_and_platforms: - self.notice( - f"A stop or a platform {sp} belongs to " - "multiple stop areas, might be correct" - ) - else: - self.stops_and_platforms.add(sp) - - # Extract routes - for el in self.elements.values(): - if Route.is_route(el, self.modes): - if el["tags"].get("access") in ("no", "private"): - continue - route_id = el_id(el) - master = self.masters.get(route_id, None) - if self.networks: - network = Route.get_network(el) - if master: - master_network = Route.get_network(master) - else: - master_network = None - if ( - network not in self.networks - and master_network not in self.networks - ): - continue - - route = self.route_class(el, self, master) - if not route.stops: - self.warn("Route has no stops", el) - continue - elif len(route.stops) == 1: - self.warn("Route has only one stop", el) - continue - - k = el_id(master) if master else route.ref - if k not in self.routes: - self.routes[k] = RouteMaster(self, master) - self.routes[k].add(route) - - # Sometimes adding a route to a newly initialized RouteMaster - # can fail - if len(self.routes[k]) == 0: - del self.routes[k] - - # And while we're iterating over relations, find interchanges - if ( - el["type"] == "relation" - and el.get("tags", {}).get("public_transport", None) - == "stop_area_group" - ): - self.make_transfer(el) - - # Filter transfers, leaving only stations that belong to routes - own_stopareas = set(self.stopareas()) - - self.transfers = [ - inner_transfer - for inner_transfer in ( - own_stopareas.intersection(transfer) - for transfer in self.transfers - ) - if len(inner_transfer) > 1 - ] - - def __iter__(self) -> Iterator[RouteMaster]: - return iter(self.routes.values()) - - def stopareas(self) -> Iterator[StopArea]: - yielded_stopareas = set() - for route_master in self: - for stoparea in route_master.stopareas(): - if stoparea not in yielded_stopareas: - yield stoparea - yielded_stopareas.add(stoparea) - - @property - def is_good(self) -> bool: - if not (self.errors or self.validate_called): - raise RuntimeError( - "You mustn't refer to City.is_good property before calling " - "the City.validate() method unless an error already occurred." - ) - return len(self.errors) == 0 - - def get_validation_result(self) -> dict: - result = { - "name": self.name, - "country": self.country, - "continent": self.continent, - "stations_found": getattr(self, "found_stations", 0), - "transfers_found": getattr(self, "found_interchanges", 0), - "unused_entrances": getattr(self, "unused_entrances", 0), - "networks": getattr(self, "found_networks", 0), - } - if not self.overground: - result.update( - { - "subwayl_expected": getattr(self, "num_lines", 0), - "lightrl_expected": getattr(self, "num_light_lines", 0), - "subwayl_found": getattr(self, "found_lines", 0), - "lightrl_found": getattr(self, "found_light_lines", 0), - "stations_expected": getattr(self, "num_stations", 0), - "transfers_expected": getattr(self, "num_interchanges", 0), - } - ) - else: - result.update( - { - "stations_expected": 0, - "transfers_expected": 0, - "busl_expected": getattr(self, "num_bus_lines", 0), - "trolleybusl_expected": getattr( - self, "num_trolleybus_lines", 0 - ), - "traml_expected": getattr(self, "num_tram_lines", 0), - "otherl_expected": getattr(self, "num_other_lines", 0), - "busl_found": getattr(self, "found_bus_lines", 0), - "trolleybusl_found": getattr( - self, "found_trolleybus_lines", 0 - ), - "traml_found": getattr(self, "found_tram_lines", 0), - "otherl_found": getattr(self, "found_other_lines", 0), - } - ) - result["warnings"] = self.warnings - result["errors"] = self.errors - result["notices"] = self.notices - return result - - def count_unused_entrances(self) -> None: - global used_entrances - stop_areas = set() - for el in self.elements.values(): - if ( - el["type"] == "relation" - and "tags" in el - and el["tags"].get("public_transport") == "stop_area" - and "members" in el - ): - stop_areas.update([el_id(m) for m in el["members"]]) - unused = [] - not_in_sa = [] - for el in self.elements.values(): - if ( - el["type"] == "node" - and "tags" in el - and el["tags"].get("railway") == "subway_entrance" - ): - i = el_id(el) - if i in self.stations: - used_entrances.add(i) - if i not in stop_areas: - not_in_sa.append(i) - if i not in self.stations: - unused.append(i) - self.unused_entrances = len(unused) - self.entrances_not_in_stop_areas = len(not_in_sa) - if unused: - self.notice( - f"{len(unused)} subway entrances are not connected to a " - f"station: {format_elid_list(unused)}" - ) - if not_in_sa: - self.notice( - f"{len(not_in_sa)} subway entrances are not in stop_area " - f"relations: {format_elid_list(not_in_sa)}" - ) - - def validate_lines(self) -> None: - self.found_light_lines = len( - [x for x in self.routes.values() if x.mode != "subway"] - ) - self.found_lines = len(self.routes) - self.found_light_lines - if self.found_lines != self.num_lines: - self.error( - "Found {} subway lines, expected {}".format( - self.found_lines, self.num_lines - ) - ) - if self.found_light_lines != self.num_light_lines: - self.error( - "Found {} light rail lines, expected {}".format( - self.found_light_lines, self.num_light_lines - ) - ) - - def validate_overground_lines(self) -> None: - self.found_tram_lines = len( - [x for x in self.routes.values() if x.mode == "tram"] - ) - self.found_bus_lines = len( - [x for x in self.routes.values() if x.mode == "bus"] - ) - self.found_trolleybus_lines = len( - [x for x in self.routes.values() if x.mode == "trolleybus"] - ) - self.found_other_lines = len( - [ - x - for x in self.routes.values() - if x.mode not in ("bus", "trolleybus", "tram") - ] - ) - if self.found_tram_lines != self.num_tram_lines: - log_function = ( - self.error if self.found_tram_lines == 0 else self.notice - ) - log_function( - "Found {} tram lines, expected {}".format( - self.found_tram_lines, self.num_tram_lines - ), - ) - - def validate(self) -> None: - networks = Counter() - self.found_stations = 0 - unused_stations = set(self.station_ids) - for rmaster in self.routes.values(): - networks[str(rmaster.network)] += 1 - if not self.overground: - rmaster.check_return_routes() - route_stations = set() - for sa in rmaster.stopareas(): - route_stations.add(sa.transfer or sa.id) - unused_stations.discard(sa.station.id) - self.found_stations += len(route_stations) - if unused_stations: - self.unused_stations = len(unused_stations) - self.notice( - "{} unused stations: {}".format( - self.unused_stations, format_elid_list(unused_stations) - ) - ) - self.count_unused_entrances() - self.found_interchanges = len(self.transfers) - - if self.overground: - self.validate_overground_lines() - else: - self.validate_lines() - - if self.found_stations != self.num_stations: - msg = "Found {} stations in routes, expected {}".format( - self.found_stations, self.num_stations - ) - log_function = ( - self.error - if self.num_stations > 0 - and not ( - 0 - <= (self.num_stations - self.found_stations) - / self.num_stations - <= ALLOWED_STATIONS_MISMATCH - ) - else self.warn - ) - log_function(msg) - - if self.found_interchanges != self.num_interchanges: - msg = "Found {} interchanges, expected {}".format( - self.found_interchanges, self.num_interchanges - ) - log_function = ( - self.error - if self.num_interchanges != 0 - and not ( - (self.num_interchanges - self.found_interchanges) - / self.num_interchanges - <= ALLOWED_TRANSFERS_MISMATCH - ) - else self.warn - ) - log_function(msg) - - self.found_networks = len(networks) - if len(networks) > max(1, len(self.networks)): - n_str = "; ".join( - ["{} ({})".format(k, v) for k, v in networks.items()] - ) - self.notice("More than one network: {}".format(n_str)) - - self.validate_called = True - - def calculate_distances(self) -> None: - for route_master in self: - for route in route_master: - route.calculate_distances() - - -def find_transfers( - elements: list[OsmElementT], cities: Collection[City] -) -> TransfersT: - """As for now, two Cities may contain the same stoparea, but those - StopArea instances would have different python id. So we don't store - references to StopAreas, but only their ids. This is important at - inter-city interchanges. - """ - stop_area_groups = [ - el - for el in elements - if el["type"] == "relation" - and "members" in el - and el.get("tags", {}).get("public_transport") == "stop_area_group" - ] - - stopareas_in_cities_ids = set( - stoparea.id - for city in cities - if city.is_good - for stoparea in city.stopareas() - ) - - transfers = [] - for stop_area_group in stop_area_groups: - transfer: TransferT = set( - member_id - for member_id in ( - el_id(member) for member in stop_area_group["members"] - ) - if member_id in stopareas_in_cities_ids - ) - if len(transfer) > 1: - transfers.append(transfer) - return transfers - - -def get_unused_subway_entrances_geojson(elements: list[OsmElementT]) -> dict: - global used_entrances - features = [] - for el in elements: - if ( - el["type"] == "node" - and "tags" in el - and el["tags"].get("railway") == "subway_entrance" - ): - if el_id(el) not in used_entrances: - geometry = {"type": "Point", "coordinates": el_center(el)} - properties = { - k: v - for k, v in el["tags"].items() - if k not in ("railway", "entrance") - } - features.append( - { - "type": "Feature", - "geometry": geometry, - "properties": properties, - } - ) - return {"type": "FeatureCollection", "features": features} diff --git a/subways/__init__.py b/subways/__init__.py new file mode 100644 index 0000000..c734b54 --- /dev/null +++ b/subways/__init__.py @@ -0,0 +1,92 @@ +from .consts import ( + ALL_MODES, + CONSTRUCTION_KEYS, + DEFAULT_MODES_RAPID, + DEFAULT_MODES_OVERGROUND, + DISPLACEMENT_TOLERANCE, + MAX_DISTANCE_STOP_TO_LINE, + MODES_OVERGROUND, + MODES_RAPID, + RAILWAY_TYPES, +) +from .css_colours import normalize_colour +from .geom_utils import ( + angle_between, + distance, + distance_on_line, + find_segment, + is_near, + project_on_line, +) +from .osm_element import el_center, el_id +from .overpass import multi_overpass, overpass_request +from .subway_io import ( + dump_yaml, + load_xml, + make_geojson, + read_recovery_data, + write_recovery_data, +) +from .types import ( + CriticalValidationError, + IdT, + LonLat, + OsmElementT, + RailT, + TransferT, + TransfersT, +) +from .validation import ( + add_osm_elements_to_cities, + BAD_MARK, + calculate_centers, + DEFAULT_CITIES_INFO_URL, + DEFAULT_SPREADSHEET_ID, + get_cities_info, + prepare_cities, + validate_cities, +) + + +__all__ = [ + "ALL_MODES", + "CONSTRUCTION_KEYS", + "DEFAULT_MODES_RAPID", + "DEFAULT_MODES_OVERGROUND", + "DISPLACEMENT_TOLERANCE", + "MAX_DISTANCE_STOP_TO_LINE", + "MODES_OVERGROUND", + "MODES_RAPID", + "RAILWAY_TYPES", + "angle_between", + "distance", + "distance_on_line", + "find_segment", + "is_near", + "project_on_line", + "normalize_colour", + "el_center", + "el_id", + "overpass_request", + "multi_overpass", + "dump_yaml", + "load_xml", + "make_geojson", + "read_recovery_data", + "write_recovery_data", + "CriticalValidationError", + "IdT", + "LonLat", + "OsmElementT", + "RailT", + "TransferT", + "TransfersT", + "add_osm_elements_to_cities", + "BAD_MARK", + "calculate_centers", + "DEFAULT_CITIES_INFO_URL", + "DEFAULT_SPREADSHEET_ID", + "get_cities_info", + "prepare_cities", + "validate_cities", +] diff --git a/subways/consts.py b/subways/consts.py new file mode 100644 index 0000000..4d75426 --- /dev/null +++ b/subways/consts.py @@ -0,0 +1,26 @@ +MAX_DISTANCE_STOP_TO_LINE = 50 # in meters + +# If an object was moved not too far compared to previous validator run, +# it is likely the same object +DISPLACEMENT_TOLERANCE = 300 # in meters + +MODES_RAPID = {"subway", "light_rail", "monorail", "train"} +MODES_OVERGROUND = {"tram", "bus", "trolleybus", "aerialway", "ferry"} +DEFAULT_MODES_RAPID = {"subway", "light_rail"} +DEFAULT_MODES_OVERGROUND = {"tram"} # TODO: bus and trolleybus? +ALL_MODES = MODES_RAPID | MODES_OVERGROUND +RAILWAY_TYPES = { + "rail", + "light_rail", + "subway", + "narrow_gauge", + "funicular", + "monorail", + "tram", +} +CONSTRUCTION_KEYS = ( + "construction", + "proposed", + "construction:railway", + "proposed:railway", +) diff --git a/css_colours.py b/subways/css_colours.py similarity index 100% rename from css_colours.py rename to subways/css_colours.py diff --git a/subways/geom_utils.py b/subways/geom_utils.py new file mode 100644 index 0000000..30d1a2d --- /dev/null +++ b/subways/geom_utils.py @@ -0,0 +1,175 @@ +import math + +from subways.consts import MAX_DISTANCE_STOP_TO_LINE +from subways.types import LonLat, RailT + + +def distance(p1: LonLat, p2: LonLat) -> float: + if p1 is None or p2 is None: + raise Exception( + "One of arguments to distance({}, {}) is None".format(p1, p2) + ) + dx = math.radians(p1[0] - p2[0]) * math.cos( + 0.5 * math.radians(p1[1] + p2[1]) + ) + dy = math.radians(p1[1] - p2[1]) + return 6378137 * math.sqrt(dx * dx + dy * dy) + + +def is_near(p1: LonLat, p2: LonLat) -> bool: + return ( + p1[0] - 1e-8 <= p2[0] <= p1[0] + 1e-8 + and p1[1] - 1e-8 <= p2[1] <= p1[1] + 1e-8 + ) + + +def project_on_segment(p: LonLat, p1: LonLat, p2: LonLat) -> float | None: + """Given three points, return u - the position of projection of + point p onto segment p1p2 regarding point p1 and (p2-p1) direction vector + """ + dp = (p2[0] - p1[0], p2[1] - p1[1]) + d2 = dp[0] * dp[0] + dp[1] * dp[1] + if d2 < 1e-14: + return None + u = ((p[0] - p1[0]) * dp[0] + (p[1] - p1[1]) * dp[1]) / d2 + if not 0 <= u <= 1: + return None + return u + + +def project_on_line(p: LonLat, line: RailT) -> dict: + result = { + # In the first approximation, position on rails is the index of the + # closest vertex of line to the point p. Fractional value means that + # the projected point lies on a segment between two vertices. + # More than one value can occur if a route follows the same tracks + # more than once. + "positions_on_line": None, + "projected_point": None, # (lon, lat) + } + + if len(line) < 2: + return result + d_min = MAX_DISTANCE_STOP_TO_LINE * 5 + closest_to_vertex = False + # First, check vertices in the line + for i, vertex in enumerate(line): + d = distance(p, vertex) + if d < d_min: + result["positions_on_line"] = [i] + result["projected_point"] = vertex + d_min = d + closest_to_vertex = True + elif vertex == result["projected_point"]: + # Repeated occurrence of the track vertex in line, like Oslo Line 5 + result["positions_on_line"].append(i) + # And then calculate distances to each segment + for seg in range(len(line) - 1): + # Check bbox for speed + if not ( + ( + min(line[seg][0], line[seg + 1][0]) - MAX_DISTANCE_STOP_TO_LINE + <= p[0] + <= max(line[seg][0], line[seg + 1][0]) + + MAX_DISTANCE_STOP_TO_LINE + ) + and ( + min(line[seg][1], line[seg + 1][1]) - MAX_DISTANCE_STOP_TO_LINE + <= p[1] + <= max(line[seg][1], line[seg + 1][1]) + + MAX_DISTANCE_STOP_TO_LINE + ) + ): + continue + u = project_on_segment(p, line[seg], line[seg + 1]) + if u: + projected_point = ( + line[seg][0] + u * (line[seg + 1][0] - line[seg][0]), + line[seg][1] + u * (line[seg + 1][1] - line[seg][1]), + ) + d = distance(p, projected_point) + if d < d_min: + result["positions_on_line"] = [seg + u] + result["projected_point"] = projected_point + d_min = d + closest_to_vertex = False + elif projected_point == result["projected_point"]: + # Repeated occurrence of the track segment in line, + # like Oslo Line 5 + if not closest_to_vertex: + result["positions_on_line"].append(seg + u) + return result + + +def find_segment( + p: LonLat, line: RailT, start_vertex: int = 0 +) -> tuple[int, float] | tuple[None, None]: + """Returns index of a segment and a position inside it.""" + EPS = 1e-9 + for seg in range(start_vertex, len(line) - 1): + if is_near(p, line[seg]): + return seg, 0.0 + if line[seg][0] == line[seg + 1][0]: + if not (p[0] - EPS <= line[seg][0] <= p[0] + EPS): + continue + px = None + else: + px = (p[0] - line[seg][0]) / (line[seg + 1][0] - line[seg][0]) + if px is None or (0 <= px <= 1): + if line[seg][1] == line[seg + 1][1]: + if not (p[1] - EPS <= line[seg][1] <= p[1] + EPS): + continue + py = None + else: + py = (p[1] - line[seg][1]) / (line[seg + 1][1] - line[seg][1]) + if py is None or (0 <= py <= 1): + if py is None or px is None or (px - EPS <= py <= px + EPS): + return seg, px or py + return None, None + + +def distance_on_line( + p1: LonLat, p2: LonLat, line: RailT, start_vertex: int = 0 +) -> tuple[float, int] | None: + """Calculates distance via line between projections + of points p1 and p2. Returns a TUPLE of (d, vertex): + d is the distance and vertex is the number of the second + vertex, to continue calculations for the next point.""" + line_len = len(line) + seg1, pos1 = find_segment(p1, line, start_vertex) + if seg1 is None: + # logging.warn('p1 %s is not projected, st=%s', p1, start_vertex) + return None + seg2, pos2 = find_segment(p2, line, seg1) + if seg2 is None: + if line[0] == line[-1]: + line = line + line[1:] + seg2, pos2 = find_segment(p2, line, seg1) + if seg2 is None: + # logging.warn('p2 %s is not projected, st=%s', p2, start_vertex) + return None + if seg1 == seg2: + return distance(line[seg1], line[seg1 + 1]) * abs(pos2 - pos1), seg1 + if seg2 < seg1: + # Should not happen + raise Exception("Pos1 %s is after pos2 %s", seg1, seg2) + d = 0 + if pos1 < 1: + d += distance(line[seg1], line[seg1 + 1]) * (1 - pos1) + for i in range(seg1 + 1, seg2): + d += distance(line[i], line[i + 1]) + if pos2 > 0: + d += distance(line[seg2], line[seg2 + 1]) * pos2 + return d, seg2 % line_len + + +def angle_between(p1: LonLat, c: LonLat, p2: LonLat) -> float: + a = round( + abs( + math.degrees( + math.atan2(p1[1] - c[1], p1[0] - c[0]) + - math.atan2(p2[1] - c[1], p2[0] - c[0]) + ) + ) + ) + return a if a <= 180 else 360 - a diff --git a/subways/osm_element.py b/subways/osm_element.py new file mode 100644 index 0000000..5ea8bc4 --- /dev/null +++ b/subways/osm_element.py @@ -0,0 +1,19 @@ +from subways.types import IdT, LonLat, OsmElementT + + +def el_id(el: OsmElementT) -> IdT | None: + if not el: + return None + if "type" not in el: + raise Exception("What is this element? {}".format(el)) + return el["type"][0] + str(el.get("id", el.get("ref", ""))) + + +def el_center(el: OsmElementT) -> LonLat | None: + if not el: + return None + if "lat" in el: + return el["lon"], el["lat"] + elif "center" in el: + return el["center"]["lon"], el["center"]["lat"] + return None diff --git a/subways/overpass.py b/subways/overpass.py new file mode 100644 index 0000000..88c128e --- /dev/null +++ b/subways/overpass.py @@ -0,0 +1,60 @@ +import json +import logging +import time +import urllib.parse +import urllib.request + +from subways.consts import MODES_OVERGROUND, MODES_RAPID +from subways.types import OsmElementT + + +def compose_overpass_request( + overground: bool, bboxes: list[list[float]] +) -> str: + if not bboxes: + raise RuntimeError("No bboxes given for overpass request") + + query = "[out:json][timeout:1000];(" + modes = MODES_OVERGROUND if overground else MODES_RAPID + for bbox in bboxes: + bbox_part = f"({','.join(str(coord) for coord in bbox)})" + query += "(" + for mode in sorted(modes): + query += f'rel[route="{mode}"]{bbox_part};' + query += ");" + query += "rel(br)[type=route_master];" + if not overground: + query += f"node[railway=subway_entrance]{bbox_part};" + query += f"node[railway=train_station_entrance]{bbox_part};" + query += f"rel[public_transport=stop_area]{bbox_part};" + query += ( + "rel(br)[type=public_transport][public_transport=stop_area_group];" + ) + query += ");(._;>>;);out body center qt;" + logging.debug("Query: %s", query) + return query + + +def overpass_request( + overground: bool, overpass_api: str, bboxes: list[list[float]] +) -> list[OsmElementT]: + query = compose_overpass_request(overground, bboxes) + url = f"{overpass_api}?data={urllib.parse.quote(query)}" + response = urllib.request.urlopen(url, timeout=1000) + if (r_code := response.getcode()) != 200: + raise Exception(f"Failed to query Overpass API: HTTP {r_code}") + return json.load(response)["elements"] + + +def multi_overpass( + overground: bool, overpass_api: str, bboxes: list[list[float]] +) -> list[OsmElementT]: + SLICE_SIZE = 10 + INTERREQUEST_WAIT = 5 # in seconds + result = [] + for i in range(0, len(bboxes), SLICE_SIZE): + if i > 0: + time.sleep(INTERREQUEST_WAIT) + bboxes_i = bboxes[i : i + SLICE_SIZE] # noqa E203 + result.extend(overpass_request(overground, overpass_api, bboxes_i)) + return result diff --git a/processors/__init__.py b/subways/processors/__init__.py similarity index 56% rename from processors/__init__.py rename to subways/processors/__init__.py index 4f5ed84..89ae016 100644 --- a/processors/__init__.py +++ b/subways/processors/__init__.py @@ -1,4 +1,8 @@ # Import only those processors (modules) you want to use. # Ignore F401 "module imported but unused" violation since these modules # are addressed via introspection. -from . import mapsme, gtfs # noqa F401 +from . import gtfs, mapsme # noqa F401 +from ._common import transit_to_dict + + +__all__ = ["gtfs", "mapsme", "transit_to_dict"] diff --git a/processors/_common.py b/subways/processors/_common.py similarity index 95% rename from processors/_common.py rename to subways/processors/_common.py index d60ff07..1d58da4 100644 --- a/processors/_common.py +++ b/subways/processors/_common.py @@ -1,4 +1,12 @@ -from subway_structure import City, el_center, TransfersT +from __future__ import annotations + +import typing + +from subways.osm_element import el_center +from subways.types import TransfersT + +if typing.TYPE_CHECKING: + from subways.structure.city import City DEFAULT_INTERVAL = 2.5 * 60 # seconds KMPH_TO_MPS = 1 / 3.6 # km/h to m/s conversion multiplier diff --git a/processors/gtfs.py b/subways/processors/gtfs.py similarity index 98% rename from processors/gtfs.py rename to subways/processors/gtfs.py index 463443e..3722815 100644 --- a/processors/gtfs.py +++ b/subways/processors/gtfs.py @@ -1,4 +1,7 @@ +from __future__ import annotations + import csv +import typing from functools import partial from io import BytesIO, StringIO from itertools import permutations @@ -12,11 +15,11 @@ from ._common import ( TRANSFER_PENALTY, transit_to_dict, ) -from subway_structure import ( - City, - distance, - TransfersT, -) +from subways.types import TransfersT +from subways.geom_utils import distance + +if typing.TYPE_CHECKING: + from subways.structure.city import City DEFAULT_TRIP_START_TIME = (5, 0) # 05:00 diff --git a/processors/mapsme.py b/subways/processors/mapsme.py similarity index 97% rename from processors/mapsme.py rename to subways/processors/mapsme.py index e87ffe0..e176832 100755 --- a/processors/mapsme.py +++ b/subways/processors/mapsme.py @@ -1,22 +1,18 @@ +from __future__ import annotations + import json import logging import os +import typing from collections import defaultdict from collections.abc import Callable from typing import Any, TypeAlias -from subway_structure import ( - City, - DISPLACEMENT_TOLERANCE, - distance, - el_center, - IdT, - LonLat, - OsmElementT, - Station, - StopArea, - TransfersT, -) +from subways.consts import DISPLACEMENT_TOLERANCE +from subways.geom_utils import distance +from subways.osm_element import el_center +from subways.structure.station import Station +from subways.types import IdT, LonLat, OsmElementT, TransfersT from ._common import ( DEFAULT_INTERVAL, format_colour, @@ -25,6 +21,11 @@ from ._common import ( TRANSFER_PENALTY, ) +if typing.TYPE_CHECKING: + from subways.structure.city import City + from subways.structure.stop_area import StopArea + + OSM_TYPES = {"n": (0, "node"), "w": (2, "way"), "r": (3, "relation")} ENTRANCE_PENALTY = 60 # seconds SPEED_TO_ENTRANCE = 5 * KMPH_TO_MPS # m/s diff --git a/requirements.txt b/subways/requirements.txt similarity index 100% rename from requirements.txt rename to subways/requirements.txt diff --git a/subways/structure/__init__.py b/subways/structure/__init__.py new file mode 100644 index 0000000..6ef67d1 --- /dev/null +++ b/subways/structure/__init__.py @@ -0,0 +1,17 @@ +from .city import City, get_unused_subway_entrances_geojson +from .route import Route +from .route_master import RouteMaster +from .route_stop import RouteStop +from .station import Station +from .stop_area import StopArea + + +__all__ = [ + "City", + "get_unused_subway_entrances_geojson", + "Route", + "RouteMaster", + "RouteStop", + "Station", + "StopArea", +] diff --git a/subways/structure/city.py b/subways/structure/city.py new file mode 100644 index 0000000..441c08b --- /dev/null +++ b/subways/structure/city.py @@ -0,0 +1,626 @@ +from __future__ import annotations + +from collections import Counter, defaultdict +from collections.abc import Collection, Iterator +from itertools import chain + +from subways.consts import ( + DEFAULT_MODES_OVERGROUND, + DEFAULT_MODES_RAPID, +) +from subways.osm_element import el_center, el_id +from subways.structure.route import Route +from subways.structure.route_master import RouteMaster +from subways.structure.station import Station +from subways.structure.stop_area import StopArea +from subways.types import ( + IdT, + OsmElementT, + TransfersT, + TransferT, +) + +ALLOWED_STATIONS_MISMATCH = 0.02 # part of total station count +ALLOWED_TRANSFERS_MISMATCH = 0.07 # part of total interchanges count + +used_entrances = set() + + +def format_elid_list(ids: Collection[IdT]) -> str: + msg = ", ".join(sorted(ids)[:20]) + if len(ids) > 20: + msg += ", ..." + return msg + + +class City: + route_class = Route + + def __init__(self, city_data: dict, overground: bool = False) -> None: + self.validate_called = False + self.errors: list[str] = [] + self.warnings: list[str] = [] + self.notices: list[str] = [] + self.id = None + self.try_fill_int_attribute(city_data, "id") + self.name = city_data["name"] + self.country = city_data["country"] + self.continent = city_data["continent"] + self.overground = overground + if not overground: + self.try_fill_int_attribute(city_data, "num_stations") + self.try_fill_int_attribute(city_data, "num_lines", "0") + self.try_fill_int_attribute(city_data, "num_light_lines", "0") + self.try_fill_int_attribute(city_data, "num_interchanges", "0") + else: + self.try_fill_int_attribute(city_data, "num_tram_lines", "0") + self.try_fill_int_attribute(city_data, "num_trolleybus_lines", "0") + self.try_fill_int_attribute(city_data, "num_bus_lines", "0") + self.try_fill_int_attribute(city_data, "num_other_lines", "0") + + # Acquiring list of networks and modes + networks = ( + None + if not city_data["networks"] + else city_data["networks"].split(":") + ) + if not networks or len(networks[-1]) == 0: + self.networks = [] + else: + self.networks = set( + filter(None, [x.strip() for x in networks[-1].split(";")]) + ) + if not networks or len(networks) < 2 or len(networks[0]) == 0: + if self.overground: + self.modes = DEFAULT_MODES_OVERGROUND + else: + self.modes = DEFAULT_MODES_RAPID + else: + self.modes = {x.strip() for x in networks[0].split(",")} + + # Reversing bbox so it is (xmin, ymin, xmax, ymax) + bbox = city_data["bbox"].split(",") + if len(bbox) == 4: + self.bbox = [float(bbox[i]) for i in (1, 0, 3, 2)] + else: + self.bbox = None + + self.elements: dict[IdT, OsmElementT] = {} + self.stations: dict[IdT, list[StopArea]] = defaultdict(list) + self.routes: dict[str, RouteMaster] = {} # keys are route_master refs + self.masters: dict[IdT, OsmElementT] = {} # Route id → master element + self.stop_areas: [IdT, list[OsmElementT]] = defaultdict(list) + self.transfers: list[set[StopArea]] = [] + self.station_ids: set[IdT] = set() + self.stops_and_platforms: set[IdT] = set() + self.recovery_data = None + + def try_fill_int_attribute( + self, city_data: dict, attr: str, default: str | None = None + ) -> None: + """Try to convert string value to int. Conversion is considered + to fail if one of the following is true: + * attr is not empty and data type casting fails; + * attr is empty and no default value is given. + In such cases the city is marked as bad by adding an error + to the city validation log. + """ + attr_value = city_data[attr] + if not attr_value and default is not None: + attr_value = default + + try: + attr_int = int(attr_value) + except ValueError: + print_value = ( + f"{city_data[attr]}" if city_data[attr] else "" + ) + self.error( + f"Configuration error: wrong value for {attr}: {print_value}" + ) + setattr(self, attr, 0) + else: + setattr(self, attr, attr_int) + + @staticmethod + def log_message(message: str, el: OsmElementT) -> str: + if el: + tags = el.get("tags", {}) + message += ' ({} {}, "{}")'.format( + el["type"], + el.get("id", el.get("ref")), + tags.get("name", tags.get("ref", "")), + ) + return message + + def notice(self, message: str, el: OsmElementT | None = None) -> None: + """This type of message may point to a potential problem.""" + msg = City.log_message(message, el) + self.notices.append(msg) + + def warn(self, message: str, el: OsmElementT | None = None) -> None: + """A warning is definitely a problem but is doesn't prevent + from building a routing file and doesn't invalidate the city. + """ + msg = City.log_message(message, el) + self.warnings.append(msg) + + def error(self, message: str, el: OsmElementT | None = None) -> None: + """Error is a critical problem that invalidates the city.""" + msg = City.log_message(message, el) + self.errors.append(msg) + + def contains(self, el: OsmElementT) -> bool: + center = el_center(el) + if center: + return ( + self.bbox[0] <= center[1] <= self.bbox[2] + and self.bbox[1] <= center[0] <= self.bbox[3] + ) + return False + + def add(self, el: OsmElementT) -> None: + if el["type"] == "relation" and "members" not in el: + return + + self.elements[el_id(el)] = el + if not (el["type"] == "relation" and "tags" in el): + return + + relation_type = el["tags"].get("type") + if relation_type == "route_master": + for m in el["members"]: + if m["type"] != "relation": + continue + + if el_id(m) in self.masters: + self.error("Route in two route_masters", m) + self.masters[el_id(m)] = el + + elif el["tags"].get("public_transport") == "stop_area": + if relation_type != "public_transport": + self.warn( + "stop_area relation with " + f"type={relation_type}, needed type=public_transport", + el, + ) + return + + warned_about_duplicates = False + for m in el["members"]: + stop_areas = self.stop_areas[el_id(m)] + if el in stop_areas and not warned_about_duplicates: + self.warn("Duplicate element in a stop area", el) + warned_about_duplicates = True + else: + stop_areas.append(el) + + def make_transfer(self, stoparea_group: OsmElementT) -> None: + transfer: set[StopArea] = set() + for m in stoparea_group["members"]: + k = el_id(m) + el = self.elements.get(k) + if not el: + # A stoparea_group member may validly not belong to the city + # while the stoparea_group does - near the city bbox boundary + continue + if "tags" not in el: + self.warn( + "An untagged object {} in a stop_area_group".format(k), + stoparea_group, + ) + continue + if ( + el["type"] != "relation" + or el["tags"].get("type") != "public_transport" + or el["tags"].get("public_transport") != "stop_area" + ): + continue + if k in self.stations: + stoparea = self.stations[k][0] + transfer.add(stoparea) + if stoparea.transfer: + # TODO: properly process such cases. + # Counterexample 1: Paris, + # Châtelet subway station <-> + # "Châtelet - Les Halles" railway station <-> + # Les Halles subway station + # Counterexample 2: Saint-Petersburg, transfers + # Витебский вокзал <-> + # Пушкинская <-> + # Звенигородская + self.warn( + "Stop area {} belongs to multiple interchanges".format( + k + ) + ) + stoparea.transfer = el_id(stoparea_group) + if len(transfer) > 1: + self.transfers.append(transfer) + + def extract_routes(self) -> None: + # Extract stations + processed_stop_areas = set() + for el in self.elements.values(): + if Station.is_station(el, self.modes): + # See PR https://github.com/mapsme/subways/pull/98 + if ( + el["type"] == "relation" + and el["tags"].get("type") != "multipolygon" + ): + rel_type = el["tags"].get("type") + self.warn( + "A railway station cannot be a relation of type " + f"{rel_type}", + el, + ) + continue + st = Station(el, self) + self.station_ids.add(st.id) + if st.id in self.stop_areas: + stations = [] + for sa in self.stop_areas[st.id]: + stations.append(StopArea(st, self, sa)) + else: + stations = [StopArea(st, self)] + + for station in stations: + if station.id not in processed_stop_areas: + processed_stop_areas.add(station.id) + for st_el in station.get_elements(): + self.stations[st_el].append(station) + + # Check that stops and platforms belong to + # a single stop_area + for sp in chain(station.stops, station.platforms): + if sp in self.stops_and_platforms: + self.notice( + f"A stop or a platform {sp} belongs to " + "multiple stop areas, might be correct" + ) + else: + self.stops_and_platforms.add(sp) + + # Extract routes + for el in self.elements.values(): + if Route.is_route(el, self.modes): + if el["tags"].get("access") in ("no", "private"): + continue + route_id = el_id(el) + master = self.masters.get(route_id, None) + if self.networks: + network = Route.get_network(el) + if master: + master_network = Route.get_network(master) + else: + master_network = None + if ( + network not in self.networks + and master_network not in self.networks + ): + continue + + route = self.route_class(el, self, master) + if not route.stops: + self.warn("Route has no stops", el) + continue + elif len(route.stops) == 1: + self.warn("Route has only one stop", el) + continue + + k = el_id(master) if master else route.ref + if k not in self.routes: + self.routes[k] = RouteMaster(self, master) + self.routes[k].add(route) + + # Sometimes adding a route to a newly initialized RouteMaster + # can fail + if len(self.routes[k]) == 0: + del self.routes[k] + + # And while we're iterating over relations, find interchanges + if ( + el["type"] == "relation" + and el.get("tags", {}).get("public_transport", None) + == "stop_area_group" + ): + self.make_transfer(el) + + # Filter transfers, leaving only stations that belong to routes + own_stopareas = set(self.stopareas()) + + self.transfers = [ + inner_transfer + for inner_transfer in ( + own_stopareas.intersection(transfer) + for transfer in self.transfers + ) + if len(inner_transfer) > 1 + ] + + def __iter__(self) -> Iterator[RouteMaster]: + return iter(self.routes.values()) + + def stopareas(self) -> Iterator[StopArea]: + yielded_stopareas = set() + for route_master in self: + for stoparea in route_master.stopareas(): + if stoparea not in yielded_stopareas: + yield stoparea + yielded_stopareas.add(stoparea) + + @property + def is_good(self) -> bool: + if not (self.errors or self.validate_called): + raise RuntimeError( + "You mustn't refer to City.is_good property before calling " + "the City.validate() method unless an error already occurred." + ) + return len(self.errors) == 0 + + def get_validation_result(self) -> dict: + result = { + "name": self.name, + "country": self.country, + "continent": self.continent, + "stations_found": getattr(self, "found_stations", 0), + "transfers_found": getattr(self, "found_interchanges", 0), + "unused_entrances": getattr(self, "unused_entrances", 0), + "networks": getattr(self, "found_networks", 0), + } + if not self.overground: + result.update( + { + "subwayl_expected": getattr(self, "num_lines", 0), + "lightrl_expected": getattr(self, "num_light_lines", 0), + "subwayl_found": getattr(self, "found_lines", 0), + "lightrl_found": getattr(self, "found_light_lines", 0), + "stations_expected": getattr(self, "num_stations", 0), + "transfers_expected": getattr(self, "num_interchanges", 0), + } + ) + else: + result.update( + { + "stations_expected": 0, + "transfers_expected": 0, + "busl_expected": getattr(self, "num_bus_lines", 0), + "trolleybusl_expected": getattr( + self, "num_trolleybus_lines", 0 + ), + "traml_expected": getattr(self, "num_tram_lines", 0), + "otherl_expected": getattr(self, "num_other_lines", 0), + "busl_found": getattr(self, "found_bus_lines", 0), + "trolleybusl_found": getattr( + self, "found_trolleybus_lines", 0 + ), + "traml_found": getattr(self, "found_tram_lines", 0), + "otherl_found": getattr(self, "found_other_lines", 0), + } + ) + result["warnings"] = self.warnings + result["errors"] = self.errors + result["notices"] = self.notices + return result + + def count_unused_entrances(self) -> None: + global used_entrances + stop_areas = set() + for el in self.elements.values(): + if ( + el["type"] == "relation" + and "tags" in el + and el["tags"].get("public_transport") == "stop_area" + and "members" in el + ): + stop_areas.update([el_id(m) for m in el["members"]]) + unused = [] + not_in_sa = [] + for el in self.elements.values(): + if ( + el["type"] == "node" + and "tags" in el + and el["tags"].get("railway") == "subway_entrance" + ): + i = el_id(el) + if i in self.stations: + used_entrances.add(i) + if i not in stop_areas: + not_in_sa.append(i) + if i not in self.stations: + unused.append(i) + self.unused_entrances = len(unused) + self.entrances_not_in_stop_areas = len(not_in_sa) + if unused: + self.notice( + f"{len(unused)} subway entrances are not connected to a " + f"station: {format_elid_list(unused)}" + ) + if not_in_sa: + self.notice( + f"{len(not_in_sa)} subway entrances are not in stop_area " + f"relations: {format_elid_list(not_in_sa)}" + ) + + def validate_lines(self) -> None: + self.found_light_lines = len( + [x for x in self.routes.values() if x.mode != "subway"] + ) + self.found_lines = len(self.routes) - self.found_light_lines + if self.found_lines != self.num_lines: + self.error( + "Found {} subway lines, expected {}".format( + self.found_lines, self.num_lines + ) + ) + if self.found_light_lines != self.num_light_lines: + self.error( + "Found {} light rail lines, expected {}".format( + self.found_light_lines, self.num_light_lines + ) + ) + + def validate_overground_lines(self) -> None: + self.found_tram_lines = len( + [x for x in self.routes.values() if x.mode == "tram"] + ) + self.found_bus_lines = len( + [x for x in self.routes.values() if x.mode == "bus"] + ) + self.found_trolleybus_lines = len( + [x for x in self.routes.values() if x.mode == "trolleybus"] + ) + self.found_other_lines = len( + [ + x + for x in self.routes.values() + if x.mode not in ("bus", "trolleybus", "tram") + ] + ) + if self.found_tram_lines != self.num_tram_lines: + log_function = ( + self.error if self.found_tram_lines == 0 else self.notice + ) + log_function( + "Found {} tram lines, expected {}".format( + self.found_tram_lines, self.num_tram_lines + ), + ) + + def validate(self) -> None: + networks = Counter() + self.found_stations = 0 + unused_stations = set(self.station_ids) + for rmaster in self.routes.values(): + networks[str(rmaster.network)] += 1 + if not self.overground: + rmaster.check_return_routes() + route_stations = set() + for sa in rmaster.stopareas(): + route_stations.add(sa.transfer or sa.id) + unused_stations.discard(sa.station.id) + self.found_stations += len(route_stations) + if unused_stations: + self.unused_stations = len(unused_stations) + self.notice( + "{} unused stations: {}".format( + self.unused_stations, format_elid_list(unused_stations) + ) + ) + self.count_unused_entrances() + self.found_interchanges = len(self.transfers) + + if self.overground: + self.validate_overground_lines() + else: + self.validate_lines() + + if self.found_stations != self.num_stations: + msg = "Found {} stations in routes, expected {}".format( + self.found_stations, self.num_stations + ) + log_function = ( + self.error + if self.num_stations > 0 + and not ( + 0 + <= (self.num_stations - self.found_stations) + / self.num_stations + <= ALLOWED_STATIONS_MISMATCH + ) + else self.warn + ) + log_function(msg) + + if self.found_interchanges != self.num_interchanges: + msg = "Found {} interchanges, expected {}".format( + self.found_interchanges, self.num_interchanges + ) + log_function = ( + self.error + if self.num_interchanges != 0 + and not ( + (self.num_interchanges - self.found_interchanges) + / self.num_interchanges + <= ALLOWED_TRANSFERS_MISMATCH + ) + else self.warn + ) + log_function(msg) + + self.found_networks = len(networks) + if len(networks) > max(1, len(self.networks)): + n_str = "; ".join( + ["{} ({})".format(k, v) for k, v in networks.items()] + ) + self.notice("More than one network: {}".format(n_str)) + + self.validate_called = True + + def calculate_distances(self) -> None: + for route_master in self: + for route in route_master: + route.calculate_distances() + + +def find_transfers( + elements: list[OsmElementT], cities: Collection[City] +) -> TransfersT: + """As for now, two Cities may contain the same stoparea, but those + StopArea instances would have different python id. So we don't store + references to StopAreas, but only their ids. This is important at + inter-city interchanges. + """ + stop_area_groups = [ + el + for el in elements + if el["type"] == "relation" + and "members" in el + and el.get("tags", {}).get("public_transport") == "stop_area_group" + ] + + stopareas_in_cities_ids = set( + stoparea.id + for city in cities + if city.is_good + for stoparea in city.stopareas() + ) + + transfers = [] + for stop_area_group in stop_area_groups: + transfer: TransferT = set( + member_id + for member_id in ( + el_id(member) for member in stop_area_group["members"] + ) + if member_id in stopareas_in_cities_ids + ) + if len(transfer) > 1: + transfers.append(transfer) + return transfers + + +def get_unused_subway_entrances_geojson(elements: list[OsmElementT]) -> dict: + global used_entrances + features = [] + for el in elements: + if ( + el["type"] == "node" + and "tags" in el + and el["tags"].get("railway") == "subway_entrance" + ): + if el_id(el) not in used_entrances: + geometry = {"type": "Point", "coordinates": el_center(el)} + properties = { + k: v + for k, v in el["tags"].items() + if k not in ("railway", "entrance") + } + features.append( + { + "type": "Feature", + "geometry": geometry, + "properties": properties, + } + ) + return {"type": "FeatureCollection", "features": features} diff --git a/subways/structure/route.py b/subways/structure/route.py new file mode 100644 index 0000000..926733e --- /dev/null +++ b/subways/structure/route.py @@ -0,0 +1,903 @@ +from __future__ import annotations + +import re +import typing +from collections.abc import Callable, Iterator +from itertools import islice + +from subways.consts import ( + CONSTRUCTION_KEYS, + DISPLACEMENT_TOLERANCE, + MAX_DISTANCE_STOP_TO_LINE, +) +from subways.css_colours import normalize_colour +from subways.geom_utils import ( + angle_between, + distance, + distance_on_line, + find_segment, + project_on_line, +) +from subways.osm_element import el_id, el_center +from subways.structure.route_stop import RouteStop +from subways.structure.station import Station +from subways.structure.stop_area import StopArea +from subways.types import CriticalValidationError, IdT, OsmElementT, RailT + +if typing.TYPE_CHECKING: + from subways.structure.city import City + +START_END_TIMES_RE = re.compile(r".*?(\d{2}):(\d{2})-(\d{2}):(\d{2}).*") + +ALLOWED_ANGLE_BETWEEN_STOPS = 45 # in degrees +DISALLOWED_ANGLE_BETWEEN_STOPS = 20 # in degrees + + +def get_start_end_times( + opening_hours: str, +) -> tuple[tuple[int, int], tuple[int, int]] | tuple[None, None]: + """Very simplified method to parse OSM opening_hours tag. + We simply take the first HH:MM-HH:MM substring which is the most probable + opening hours interval for the most of the weekdays. + """ + start_time, end_time = None, None + m = START_END_TIMES_RE.match(opening_hours) + if m: + ints = tuple(map(int, m.groups())) + start_time = (ints[0], ints[1]) + end_time = (ints[2], ints[3]) + return start_time, end_time + + +def osm_interval_to_seconds(interval_str: str) -> int | None: + """Convert to int an OSM value for 'interval'/'headway' tag + which may be in these formats: + HH:MM:SS, + HH:MM, + MM, + M + (https://wiki.openstreetmap.org/wiki/Key:interval#Format) + """ + hours, minutes, seconds = 0, 0, 0 + semicolon_count = interval_str.count(":") + try: + if semicolon_count == 0: + minutes = int(interval_str) + elif semicolon_count == 1: + hours, minutes = map(int, interval_str.split(":")) + elif semicolon_count == 2: + hours, minutes, seconds = map(int, interval_str.split(":")) + else: + return None + except ValueError: + return None + return seconds + 60 * minutes + 60 * 60 * hours + + +class Route: + """The longest route for a city with a unique ref.""" + + @staticmethod + def is_route(el: OsmElementT, modes: set[str]) -> bool: + if ( + el["type"] != "relation" + or el.get("tags", {}).get("type") != "route" + ): + return False + if "members" not in el: + return False + if el["tags"].get("route") not in modes: + return False + for k in CONSTRUCTION_KEYS: + if k in el["tags"]: + return False + if "ref" not in el["tags"] and "name" not in el["tags"]: + return False + return True + + @staticmethod + def get_network(relation: OsmElementT) -> str | None: + for k in ("network:metro", "network", "operator"): + if k in relation["tags"]: + return relation["tags"][k] + return None + + @staticmethod + def get_interval(tags: dict) -> int | None: + v = None + for k in ("interval", "headway"): + if k in tags: + v = tags[k] + break + else: + for kk in tags: + if kk.startswith(k + ":"): + v = tags[kk] + break + if not v: + return None + return osm_interval_to_seconds(v) + + def stopareas(self) -> Iterator[StopArea]: + yielded_stopareas = set() + for route_stop in self: + stoparea = route_stop.stoparea + if stoparea not in yielded_stopareas: + yield stoparea + yielded_stopareas.add(stoparea) + + def __init__( + self, + relation: OsmElementT, + city: City, + master: OsmElementT | None = None, + ) -> None: + assert Route.is_route( + relation, city.modes + ), f"The relation does not seem to be a route: {relation}" + self.city = city + self.element: OsmElementT = relation + self.id: IdT = el_id(relation) + + self.ref = None + self.name = None + self.mode = None + self.colour = None + self.infill = None + self.network = None + self.interval = None + self.start_time = None + self.end_time = None + self.is_circular = False + self.stops: list[RouteStop] = [] + # Would be a list of (lon, lat) for the longest stretch. Can be empty. + self.tracks = None + # Index of the first stop that is located on/near the self.tracks + self.first_stop_on_rails_index = None + # Index of the last stop that is located on/near the self.tracks + self.last_stop_on_rails_index = None + + self.process_tags(master) + stop_position_elements = self.process_stop_members() + self.process_tracks(stop_position_elements) + + def build_longest_line(self) -> tuple[list[IdT], set[IdT]]: + line_nodes: set[IdT] = set() + last_track: list[IdT] = [] + track: list[IdT] = [] + warned_about_holes = False + for m in self.element["members"]: + el = self.city.elements.get(el_id(m), None) + if not el or not StopArea.is_track(el): + continue + if "nodes" not in el or len(el["nodes"]) < 2: + self.city.error("Cannot find nodes in a railway", el) + continue + nodes: list[IdT] = ["n{}".format(n) for n in el["nodes"]] + if m["role"] == "backward": + nodes.reverse() + line_nodes.update(nodes) + if not track: + is_first = True + track.extend(nodes) + else: + new_segment = list(nodes) # copying + if new_segment[0] == track[-1]: + track.extend(new_segment[1:]) + elif new_segment[-1] == track[-1]: + track.extend(reversed(new_segment[:-1])) + elif is_first and track[0] in ( + new_segment[0], + new_segment[-1], + ): + # We can reverse the track and try again + track.reverse() + if new_segment[0] == track[-1]: + track.extend(new_segment[1:]) + else: + track.extend(reversed(new_segment[:-1])) + else: + # Store the track if it is long and clean it + if not warned_about_holes: + self.city.warn( + "Hole in route rails near node {}".format( + track[-1] + ), + self.element, + ) + warned_about_holes = True + if len(track) > len(last_track): + last_track = track + track = [] + is_first = False + if len(track) > len(last_track): + last_track = track + # Remove duplicate points + last_track = [ + last_track[i] + for i in range(0, len(last_track)) + if i == 0 or last_track[i - 1] != last_track[i] + ] + return last_track, line_nodes + + def get_stop_projections(self) -> tuple[list[dict], Callable[[int], bool]]: + projected = [project_on_line(x.stop, self.tracks) for x in self.stops] + + def stop_near_tracks_criterion(stop_index: int) -> bool: + return ( + projected[stop_index]["projected_point"] is not None + and distance( + self.stops[stop_index].stop, + projected[stop_index]["projected_point"], + ) + <= MAX_DISTANCE_STOP_TO_LINE + ) + + return projected, stop_near_tracks_criterion + + def project_stops_on_line(self) -> dict: + projected, stop_near_tracks_criterion = self.get_stop_projections() + + projected_stops_data = { + "first_stop_on_rails_index": None, + "last_stop_on_rails_index": None, + "stops_on_longest_line": [], # list [{'route_stop': RouteStop, + # 'coords': LonLat, + # 'positions_on_rails': [] } + } + first_index = 0 + while first_index < len(self.stops) and not stop_near_tracks_criterion( + first_index + ): + first_index += 1 + projected_stops_data["first_stop_on_rails_index"] = first_index + + last_index = len(self.stops) - 1 + while last_index > projected_stops_data[ + "first_stop_on_rails_index" + ] and not stop_near_tracks_criterion(last_index): + last_index -= 1 + projected_stops_data["last_stop_on_rails_index"] = last_index + + for i, route_stop in enumerate(self.stops): + if not first_index <= i <= last_index: + continue + + if projected[i]["projected_point"] is None: + self.city.error( + 'Stop "{}" {} is nowhere near the tracks'.format( + route_stop.stoparea.name, route_stop.stop + ), + self.element, + ) + else: + stop_data = { + "route_stop": route_stop, + "coords": None, + "positions_on_rails": None, + } + projected_point = projected[i]["projected_point"] + # We've got two separate stations with a good stretch of + # railway tracks between them. Put these on tracks. + d = round(distance(route_stop.stop, projected_point)) + if d > MAX_DISTANCE_STOP_TO_LINE: + self.city.notice( + 'Stop "{}" {} is {} meters from the tracks'.format( + route_stop.stoparea.name, route_stop.stop, d + ), + self.element, + ) + else: + stop_data["coords"] = projected_point + stop_data["positions_on_rails"] = projected[i][ + "positions_on_line" + ] + projected_stops_data["stops_on_longest_line"].append(stop_data) + return projected_stops_data + + def calculate_distances(self) -> None: + dist = 0 + vertex = 0 + for i, stop in enumerate(self.stops): + if i > 0: + direct = distance(stop.stop, self.stops[i - 1].stop) + d_line = None + if ( + self.first_stop_on_rails_index + <= i + <= self.last_stop_on_rails_index + ): + d_line = distance_on_line( + self.stops[i - 1].stop, stop.stop, self.tracks, vertex + ) + if d_line and direct - 10 <= d_line[0] <= direct * 2: + vertex = d_line[1] + dist += round(d_line[0]) + else: + dist += round(direct) + stop.distance = dist + + def process_tags(self, master: OsmElementT) -> None: + relation = self.element + master_tags = {} if not master else master["tags"] + if "ref" not in relation["tags"] and "ref" not in master_tags: + self.city.notice("Missing ref on a route", relation) + self.ref = relation["tags"].get( + "ref", master_tags.get("ref", relation["tags"].get("name", None)) + ) + self.name = relation["tags"].get("name", None) + self.mode = relation["tags"]["route"] + if ( + "colour" not in relation["tags"] + and "colour" not in master_tags + and self.mode != "tram" + ): + self.city.notice("Missing colour on a route", relation) + try: + self.colour = normalize_colour( + relation["tags"].get("colour", master_tags.get("colour", None)) + ) + except ValueError as e: + self.colour = None + self.city.warn(str(e), relation) + try: + self.infill = normalize_colour( + relation["tags"].get( + "colour:infill", master_tags.get("colour:infill", None) + ) + ) + except ValueError as e: + self.infill = None + self.city.warn(str(e), relation) + self.network = Route.get_network(relation) + self.interval = Route.get_interval( + relation["tags"] + ) or Route.get_interval(master_tags) + self.start_time, self.end_time = get_start_end_times( + relation["tags"].get( + "opening_hours", master_tags.get("opening_hours", "") + ) + ) + if relation["tags"].get("public_transport:version") == "1": + self.city.warn( + "Public transport version is 1, which means the route " + "is an unsorted pile of objects", + relation, + ) + + def process_stop_members(self) -> list[OsmElementT]: + stations: set[StopArea] = set() # temporary for recording stations + seen_stops = False + seen_platforms = False + repeat_pos = None + stop_position_elements: list[OsmElementT] = [] + for m in self.element["members"]: + if "inactive" in m["role"]: + continue + k = el_id(m) + if k in self.city.stations: + st_list = self.city.stations[k] + st = st_list[0] + if len(st_list) > 1: + self.city.error( + f"Ambiguous station {st.name} in route. Please " + "use stop_position or split interchange stations", + self.element, + ) + el = self.city.elements[k] + actual_role = RouteStop.get_actual_role( + el, m["role"], self.city.modes + ) + if actual_role: + if m["role"] and actual_role not in m["role"]: + self.city.warn( + "Wrong role '{}' for {} {}".format( + m["role"], actual_role, k + ), + self.element, + ) + if repeat_pos is None: + if not self.stops or st not in stations: + stop = RouteStop(st) + self.stops.append(stop) + stations.add(st) + elif self.stops[-1].stoparea.id == st.id: + stop = self.stops[-1] + else: + # We've got a repeat + if ( + (seen_stops and seen_platforms) + or ( + actual_role == "stop" + and not seen_platforms + ) + or ( + actual_role == "platform" + and not seen_stops + ) + ): + # Circular route! + stop = RouteStop(st) + self.stops.append(stop) + stations.add(st) + else: + repeat_pos = 0 + if repeat_pos is not None: + if repeat_pos >= len(self.stops): + continue + # Check that the type matches + if (actual_role == "stop" and seen_stops) or ( + actual_role == "platform" and seen_platforms + ): + self.city.error( + 'Found an out-of-place {}: "{}" ({})'.format( + actual_role, el["tags"].get("name", ""), k + ), + self.element, + ) + continue + # Find the matching stop starting with index repeat_pos + while ( + repeat_pos < len(self.stops) + and self.stops[repeat_pos].stoparea.id != st.id + ): + repeat_pos += 1 + if repeat_pos >= len(self.stops): + self.city.error( + "Incorrect order of {}s at {}".format( + actual_role, k + ), + self.element, + ) + continue + stop = self.stops[repeat_pos] + + stop.add(m, self.element, self.city) + if repeat_pos is None: + seen_stops |= stop.seen_stop or stop.seen_station + seen_platforms |= stop.seen_platform + + if StopArea.is_stop(el): + stop_position_elements.append(el) + + continue + + if k not in self.city.elements: + if "stop" in m["role"] or "platform" in m["role"]: + raise CriticalValidationError( + f"{m['role']} {m['type']} {m['ref']} for route " + f"relation {self.element['id']} is not in the dataset" + ) + continue + el = self.city.elements[k] + if "tags" not in el: + self.city.error( + f"Untagged object {k} in a route", self.element + ) + continue + + is_under_construction = False + for ck in CONSTRUCTION_KEYS: + if ck in el["tags"]: + self.city.warn( + f"Under construction {m['role'] or 'feature'} {k} " + "in route. Consider setting 'inactive' role or " + "removing construction attributes", + self.element, + ) + is_under_construction = True + break + if is_under_construction: + continue + + if Station.is_station(el, self.city.modes): + # A station may be not included in this route due to previous + # 'stop area has multiple stations' error. No other error + # message is needed. + pass + elif el["tags"].get("railway") in ("station", "halt"): + self.city.error( + "Missing station={} on a {}".format(self.mode, m["role"]), + el, + ) + else: + actual_role = RouteStop.get_actual_role( + el, m["role"], self.city.modes + ) + if actual_role: + self.city.error( + f"{actual_role} {m['type']} {m['ref']} is not " + "connected to a station in route", + self.element, + ) + elif not StopArea.is_track(el): + self.city.warn( + "Unknown member type for {} {} in route".format( + m["type"], m["ref"] + ), + self.element, + ) + return stop_position_elements + + def process_tracks( + self, stop_position_elements: list[OsmElementT] + ) -> None: + tracks, line_nodes = self.build_longest_line() + + for stop_el in stop_position_elements: + stop_id = el_id(stop_el) + if stop_id not in line_nodes: + self.city.warn( + 'Stop position "{}" ({}) is not on tracks'.format( + stop_el["tags"].get("name", ""), stop_id + ), + self.element, + ) + + # self.tracks would be a list of (lon, lat) for the longest stretch. + # Can be empty. + self.tracks = [el_center(self.city.elements.get(k)) for k in tracks] + if ( + None in self.tracks + ): # usually, extending BBOX for the city is needed + self.tracks = [] + for n in filter(lambda x: x not in self.city.elements, tracks): + self.city.warn( + f"The dataset is missing the railway tracks node {n}", + self.element, + ) + break + + if len(self.stops) > 1: + self.is_circular = ( + self.stops[0].stoparea == self.stops[-1].stoparea + ) + if ( + self.is_circular + and self.tracks + and self.tracks[0] != self.tracks[-1] + ): + self.city.warn( + "Non-closed rail sequence in a circular route", + self.element, + ) + + projected_stops_data = self.project_stops_on_line() + self.check_and_recover_stops_order(projected_stops_data) + self.apply_projected_stops_data(projected_stops_data) + + def apply_projected_stops_data(self, projected_stops_data: dict) -> None: + """Store better stop coordinates and indexes of first/last stops + that lie on a continuous track line, to the instance attributes. + """ + for attr in ("first_stop_on_rails_index", "last_stop_on_rails_index"): + setattr(self, attr, projected_stops_data[attr]) + + for stop_data in projected_stops_data["stops_on_longest_line"]: + route_stop = stop_data["route_stop"] + route_stop.positions_on_rails = stop_data["positions_on_rails"] + if stop_coords := stop_data["coords"]: + route_stop.stop = stop_coords + + def get_extended_tracks(self) -> RailT: + """Amend tracks with points of leading/trailing self.stops + that were not projected onto the longest tracks line. + Return a new array. + """ + if self.first_stop_on_rails_index >= len(self.stops): + tracks = [route_stop.stop for route_stop in self.stops] + else: + tracks = ( + [ + route_stop.stop + for i, route_stop in enumerate(self.stops) + if i < self.first_stop_on_rails_index + ] + + self.tracks + + [ + route_stop.stop + for i, route_stop in enumerate(self.stops) + if i > self.last_stop_on_rails_index + ] + ) + return tracks + + def get_truncated_tracks(self, tracks: RailT) -> RailT: + """Truncate leading/trailing segments of `tracks` param + that are beyond the first and last stop locations. + Return a new array. + """ + if self.is_circular: + return tracks.copy() + + first_stop_location = find_segment(self.stops[0].stop, tracks, 0) + last_stop_location = find_segment(self.stops[-1].stop, tracks, 0) + + if last_stop_location != (None, None): + seg2, u2 = last_stop_location + if u2 == 0.0: + # Make seg2 the segment the last_stop_location is + # at the middle or end of + seg2 -= 1 + # u2 = 1.0 + if seg2 + 2 < len(tracks): + tracks = tracks[0 : seg2 + 2] # noqa E203 + tracks[-1] = self.stops[-1].stop + + if first_stop_location != (None, None): + seg1, u1 = first_stop_location + if u1 == 1.0: + # Make seg1 the segment the first_stop_location is + # at the beginning or middle of + seg1 += 1 + # u1 = 0.0 + if seg1 > 0: + tracks = tracks[seg1:] + tracks[0] = self.stops[0].stop + + return tracks + + def are_tracks_complete(self) -> bool: + return ( + self.first_stop_on_rails_index == 0 + and self.last_stop_on_rails_index == len(self) - 1 + ) + + def get_tracks_geometry(self) -> RailT: + tracks = self.get_extended_tracks() + tracks = self.get_truncated_tracks(tracks) + return tracks + + def check_stops_order_by_angle(self) -> tuple[list[str], list[str]]: + disorder_warnings = [] + disorder_errors = [] + for i, route_stop in enumerate( + islice(self.stops, 1, len(self.stops) - 1), start=1 + ): + angle = angle_between( + self.stops[i - 1].stop, + route_stop.stop, + self.stops[i + 1].stop, + ) + if angle < ALLOWED_ANGLE_BETWEEN_STOPS: + msg = ( + "Angle between stops around " + f'"{route_stop.stoparea.name}" {route_stop.stop} ' + f"is too narrow, {angle} degrees" + ) + if angle < DISALLOWED_ANGLE_BETWEEN_STOPS: + disorder_errors.append(msg) + else: + disorder_warnings.append(msg) + return disorder_warnings, disorder_errors + + def check_stops_order_on_tracks_direct( + self, stop_sequence: Iterator[dict] + ) -> str | None: + """Checks stops order on tracks, following stop_sequence + in direct order only. + :param stop_sequence: list of dict{'route_stop', 'positions_on_rails', + 'coords'} for RouteStops that belong to the longest contiguous + sequence of tracks in a route. + :return: error message on the first order violation or None. + """ + allowed_order_violations = 1 if self.is_circular else 0 + max_position_on_rails = -1 + for stop_data in stop_sequence: + positions_on_rails = stop_data["positions_on_rails"] + suitable_occurrence = 0 + while ( + suitable_occurrence < len(positions_on_rails) + and positions_on_rails[suitable_occurrence] + < max_position_on_rails + ): + suitable_occurrence += 1 + if suitable_occurrence == len(positions_on_rails): + if allowed_order_violations > 0: + suitable_occurrence -= 1 + allowed_order_violations -= 1 + else: + route_stop = stop_data["route_stop"] + return ( + "Stops on tracks are unordered near " + f'"{route_stop.stoparea.name}" {route_stop.stop}' + ) + max_position_on_rails = positions_on_rails[suitable_occurrence] + + def check_stops_order_on_tracks( + self, projected_stops_data: dict + ) -> str | None: + """Checks stops order on tracks, trying direct and reversed + order of stops in the stop_sequence. + :param projected_stops_data: info about RouteStops that belong to the + longest contiguous sequence of tracks in a route. May be changed + if tracks reversing is performed. + :return: error message on the first order violation or None. + """ + error_message = self.check_stops_order_on_tracks_direct( + projected_stops_data["stops_on_longest_line"] + ) + if error_message: + error_message_reversed = self.check_stops_order_on_tracks_direct( + reversed(projected_stops_data["stops_on_longest_line"]) + ) + if error_message_reversed is None: + error_message = None + self.city.warn( + "Tracks seem to go in the opposite direction to stops", + self.element, + ) + self.tracks.reverse() + new_projected_stops_data = self.project_stops_on_line() + projected_stops_data.update(new_projected_stops_data) + + return error_message + + def check_stops_order( + self, projected_stops_data: dict + ) -> tuple[list[str], list[str]]: + ( + angle_disorder_warnings, + angle_disorder_errors, + ) = self.check_stops_order_by_angle() + disorder_on_tracks_error = self.check_stops_order_on_tracks( + projected_stops_data + ) + disorder_warnings = angle_disorder_warnings + disorder_errors = angle_disorder_errors + if disorder_on_tracks_error: + disorder_errors.append(disorder_on_tracks_error) + return disorder_warnings, disorder_errors + + def check_and_recover_stops_order( + self, projected_stops_data: dict + ) -> None: + """ + :param projected_stops_data: may change if we need to reverse tracks + """ + disorder_warnings, disorder_errors = self.check_stops_order( + projected_stops_data + ) + if disorder_warnings or disorder_errors: + resort_success = False + if self.city.recovery_data: + resort_success = self.try_resort_stops() + if resort_success: + for msg in disorder_warnings: + self.city.notice(msg, self.element) + for msg in disorder_errors: + self.city.warn( + "Fixed with recovery data: " + msg, self.element + ) + + if not resort_success: + for msg in disorder_warnings: + self.city.notice(msg, self.element) + for msg in disorder_errors: + self.city.error(msg, self.element) + + def try_resort_stops(self) -> bool: + """Precondition: self.city.recovery_data is not None. + Return success of station order recovering.""" + self_stops = {} # station name => RouteStop + for stop in self.stops: + station = stop.stoparea.station + stop_name = station.name + if stop_name == "?" and station.int_name: + stop_name = station.int_name + # We won't programmatically recover routes with repeating stations: + # such cases are rare and deserves manual verification + if stop_name in self_stops: + return False + self_stops[stop_name] = stop + + route_id = (self.colour, self.ref) + if route_id not in self.city.recovery_data: + return False + + stop_names = list(self_stops.keys()) + suitable_itineraries = [] + for itinerary in self.city.recovery_data[route_id]: + itinerary_stop_names = [ + stop["name"] for stop in itinerary["stations"] + ] + if not ( + len(stop_names) == len(itinerary_stop_names) + and sorted(stop_names) == sorted(itinerary_stop_names) + ): + continue + big_station_displacement = False + for it_stop in itinerary["stations"]: + name = it_stop["name"] + it_stop_center = it_stop["center"] + self_stop_center = self_stops[name].stoparea.station.center + if ( + distance(it_stop_center, self_stop_center) + > DISPLACEMENT_TOLERANCE + ): + big_station_displacement = True + break + if not big_station_displacement: + suitable_itineraries.append(itinerary) + + if len(suitable_itineraries) == 0: + return False + elif len(suitable_itineraries) == 1: + matching_itinerary = suitable_itineraries[0] + else: + from_tag = self.element["tags"].get("from") + to_tag = self.element["tags"].get("to") + if not from_tag and not to_tag: + return False + matching_itineraries = [ + itin + for itin in suitable_itineraries + if from_tag + and itin["from"] == from_tag + or to_tag + and itin["to"] == to_tag + ] + if len(matching_itineraries) != 1: + return False + matching_itinerary = matching_itineraries[0] + self.stops = [ + self_stops[stop["name"]] for stop in matching_itinerary["stations"] + ] + return True + + def get_end_transfers(self) -> tuple[IdT, IdT]: + """Using transfer ids because a train can arrive at different + stations within a transfer. But disregard transfer that may give + an impression of a circular route (for example, + Simonis / Elisabeth station and route 2 in Brussels). + """ + return ( + (self[0].stoparea.id, self[-1].stoparea.id) + if ( + self[0].stoparea.transfer is not None + and self[0].stoparea.transfer == self[-1].stoparea.transfer + ) + else ( + self[0].stoparea.transfer or self[0].stoparea.id, + self[-1].stoparea.transfer or self[-1].stoparea.id, + ) + ) + + def get_transfers_sequence(self) -> list[IdT]: + """Return a list of stoparea or transfer (if not None) ids.""" + transfer_seq = [ + stop.stoparea.transfer or stop.stoparea.id for stop in self + ] + if ( + self[0].stoparea.transfer is not None + and self[0].stoparea.transfer == self[-1].stoparea.transfer + ): + transfer_seq[0], transfer_seq[-1] = self.get_end_transfers() + return transfer_seq + + def __len__(self) -> int: + return len(self.stops) + + def __getitem__(self, i) -> RouteStop: + return self.stops[i] + + def __iter__(self) -> Iterator[RouteStop]: + return iter(self.stops) + + def __repr__(self) -> str: + return ( + "Route(id={}, mode={}, ref={}, name={}, network={}, interval={}, " + "circular={}, num_stops={}, line_length={} m, from={}, to={}" + ).format( + self.id, + self.mode, + self.ref, + self.name, + self.network, + self.interval, + self.is_circular, + len(self.stops), + self.stops[-1].distance, + self.stops[0], + self.stops[-1], + ) diff --git a/subways/structure/route_master.py b/subways/structure/route_master.py new file mode 100644 index 0000000..36ab148 --- /dev/null +++ b/subways/structure/route_master.py @@ -0,0 +1,464 @@ +from __future__ import annotations + +import typing +from collections.abc import Iterator +from typing import TypeVar + +from subways.consts import MAX_DISTANCE_STOP_TO_LINE +from subways.css_colours import normalize_colour +from subways.geom_utils import distance, project_on_line +from subways.osm_element import el_id +from subways.structure.route import Route +from subways.structure.stop_area import StopArea +from subways.types import IdT, OsmElementT + +if typing.TYPE_CHECKING: + from subways.structure.city import City + from subways.structure.route_stop import RouteStop + + +SUGGEST_TRANSFER_MIN_DISTANCE = 100 # in meters + +T = TypeVar("T") + + +class RouteMaster: + def __init__(self, city: City, master: OsmElementT = None) -> None: + self.city = city + self.routes = [] + self.best: Route = None + self.id: IdT = el_id(master) + self.has_master = master is not None + self.interval_from_master = False + if master: + self.ref = master["tags"].get( + "ref", master["tags"].get("name", None) + ) + try: + self.colour = normalize_colour( + master["tags"].get("colour", None) + ) + except ValueError: + self.colour = None + try: + self.infill = normalize_colour( + master["tags"].get("colour:infill", None) + ) + except ValueError: + self.infill = None + self.network = Route.get_network(master) + self.mode = master["tags"].get( + "route_master", None + ) # This tag is required, but okay + self.name = master["tags"].get("name", None) + self.interval = Route.get_interval(master["tags"]) + self.interval_from_master = self.interval is not None + else: + self.ref = None + self.colour = None + self.infill = None + self.network = None + self.mode = None + self.name = None + self.interval = None + + def stopareas(self) -> Iterator[StopArea]: + yielded_stopareas = set() + for route in self: + for stoparea in route.stopareas(): + if stoparea not in yielded_stopareas: + yield stoparea + yielded_stopareas.add(stoparea) + + def add(self, route: Route) -> None: + if not self.network: + self.network = route.network + elif route.network and route.network != self.network: + self.city.error( + 'Route has different network ("{}") from master "{}"'.format( + route.network, self.network + ), + route.element, + ) + + if not self.colour: + self.colour = route.colour + elif route.colour and route.colour != self.colour: + self.city.notice( + 'Route "{}" has different colour from master "{}"'.format( + route.colour, self.colour + ), + route.element, + ) + + if not self.infill: + self.infill = route.infill + elif route.infill and route.infill != self.infill: + self.city.notice( + ( + f'Route "{route.infill}" has different infill colour ' + f'from master "{self.infill}"' + ), + route.element, + ) + + if not self.ref: + self.ref = route.ref + elif route.ref != self.ref: + self.city.notice( + 'Route "{}" has different ref from master "{}"'.format( + route.ref, self.ref + ), + route.element, + ) + + if not self.name: + self.name = route.name + + if not self.mode: + self.mode = route.mode + elif route.mode != self.mode: + self.city.error( + "Incompatible PT mode: master has {} and route has {}".format( + self.mode, route.mode + ), + route.element, + ) + return + + if not self.interval_from_master and route.interval: + if not self.interval: + self.interval = route.interval + else: + self.interval = min(self.interval, route.interval) + + # Choose minimal id for determinancy + if not self.has_master and (not self.id or self.id > route.id): + self.id = route.id + + self.routes.append(route) + if ( + not self.best + or len(route.stops) > len(self.best.stops) + or ( + # Choose route with minimal id for determinancy + len(route.stops) == len(self.best.stops) + and route.element["id"] < self.best.element["id"] + ) + ): + self.best = route + + def get_meaningful_routes(self) -> list[Route]: + return [route for route in self if len(route) >= 2] + + def find_twin_routes(self) -> dict[Route, Route]: + """Two non-circular routes are twins if they have the same end + stations and opposite directions, and the number of stations is + the same or almost the same. We'll then find stops that are present + in one direction and is missing in another direction - to warn. + """ + + twin_routes = {} # route => "twin" route + + for route in self.get_meaningful_routes(): + if route.is_circular: + continue # Difficult to calculate. TODO(?) in the future + if route in twin_routes: + continue + + route_transfer_ids = set(route.get_transfers_sequence()) + ends = route.get_end_transfers() + ends_reversed = ends[::-1] + + twin_candidates = [ + r + for r in self + if not r.is_circular + and r not in twin_routes + and r.get_end_transfers() == ends_reversed + # If absolute or relative difference in station count is large, + # possibly it's an express version of a route - skip it. + and ( + abs(len(r) - len(route)) <= 2 + or abs(len(r) - len(route)) / max(len(r), len(route)) + <= 0.2 + ) + ] + + if not twin_candidates: + continue + + twin_route = min( + twin_candidates, + key=lambda r: len( + route_transfer_ids ^ set(r.get_transfers_sequence()) + ), + ) + twin_routes[route] = twin_route + twin_routes[twin_route] = route + + return twin_routes + + def check_return_routes(self) -> None: + """Check if a route has return direction, and if twin routes + miss stations. + """ + meaningful_routes = self.get_meaningful_routes() + + if len(meaningful_routes) == 0: + self.city.error( + f"An empty route master {self.id}. " + "Please set construction:route if it is under construction" + ) + elif len(meaningful_routes) == 1: + log_function = ( + self.city.error + if not self.best.is_circular + else self.city.notice + ) + log_function( + "Only one route in route_master. " + "Please check if it needs a return route", + self.best.element, + ) + else: + self.check_return_circular_routes() + self.check_return_noncircular_routes() + + def check_return_noncircular_routes(self) -> None: + routes = [ + route + for route in self.get_meaningful_routes() + if not route.is_circular + ] + all_ends = {route.get_end_transfers(): route for route in routes} + for route in routes: + ends = route.get_end_transfers() + if ends[::-1] not in all_ends: + self.city.notice( + "Route does not have a return direction", route.element + ) + + twin_routes = self.find_twin_routes() + for route1, route2 in twin_routes.items(): + if route1.id > route2.id: + continue # to process a pair of routes only once + # and to ensure the order of routes in the pair + self.alert_twin_routes_differ(route1, route2) + + def check_return_circular_routes(self) -> None: + routes = { + route + for route in self.get_meaningful_routes() + if route.is_circular + } + routes_having_backward = set() + + for route in routes: + if route in routes_having_backward: + continue + transfer_sequence1 = [ + stop.stoparea.transfer or stop.stoparea.id for stop in route + ] + transfer_sequence1.pop() + for potential_backward_route in routes - {route}: + transfer_sequence2 = [ + stop.stoparea.transfer or stop.stoparea.id + for stop in potential_backward_route + ][ + -2::-1 + ] # truncate repeated first stop and reverse + common_subsequence = self.find_common_circular_subsequence( + transfer_sequence1, transfer_sequence2 + ) + if len(common_subsequence) >= 0.8 * min( + len(transfer_sequence1), len(transfer_sequence2) + ): + routes_having_backward.add(route) + routes_having_backward.add(potential_backward_route) + break + + for route in routes - routes_having_backward: + self.city.notice( + "Route does not have a return direction", route.element + ) + + @staticmethod + def find_common_circular_subsequence( + seq1: list[T], seq2: list[T] + ) -> list[T]: + """seq1 and seq2 are supposed to be stops of some circular routes. + Prerequisites to rely on the result: + - elements of each sequence are not repeated + - the order of stations is not violated. + Under these conditions we don't need LCS algorithm. Linear scan is + sufficient. + """ + i1, i2 = -1, -1 + for i1, x in enumerate(seq1): + try: + i2 = seq2.index(x) + except ValueError: + continue + else: + # x is found both in seq1 and seq2 + break + + if i2 == -1: + return [] + + # Shift cyclically so that the common element takes the first position + # both in seq1 and seq2 + seq1 = seq1[i1:] + seq1[:i1] + seq2 = seq2[i2:] + seq2[:i2] + + common_subsequence = [] + i2 = 0 + for x in seq1: + try: + i2 = seq2.index(x, i2) + except ValueError: + continue + common_subsequence.append(x) + i2 += 1 + if i2 >= len(seq2): + break + return common_subsequence + + def alert_twin_routes_differ(self, route1: Route, route2: Route) -> None: + """Arguments are that route1.id < route2.id""" + ( + stops_missing_from_route1, + stops_missing_from_route2, + stops_that_dont_match, + ) = self.calculate_twin_routes_diff(route1, route2) + + for st in stops_missing_from_route1: + if ( + not route1.are_tracks_complete() + or ( + projected_point := project_on_line( + st.stoparea.center, route1.tracks + )["projected_point"] + ) + is not None + and distance(st.stoparea.center, projected_point) + <= MAX_DISTANCE_STOP_TO_LINE + ): + self.city.notice( + f"Stop {st.stoparea.station.name} {st.stop} is included " + f"in the {route2.id} but not included in {route1.id}", + route1.element, + ) + + for st in stops_missing_from_route2: + if ( + not route2.are_tracks_complete() + or ( + projected_point := project_on_line( + st.stoparea.center, route2.tracks + )["projected_point"] + ) + is not None + and distance(st.stoparea.center, projected_point) + <= MAX_DISTANCE_STOP_TO_LINE + ): + self.city.notice( + f"Stop {st.stoparea.station.name} {st.stop} is included " + f"in the {route1.id} but not included in {route2.id}", + route2.element, + ) + + for st1, st2 in stops_that_dont_match: + if ( + st1.stoparea.station == st2.stoparea.station + or distance(st1.stop, st2.stop) < SUGGEST_TRANSFER_MIN_DISTANCE + ): + self.city.notice( + "Should there be one stoparea or a transfer between " + f"{st1.stoparea.station.name} {st1.stop} and " + f"{st2.stoparea.station.name} {st2.stop}?", + route1.element, + ) + + @staticmethod + def calculate_twin_routes_diff(route1: Route, route2: Route) -> tuple: + """Wagner–Fischer algorithm for stops diff in two twin routes.""" + + stops1 = route1.stops + stops2 = route2.stops[::-1] + + def stops_match(stop1: RouteStop, stop2: RouteStop) -> bool: + return ( + stop1.stoparea == stop2.stoparea + or stop1.stoparea.transfer is not None + and stop1.stoparea.transfer == stop2.stoparea.transfer + ) + + d = [[0] * (len(stops2) + 1) for _ in range(len(stops1) + 1)] + d[0] = list(range(len(stops2) + 1)) + for i in range(len(stops1) + 1): + d[i][0] = i + + for i in range(1, len(stops1) + 1): + for j in range(1, len(stops2) + 1): + d[i][j] = ( + d[i - 1][j - 1] + if stops_match(stops1[i - 1], stops2[j - 1]) + else min((d[i - 1][j], d[i][j - 1], d[i - 1][j - 1])) + 1 + ) + + stops_missing_from_route1: list[RouteStop] = [] + stops_missing_from_route2: list[RouteStop] = [] + stops_that_dont_match: list[tuple[RouteStop, RouteStop]] = [] + + i = len(stops1) + j = len(stops2) + while not (i == 0 and j == 0): + action = None + if i > 0 and j > 0: + match = stops_match(stops1[i - 1], stops2[j - 1]) + if match and d[i - 1][j - 1] == d[i][j]: + action = "no" + elif not match and d[i - 1][j - 1] + 1 == d[i][j]: + action = "change" + if not action and i > 0 and d[i - 1][j] + 1 == d[i][j]: + action = "add_2" + if not action and j > 0 and d[i][j - 1] + 1 == d[i][j]: + action = "add_1" + + match action: + case "add_1": + stops_missing_from_route1.append(stops2[j - 1]) + j -= 1 + case "add_2": + stops_missing_from_route2.append(stops1[i - 1]) + i -= 1 + case _: + if action == "change": + stops_that_dont_match.append( + (stops1[i - 1], stops2[j - 1]) + ) + i -= 1 + j -= 1 + return ( + stops_missing_from_route1, + stops_missing_from_route2, + stops_that_dont_match, + ) + + def __len__(self) -> int: + return len(self.routes) + + def __getitem__(self, i) -> Route: + return self.routes[i] + + def __iter__(self) -> Iterator[Route]: + return iter(self.routes) + + def __repr__(self) -> str: + return ( + f"RouteMaster(id={self.id}, mode={self.mode}, ref={self.ref}, " + f"name={self.name}, network={self.network}, " + f"num_variants={len(self.routes)}" + ) diff --git a/subways/structure/route_stop.py b/subways/structure/route_stop.py new file mode 100644 index 0000000..c67d597 --- /dev/null +++ b/subways/structure/route_stop.py @@ -0,0 +1,122 @@ +from __future__ import annotations + +import typing + +from subways.osm_element import el_center, el_id +from subways.structure.station import Station +from subways.structure.stop_area import StopArea +from subways.types import LonLat, OsmElementT + +if typing.TYPE_CHECKING: + from subways.structure.city import City + + +class RouteStop: + def __init__(self, stoparea: StopArea) -> None: + self.stoparea: StopArea = stoparea + self.stop: LonLat = None # Stop position, possibly projected + self.distance = 0 # In meters from the start of the route + self.platform_entry = None # Platform el_id + self.platform_exit = None # Platform el_id + self.can_enter = False + self.can_exit = False + self.seen_stop = False + self.seen_platform_entry = False + self.seen_platform_exit = False + self.seen_station = False + + @property + def seen_platform(self) -> bool: + return self.seen_platform_entry or self.seen_platform_exit + + @staticmethod + def get_actual_role( + el: OsmElementT, role: str, modes: set[str] + ) -> str | None: + if StopArea.is_stop(el): + return "stop" + elif StopArea.is_platform(el): + return "platform" + elif Station.is_station(el, modes): + if "platform" in role: + return "platform" + else: + return "stop" + return None + + def add(self, member: dict, relation: OsmElementT, city: City) -> None: + el = city.elements[el_id(member)] + role = member["role"] + + if StopArea.is_stop(el): + if "platform" in role: + city.warn("Stop position in a platform role in a route", el) + if el["type"] != "node": + city.error("Stop position is not a node", el) + self.stop = el_center(el) + if "entry_only" not in role: + self.can_exit = True + if "exit_only" not in role: + self.can_enter = True + + elif Station.is_station(el, city.modes): + if el["type"] != "node": + city.notice("Station in route is not a node", el) + + if not self.seen_stop and not self.seen_platform: + self.stop = el_center(el) + self.can_enter = True + self.can_exit = True + + elif StopArea.is_platform(el): + if "stop" in role: + city.warn("Platform in a stop role in a route", el) + if "exit_only" not in role: + self.platform_entry = el_id(el) + self.can_enter = True + if "entry_only" not in role: + self.platform_exit = el_id(el) + self.can_exit = True + if not self.seen_stop: + self.stop = el_center(el) + + multiple_check = False + actual_role = RouteStop.get_actual_role(el, role, city.modes) + if actual_role == "platform": + if role == "platform_entry_only": + multiple_check = self.seen_platform_entry + self.seen_platform_entry = True + elif role == "platform_exit_only": + multiple_check = self.seen_platform_exit + self.seen_platform_exit = True + else: + if role != "platform" and "stop" not in role: + city.warn( + f'Platform "{el["tags"].get("name", "")}" ' + f'({el_id(el)}) with invalid role "{role}" in route', + relation, + ) + multiple_check = self.seen_platform + self.seen_platform_entry = True + self.seen_platform_exit = True + elif actual_role == "stop": + multiple_check = self.seen_stop + self.seen_stop = True + if multiple_check: + log_function = city.error if actual_role == "stop" else city.notice + log_function( + f'Multiple {actual_role}s for a station "' + f'{el["tags"].get("name", "")} ' + f"({el_id(el)}) in a route relation", + relation, + ) + + def __repr__(self) -> str: + return ( + "RouteStop(stop={}, pl_entry={}, pl_exit={}, stoparea={})".format( + self.stop, + self.platform_entry, + self.platform_exit, + self.stoparea, + ) + ) diff --git a/subways/structure/station.py b/subways/structure/station.py new file mode 100644 index 0000000..f1cd2fa --- /dev/null +++ b/subways/structure/station.py @@ -0,0 +1,62 @@ +from __future__ import annotations + +import typing + +from subways.consts import ALL_MODES, CONSTRUCTION_KEYS +from subways.css_colours import normalize_colour +from subways.osm_element import el_center, el_id +from subways.types import IdT, OsmElementT + +if typing.TYPE_CHECKING: + from subways.structure.city import City + + +class Station: + def __init__(self, el: OsmElementT, city: City) -> None: + """Call this with a railway=station OSM feature.""" + self.id: IdT = el_id(el) + self.element: OsmElementT = el + self.modes = Station.get_modes(el) + self.name = el["tags"].get("name", "?") + self.int_name = el["tags"].get( + "int_name", el["tags"].get("name:en", None) + ) + try: + self.colour = normalize_colour(el["tags"].get("colour", None)) + except ValueError as e: + self.colour = None + city.warn(str(e), el) + self.center = el_center(el) + if self.center is None: + raise Exception("Could not find center of {}".format(el)) + + @staticmethod + def get_modes(el: OsmElementT) -> set[str]: + modes = {m for m in ALL_MODES if el["tags"].get(m) == "yes"} + if mode := el["tags"].get("station"): + modes.add(mode) + return modes + + @staticmethod + def is_station(el: OsmElementT, modes: set[str]) -> bool: + # public_transport=station is too ambiguous and unspecific to use, + # so we expect for it to be backed by railway=station. + if ( + "tram" in modes + and el.get("tags", {}).get("railway") == "tram_stop" + ): + return True + if el.get("tags", {}).get("railway") not in ("station", "halt"): + return False + for k in CONSTRUCTION_KEYS: + if k in el["tags"]: + return False + # Not checking for station=train, obviously + if "train" not in modes and Station.get_modes(el).isdisjoint(modes): + return False + return True + + def __repr__(self) -> str: + return "Station(id={}, modes={}, name={}, center={})".format( + self.id, ",".join(self.modes), self.name, self.center + ) diff --git a/subways/structure/stop_area.py b/subways/structure/stop_area.py new file mode 100644 index 0000000..913b2c7 --- /dev/null +++ b/subways/structure/stop_area.py @@ -0,0 +1,191 @@ +from __future__ import annotations + +import typing +from itertools import chain + +from subways.consts import RAILWAY_TYPES +from subways.css_colours import normalize_colour +from subways.geom_utils import distance +from subways.osm_element import el_id, el_center +from subways.structure.station import Station +from subways.types import IdT, OsmElementT + +if typing.TYPE_CHECKING: + from subways.structure.city import City + +MAX_DISTANCE_TO_ENTRANCES = 300 # in meters + + +class StopArea: + @staticmethod + def is_stop(el: OsmElementT) -> bool: + if "tags" not in el: + return False + if el["tags"].get("railway") == "stop": + return True + if el["tags"].get("public_transport") == "stop_position": + return True + return False + + @staticmethod + def is_platform(el: OsmElementT) -> bool: + if "tags" not in el: + return False + if el["tags"].get("railway") in ("platform", "platform_edge"): + return True + if el["tags"].get("public_transport") == "platform": + return True + return False + + @staticmethod + def is_track(el: OsmElementT) -> bool: + if el["type"] != "way" or "tags" not in el: + return False + return el["tags"].get("railway") in RAILWAY_TYPES + + def __init__( + self, + station: Station, + city: City, + stop_area: OsmElementT | None = None, + ) -> None: + """Call this with a Station object.""" + + self.element: OsmElementT = stop_area or station.element + self.id: IdT = el_id(self.element) + self.station: Station = station + self.stops = set() # set of el_ids of stop_positions + self.platforms = set() # set of el_ids of platforms + self.exits = set() # el_id of subway_entrance/train_station_entrance + # for leaving the platform + self.entrances = set() # el_id of subway/train_station entrance + # for entering the platform + self.center = None # lon, lat of the station centre point + self.centers = {} # el_id -> (lon, lat) for all elements + self.transfer = None # el_id of a transfer relation + + self.modes = station.modes + self.name = station.name + self.int_name = station.int_name + self.colour = station.colour + + if stop_area: + self.name = stop_area["tags"].get("name", self.name) + self.int_name = stop_area["tags"].get( + "int_name", stop_area["tags"].get("name:en", self.int_name) + ) + try: + self.colour = ( + normalize_colour(stop_area["tags"].get("colour")) + or self.colour + ) + except ValueError as e: + city.warn(str(e), stop_area) + + self._process_members(station, city, stop_area) + else: + self._add_nearby_entrances(station, city) + + if self.exits and not self.entrances: + city.warn( + "Only exits for a station, no entrances", + stop_area or station.element, + ) + if self.entrances and not self.exits: + city.warn("No exits for a station", stop_area or station.element) + + for el in self.get_elements(): + self.centers[el] = el_center(city.elements[el]) + + """Calculate the center point of the station. This algorithm + cannot rely on a station node, since many stop_areas can share one. + Basically it averages center points of all platforms + and stop positions.""" + if len(self.stops) + len(self.platforms) == 0: + self.center = station.center + else: + self.center = [0, 0] + for sp in chain(self.stops, self.platforms): + spc = self.centers[sp] + for i in range(2): + self.center[i] += spc[i] + for i in range(2): + self.center[i] /= len(self.stops) + len(self.platforms) + + def _process_members( + self, station: Station, city: City, stop_area: OsmElementT + ) -> None: + # If we have a stop area, add all elements from it + tracks_detected = False + for m in stop_area["members"]: + k = el_id(m) + m_el = city.elements.get(k) + if not m_el or "tags" not in m_el: + continue + if Station.is_station(m_el, city.modes): + if k != station.id: + city.error("Stop area has multiple stations", stop_area) + elif StopArea.is_stop(m_el): + self.stops.add(k) + elif StopArea.is_platform(m_el): + self.platforms.add(k) + elif (entrance_type := m_el["tags"].get("railway")) in ( + "subway_entrance", + "train_station_entrance", + ): + if m_el["type"] != "node": + city.warn(f"{entrance_type} is not a node", m_el) + if ( + m_el["tags"].get("entrance") != "exit" + and m["role"] != "exit_only" + ): + self.entrances.add(k) + if ( + m_el["tags"].get("entrance") != "entrance" + and m["role"] != "entry_only" + ): + self.exits.add(k) + elif StopArea.is_track(m_el): + tracks_detected = True + + if tracks_detected: + city.warn("Tracks in a stop_area relation", stop_area) + + def _add_nearby_entrances(self, station: Station, city: City) -> None: + center = station.center + for entrance_el in ( + el + for el in city.elements.values() + if "tags" in el + and (entrance_type := el["tags"].get("railway")) + in ("subway_entrance", "train_station_entrance") + ): + entrance_id = el_id(entrance_el) + if entrance_id in city.stop_areas: + continue # This entrance belongs to some stop_area + c_center = el_center(entrance_el) + if ( + c_center + and distance(center, c_center) <= MAX_DISTANCE_TO_ENTRANCES + ): + if entrance_el["type"] != "node": + city.warn(f"{entrance_type} is not a node", entrance_el) + etag = entrance_el["tags"].get("entrance") + if etag != "exit": + self.entrances.add(entrance_id) + if etag != "entrance": + self.exits.add(entrance_id) + + def get_elements(self) -> set[IdT]: + result = {self.id, self.station.id} + result.update(self.entrances) + result.update(self.exits) + result.update(self.stops) + result.update(self.platforms) + return result + + def __repr__(self) -> str: + return ( + f"StopArea(id={self.id}, name={self.name}, station={self.station}," + f" transfer={self.transfer}, center={self.center})" + ) diff --git a/subway_io.py b/subways/subway_io.py similarity index 96% rename from subway_io.py rename to subways/subway_io.py index 8ef5f6f..3980b4f 100644 --- a/subway_io.py +++ b/subways/subway_io.py @@ -1,12 +1,20 @@ +from __future__ import annotations + import json import logging +import typing from collections import OrderedDict +from io import BufferedIOBase from typing import Any, TextIO -from subway_structure import City, OsmElementT, StopArea +from subways.types import OsmElementT + +if typing.TYPE_CHECKING: + from subways.structure.city import City + from subways.structure.stop_area import StopArea -def load_xml(f: TextIO | str) -> list[OsmElementT]: +def load_xml(f: BufferedIOBase | str) -> list[OsmElementT]: try: from lxml import etree except ImportError: @@ -257,7 +265,7 @@ def write_recovery_data( def make_city_recovery_data( city: City, ) -> dict[tuple[str | None, str | None], list[dict]]: - routes: dict[tuple(str | None, str | None), list[dict]] = {} + routes: dict[tuple[str | None, str | None], list[dict]] = {} for route in city: # Recovery is based primarily on route/station names/refs. # If route's ref/colour changes, the route won't be used. diff --git a/tests/README.md b/subways/tests/README.md similarity index 100% rename from tests/README.md rename to subways/tests/README.md diff --git a/tests/__init__.py b/subways/tests/__init__.py similarity index 100% rename from tests/__init__.py rename to subways/tests/__init__.py diff --git a/tests/assets/cities_info_with_bad_values.csv b/subways/tests/assets/cities_info_with_bad_values.csv similarity index 100% rename from tests/assets/cities_info_with_bad_values.csv rename to subways/tests/assets/cities_info_with_bad_values.csv diff --git a/tests/assets/route_masters.osm b/subways/tests/assets/route_masters.osm similarity index 100% rename from tests/assets/route_masters.osm rename to subways/tests/assets/route_masters.osm diff --git a/tests/assets/tiny_world.osm b/subways/tests/assets/tiny_world.osm similarity index 100% rename from tests/assets/tiny_world.osm rename to subways/tests/assets/tiny_world.osm diff --git a/tests/assets/tiny_world_gtfs/agency.txt b/subways/tests/assets/tiny_world_gtfs/agency.txt similarity index 100% rename from tests/assets/tiny_world_gtfs/agency.txt rename to subways/tests/assets/tiny_world_gtfs/agency.txt diff --git a/tests/assets/tiny_world_gtfs/calendar.txt b/subways/tests/assets/tiny_world_gtfs/calendar.txt similarity index 100% rename from tests/assets/tiny_world_gtfs/calendar.txt rename to subways/tests/assets/tiny_world_gtfs/calendar.txt diff --git a/tests/assets/tiny_world_gtfs/frequencies.txt b/subways/tests/assets/tiny_world_gtfs/frequencies.txt similarity index 100% rename from tests/assets/tiny_world_gtfs/frequencies.txt rename to subways/tests/assets/tiny_world_gtfs/frequencies.txt diff --git a/tests/assets/tiny_world_gtfs/routes.txt b/subways/tests/assets/tiny_world_gtfs/routes.txt similarity index 100% rename from tests/assets/tiny_world_gtfs/routes.txt rename to subways/tests/assets/tiny_world_gtfs/routes.txt diff --git a/tests/assets/tiny_world_gtfs/shapes.txt b/subways/tests/assets/tiny_world_gtfs/shapes.txt similarity index 100% rename from tests/assets/tiny_world_gtfs/shapes.txt rename to subways/tests/assets/tiny_world_gtfs/shapes.txt diff --git a/tests/assets/tiny_world_gtfs/stop_times.txt b/subways/tests/assets/tiny_world_gtfs/stop_times.txt similarity index 100% rename from tests/assets/tiny_world_gtfs/stop_times.txt rename to subways/tests/assets/tiny_world_gtfs/stop_times.txt diff --git a/tests/assets/tiny_world_gtfs/stops.txt b/subways/tests/assets/tiny_world_gtfs/stops.txt similarity index 100% rename from tests/assets/tiny_world_gtfs/stops.txt rename to subways/tests/assets/tiny_world_gtfs/stops.txt diff --git a/tests/assets/tiny_world_gtfs/transfers.txt b/subways/tests/assets/tiny_world_gtfs/transfers.txt similarity index 100% rename from tests/assets/tiny_world_gtfs/transfers.txt rename to subways/tests/assets/tiny_world_gtfs/transfers.txt diff --git a/tests/assets/tiny_world_gtfs/trips.txt b/subways/tests/assets/tiny_world_gtfs/trips.txt similarity index 100% rename from tests/assets/tiny_world_gtfs/trips.txt rename to subways/tests/assets/tiny_world_gtfs/trips.txt diff --git a/tests/assets/twin_routes.osm b/subways/tests/assets/twin_routes.osm similarity index 100% rename from tests/assets/twin_routes.osm rename to subways/tests/assets/twin_routes.osm diff --git a/tests/assets/twin_routes_with_divergence.osm b/subways/tests/assets/twin_routes_with_divergence.osm similarity index 100% rename from tests/assets/twin_routes_with_divergence.osm rename to subways/tests/assets/twin_routes_with_divergence.osm diff --git a/tests/sample_data_for_build_tracks.py b/subways/tests/sample_data_for_build_tracks.py similarity index 100% rename from tests/sample_data_for_build_tracks.py rename to subways/tests/sample_data_for_build_tracks.py diff --git a/tests/sample_data_for_center_calculation.py b/subways/tests/sample_data_for_center_calculation.py similarity index 100% rename from tests/sample_data_for_center_calculation.py rename to subways/tests/sample_data_for_center_calculation.py diff --git a/tests/sample_data_for_error_messages.py b/subways/tests/sample_data_for_error_messages.py similarity index 78% rename from tests/sample_data_for_error_messages.py rename to subways/tests/sample_data_for_error_messages.py index 0f5a434..907a077 100644 --- a/tests/sample_data_for_error_messages.py +++ b/subways/tests/sample_data_for_error_messages.py @@ -42,15 +42,108 @@ metro_samples = [ "cities_info": [ { "num_stations": 2, - "num_lines": 1, - "num_light_lines": 0, - "num_interchanges": 0, }, ], "errors": [], "warnings": [], "notices": [], }, + { + "name": "Station colour tag present/absent, correct/incorrect, on bear station / with stop_area", # noqa E501 + "xml": """ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +""", + "cities_info": [ + { + "num_stations": 6, + }, + ], + "errors": [], + "warnings": [ + 'Unknown colour code: incorrect (node 4, "Station 4")', + 'Unknown colour code: incorrect (node 6, "Station 6")', + ], + "notices": [], + }, { "name": "Bad station order", "xml": """ diff --git a/tests/sample_data_for_outputs.py b/subways/tests/sample_data_for_outputs.py similarity index 100% rename from tests/sample_data_for_outputs.py rename to subways/tests/sample_data_for_outputs.py diff --git a/tests/sample_data_for_twin_routes.py b/subways/tests/sample_data_for_twin_routes.py similarity index 100% rename from tests/sample_data_for_twin_routes.py rename to subways/tests/sample_data_for_twin_routes.py diff --git a/tests/test_build_tracks.py b/subways/tests/test_build_tracks.py similarity index 96% rename from tests/test_build_tracks.py rename to subways/tests/test_build_tracks.py index b694bbe..2bd4108 100644 --- a/tests/test_build_tracks.py +++ b/subways/tests/test_build_tracks.py @@ -1,5 +1,5 @@ -from tests.sample_data_for_build_tracks import metro_samples -from tests.util import JsonLikeComparisonMixin, TestCase +from subways.tests.sample_data_for_build_tracks import metro_samples +from subways.tests.util import JsonLikeComparisonMixin, TestCase class TestOneRouteTracks(JsonLikeComparisonMixin, TestCase): diff --git a/tests/test_center_calculation.py b/subways/tests/test_center_calculation.py similarity index 91% rename from tests/test_center_calculation.py rename to subways/tests/test_center_calculation.py index 0e42360..5c83627 100644 --- a/tests/test_center_calculation.py +++ b/subways/tests/test_center_calculation.py @@ -1,9 +1,9 @@ import io from unittest import TestCase -from process_subways import calculate_centers -from subway_io import load_xml -from tests.sample_data_for_center_calculation import metro_samples +from subways.validation import calculate_centers +from subways.subway_io import load_xml +from subways.tests.sample_data_for_center_calculation import metro_samples class TestCenterCalculation(TestCase): diff --git a/tests/test_error_messages.py b/subways/tests/test_error_messages.py similarity index 86% rename from tests/test_error_messages.py rename to subways/tests/test_error_messages.py index c833001..d879c85 100644 --- a/tests/test_error_messages.py +++ b/subways/tests/test_error_messages.py @@ -1,12 +1,12 @@ import itertools -from tests.sample_data_for_error_messages import ( +from subways.tests.sample_data_for_error_messages import ( metro_samples as metro_samples_error, ) -from tests.sample_data_for_twin_routes import ( +from subways.tests.sample_data_for_twin_routes import ( metro_samples as metro_samples_route_masters, ) -from tests.util import TestCase +from subways.tests.util import TestCase class TestValidationMessages(TestCase): diff --git a/tests/test_find_transfers.py b/subways/tests/test_find_transfers.py similarity index 88% rename from tests/test_find_transfers.py rename to subways/tests/test_find_transfers.py index bb46dc3..294304f 100644 --- a/tests/test_find_transfers.py +++ b/subways/tests/test_find_transfers.py @@ -1,7 +1,7 @@ from copy import deepcopy -from tests.sample_data_for_outputs import metro_samples -from tests.util import TestCase, JsonLikeComparisonMixin +from subways.tests.sample_data_for_outputs import metro_samples +from subways.tests.util import TestCase, JsonLikeComparisonMixin class TestTransfers(JsonLikeComparisonMixin, TestCase): diff --git a/tests/test_gtfs_processor.py b/subways/tests/test_gtfs_processor.py similarity index 95% rename from tests/test_gtfs_processor.py rename to subways/tests/test_gtfs_processor.py index 1ff7414..d5a4dcf 100644 --- a/tests/test_gtfs_processor.py +++ b/subways/tests/test_gtfs_processor.py @@ -2,10 +2,14 @@ import csv from functools import partial from pathlib import Path -from processors._common import transit_to_dict -from processors.gtfs import dict_to_row, GTFS_COLUMNS, transit_data_to_gtfs -from tests.sample_data_for_outputs import metro_samples -from tests.util import TestCase +from subways.processors._common import transit_to_dict +from subways.processors.gtfs import ( + dict_to_row, + GTFS_COLUMNS, + transit_data_to_gtfs, +) +from subways.tests.sample_data_for_outputs import metro_samples +from subways.tests.util import TestCase class TestGTFS(TestCase): diff --git a/tests/test_mapsme_processor.py b/subways/tests/test_mapsme_processor.py similarity index 89% rename from tests/test_mapsme_processor.py rename to subways/tests/test_mapsme_processor.py index 64eb9cb..c77fc6a 100644 --- a/tests/test_mapsme_processor.py +++ b/subways/tests/test_mapsme_processor.py @@ -1,8 +1,8 @@ from operator import itemgetter -from processors.mapsme import transit_data_to_mapsme -from tests.sample_data_for_outputs import metro_samples -from tests.util import JsonLikeComparisonMixin, TestCase +from subways.processors.mapsme import transit_data_to_mapsme +from subways.tests.sample_data_for_outputs import metro_samples +from subways.tests.util import JsonLikeComparisonMixin, TestCase class TestMapsme(JsonLikeComparisonMixin, TestCase): diff --git a/tests/test_overpass.py b/subways/tests/test_overpass.py similarity index 97% rename from tests/test_overpass.py rename to subways/tests/test_overpass.py index 2b0afa3..beb03ef 100644 --- a/tests/test_overpass.py +++ b/subways/tests/test_overpass.py @@ -1,6 +1,6 @@ from unittest import TestCase, mock -from process_subways import compose_overpass_request, overpass_request +from subways.overpass import compose_overpass_request, overpass_request class TestOverpassQuery(TestCase): @@ -150,11 +150,11 @@ class TestOverpassQuery(TestCase): "%28._%3B%3E%3E%3B%29%3Bout%20body%20center%20qt%3B" ) - with mock.patch("process_subways.json.load") as load_mock: + with mock.patch("subways.overpass.json.load") as load_mock: load_mock.return_value = {"elements": []} with mock.patch( - "process_subways.urllib.request.urlopen" + "subways.overpass.urllib.request.urlopen" ) as urlopen_mock: urlopen_mock.return_value.getcode.return_value = 200 diff --git a/tests/test_prepare_cities.py b/subways/tests/test_prepare_cities.py similarity index 96% rename from tests/test_prepare_cities.py rename to subways/tests/test_prepare_cities.py index 63ddce6..09679c7 100644 --- a/tests/test_prepare_cities.py +++ b/subways/tests/test_prepare_cities.py @@ -2,7 +2,7 @@ import inspect from pathlib import Path from unittest import TestCase -from process_subways import prepare_cities +from subways.validation import prepare_cities class TestPrepareCities(TestCase): diff --git a/tests/test_projection.py b/subways/tests/test_projection.py similarity index 86% rename from tests/test_projection.py rename to subways/tests/test_projection.py index b0091aa..770232c 100644 --- a/tests/test_projection.py +++ b/subways/tests/test_projection.py @@ -2,17 +2,23 @@ import collections import itertools import unittest -from subway_structure import project_on_segment +from subways.geom_utils import project_on_segment +from subways.types import LonLat class TestProjection(unittest.TestCase): - """Test subway_structure.project_on_segment function""" + """Test subways.geom_utils.project_on_segment function""" PRECISION = 10 # decimal places in assertAlmostEqual SHIFT = 1e-6 # Small distance between projected point and segment endpoint - def _test_projection_in_bulk(self, points, segments, answers): + def _test_projection_in_bulk( + self, + points: list[LonLat], + segments: list[tuple[LonLat, LonLat]], + answers: list[float | None], + ) -> None: """Test 'project_on_segment' function for array of points and array of parallel segments projections on which are equal. """ @@ -39,7 +45,7 @@ class TestProjection(unittest.TestCase): f"{segment}: {u} returned, {answer} expected", ) - def test_projection_on_horizontal_segments(self): + def test_projection_on_horizontal_segments(self) -> None: points = [ (-2, 0), (-1 - self.SHIFT, 0), @@ -74,7 +80,7 @@ class TestProjection(unittest.TestCase): self._test_projection_in_bulk(points, horizontal_segments, answers) - def test_projection_on_vertical_segments(self): + def test_projection_on_vertical_segments(self) -> None: points = [ (0, -2), (0, -1 - self.SHIFT), @@ -109,7 +115,7 @@ class TestProjection(unittest.TestCase): self._test_projection_in_bulk(points, vertical_segments, answers) - def test_projection_on_inclined_segment(self): + def test_projection_on_inclined_segment(self) -> None: points = [ (-2, -2), (-1, -1), @@ -128,7 +134,7 @@ class TestProjection(unittest.TestCase): self._test_projection_in_bulk(points, segments, answers) - def test_projection_with_different_collections(self): + def test_projection_with_different_collections(self) -> None: """The tested function should accept points as any consecutive container with index operator. """ @@ -148,7 +154,7 @@ class TestProjection(unittest.TestCase): s2 = s2_type(segment_end2) project_on_segment(p, s1, s2) - def test_projection_on_degenerate_segment(self): + def test_projection_on_degenerate_segment(self) -> None: coords = [-1, 0, 1] points = [(x, y) for x, y in itertools.product(coords, coords)] segments = [ diff --git a/tests/test_route_master.py b/subways/tests/test_route_master.py similarity index 96% rename from tests/test_route_master.py rename to subways/tests/test_route_master.py index 22d2f8b..77ddf21 100644 --- a/tests/test_route_master.py +++ b/subways/tests/test_route_master.py @@ -1,6 +1,6 @@ -from subway_structure import RouteMaster -from tests.sample_data_for_twin_routes import metro_samples -from tests.util import TestCase +from subways.structure.route_master import RouteMaster +from subways.tests.sample_data_for_twin_routes import metro_samples +from subways.tests.util import TestCase class TestRouteMaster(TestCase): diff --git a/tests/test_station.py b/subways/tests/test_station.py similarity index 96% rename from tests/test_station.py rename to subways/tests/test_station.py index 2081aaa..65f4b87 100644 --- a/tests/test_station.py +++ b/subways/tests/test_station.py @@ -1,6 +1,6 @@ from unittest import TestCase -from subway_structure import Station +from subways.structure.station import Station class TestStation(TestCase): diff --git a/tests/test_storage.py b/subways/tests/test_storage.py similarity index 86% rename from tests/test_storage.py rename to subways/tests/test_storage.py index 042f428..692bddd 100644 --- a/tests/test_storage.py +++ b/subways/tests/test_storage.py @@ -1,9 +1,9 @@ import json from operator import itemgetter -from processors._common import transit_to_dict -from tests.sample_data_for_outputs import metro_samples -from tests.util import JsonLikeComparisonMixin, TestCase +from subways.processors._common import transit_to_dict +from subways.tests.sample_data_for_outputs import metro_samples +from subways.tests.util import JsonLikeComparisonMixin, TestCase class TestStorage(JsonLikeComparisonMixin, TestCase): diff --git a/tests/util.py b/subways/tests/util.py similarity index 98% rename from tests/util.py rename to subways/tests/util.py index bfc3fd8..73c142c 100644 --- a/tests/util.py +++ b/subways/tests/util.py @@ -4,13 +4,13 @@ from pathlib import Path from typing import Any, TypeAlias, Self from unittest import TestCase as unittestTestCase -from process_subways import ( +from subways.structure.city import City, find_transfers +from subways.subway_io import load_xml +from subways.validation import ( add_osm_elements_to_cities, validate_cities, calculate_centers, ) -from subway_io import load_xml -from subway_structure import City, find_transfers TestCaseMixin: TypeAlias = Self | unittestTestCase diff --git a/subways/types.py b/subways/types.py new file mode 100644 index 0000000..cb1189a --- /dev/null +++ b/subways/types.py @@ -0,0 +1,14 @@ +from typing import TypeAlias + + +OsmElementT: TypeAlias = dict +IdT: TypeAlias = str # Type of feature ids +TransferT: TypeAlias = set[IdT] # A transfer is a set of StopArea IDs +TransfersT: TypeAlias = list[TransferT] +LonLat: TypeAlias = tuple[float, float] +RailT: TypeAlias = list[LonLat] + + +class CriticalValidationError(Exception): + """Is thrown if an error occurs + that prevents further validation of a city.""" diff --git a/subways/validation.py b/subways/validation.py new file mode 100644 index 0000000..67df60e --- /dev/null +++ b/subways/validation.py @@ -0,0 +1,253 @@ +import csv +import logging +import urllib.request +from functools import partial + +from subways.structure.city import City +from subways.types import CriticalValidationError, LonLat, OsmElementT + +DEFAULT_SPREADSHEET_ID = "1SEW1-NiNOnA2qDwievcxYV1FOaQl1mb1fdeyqAxHu3k" +DEFAULT_CITIES_INFO_URL = ( + "https://docs.google.com/spreadsheets/d/" + f"{DEFAULT_SPREADSHEET_ID}/export?format=csv" +) +BAD_MARK = "[bad]" + + +def get_way_center( + element: OsmElementT, node_centers: dict[int, LonLat] +) -> LonLat | None: + """ + :param element: dict describing OSM element + :param node_centers: osm_id => LonLat + :return: tuple with center coordinates, or None + """ + + # If elements have been queried via overpass-api with + # 'out center;' clause then ways already have 'center' attribute + if "center" in element: + return element["center"]["lon"], element["center"]["lat"] + + if "nodes" not in element: + return None + + center = [0, 0] + count = 0 + way_nodes = element["nodes"] + way_nodes_len = len(element["nodes"]) + for i, nd in enumerate(way_nodes): + if nd not in node_centers: + continue + # Don't count the first node of a closed way twice + if ( + i == way_nodes_len - 1 + and way_nodes_len > 1 + and way_nodes[0] == way_nodes[-1] + ): + break + center[0] += node_centers[nd][0] + center[1] += node_centers[nd][1] + count += 1 + if count == 0: + return None + element["center"] = {"lat": center[1] / count, "lon": center[0] / count} + return element["center"]["lon"], element["center"]["lat"] + + +def get_relation_center( + element: OsmElementT, + node_centers: dict[int, LonLat], + way_centers: dict[int, LonLat], + relation_centers: dict[int, LonLat], + ignore_unlocalized_child_relations: bool = False, +) -> LonLat | None: + """ + :param element: dict describing OSM element + :param node_centers: osm_id => LonLat + :param way_centers: osm_id => LonLat + :param relation_centers: osm_id => LonLat + :param ignore_unlocalized_child_relations: if a member that is a relation + has no center, skip it and calculate center based on member nodes, + ways and other, "localized" (with known centers), relations + :return: tuple with center coordinates, or None + """ + + # If elements have been queried via overpass-api with + # 'out center;' clause then some relations already have 'center' + # attribute. But this is not the case for relations composed only + # of other relations (e.g., route_master, stop_area_group or + # stop_area with only members that are multipolygons) + if "center" in element: + return element["center"]["lon"], element["center"]["lat"] + + center = [0, 0] + count = 0 + for m in element.get("members", list()): + m_id = m["ref"] + m_type = m["type"] + if m_type == "relation" and m_id not in relation_centers: + if ignore_unlocalized_child_relations: + continue + else: + # Cannot calculate fair center because the center + # of a child relation is not known yet + return None + member_container = ( + node_centers + if m_type == "node" + else way_centers + if m_type == "way" + else relation_centers + ) + if m_id in member_container: + center[0] += member_container[m_id][0] + center[1] += member_container[m_id][1] + count += 1 + if count == 0: + return None + element["center"] = {"lat": center[1] / count, "lon": center[0] / count} + return element["center"]["lon"], element["center"]["lat"] + + +def calculate_centers(elements: list[OsmElementT]) -> None: + """Adds 'center' key to each way/relation in elements, + except for empty ways or relations. + Relies on nodes-ways-relations order in the elements list. + """ + nodes: dict[int, LonLat] = {} # id => LonLat + ways: dict[int, LonLat] = {} # id => approx center LonLat + relations: dict[int, LonLat] = {} # id => approx center LonLat + + unlocalized_relations: list[OsmElementT] = [] # 'unlocalized' means + # the center of the relation has not been calculated yet + + for el in elements: + if el["type"] == "node": + nodes[el["id"]] = (el["lon"], el["lat"]) + elif el["type"] == "way": + if center := get_way_center(el, nodes): + ways[el["id"]] = center + elif el["type"] == "relation": + if center := get_relation_center(el, nodes, ways, relations): + relations[el["id"]] = center + else: + unlocalized_relations.append(el) + + def iterate_relation_centers_calculation( + ignore_unlocalized_child_relations: bool, + ) -> list[OsmElementT]: + unlocalized_relations_upd = [] + for rel in unlocalized_relations: + if center := get_relation_center( + rel, nodes, ways, relations, ignore_unlocalized_child_relations + ): + relations[rel["id"]] = center + else: + unlocalized_relations_upd.append(rel) + return unlocalized_relations_upd + + # Calculate centers for relations that have no one yet + while unlocalized_relations: + unlocalized_relations_upd = iterate_relation_centers_calculation(False) + progress = len(unlocalized_relations_upd) < len(unlocalized_relations) + if not progress: + unlocalized_relations_upd = iterate_relation_centers_calculation( + True + ) + progress = len(unlocalized_relations_upd) < len( + unlocalized_relations + ) + if not progress: + break + unlocalized_relations = unlocalized_relations_upd + + +def add_osm_elements_to_cities( + osm_elements: list[OsmElementT], cities: list[City] +) -> None: + for el in osm_elements: + for c in cities: + if c.contains(el): + c.add(el) + + +def validate_cities(cities: list[City]) -> list[City]: + """Validate cities. Return list of good cities.""" + good_cities = [] + for c in cities: + try: + c.extract_routes() + except CriticalValidationError as e: + logging.error( + "Critical validation error while processing %s: %s", + c.name, + e, + ) + c.error(str(e)) + except AssertionError as e: + logging.error( + "Validation logic error while processing %s: %s", + c.name, + e, + ) + c.error(f"Validation logic error: {e}") + else: + c.validate() + if c.is_good: + c.calculate_distances() + good_cities.append(c) + + return good_cities + + +def get_cities_info( + cities_info_url: str = DEFAULT_CITIES_INFO_URL, +) -> list[dict]: + response = urllib.request.urlopen(cities_info_url) + if ( + not cities_info_url.startswith("file://") + and (r_code := response.getcode()) != 200 + ): + raise Exception( + f"Failed to download cities spreadsheet: HTTP {r_code}" + ) + data = response.read().decode("utf-8") + reader = csv.DictReader( + data.splitlines(), + fieldnames=( + "id", + "name", + "country", + "continent", + "num_stations", + "num_lines", + "num_light_lines", + "num_interchanges", + "bbox", + "networks", + ), + ) + + cities_info = list() + names = set() + next(reader) # skipping the header + for city_info in reader: + if city_info["id"] and city_info["bbox"]: + cities_info.append(city_info) + name = city_info["name"].strip() + if name in names: + logging.warning( + "Duplicate city name in city list: %s", + city_info, + ) + names.add(name) + return cities_info + + +def prepare_cities( + cities_info_url: str = DEFAULT_CITIES_INFO_URL, overground: bool = False +) -> list[City]: + if overground: + raise NotImplementedError("Overground transit not implemented yet") + cities_info = get_cities_info(cities_info_url) + return list(map(partial(City, overground=overground), cities_info)) diff --git a/checkers/common.py b/tools/checkers/common.py similarity index 100% rename from checkers/common.py rename to tools/checkers/common.py diff --git a/checkers/compare_city_caches.py b/tools/checkers/compare_city_caches.py similarity index 100% rename from checkers/compare_city_caches.py rename to tools/checkers/compare_city_caches.py diff --git a/checkers/compare_json_outputs.py b/tools/checkers/compare_json_outputs.py similarity index 100% rename from checkers/compare_json_outputs.py rename to tools/checkers/compare_json_outputs.py diff --git a/mapsme_json_to_cities.py b/tools/legacy/mapsme_json_to_cities.py similarity index 89% rename from mapsme_json_to_cities.py rename to tools/legacy/mapsme_json_to_cities.py index 736b74b..65cbf5f 100644 --- a/mapsme_json_to_cities.py +++ b/tools/legacy/mapsme_json_to_cities.py @@ -2,14 +2,18 @@ Generate sorted list of all cities, with [bad] mark for bad cities. !!! Deprecated for use in validation cycle. -Use "process_subways.py --dump-city-list " instead. +Use "scripts/process_subways.py --dump-city-list " instead. """ import argparse import json -from process_subways import BAD_MARK, DEFAULT_CITIES_INFO_URL, get_cities_info +from subways.validation import ( + BAD_MARK, + DEFAULT_CITIES_INFO_URL, + get_cities_info, +) if __name__ == "__main__": @@ -19,7 +23,7 @@ if __name__ == "__main__": used by subway render to generate the list of network at frontend. It uses two sources: a mapsme.json validator output with good networks, and a google spreadsheet with networks for the - process_subways.download_cities() function.""" + subways.validation.get_cities_info() function.""" ), formatter_class=argparse.RawTextHelpFormatter, ) diff --git a/make_all_metro_poly.py b/tools/make_poly/make_all_metro_poly.py similarity index 95% rename from make_all_metro_poly.py rename to tools/make_poly/make_all_metro_poly.py index e8450a2..88f9b8a 100644 --- a/make_all_metro_poly.py +++ b/tools/make_poly/make_all_metro_poly.py @@ -3,7 +3,7 @@ import argparse from shapely import unary_union from shapely.geometry import MultiPolygon, Polygon -from process_subways import DEFAULT_CITIES_INFO_URL, get_cities_info +from subways.validation import DEFAULT_CITIES_INFO_URL, get_cities_info def make_disjoint_metro_polygons(cities_info_url: str) -> None: diff --git a/tools/make_poly/tests/__init__.py b/tools/make_poly/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/assets/cities_info_1city.csv b/tools/make_poly/tests/assets/cities_info_1city.csv similarity index 100% rename from tests/assets/cities_info_1city.csv rename to tools/make_poly/tests/assets/cities_info_1city.csv diff --git a/tests/assets/cities_info_2cities.csv b/tools/make_poly/tests/assets/cities_info_2cities.csv similarity index 100% rename from tests/assets/cities_info_2cities.csv rename to tools/make_poly/tests/assets/cities_info_2cities.csv diff --git a/tests/test_make_all_metro_poly.py b/tools/make_poly/tests/test_make_all_metro_poly.py similarity index 94% rename from tests/test_make_all_metro_poly.py rename to tools/make_poly/tests/test_make_all_metro_poly.py index dac8dae..d6df831 100644 --- a/tests/test_make_all_metro_poly.py +++ b/tools/make_poly/tests/test_make_all_metro_poly.py @@ -1,6 +1,6 @@ import contextlib import io -import os +from pathlib import Path from unittest import TestCase from make_all_metro_poly import make_disjoint_metro_polygons @@ -63,9 +63,8 @@ class TestMakeAllMetroPoly(TestCase): def test_make_disjoint_metro_polygons(self) -> None: for case in cases: with self.subTest(msg=case["csv_file"]): - file_url = ( - f"file://{os.getcwd()}/tests/assets/{case['csv_file']}" - ) + assets_dir = Path(__file__).resolve().parent / "assets" + file_url = f"file://{assets_dir}/{case['csv_file']}" stream = io.StringIO() with contextlib.redirect_stdout(stream): make_disjoint_metro_polygons(file_url) diff --git a/stop_areas/make_stop_areas.py b/tools/stop_areas/make_stop_areas.py similarity index 100% rename from stop_areas/make_stop_areas.py rename to tools/stop_areas/make_stop_areas.py diff --git a/stop_areas/make_tram_areas.py b/tools/stop_areas/make_tram_areas.py similarity index 100% rename from stop_areas/make_tram_areas.py rename to tools/stop_areas/make_tram_areas.py diff --git a/stop_areas/requirements.txt b/tools/stop_areas/requirements.txt similarity index 100% rename from stop_areas/requirements.txt rename to tools/stop_areas/requirements.txt diff --git a/stop_areas/serve.py b/tools/stop_areas/serve.py similarity index 100% rename from stop_areas/serve.py rename to tools/stop_areas/serve.py diff --git a/stop_areas/templates/index.html b/tools/stop_areas/templates/index.html similarity index 100% rename from stop_areas/templates/index.html rename to tools/stop_areas/templates/index.html diff --git a/v2h_templates.py b/tools/v2h/v2h_templates.py similarity index 100% rename from v2h_templates.py rename to tools/v2h/v2h_templates.py diff --git a/validation_to_html.py b/tools/v2h/validation_to_html.py similarity index 99% rename from validation_to_html.py rename to tools/v2h/validation_to_html.py index 0f9ec3b..42158f8 100755 --- a/validation_to_html.py +++ b/tools/v2h/validation_to_html.py @@ -9,7 +9,7 @@ import re from collections import defaultdict from typing import Any -from process_subways import DEFAULT_SPREADSHEET_ID +from subways.validation import DEFAULT_SPREADSHEET_ID from v2h_templates import ( COUNTRY_CITY, COUNTRY_FOOTER, From 179dcb6a6f3683bd4c1b7bb676a156b4b5509de9 Mon Sep 17 00:00:00 2001 From: Alexey Zakharenkov Date: Wed, 20 Mar 2024 20:24:19 +0300 Subject: [PATCH 10/15] Use virtual environments --- .github/workflows/python-app.yml | 18 ++++++++--- README.md | 6 ++-- scripts/process_subways.sh | 53 +++++++++++++++++++++++--------- scripts/requirements.txt | 1 + subways/tests/README.md | 15 ++++----- tools/make_poly/requirements.txt | 1 + 6 files changed, 63 insertions(+), 31 deletions(-) create mode 100644 scripts/requirements.txt create mode 100644 tools/make_poly/requirements.txt diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index 55ce353..37b8af8 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -23,17 +23,25 @@ jobs: uses: actions/setup-python@v3 with: python-version: "3.11" - - name: Install dependencies + - name: Install dependencies for linters run: | python -m pip install --upgrade pip - pip install flake8==6.0.0 black==23.1.0 shapely==2.0.1 - pip install -r subways/requirements.txt + pip install flake8==6.0.0 black==23.1.0 - name: Lint with flake8 run: | flake8 - name: Check with black run: | black --check --line-length 79 . - - name: Test with unittest + - name: Test subways with unittest run: | - python -m unittest discover tests \ No newline at end of file + export PYTHONPATH=$(pwd) + pip freeze | xargs pip uninstall -y + pip install -r subways/requirements.txt + python -m unittest discover subways + - name: Test tools with unittest + run: | + export PYTHONPATH=$(pwd) + pip freeze | xargs pip uninstall -y + pip install -r tools/make_poly/requirements.txt + python -m unittest discover tools/make_poly diff --git a/README.md b/README.md index 157e1ad..516c636 100644 --- a/README.md +++ b/README.md @@ -59,9 +59,11 @@ if you allow the `scripts/process_subway.py` to fetch data from Overpass API. He git clone https://github.com/alexey-zakharenkov/subways.git subways_validator cd subways_validator ``` -3. Install python dependencies +3. Configure python environment, e.g. ```bash - pip install -r subways/requirements.txt + python3 -m venv scripts/.venv + source scripts/.venv/bin/activate + pip install scripts/requirements.txt ``` 4. Execute ```bash diff --git a/scripts/process_subways.sh b/scripts/process_subways.sh index 62a45e7..837463a 100755 --- a/scripts/process_subways.sh +++ b/scripts/process_subways.sh @@ -63,6 +63,22 @@ EOF fi +function activate_venv_at_path() { + path=$1 + + if [ ! -d "$path/".venv ]; then + "${PYTHON:-python3.11}" -m venv "$path"/.venv + fi + + source "$path"/.venv/bin/activate + + if [ -f "$path"/requirements.txt ]; then + pip install --upgrade pip + pip install -r "$path"/requirements.txt + fi +} + + function check_osmctools() { OSMCTOOLS="${OSMCTOOLS:-$HOME/osmctools}" if [ ! -f "$OSMCTOOLS/osmupdate" ]; then @@ -91,39 +107,39 @@ function check_poly() { if [ -z "${POLY-}" -o ! -f "${POLY-}" ]; then POLY=${POLY:-$(mktemp "$TMPDIR/all-metro.XXXXXXXX.poly")} - if [ -n "$("$PYTHON" -c "import shapely" 2>&1)" ]; then - "$PYTHON" -m pip install shapely==2.0.1 - fi - "$PYTHON" "$SUBWAYS_REPO_PATH"/tools/make_poly/make_all_metro_poly.py \ + activate_venv_at_path "$SUBWAYS_REPO_PATH/tools/make_poly" + python "$SUBWAYS_REPO_PATH"/tools/make_poly/make_all_metro_poly.py \ ${CITIES_INFO_URL:+--cities-info-url "$CITIES_INFO_URL"} > "$POLY" + deactivate fi fi POLY_CHECKED=1 fi } - -PYTHON=${PYTHON:-python3} -# This will fail if there is no python -"$PYTHON" --version > /dev/null - # "readlink -f" echoes canonicalized absolute path to a file/directory SUBWAYS_REPO_PATH="$(readlink -f $(dirname "$0")/..)" if [ ! -f "$SUBWAYS_REPO_PATH/scripts/process_subways.py" ]; then - echo "Please clone the subways repo to $SUBWAYS_PATH" + echo "Please clone the subways repo to $SUBWAYS_REPO_PATH" exit 2 fi -TMPDIR="${TMPDIR:-$SUBWAYS_REPO_PATH}" -mkdir -p "$TMPDIR" +# Contains 'subways' dir and is required by the main validator python script +# as well as by some tools +export PYTHONPATH="$SUBWAYS_REPO_PATH" # Downloading the latest version of the subways script if [ -n "${GIT_PULL-}" ]; then ( - cd "$SUBWAYS_PATH" + pushd "$SUBWAYS_REPO_PATH" git pull origin master + popd ) fi + +TMPDIR="${TMPDIR:-"$SUBWAYS_REPO_PATH"}" +mkdir -p "$TMPDIR" + if [ -z "${FILTERED_DATA-}" ]; then FILTERED_DATA="$TMPDIR/subways.osm" NEED_TO_REMOVE_FILTERED_DATA=1 @@ -244,7 +260,9 @@ if [ -n "${DUMP-}" ]; then fi VALIDATION="$TMPDIR/validation.json" -"$PYTHON" "$SUBWAYS_REPO_PATH/scripts/process_subways.py" ${QUIET:+-q} \ + +activate_venv_at_path "$SUBWAYS_REPO_PATH/scripts" +python "$SUBWAYS_REPO_PATH/scripts/process_subways.py" ${QUIET:+-q} \ -x "$FILTERED_DATA" -l "$VALIDATION" \ ${CITIES_INFO_URL:+--cities-info-url "$CITIES_INFO_URL"} \ ${MAPSME:+--output-mapsme "$MAPSME"} \ @@ -256,6 +274,8 @@ VALIDATION="$TMPDIR/validation.json" ${ELEMENTS_CACHE:+-i "$ELEMENTS_CACHE"} \ ${CITY_CACHE:+--cache "$CITY_CACHE"} \ ${RECOVERY_PATH:+-r "$RECOVERY_PATH"} +deactivate + if [ -n "${NEED_TO_REMOVE_FILTERED_DATA-}" ]; then rm "$FILTERED_DATA" @@ -270,9 +290,12 @@ fi mkdir -p $HTML_DIR rm -f "$HTML_DIR"/*.html -"$PYTHON" "$SUBWAYS_REPO_PATH/tools/v2h/validation_to_html.py" \ + +activate_venv_at_path "$SUBWAYS_REPO_PATH/tools/v2h" +python "$SUBWAYS_REPO_PATH/tools/v2h/validation_to_html.py" \ ${CITIES_INFO_URL:+--cities-info-url "$CITIES_INFO_URL"} \ "$VALIDATION" "$HTML_DIR" +deactivate # Uploading files to the server diff --git a/scripts/requirements.txt b/scripts/requirements.txt new file mode 100644 index 0000000..1f71eee --- /dev/null +++ b/scripts/requirements.txt @@ -0,0 +1 @@ +-r ../subways/requirements.txt diff --git a/subways/tests/README.md b/subways/tests/README.md index d6da466..4423809 100644 --- a/subways/tests/README.md +++ b/subways/tests/README.md @@ -1,13 +1,10 @@ -To perform tests manually, run this command from the top directory +To perform tests, run this command from the top directory of the repository: ```bash -python -m unittest discover tests +export PYTHONPATH=$(pwd) +[ -d "subways/tests/.venv" ] || python3 -m venv subways/tests/.venv +source subways/tests/.venv/bin/activate +pip install -r subways/requirements.txt +python -m unittest discover subways ``` - -or simply - -```bash -python -m unittest -``` - diff --git a/tools/make_poly/requirements.txt b/tools/make_poly/requirements.txt new file mode 100644 index 0000000..67ee66c --- /dev/null +++ b/tools/make_poly/requirements.txt @@ -0,0 +1 @@ +shapely==2.0.1 From 6a4c2a255f24862a7cdf4519973076e0e289b7f4 Mon Sep 17 00:00:00 2001 From: Alexey Zakharenkov Date: Thu, 21 Mar 2024 13:12:05 +0300 Subject: [PATCH 11/15] Remove obsolete scripts --- README.md | 2 +- scripts/build_city.sh | 16 ------- scripts/build_trams.sh | 16 ------- scripts/process_trams.sh | 94 ---------------------------------------- 4 files changed, 1 insertion(+), 127 deletions(-) delete mode 100755 scripts/build_city.sh delete mode 100755 scripts/build_trams.sh delete mode 100755 scripts/process_trams.sh diff --git a/README.md b/README.md index 516c636..44e4742 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # Subway Preprocessor Here you see a list of scripts that can be used for preprocessing all the metro -systems in the world from OpenStreetMap. `scripts/subway_structure.py` produces +systems in the world from OpenStreetMap. `subways` package produces a list of disjunct systems that can be used for routing and for displaying of metro maps. diff --git a/scripts/build_city.sh b/scripts/build_city.sh deleted file mode 100755 index 6b8d8af..0000000 --- a/scripts/build_city.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/bash -set -e -u -[ $# -lt 1 ] && echo "Usage: $0 [ []]" && exit 1 - -export OSMCTOOLS="${OSMCTOOLS:-$HOME/osm/planet}" -export DUMP=html -export JSON=html -if [ -n "${2-}" ]; then - export CITY="$2" -fi -if [ -n "${3-}" ]; then - export BBOX="$3" -elif [ -n "${CITY-}" ]; then - export BBOX="$(python3 -c 'import subway_structure; c = [x for x in subway_structure.download_cities() if x.name == "'"$CITY"'"]; print("{1},{0},{3},{2}".format(*c[0].bbox))')" || true -fi -"$(dirname "$0")/process_subways.sh" "$1" diff --git a/scripts/build_trams.sh b/scripts/build_trams.sh deleted file mode 100755 index 6b62d24..0000000 --- a/scripts/build_trams.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/bash -set -e -u -[ $# -lt 1 ] && echo "Usage: $0 [ []]" && exit 1 - -export OSMCTOOLS="${OSMCTOOLS:-$HOME/osm/planet}" -export DUMP=html -export JSON=html -if [ -n "${2-}" ]; then - export CITY="$2" -fi -if [ -n "${3-}" ]; then - export BBOX="$3" -elif [ -n "${CITY-}" ]; then - export BBOX="$(python3 -c 'import subway_structure; c = [x for x in subway_structure.download_cities(True) if x.name == "'"$CITY"'"]; print("{1},{0},{3},{2}".format(*c[0].bbox))')" || true -fi -"$(dirname "$0")/process_trams.sh" "$1" diff --git a/scripts/process_trams.sh b/scripts/process_trams.sh deleted file mode 100755 index 84e007a..0000000 --- a/scripts/process_trams.sh +++ /dev/null @@ -1,94 +0,0 @@ -#!/bin/bash -set -e -u - -if [ $# -lt 1 -a -z "${PLANET-}" ]; then - echo "This script updates a planet or an extract, processes tram networks in it" - echo "and produses a set of HTML files with validation results." - echo - echo "Usage: $0 " - echo - echo "Variable reference:" - echo "- PLANET: path for the source o5m file (the entire planet or an extract)" - echo "- CITY: name of a city to process" - echo "- BBOX: bounding box of an extract; x1,y1,x2,y2" - echo "- DUMP: file name to dump city data" - echo "- MAPSME: file name for maps.me json output" - echo "- OSMCTOOLS: path to osmconvert and osmupdate binaries" - echo "- PYTHON: python 3 executable" - echo "- GIT_PULL: set to 1 to update the scripts" - echo "- TMPDIR: path to temporary files" - echo "- HTML_DIR: target path for generated HTML files" - echo "- SERVER: server name and path to upload HTML files (e.g. ilya@osmz.ru:/var/www/)" - echo "- SERVER_KEY: rsa key to supply for uploading the files" - echo "- REMOVE_HTML: set to 1 to remove HTML_DIR after uploading" - exit 1 -fi - -[ -n "${WHAT-}" ] && echo WHAT - -PLANET="${PLANET:-${1-}}" -[ ! -f "$PLANET" ] && echo "Cannot find planet file $PLANET" && exit 2 -OSMCTOOLS="${OSMCTOOLS:-$HOME/osmctools}" -if [ ! -f "$OSMCTOOLS/osmupdate" ]; then - if which osmupdate > /dev/null; then - OSMCTOOLS="$(dirname "$(which osmupdate)")" - else - echo "Please compile osmctools to $OSMCTOOLS" - exit 1 - fi -fi -PYTHON=${PYTHON:-python3} -# This will fail if there is no python -"$PYTHON" --version > /dev/null -SUBWAYS_PATH="$(dirname "$0")/.." -[ ! -f "$SUBWAYS_PATH/process_subways.py" ] && echo "Please clone the subways repo to $SUBWAYS_PATH" && exit 2 -TMPDIR="${TMPDIR:-$SUBWAYS_PATH}" - -# Downloading the latest version of the subways script - - -if [ -n "${GIT_PULL-}" ]; then ( - cd "$SUBWAYS_PATH" - git pull origin master -) fi - - -# Updating the planet file - -PLANET_ABS="$(cd "$(dirname "$PLANET")"; pwd)/$(basename "$PLANET")" -( - cd "$OSMCTOOLS" # osmupdate requires osmconvert in a current directory - ./osmupdate --drop-author --out-o5m "$PLANET_ABS" ${BBOX+"-b=$BBOX"} "$PLANET_ABS.new.o5m" && mv "$PLANET_ABS.new.o5m" "$PLANET_ABS" || true -) - -# Filtering it - -FILTERED_DATA="$TMPDIR/subways.osm" -QRELATIONS="route=tram route_master=tram public_transport=stop_area =stop_area_group" -QNODES="railway=tram_stop railway=subway_entrance tram=yes" -"$OSMCTOOLS/osmfilter" "$PLANET" --keep= --keep-relations="$QRELATIONS" --keep-nodes="$QNODES" --drop-author "-o=$FILTERED_DATA" - -# Running the validation - -VALIDATION="$TMPDIR/validation.json" -"$PYTHON" "$SUBWAYS_PATH/process_subways.py" -t -q -x "$FILTERED_DATA" -l "$VALIDATION" ${MAPSME+-o "$MAPSME"} ${CITY+-c "$CITY"} ${DUMP+-d "$DUMP"} ${JSON+-j "$JSON"} -rm "$FILTERED_DATA" - -# Preparing HTML files - -if [ -z "${HTML_DIR-}" ]; then - HTML_DIR="$SUBWAYS_PATH/html" - REMOVE_HTML=1 -fi - -mkdir -p $HTML_DIR -rm -f "$HTML_DIR"/*.html -"$PYTHON" "$SUBWAYS_PATH/validation_to_html.py" "$VALIDATION" "$HTML_DIR" -rm "$VALIDATION" - -# Uploading files to the server - -if [ -n "${SERVER-}" ]; then - scp -q ${SERVER_KEY+-i "$SERVER_KEY"} "$HTML_DIR"/* "$SERVER" - [ -n "${REMOVE_HTML-}" ] && rm -r "$HTML_DIR" -fi From aff6a9f129c09cc046176358c40250fc61765e43 Mon Sep 17 00:00:00 2001 From: Alexey Zakharenkov Date: Thu, 4 Apr 2024 12:48:29 +0300 Subject: [PATCH 12/15] Process route duration (average vehicle speed) --- subways/osm_element.py | 7 + subways/processors/_common.py | 2 + subways/processors/gtfs.py | 14 ++ subways/processors/mapsme.py | 4 +- subways/structure/city.py | 26 ++-- subways/structure/route.py | 137 ++++++++++------- subways/structure/route_master.py | 33 ++-- subways/tests/assets/tiny_world.osm | 9 +- .../tests/assets/tiny_world_gtfs/trips.txt | 14 +- subways/tests/sample_data_for_outputs.py | 6 + subways/tests/test_route.py | 141 ++++++++++++++++++ 11 files changed, 303 insertions(+), 90 deletions(-) create mode 100644 subways/tests/test_route.py diff --git a/subways/osm_element.py b/subways/osm_element.py index 5ea8bc4..19861da 100644 --- a/subways/osm_element.py +++ b/subways/osm_element.py @@ -17,3 +17,10 @@ def el_center(el: OsmElementT) -> LonLat | None: elif "center" in el: return el["center"]["lon"], el["center"]["lat"] return None + + +def get_network(relation: OsmElementT) -> str | None: + for k in ("network:metro", "network", "operator"): + if k in relation["tags"]: + return relation["tags"][k] + return None diff --git a/subways/processors/_common.py b/subways/processors/_common.py index 1d58da4..5565894 100644 --- a/subways/processors/_common.py +++ b/subways/processors/_common.py @@ -10,6 +10,7 @@ if typing.TYPE_CHECKING: DEFAULT_INTERVAL = 2.5 * 60 # seconds KMPH_TO_MPS = 1 / 3.6 # km/h to m/s conversion multiplier +DEFAULT_AVE_VEHICLE_SPEED = 40 * KMPH_TO_MPS # m/s SPEED_ON_TRANSFER = 3.5 * KMPH_TO_MPS # m/s TRANSFER_PENALTY = 30 # seconds @@ -52,6 +53,7 @@ def transit_to_dict(cities: list[City], transfers: TransfersT) -> dict: "start_time": route.start_time, "end_time": route.end_time, "interval": route.interval, + "duration": route.duration, "stops": [ { "stoparea_id": route_stop.stoparea.id, diff --git a/subways/processors/gtfs.py b/subways/processors/gtfs.py index 3722815..df70cc7 100644 --- a/subways/processors/gtfs.py +++ b/subways/processors/gtfs.py @@ -9,8 +9,10 @@ from tarfile import TarFile, TarInfo from zipfile import ZipFile from ._common import ( + DEFAULT_AVE_VEHICLE_SPEED, DEFAULT_INTERVAL, format_colour, + KMPH_TO_MPS, SPEED_ON_TRANSFER, TRANSFER_PENALTY, transit_to_dict, @@ -63,6 +65,7 @@ GTFS_COLUMNS = { "trip_route_type", "route_pattern_id", "bikes_allowed", + "average_speed", # extension field (km/h) ], "stops": [ "stop_id", @@ -242,11 +245,22 @@ def transit_data_to_gtfs(data: dict) -> dict: for itinerary in route_master["itineraries"]: shape_id = itinerary["id"][1:] # truncate leading 'r' + average_speed = round( + ( + DEFAULT_AVE_VEHICLE_SPEED + if not itinerary["duration"] + else itinerary["stops"][-1]["distance"] + / itinerary["duration"] + ) + / KMPH_TO_MPS, + 1, + ) # km/h trip = { "trip_id": itinerary["id"], "route_id": route_master["id"], "service_id": "always", "shape_id": shape_id, + "average_speed": average_speed, } gtfs_data["trips"].append(trip) diff --git a/subways/processors/mapsme.py b/subways/processors/mapsme.py index e176832..32f5b69 100755 --- a/subways/processors/mapsme.py +++ b/subways/processors/mapsme.py @@ -14,6 +14,7 @@ from subways.osm_element import el_center from subways.structure.station import Station from subways.types import IdT, LonLat, OsmElementT, TransfersT from ._common import ( + DEFAULT_AVE_VEHICLE_SPEED, DEFAULT_INTERVAL, format_colour, KMPH_TO_MPS, @@ -29,7 +30,6 @@ if typing.TYPE_CHECKING: OSM_TYPES = {"n": (0, "node"), "w": (2, "way"), "r": (3, "relation")} ENTRANCE_PENALTY = 60 # seconds SPEED_TO_ENTRANCE = 5 * KMPH_TO_MPS # m/s -SPEED_ON_LINE = 40 * KMPH_TO_MPS # m/s # (stoparea1_uid, stoparea2_uid) -> seconds; stoparea1_uid < stoparea2_uid TransferTimesT: TypeAlias = dict[tuple[int, int], int] @@ -258,7 +258,7 @@ def transit_data_to_mapsme( itin.append( [ uid(stop.stoparea.id), - round(stop.distance / SPEED_ON_LINE), + round(stop.distance / DEFAULT_AVE_VEHICLE_SPEED), ] ) # Make exits from platform nodes, diff --git a/subways/structure/city.py b/subways/structure/city.py index 441c08b..480a0fd 100644 --- a/subways/structure/city.py +++ b/subways/structure/city.py @@ -8,7 +8,7 @@ from subways.consts import ( DEFAULT_MODES_OVERGROUND, DEFAULT_MODES_RAPID, ) -from subways.osm_element import el_center, el_id +from subways.osm_element import el_center, el_id, get_network from subways.structure.route import Route from subways.structure.route_master import RouteMaster from subways.structure.station import Station @@ -287,11 +287,11 @@ class City: if el["tags"].get("access") in ("no", "private"): continue route_id = el_id(el) - master = self.masters.get(route_id, None) + master_element = self.masters.get(route_id, None) if self.networks: - network = Route.get_network(el) - if master: - master_network = Route.get_network(master) + network = get_network(el) + if master_element: + master_network = get_network(master_element) else: master_network = None if ( @@ -300,7 +300,7 @@ class City: ): continue - route = self.route_class(el, self, master) + route = self.route_class(el, self, master_element) if not route.stops: self.warn("Route has no stops", el) continue @@ -308,15 +308,11 @@ class City: self.warn("Route has only one stop", el) continue - k = el_id(master) if master else route.ref - if k not in self.routes: - self.routes[k] = RouteMaster(self, master) - self.routes[k].add(route) - - # Sometimes adding a route to a newly initialized RouteMaster - # can fail - if len(self.routes[k]) == 0: - del self.routes[k] + master_id = el_id(master_element) or route.ref + route_master = self.routes.setdefault( + master_id, RouteMaster(self, master_element) + ) + route_master.add(route) # And while we're iterating over relations, find interchanges if ( diff --git a/subways/structure/route.py b/subways/structure/route.py index 926733e..f2ff3c3 100644 --- a/subways/structure/route.py +++ b/subways/structure/route.py @@ -2,7 +2,7 @@ from __future__ import annotations import re import typing -from collections.abc import Callable, Iterator +from collections.abc import Callable, Collection, Iterator from itertools import islice from subways.consts import ( @@ -18,7 +18,7 @@ from subways.geom_utils import ( find_segment, project_on_line, ) -from subways.osm_element import el_id, el_center +from subways.osm_element import el_id, el_center, get_network from subways.structure.route_stop import RouteStop from subways.structure.station import Station from subways.structure.stop_area import StopArea @@ -33,24 +33,29 @@ ALLOWED_ANGLE_BETWEEN_STOPS = 45 # in degrees DISALLOWED_ANGLE_BETWEEN_STOPS = 20 # in degrees -def get_start_end_times( +def parse_time_range( opening_hours: str, -) -> tuple[tuple[int, int], tuple[int, int]] | tuple[None, None]: +) -> tuple[tuple[int, int], tuple[int, int]] | None: """Very simplified method to parse OSM opening_hours tag. We simply take the first HH:MM-HH:MM substring which is the most probable opening hours interval for the most of the weekdays. """ - start_time, end_time = None, None + if opening_hours == "24/7": + return (0, 0), (24, 0) + m = START_END_TIMES_RE.match(opening_hours) - if m: - ints = tuple(map(int, m.groups())) - start_time = (ints[0], ints[1]) - end_time = (ints[2], ints[3]) + if not m: + return None + ints = tuple(map(int, m.groups())) + if ints[1] > 59 or ints[3] > 59: + return None + start_time = (ints[0], ints[1]) + end_time = (ints[2], ints[3]) return start_time, end_time def osm_interval_to_seconds(interval_str: str) -> int | None: - """Convert to int an OSM value for 'interval'/'headway' tag + """Convert to int an OSM value for 'interval'/'headway'/'duration' tag which may be in these formats: HH:MM:SS, HH:MM, @@ -71,7 +76,54 @@ def osm_interval_to_seconds(interval_str: str) -> int | None: return None except ValueError: return None - return seconds + 60 * minutes + 60 * 60 * hours + + if seconds < 0 or minutes < 0 or hours < 0: + return None + if semicolon_count > 0 and (seconds >= 60 or minutes >= 60): + return None + + interval = seconds + 60 * minutes + 60 * 60 * hours + if interval == 0: + return None + return interval + + +def get_interval_in_seconds_from_tags( + tags: dict, keys: str | Collection[str] +) -> int | None: + """Extract time interval value from tags for keys among "keys". + E.g., "interval" and "headway" means the same in OSM. + Examples: + interval=5 => 300 + headway:peak=00:01:30 => 90 + """ + if isinstance(keys, str): + keys = (keys,) + + value = None + for key in keys: + if key in tags: + value = tags[key] + break + if value is None: + for key in keys: + if value: + break + for tag_name in tags: + if tag_name.startswith(key + ":"): + value = tags[tag_name] + break + if not value: + return None + return osm_interval_to_seconds(value) + + +def get_route_interval(tags: dict) -> int | None: + return get_interval_in_seconds_from_tags(tags, ("interval", "headway")) + + +def get_route_duration(tags: dict) -> int | None: + return get_interval_in_seconds_from_tags(tags, "duration") class Route: @@ -95,29 +147,6 @@ class Route: return False return True - @staticmethod - def get_network(relation: OsmElementT) -> str | None: - for k in ("network:metro", "network", "operator"): - if k in relation["tags"]: - return relation["tags"][k] - return None - - @staticmethod - def get_interval(tags: dict) -> int | None: - v = None - for k in ("interval", "headway"): - if k in tags: - v = tags[k] - break - else: - for kk in tags: - if kk.startswith(k + ":"): - v = tags[kk] - break - if not v: - return None - return osm_interval_to_seconds(v) - def stopareas(self) -> Iterator[StopArea]: yielded_stopareas = set() for route_stop in self: @@ -146,6 +175,7 @@ class Route: self.infill = None self.network = None self.interval = None + self.duration = None self.start_time = None self.end_time = None self.is_circular = False @@ -319,46 +349,51 @@ class Route: def process_tags(self, master: OsmElementT) -> None: relation = self.element + tags = relation["tags"] master_tags = {} if not master else master["tags"] - if "ref" not in relation["tags"] and "ref" not in master_tags: + if "ref" not in tags and "ref" not in master_tags: self.city.notice("Missing ref on a route", relation) - self.ref = relation["tags"].get( - "ref", master_tags.get("ref", relation["tags"].get("name", None)) + self.ref = tags.get( + "ref", master_tags.get("ref", tags.get("name", None)) ) - self.name = relation["tags"].get("name", None) - self.mode = relation["tags"]["route"] + self.name = tags.get("name", None) + self.mode = tags["route"] if ( - "colour" not in relation["tags"] + "colour" not in tags and "colour" not in master_tags and self.mode != "tram" ): self.city.notice("Missing colour on a route", relation) try: self.colour = normalize_colour( - relation["tags"].get("colour", master_tags.get("colour", None)) + tags.get("colour", master_tags.get("colour", None)) ) except ValueError as e: self.colour = None self.city.warn(str(e), relation) try: self.infill = normalize_colour( - relation["tags"].get( + tags.get( "colour:infill", master_tags.get("colour:infill", None) ) ) except ValueError as e: self.infill = None self.city.warn(str(e), relation) - self.network = Route.get_network(relation) - self.interval = Route.get_interval( - relation["tags"] - ) or Route.get_interval(master_tags) - self.start_time, self.end_time = get_start_end_times( - relation["tags"].get( - "opening_hours", master_tags.get("opening_hours", "") - ) + self.network = get_network(relation) + self.interval = get_route_interval(tags) or get_route_interval( + master_tags ) - if relation["tags"].get("public_transport:version") == "1": + self.duration = get_route_duration(tags) or get_route_duration( + master_tags + ) + parsed_time_range = parse_time_range( + tags.get("opening_hours", master_tags.get("opening_hours", "")) + ) + if parsed_time_range: + self.start_time, self.end_time = parsed_time_range + + if tags.get("public_transport:version") == "1": self.city.warn( "Public transport version is 1, which means the route " "is an unsorted pile of objects", diff --git a/subways/structure/route_master.py b/subways/structure/route_master.py index 36ab148..891ae20 100644 --- a/subways/structure/route_master.py +++ b/subways/structure/route_master.py @@ -7,8 +7,8 @@ from typing import TypeVar from subways.consts import MAX_DISTANCE_STOP_TO_LINE from subways.css_colours import normalize_colour from subways.geom_utils import distance, project_on_line -from subways.osm_element import el_id -from subways.structure.route import Route +from subways.osm_element import el_id, get_network +from subways.structure.route import get_route_duration, get_route_interval from subways.structure.stop_area import StopArea from subways.types import IdT, OsmElementT @@ -26,7 +26,7 @@ class RouteMaster: def __init__(self, city: City, master: OsmElementT = None) -> None: self.city = city self.routes = [] - self.best: Route = None + self.best: Route = None # noqa: F821 self.id: IdT = el_id(master) self.has_master = master is not None self.interval_from_master = False @@ -46,13 +46,14 @@ class RouteMaster: ) except ValueError: self.infill = None - self.network = Route.get_network(master) + self.network = get_network(master) self.mode = master["tags"].get( "route_master", None ) # This tag is required, but okay self.name = master["tags"].get("name", None) - self.interval = Route.get_interval(master["tags"]) + self.interval = get_route_interval(master["tags"]) self.interval_from_master = self.interval is not None + self.duration = get_route_duration(master["tags"]) else: self.ref = None self.colour = None @@ -61,6 +62,7 @@ class RouteMaster: self.mode = None self.name = None self.interval = None + self.duration = None def stopareas(self) -> Iterator[StopArea]: yielded_stopareas = set() @@ -70,7 +72,7 @@ class RouteMaster: yield stoparea yielded_stopareas.add(stoparea) - def add(self, route: Route) -> None: + def add(self, route: Route) -> None: # noqa: F821 if not self.network: self.network = route.network elif route.network and route.network != self.network: @@ -148,10 +150,10 @@ class RouteMaster: ): self.best = route - def get_meaningful_routes(self) -> list[Route]: + def get_meaningful_routes(self) -> list[Route]: # noqa: F821 return [route for route in self if len(route) >= 2] - def find_twin_routes(self) -> dict[Route, Route]: + def find_twin_routes(self) -> dict[Route, Route]: # noqa: F821 """Two non-circular routes are twins if they have the same end stations and opposite directions, and the number of stations is the same or almost the same. We'll then find stops that are present @@ -325,7 +327,11 @@ class RouteMaster: break return common_subsequence - def alert_twin_routes_differ(self, route1: Route, route2: Route) -> None: + def alert_twin_routes_differ( + self, + route1: Route, # noqa: F821 + route2: Route, # noqa: F821 + ) -> None: """Arguments are that route1.id < route2.id""" ( stops_missing_from_route1, @@ -382,7 +388,10 @@ class RouteMaster: ) @staticmethod - def calculate_twin_routes_diff(route1: Route, route2: Route) -> tuple: + def calculate_twin_routes_diff( + route1: Route, # noqa: F821 + route2: Route, # noqa: F821 + ) -> tuple: """Wagner–Fischer algorithm for stops diff in two twin routes.""" stops1 = route1.stops @@ -450,10 +459,10 @@ class RouteMaster: def __len__(self) -> int: return len(self.routes) - def __getitem__(self, i) -> Route: + def __getitem__(self, i) -> Route: # noqa: F821 return self.routes[i] - def __iter__(self) -> Iterator[Route]: + def __iter__(self) -> Iterator[Route]: # noqa: F821 return iter(self.routes) def __repr__(self) -> str: diff --git a/subways/tests/assets/tiny_world.osm b/subways/tests/assets/tiny_world.osm index 276fb80..4cd0631 100644 --- a/subways/tests/assets/tiny_world.osm +++ b/subways/tests/assets/tiny_world.osm @@ -187,9 +187,10 @@ + + - @@ -198,6 +199,7 @@ + @@ -208,6 +210,7 @@ + @@ -217,18 +220,18 @@ + - + - diff --git a/subways/tests/assets/tiny_world_gtfs/trips.txt b/subways/tests/assets/tiny_world_gtfs/trips.txt index 41da841..8061559 100644 --- a/subways/tests/assets/tiny_world_gtfs/trips.txt +++ b/subways/tests/assets/tiny_world_gtfs/trips.txt @@ -1,7 +1,7 @@ -route_id,service_id,trip_id,trip_headsign,trip_short_name,direction_id,block_id,shape_id,wheelchair_accessible,trip_route_type,route_pattern_id,bikes_allowed -r15,always,r7,,,,,7,,,, -r15,always,r8,,,,,8,,,, -r14,always,r12,,,,,12,,,, -r14,always,r13,,,,,13,,,, -r11,always,r9,,,,,9,,,, -r11,always,r10,,,,,10,,,, +route_id,service_id,trip_id,trip_headsign,trip_short_name,direction_id,block_id,shape_id,wheelchair_accessible,trip_route_type,route_pattern_id,bikes_allowed,average_speed +r15,always,r7,,,,,7,,,,,40.0 +r15,always,r8,,,,,8,,,,,40.0 +r14,always,r12,,,,,12,,,,,9.4 +r14,always,r13,,,,,13,,,,,11.8 +r11,always,r9,,,,,9,,,,,6.5 +r11,always,r10,,,,,10,,,,,6.5 diff --git a/subways/tests/sample_data_for_outputs.py b/subways/tests/sample_data_for_outputs.py index b50ddbe..fd2cf43 100644 --- a/subways/tests/sample_data_for_outputs.py +++ b/subways/tests/sample_data_for_outputs.py @@ -163,6 +163,7 @@ metro_samples = [ "start_time": null, "end_time": null, "interval": null, + "duration": null, "stops": [ { "stoparea_id": "n1", @@ -197,6 +198,7 @@ metro_samples = [ "start_time": null, "end_time": null, "interval": null, + "duration": null, "stops": [ { "stoparea_id": "r3", @@ -237,6 +239,7 @@ metro_samples = [ "start_time": null, "end_time": null, "interval": null, + "duration": 600, "stops": [ { "stoparea_id": "n4", @@ -267,6 +270,7 @@ metro_samples = [ "start_time": null, "end_time": null, "interval": null, + "duration": 480, "stops": [ { "stoparea_id": "n6", @@ -313,6 +317,7 @@ metro_samples = [ "start_time": null, "end_time": null, "interval": null, + "duration": 300, "stops": [ { "stoparea_id": "r4", @@ -339,6 +344,7 @@ metro_samples = [ "start_time": null, "end_time": null, "interval": null, + "duration": 300, "stops": [ { "stoparea_id": "r16", diff --git a/subways/tests/test_route.py b/subways/tests/test_route.py new file mode 100644 index 0000000..ec82e41 --- /dev/null +++ b/subways/tests/test_route.py @@ -0,0 +1,141 @@ +from unittest import TestCase + +from subways.structure.route import ( + get_interval_in_seconds_from_tags, + osm_interval_to_seconds, + parse_time_range, +) + + +class TestTimeIntervalsParsing(TestCase): + def test__osm_interval_to_seconds__invalid_value(self) -> None: + intervals = ( + ["", "abc", "x30", "30x", "3x0"] + + ["5:", ":5", "01:05:", ":01:05", "01:01:00:", ":01:01:00"] + + ["01x:05", "01:x5", "x5:01:00", "01:0x:00", "01:01:x"] + + ["-5", "01:-05", "-01:05", "-01:00:00", "01:-01:00", "01:01:-01"] + + ["0", "00:00", "00:00:00"] + + ["00:60", "01:00:60", "01:60:00"] + + ["01:60:61", "01:61:60", "01:61:61"] + ) + for interval in intervals: + with self.subTest(msg=f"value='{interval}'"): + self.assertIsNone(osm_interval_to_seconds(interval)) + + def test__osm_interval_to_seconds__valid_value(self) -> None: + intervals = { + "5": 300, + "65": 3900, + "10:55": 39300, + "02:02:02": 7322, + "2:2:2": 7322, + "00:59": 3540, + "01:00": 3600, + "00:00:50": 50, + "00:10:00": 600, + "01:00:00": 3600, + } + + for interval_str, interval_sec in intervals.items(): + with self.subTest(msg=f"value='{interval_str}'"): + self.assertEqual( + interval_sec, osm_interval_to_seconds(interval_str) + ) + + def test__parse_time_range__invalid_values(self) -> None: + ranges = ( + ["", "a", "ab:cd-ab:cd", "1", "1-2", "01-02"] + + ["24/8", "24/7/365"] + + ["1:00-02:00", "01:0-02:00", "01:00-2:00", "01:00-02:0"] + + ["1x:00-02:00", "01:0x-02:00", "01:00-1x:00", "01:00-02:ab"] + + ["-1:00-02:00", "01:-1-02:00", "01:00--2:00", "01:00-02:-1"] + + ["01;00-02:00", "01:00-02;00", "01:00=02:00"] + + ["01:00-#02:00", "01:00 - 02:00"] + + ["01:60-02:05", "01:00-01:61"] + ) + for r in ranges: + with self.subTest(msg=f"value='{r}'"): + self.assertIsNone(parse_time_range(r)) + + def test__parse_time_range__valid_values(self) -> None: + ranges = ( + ["24/7"] + + ["00:00-00:00", "00:01-00:02"] + + ["01:00-02:00", "02:01-01:02"] + + ["02:00-26:59", "12:01-13:59"] + + ["Mo-Fr 06:00-21:30", "06:00-21:30 (weekdays)"] + + ["Mo-Fr 06:00-21:00; Sa-Su 07:00-20:00"] + ) + answers = [ + ((0, 0), (24, 0)), + ((0, 0), (0, 0)), + ((0, 1), (0, 2)), + ((1, 0), (2, 0)), + ((2, 1), (1, 2)), + ((2, 0), (26, 59)), + ((12, 1), (13, 59)), + ((6, 0), (21, 30)), + ((6, 0), (21, 30)), + ((6, 0), (21, 0)), + ] + + for r, answer in zip(ranges, answers): + with self.subTest(msg=f"value='{r}'"): + self.assertTupleEqual(answer, parse_time_range(r)) + + +class TestRouteIntervals(TestCase): + def test__get_interval_in_seconds_from_tags__one_key(self) -> None: + cases = [ + {"tags": {}, "answer": None}, + {"tags": {"a": "1"}, "answer": None}, + {"tags": {"duration": "1"}, "answer": 60}, + {"tags": {"durationxxx"}, "answer": None}, + {"tags": {"xxxduration"}, "answer": None}, + # prefixes not considered + {"tags": {"ru:duration"}, "answer": None}, + # suffixes considered + {"tags": {"duration:peak": "1"}, "answer": 60}, + # bare tag has precedence over suffixed version + {"tags": {"duration:peak": "1", "duration": "2"}, "answer": 120}, + # first suffixed version apply + {"tags": {"duration:y": "1", "duration:x": "2"}, "answer": 60}, + # other tags present + {"tags": {"a": "x", "duration": "1", "b": "y"}, "answer": 60}, + ] + + for case in cases: + with self.subTest(msg=f"{case['tags']}"): + self.assertEqual( + case["answer"], + get_interval_in_seconds_from_tags( + case["tags"], "duration" + ), + ) + + def test__get_interval_in_seconds_from_tags__several_keys(self) -> None: + keys = ("interval", "headway") + cases = [ + {"tags": {}, "answer": None}, + # prefixes not considered + {"tags": {"ru:interval"}, "answer": None}, + {"tags": {"interval": "1"}, "answer": 60}, + {"tags": {"headway": "1"}, "answer": 60}, + {"tags": {"interval": "1", "headway": "2"}, "answer": 60}, + # interval has precedence due to its position in 'keys' + {"tags": {"headway": "2", "interval": "1"}, "answer": 60}, + # non-suffixed keys has precedence + {"tags": {"interval:peak": "1", "headway": "2"}, "answer": 120}, + # among suffixed versions, first key in 'keys' is used first + { + "tags": {"headway:peak": "2", "interval:peak": "1"}, + "answer": 60, + }, + ] + + for case in cases: + with self.subTest(msg=f"{case['tags']}"): + self.assertEqual( + case["answer"], + get_interval_in_seconds_from_tags(case["tags"], keys), + ) From a0a6fb043f177d29e734ec388f51fdffe18af4ce Mon Sep 17 00:00:00 2001 From: Alexey Zakharenkov Date: Fri, 21 Jun 2024 13:29:51 +0300 Subject: [PATCH 13/15] Upgrade shapely to 2.0.4 and fixate indirect requirements --- tools/make_poly/requirements.txt | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/make_poly/requirements.txt b/tools/make_poly/requirements.txt index 67ee66c..03bc2d9 100644 --- a/tools/make_poly/requirements.txt +++ b/tools/make_poly/requirements.txt @@ -1 +1,4 @@ -shapely==2.0.1 +shapely==2.0.4 + +# Fixate versions of indirect requirements +NumPy==2.0.0 From 6b54654a50b2c1bf46122a574181ea6b6b48acbf Mon Sep 17 00:00:00 2001 From: Ilya Zverev Date: Mon, 9 Dec 2024 17:43:39 +0200 Subject: [PATCH 14/15] Switch osmctools to osmium --- .gitignore | 2 + README.md | 16 ++++---- scripts/process_subways.sh | 76 +++++++++++++++++--------------------- scripts/requirements.txt | 1 + subways/requirements.txt | 2 +- 5 files changed, 45 insertions(+), 52 deletions(-) diff --git a/.gitignore b/.gitignore index 129911a..f5a8568 100644 --- a/.gitignore +++ b/.gitignore @@ -13,3 +13,5 @@ html/ *.pyc *.txt *.zip +*.osm.pbf +*.o5m diff --git a/README.md b/README.md index 44e4742..4251a7c 100644 --- a/README.md +++ b/README.md @@ -34,8 +34,7 @@ for details. Here is an example of the script usage: ```bash export PLANET=https://ftp5.gwdg.de/pub/misc/openstreetmap/planet.openstreetmap.org/pbf/planet-latest.osm.pbf -export PLANET_METRO="$HOME/metro/planet-metro.o5m -export OSMCTOOLS="$HOME/osmctools" +export PLANET_METRO="$HOME/metro/planet-metro.osm.pbf export TMPDIR="$HOME/metro/tmp" export HTML_DIR="$HOME/metro/tmp_html" export DUMP="$HTML_DIR" @@ -60,14 +59,15 @@ if you allow the `scripts/process_subway.py` to fetch data from Overpass API. He cd subways_validator ``` 3. Configure python environment, e.g. - ```bash - python3 -m venv scripts/.venv - source scripts/.venv/bin/activate - pip install scripts/requirements.txt - ``` + ```bash + python3 -m venv scripts/.venv + source scripts/.venv/bin/activate + pip install -r scripts/requirements.txt + ``` + (this is optional if you only process a single city though.) 4. Execute ```bash - python3 scripts/process_subways.py -c "London" \ + PYTHONPATH=. python3 scripts/process_subways.py -c "London" \ -l validation.log -d London.yaml ``` here diff --git a/scripts/process_subways.sh b/scripts/process_subways.sh index 837463a..3a54b40 100755 --- a/scripts/process_subways.sh +++ b/scripts/process_subways.sh @@ -6,13 +6,13 @@ if [ $# -gt 0 -a \( "${1-}" = "-h" -o "${1-}" = '--help' \) ]; then This script updates a planet or an extract, processes metro networks in it and produces a set of HTML files with validation results. -Usage: $0 [] +Usage: $0 [] In more detail, the script does the following: - If \$PLANET is a remote file, downloads it. - If \$BBOX variable is set, proceeds with this setting for the planet clipping. Otherwise uses \$POLY: unless \$POLY variable is set and the file exists, generates a *.poly file with union of bboxes of all cities having metro. - - Makes a *.o5m extract of the \$PLANET using the *.poly file. + - Makes an extract of the \$PLANET using the *.poly file. - Updates the extract. - Filters railway infrastructure from the extract. - Uses filtered file for validation and generates a bunch of output files. @@ -28,8 +28,8 @@ variable is not defined or is null, otherwise they are kept. The \$PLANET file from remote URL is saved to a tempfile and is removed at the end. Environment variable reference: - - PLANET: path to a local or remote o5m or pbf source file (the entire planet or an extract) - - PLANET_METRO: path to a local o5m file with extract of cities having metro + - PLANET: path to a local or remote pbf source file (the entire planet or an extract) + - PLANET_METRO: path to a local pbf file with extract of cities having metro It's used instead of \$PLANET if exists otherwise it's created first - PLANET_UPDATE_SERVER: server to get replication data from. Defaults to https://planet.openstreetmap.org/replication/ - CITIES_INFO_URL: http(s) or "file://" URL to a CSV file with reference information about rapid transit systems. A default value is hammered into python code. @@ -48,7 +48,6 @@ Environment variable reference: - CITY_CACHE: json file with good cities obtained on previous validation runs - RECOVERY_PATH: file with some data collected at previous validation runs that may help to recover some simple validation errors - - OSMCTOOLS: path to osmconvert and osmupdate binaries - PYTHON: python 3 executable - GIT_PULL: set to 1 to update the scripts - TMPDIR: path to temporary files @@ -67,7 +66,7 @@ function activate_venv_at_path() { path=$1 if [ ! -d "$path/".venv ]; then - "${PYTHON:-python3.11}" -m venv "$path"/.venv + "${PYTHON:-python3}" -m venv "$path"/.venv fi source "$path"/.venv/bin/activate @@ -79,15 +78,10 @@ function activate_venv_at_path() { } -function check_osmctools() { - OSMCTOOLS="${OSMCTOOLS:-$HOME/osmctools}" - if [ ! -f "$OSMCTOOLS/osmupdate" ]; then - if which osmupdate > /dev/null; then - OSMCTOOLS="$(dirname "$(which osmupdate)")" - else - echo "Please compile osmctools to $OSMCTOOLS" - exit 1 - fi +function check_osmium() { + if ! which osmium > /dev/null; then + echo "Please install osmium-tool" + exit 1 fi } @@ -163,7 +157,7 @@ if [ -n "${NEED_FILTER-}" ]; then fi if [ ! -f "${PLANET_METRO-}" ]; then - check_osmctools + check_osmium check_poly PLANET="${PLANET:-${1-}}" @@ -186,7 +180,7 @@ if [ -n "${NEED_FILTER-}" ]; then fi if [ -z "${PLANET_METRO-}" ]; then - PLANET_METRO=$(mktemp "$TMPDIR/planet-metro.XXXXXXXX.o5m") + PLANET_METRO=$(mktemp "$TMPDIR/planet-metro.XXXXXXXX.osm.pbf") NEED_TO_REMOVE_PLANET_METRO=1 fi @@ -195,10 +189,8 @@ if [ -n "${NEED_FILTER-}" ]; then exit 6 fi - mkdir -p $TMPDIR/osmconvert_temp/ - "$OSMCTOOLS"/osmconvert "$PLANET" \ - -t=$TMPDIR/osmconvert_temp/temp \ - ${BBOX:+"-b=$BBOX"} ${POLY:+"-B=$POLY"} -o="$PLANET_METRO" + osmium extract "$PLANET" \ + ${BBOX:+"--bbox=$BBOX"} ${POLY:+"--polygon=$POLY"} -O -o "$PLANET_METRO" fi fi @@ -210,40 +202,38 @@ fi # If there's no need to filter, then update is also unnecessary if [ -z "${SKIP_PLANET_UPDATE-}" -a -n "${NEED_FILTER-}" ]; then - check_osmctools + check_osmium check_poly - PLANET_UPDATE_SERVER=${PLANET_UPDATE_SERVER:-https://planet.openstreetmap.org/replication/} + PLANET_UPDATE_SERVER=${PLANET_UPDATE_SERVER:-https://planet.openstreetmap.org/replication/hour/} PLANET_METRO_ABS="$(cd "$(dirname "$PLANET_METRO")"; pwd)/$(basename "$PLANET_METRO")" + PLANET_METRO_ABS_NEW="$PLANET_METRO_ABS.new.osm.pbf" mkdir -p $TMPDIR/osmupdate_temp/ - pushd $TMPDIR/osmupdate_temp/ - export PATH="$PATH:$OSMCTOOLS" - OSMUPDATE_ERRORS=$(osmupdate --drop-author --out-o5m ${BBOX:+"-b=$BBOX"} \ - ${POLY:+"-B=$POLY"} "$PLANET_METRO_ABS" \ - --base-url=$PLANET_UPDATE_SERVER \ - --tempfiles=$TMPDIR/osmupdate_temp/temp \ - "$PLANET_METRO_ABS.new.o5m" 2>&1 || :) + + activate_venv_at_path "$SUBWAYS_REPO_PATH/scripts" + OSMUPDATE_ERRORS=$(pyosmium-up-to-date \ + "$PLANET_METRO_ABS" \ + --server $PLANET_UPDATE_SERVER \ + --tmpdir $TMPDIR/osmupdate_temp/temp \ + -o "$PLANET_METRO_ABS_NEW" 2>&1 || :) + deactivate if [ -n "$OSMUPDATE_ERRORS" ]; then echo "osmupdate failed: $OSMUPDATE_ERRORS" exit 7 fi - popd - mv "$PLANET_METRO_ABS.new.o5m" "$PLANET_METRO_ABS" + + # Since updating adds things outside the area, trim those again. + osmium extract "$PLANET_METRO_ABS_NEW" \ + ${BBOX:+"--bbox=$BBOX"} ${POLY:+"--polygon=$POLY"} -O -o "$PLANET_METRO_ABS" + rm -f "$PLANET_METRO_ABS_NEW" fi # Filtering planet-metro if [ -n "${NEED_FILTER-}" ]; then - check_osmctools - mkdir -p $TMPDIR/osmfilter_temp/ - QRELATIONS="route=subway =light_rail =monorail =train route_master=subway =light_rail =monorail =train public_transport=stop_area =stop_area_group" - QNODES="railway=station =subway_entrance =train_station_entrance station=subway =light_rail =monorail subway=yes light_rail=yes monorail=yes train=yes" - "$OSMCTOOLS/osmfilter" "$PLANET_METRO" \ - --keep= \ - --keep-relations="$QRELATIONS" \ - --keep-nodes="$QNODES" \ - --drop-author \ - -t=$TMPDIR/osmfilter_temp/temp \ - -o="$FILTERED_DATA" + check_osmium + QRELATIONS="r/route,route_master=subway,light_rail,monorail,train r/public_transport=stop_area,stop_area_group" + QNODES="n/railway=station,subway_entrance,train_station_entrance n/station=subway,light_rail,monorail n/subway=yes n/light_rail=yes n/monorail=yes n/train=yes" + osmium tags-filter "$PLANET_METRO" $QRELATIONS $QNODES -o "$FILTERED_DATA" -O fi if [ -n "${NEED_TO_REMOVE_PLANET_METRO-}" ]; then diff --git a/scripts/requirements.txt b/scripts/requirements.txt index 1f71eee..f913954 100644 --- a/scripts/requirements.txt +++ b/scripts/requirements.txt @@ -1 +1,2 @@ +osmium -r ../subways/requirements.txt diff --git a/subways/requirements.txt b/subways/requirements.txt index 29b232b..ab90481 100644 --- a/subways/requirements.txt +++ b/subways/requirements.txt @@ -1 +1 @@ -lxml==4.9.2 +lxml From fce16592cf69b4e61c2d0418854993bbfd74766d Mon Sep 17 00:00:00 2001 From: Ilya Zverev Date: Mon, 9 Dec 2024 17:45:39 +0200 Subject: [PATCH 15/15] FMK --- scripts/process_subways.sh | 1 + subways/processors/__init__.py | 4 +- subways/processors/fmk.py | 235 +++++++++++++++++++++++++++++++++ 3 files changed, 238 insertions(+), 2 deletions(-) create mode 100644 subways/processors/fmk.py diff --git a/scripts/process_subways.sh b/scripts/process_subways.sh index 3a54b40..66d3de7 100755 --- a/scripts/process_subways.sh +++ b/scripts/process_subways.sh @@ -256,6 +256,7 @@ python "$SUBWAYS_REPO_PATH/scripts/process_subways.py" ${QUIET:+-q} \ -x "$FILTERED_DATA" -l "$VALIDATION" \ ${CITIES_INFO_URL:+--cities-info-url "$CITIES_INFO_URL"} \ ${MAPSME:+--output-mapsme "$MAPSME"} \ + ${FMK:+--output-fmk "$FMK"} \ ${GTFS:+--output-gtfs "$GTFS"} \ ${CITY:+-c "$CITY"} \ ${DUMP:+-d "$DUMP"} \ diff --git a/subways/processors/__init__.py b/subways/processors/__init__.py index 89ae016..2411b6f 100644 --- a/subways/processors/__init__.py +++ b/subways/processors/__init__.py @@ -1,8 +1,8 @@ # Import only those processors (modules) you want to use. # Ignore F401 "module imported but unused" violation since these modules # are addressed via introspection. -from . import gtfs, mapsme # noqa F401 +from . import gtfs, mapsme, fmk # noqa F401 from ._common import transit_to_dict -__all__ = ["gtfs", "mapsme", "transit_to_dict"] +__all__ = ["gtfs", "mapsme", "fmk", "transit_to_dict"] diff --git a/subways/processors/fmk.py b/subways/processors/fmk.py new file mode 100644 index 0000000..c3bfde8 --- /dev/null +++ b/subways/processors/fmk.py @@ -0,0 +1,235 @@ +from __future__ import annotations + +import json +import logging +import os +import typing +from collections import defaultdict +from collections.abc import Callable +from typing import Any, TypeAlias + +from subways.consts import DISPLACEMENT_TOLERANCE +from subways.geom_utils import distance +from subways.osm_element import el_center +from subways.structure.station import Station +from subways.types import IdT, LonLat, OsmElementT, TransfersT +from ._common import ( + DEFAULT_AVE_VEHICLE_SPEED, + DEFAULT_INTERVAL, + format_colour, + KMPH_TO_MPS, + SPEED_ON_TRANSFER, + TRANSFER_PENALTY, +) + +if typing.TYPE_CHECKING: + from subways.structure.city import City + from subways.structure.stop_area import StopArea + + +OSM_TYPES = {"n": (0, "node"), "w": (2, "way"), "r": (3, "relation")} +ENTRANCE_PENALTY = 60 # seconds +SPEED_TO_ENTRANCE = 5 * KMPH_TO_MPS # m/s + +# (stoparea1_uid, stoparea2_uid) -> seconds; stoparea1_uid < stoparea2_uid +TransferTimesT: TypeAlias = dict[tuple[int, int], int] + + +def uid(elid: IdT, typ: str | None = None) -> int: + t = elid[0] + osm_id = int(elid[1:]) + if not typ: + osm_id = (osm_id << 2) + OSM_TYPES[t][0] + elif typ != t: + raise Exception("Got {}, expected {}".format(elid, typ)) + return osm_id << 1 + + +def transit_data_to_fmk(cities: list[City], transfers: TransfersT) -> dict: + """Generate all output and save to file. + :param cities: List of City instances + :param transfers: List of sets of StopArea.id + :param cache_path: Path to json-file with good cities cache or None. + """ + + def find_exits_for_platform( + center: LonLat, nodes: list[OsmElementT] + ) -> list[OsmElementT]: + exits: list[OsmElementT] = [] + min_distance = None + for n in nodes: + d = distance(center, (n["lon"], n["lat"])) + if not min_distance: + min_distance = d * 2 / 3 + elif d < min_distance: + continue + too_close = False + for e in exits: + d = distance((e["lon"], e["lat"]), (n["lon"], n["lat"])) + if d < min_distance: + too_close = True + break + if not too_close: + exits.append(n) + return exits + + stop_areas: dict[IdT, StopArea] = {} + stops: dict[IdT, dict] = {} # stoparea el_id -> stop jsonified data + networks = [] + good_cities = [c for c in cities if c.is_good] + platform_nodes = {} + + for city in good_cities: + network = {"network": city.name, "routes": [], "agency_id": city.id} + for route in city: + routes = { + "type": route.mode, + "ref": route.ref, + "name": route.name, + "colour": format_colour(route.colour), + "route_id": uid(route.id, "r"), + "itineraries": [], + } + if route.infill: + routes["casing"] = routes["colour"] + routes["colour"] = format_colour(route.infill) + for i, variant in enumerate(route): + itin = [] + for stop in variant: + stop_areas[stop.stoparea.id] = stop.stoparea + itin.append(uid(stop.stoparea.id)) + # Make exits from platform nodes, + # if we don't have proper exits + if ( + len(stop.stoparea.entrances) + len(stop.stoparea.exits) + == 0 + ): + for pl in stop.stoparea.platforms: + pl_el = city.elements[pl] + if pl_el["type"] == "node": + pl_nodes = [pl_el] + elif pl_el["type"] == "way": + pl_nodes = [ + city.elements.get("n{}".format(n)) + for n in pl_el["nodes"] + ] + else: + pl_nodes = [] + for m in pl_el["members"]: + if m["type"] == "way": + if ( + "{}{}".format( + m["type"][0], m["ref"] + ) + in city.elements + ): + pl_nodes.extend( + [ + city.elements.get( + "n{}".format(n) + ) + for n in city.elements[ + "{}{}".format( + m["type"][0], + m["ref"], + ) + ]["nodes"] + ] + ) + pl_nodes = [n for n in pl_nodes if n] + platform_nodes[pl] = find_exits_for_platform( + stop.stoparea.centers[pl], pl_nodes + ) + + routes["itineraries"].append(itin) + network["routes"].append(routes) + networks.append(network) + + for stop_id, stop in stop_areas.items(): + st = { + "name": stop.name, + "int_name": stop.int_name, + "lat": stop.center[1], + "lon": stop.center[0], + "osm_type": OSM_TYPES[stop.station.id[0]][1], + "osm_id": int(stop.station.id[1:]), + "id": uid(stop.id), + "entrances": [], + "exits": [], + } + for e_l, k in ((stop.entrances, "entrances"), (stop.exits, "exits")): + for e in e_l: + if e[0] == "n": + st[k].append( + { + "osm_type": "node", + "osm_id": int(e[1:]), + "lon": stop.centers[e][0], + "lat": stop.centers[e][1], + } + ) + if len(stop.entrances) + len(stop.exits) == 0: + if stop.platforms: + for pl in stop.platforms: + for n in platform_nodes[pl]: + for k in ("entrances", "exits"): + st[k].append( + { + "osm_type": n["type"], + "osm_id": n["id"], + "lon": n["lon"], + "lat": n["lat"], + } + ) + else: + for k in ("entrances", "exits"): + st[k].append( + { + "osm_type": OSM_TYPES[stop.station.id[0]][1], + "osm_id": int(stop.station.id[1:]), + "lon": stop.centers[stop.id][0], + "lat": stop.centers[stop.id][1], + } + ) + + stops[stop_id] = st + + pairwise_transfers: list[list[int]] = [] + for stoparea_id_set in transfers: + tr = list(sorted([uid(sa_id) for sa_id in stoparea_id_set + if sa_id in stops])) + if len(tr) > 1: + pairwise_transfers.append(tr) + + result = { + "stops": list(stops.values()), + "transfers": pairwise_transfers, + "networks": networks, + } + return result + + +def process( + cities: list[City], + transfers: TransfersT, + filename: str, + cache_path: str | None, +) -> None: + """Generate all output and save to file. + :param cities: list of City instances + :param transfers: all collected transfers in the world + :param filename: Path to file to save the result + :param cache_path: Path to json-file with good cities cache or None. + """ + if not filename.lower().endswith("json"): + filename = f"{filename}.json" + + fmk_transit = transit_data_to_fmk(cities, transfers) + + with open(filename, "w", encoding="utf-8") as f: + json.dump( + fmk_transit, + f, + indent=1, + ensure_ascii=False, + )