From 01eb589d13bc3689ab41c0c830bf44860f19fdae Mon Sep 17 00:00:00 2001 From: Alexey Zakharenkov Date: Wed, 28 Dec 2022 14:47:29 +0300 Subject: [PATCH] Improve relation center calculation --- README.md | 2 + process_subways.py | 233 +++++++++++++++---------- processors/gtfs.py | 6 +- scripts/process_subways.sh | 5 +- tests/assets/kuntsevskaya_centers.json | 28 +++ tests/assets/kuntsevskaya_transfer.osm | 82 +++++++++ tests/test_center_calculation.py | 55 ++++++ 7 files changed, 313 insertions(+), 98 deletions(-) create mode 100644 tests/assets/kuntsevskaya_centers.json create mode 100644 tests/assets/kuntsevskaya_transfer.osm create mode 100644 tests/test_center_calculation.py diff --git a/README.md b/README.md index 7b91bd8..92d7d9d 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,8 @@ systems in the world from OpenStreetMap. `subway_structure.py` produces a list of disjunct systems that can be used for routing and for displaying of metro maps. +[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) + ## How To Validate diff --git a/process_subways.py b/process_subways.py index 6b71afd..a600b37 100755 --- a/process_subways.py +++ b/process_subways.py @@ -9,6 +9,7 @@ import sys import time import urllib.parse import urllib.request +from typing import Dict, List, Optional, Tuple import processors from subway_io import ( @@ -28,6 +29,9 @@ from subway_structure import ( ) +Point = Tuple[float, float] + + def overpass_request(overground, overpass_api, bboxes): query = "[out:json][timeout:1000];(" modes = MODES_OVERGROUND if overground else MODES_RAPID @@ -71,114 +75,155 @@ def slugify(name): return re.sub(r"[^a-z0-9_-]+", "", name.lower().replace(" ", "_")) +def get_way_center( + element: dict, node_centers: Dict[int, Point] +) -> Optional[Point]: + """ + :param element: dict describing OSM element + :param node_centers: osm_id => (lat, lon) + :return: tuple with center coordinates, or None + """ + + # If elements have been queried via overpass-api with + # 'out center;' clause then ways already have 'center' attribute + if "center" in element: + return element["center"]["lat"], element["center"]["lon"] + + if "nodes" not in element: + return None + + center = [0, 0] + count = 0 + way_nodes = element["nodes"] + way_nodes_len = len(element["nodes"]) + for i, nd in enumerate(way_nodes): + if nd not in node_centers: + continue + # Don't count the first node of a closed way twice + if ( + i == way_nodes_len - 1 + and way_nodes_len > 1 + and way_nodes[0] == way_nodes[-1] + ): + break + center[0] += node_centers[nd][0] + center[1] += node_centers[nd][1] + count += 1 + if count == 0: + return None + element["center"] = {"lat": center[0] / count, "lon": center[1] / count} + return element["center"]["lat"], element["center"]["lon"] + + +def get_relation_center( + element: dict, + node_centers: Dict[int, Point], + way_centers: Dict[int, Point], + relation_centers: Dict[int, Point], + ignore_unlocalized_child_relations: bool = False, +) -> Optional[Point]: + """ + :param element: dict describing OSM element + :param node_centers: osm_id => (lat, lon) + :param way_centers: osm_id => (lat, lon) + :param relation_centers: osm_id => (lat, lon) + :param ignore_unlocalized_child_relations: if a member that is a relation + has no center, skip it and calculate center based on member nodes, + ways and other, "localized" (with known centers), relations + :return: tuple with center coordinates, or None + """ + + # If elements have been queried via overpass-api with + # 'out center;' clause then some relations already have 'center' + # attribute. But this is not the case for relations composed only + # of other relations (e.g., route_master, stop_area_group or + # stop_area with only members that are multipolygons) + if "center" in element: + return element["center"]["lat"], element["center"]["lon"] + + if "center" in element: + return element["center"] + + center = [0, 0] + count = 0 + for m in element.get("members", list()): + m_id = m["ref"] + m_type = m["type"] + if m_type == "relation" and m_id not in relation_centers: + if ignore_unlocalized_child_relations: + continue + else: + # Cannot calculate fair center because the center + # of a child relation is not known yet + return None + member_container = ( + node_centers + if m_type == "node" + else way_centers + if m_type == "way" + else relation_centers + ) + if m_id in member_container: + center[0] += member_container[m_id][0] + center[1] += member_container[m_id][1] + count += 1 + if count == 0: + return None + element["center"] = {"lat": center[0] / count, "lon": center[1] / count} + return element["center"]["lat"], element["center"]["lon"] + + def calculate_centers(elements): """Adds 'center' key to each way/relation in elements, except for empty ways or relations. Relies on nodes-ways-relations order in the elements list. """ - nodes = {} # id(int) => (lat, lon) - ways = {} # id(int) => (lat, lon) - relations = {} # id(int) => (lat, lon) - empty_relations = set() # ids(int) of relations without members - # or containing only empty relations + nodes: Dict[int, Point] = {} # id => (lat, lon) + ways: Dict[int, Point] = {} # id => (lat, lon) + relations: Dict[int, Point] = {} # id => (lat, lon) - def calculate_way_center(el): - # If element has been queried via overpass-api with 'out center;' - # clause then ways already have 'center' attribute - if "center" in el: - ways[el["id"]] = (el["center"]["lat"], el["center"]["lon"]) - return - center = [0, 0] - count = 0 - way_nodes = el["nodes"] - way_nodes_len = len(el["nodes"]) - for i, nd in enumerate(way_nodes): - if nd not in nodes: - continue - # Don't count the first node of a closed way twice - if ( - i == way_nodes_len - 1 - and way_nodes_len > 1 - and way_nodes[0] == way_nodes[-1] - ): - break - center[0] += nodes[nd][0] - center[1] += nodes[nd][1] - count += 1 - if count > 0: - el["center"] = {"lat": center[0] / count, "lon": center[1] / count} - ways[el["id"]] = (el["center"]["lat"], el["center"]["lon"]) - - def calculate_relation_center(el): - # If element has been queried via overpass-api with 'out center;' - # clause then some relations already have 'center' attribute - if "center" in el: - relations[el["id"]] = (el["center"]["lat"], el["center"]["lon"]) - return True - center = [0, 0] - count = 0 - for m in el.get("members", []): - if m["type"] == "relation" and m["ref"] not in relations: - if m["ref"] in empty_relations: - # Ignore empty child relations - continue - else: - # Center of child relation is not known yet - return False - member_container = ( - nodes - if m["type"] == "node" - else ways - if m["type"] == "way" - else relations - ) - if m["ref"] in member_container: - center[0] += member_container[m["ref"]][0] - center[1] += member_container[m["ref"]][1] - count += 1 - if count == 0: - empty_relations.add(el["id"]) - else: - el["center"] = {"lat": center[0] / count, "lon": center[1] / count} - relations[el["id"]] = (el["center"]["lat"], el["center"]["lon"]) - return True - - relations_without_center = [] + unlocalized_relations = [] # 'unlocalized' means the center of the + # relation has not been calculated yet for el in elements: if el["type"] == "node": nodes[el["id"]] = (el["lat"], el["lon"]) elif el["type"] == "way": - if "nodes" in el: - calculate_way_center(el) + if center := get_way_center(el, nodes): + ways[el["id"]] = center elif el["type"] == "relation": - if not calculate_relation_center(el): - relations_without_center.append(el) + if center := get_relation_center(el, nodes, ways, relations): + relations[el["id"]] = center + else: + unlocalized_relations.append(el) + + def iterate_relation_centers_calculation( + ignore_unlocalized_child_relations: bool, + ) -> List[int]: + unlocalized_relations_upd = [] + for rel in unlocalized_relations: + if center := get_relation_center( + rel, nodes, ways, relations, ignore_unlocalized_child_relations + ): + relations[rel["id"]] = center + else: + unlocalized_relations_upd.append(rel) + return unlocalized_relations_upd # Calculate centers for relations that have no one yet - while relations_without_center: - new_relations_without_center = [] - for rel in relations_without_center: - if not calculate_relation_center(rel): - new_relations_without_center.append(rel) - if len(new_relations_without_center) == len(relations_without_center): - break - relations_without_center = new_relations_without_center - - if relations_without_center: - logging.error( - "Cannot calculate center for the relations (%d in total): %s%s", - len(relations_without_center), - ", ".join(str(rel["id"]) for rel in relations_without_center[:20]), - ", ..." if len(relations_without_center) > 20 else "", - ) - if empty_relations: - logging.warning( - "Empty relations (%d in total): %s%s", - len(empty_relations), - ", ".join(str(x) for x in list(empty_relations)[:20]), - ", ..." if len(empty_relations) > 20 else "", - ) + while unlocalized_relations: + unlocalized_relations_upd = iterate_relation_centers_calculation(False) + progress = len(unlocalized_relations_upd) < len(unlocalized_relations) + if not progress: + unlocalized_relations_upd = iterate_relation_centers_calculation( + True + ) + progress = len(unlocalized_relations_upd) < len( + unlocalized_relations + ) + if not progress: + break + unlocalized_relations = unlocalized_relations_upd def add_osm_elements_to_cities(osm_elements, cities): diff --git a/processors/gtfs.py b/processors/gtfs.py index 32d539a..5dc3952 100644 --- a/processors/gtfs.py +++ b/processors/gtfs.py @@ -3,7 +3,7 @@ from functools import partial from io import BytesIO, StringIO from itertools import permutations from tarfile import TarFile, TarInfo -from typing import List, Set +from typing import List, Optional, Set from zipfile import ZipFile from ._common import ( @@ -344,7 +344,9 @@ def dict_to_row(dict_data: dict, record_type: str) -> list: ] -def make_gtfs(filename: str, gtfs_data: dict, fmt: str = None) -> None: +def make_gtfs( + filename: str, gtfs_data: dict, fmt: Optional[str] = None +) -> None: if not fmt: fmt = "tar" if filename.endswith(".tar") else "zip" diff --git a/scripts/process_subways.sh b/scripts/process_subways.sh index cdd3e25..50548a5 100755 --- a/scripts/process_subways.sh +++ b/scripts/process_subways.sh @@ -36,7 +36,7 @@ Environment variable reference: - BBOX: bounding box of an extract; x1,y1,x2,y2. Has precedence over \$POLY - POLY: *.poly file with [multi]polygon comprising cities with metro If neither \$BBOX nor \$POLY is set, then \$POLY is generated - - SKIP_PLANET_UPDATE: skip \$PLANET file update. Any non-empty string is True + - SKIP_PLANET_UPDATE: skip \$PLANET_METRO file update. Any non-empty string is True - SKIP_FILTERING: skip filtering railway data. Any non-empty string is True - FILTERED_DATA: path to filtered data. Defaults to \$TMPDIR/subways.osm - MAPSME: file name for maps.me json output @@ -55,6 +55,7 @@ Environment variable reference: - SERVER: server name and path to upload HTML files (e.g. ilya@osmz.ru:/var/www/) - SERVER_KEY: rsa key to supply for uploading the files - REMOVE_HTML: set to 1 to remove \$HTML_DIR after uploading + - QUIET: set to any non-empty value to use WARNING log level in process_subways.py. Default is INFO. EOF exit fi @@ -233,7 +234,7 @@ fi # Running the validation VALIDATION="$TMPDIR/validation.json" -"$PYTHON" "$SUBWAYS_PATH/process_subways.py" -q \ +"$PYTHON" "$SUBWAYS_PATH/process_subways.py" ${QUIET:+-q} \ -x "$FILTERED_DATA" -l "$VALIDATION" \ ${MAPSME:+--output-mapsme "$MAPSME"} \ ${GTFS:+--output-gtfs "$GTFS"} \ diff --git a/tests/assets/kuntsevskaya_centers.json b/tests/assets/kuntsevskaya_centers.json new file mode 100644 index 0000000..36317ec --- /dev/null +++ b/tests/assets/kuntsevskaya_centers.json @@ -0,0 +1,28 @@ +{ + "w38836456": { + "lat": 55.73064775, + "lon": 37.446065950000005 + }, + "w489951237": { + "lat": 55.730760724999996, + "lon": 37.44602055 + }, + "r7588527": { + "lat": 55.73066371666667, + "lon": 37.44604881666667 + }, + "r7588528": { + "lat": 55.73075192499999, + "lon": 37.44609837 + }, + "r7588561": { + "lat": 55.73070782083333, + "lon": 37.44607359333334 + }, + "r13426423": { + "lat": 55.730760724999996, + "lon": 37.44602055 + }, + "r100": null, + "r101": null +} diff --git a/tests/assets/kuntsevskaya_transfer.osm b/tests/assets/kuntsevskaya_transfer.osm new file mode 100644 index 0000000..48bf044 --- /dev/null +++ b/tests/assets/kuntsevskaya_transfer.osm @@ -0,0 +1,82 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/test_center_calculation.py b/tests/test_center_calculation.py new file mode 100644 index 0000000..4f01a3c --- /dev/null +++ b/tests/test_center_calculation.py @@ -0,0 +1,55 @@ +import json +from pathlib import Path +from unittest import TestCase + +from process_subways import calculate_centers +from subway_io import load_xml + + +class TestCenterCalculation(TestCase): + """Test center calculation. Test data [should] contain among others + the following edge cases: + - an empty relation. It's element should not obtain "center" key. + - relation as member of relation, the child relation following the parent + in the OSM XML file. + - relation with incomplete members (broken references). + - relations with cyclic references. + """ + + ASSETS_PATH = Path(__file__).resolve().parent / "assets" + OSM_DATA = str(ASSETS_PATH / "kuntsevskaya_transfer.osm") + CORRECT_CENTERS = str(ASSETS_PATH / "kuntsevskaya_centers.json") + + def test__calculate_centers(self) -> None: + elements = load_xml(self.OSM_DATA) + + calculate_centers(elements) + + elements_dict = { + f"{'w' if el['type'] == 'way' else 'r'}{el['id']}": el + for el in elements + } + + calculated_centers = { + k: el["center"] + for k, el in elements_dict.items() + if "center" in el + } + + with open(self.CORRECT_CENTERS) as f: + correct_centers = json.load(f) + + self.assertTrue(set(calculated_centers).issubset(correct_centers)) + + for k, correct_center in correct_centers.items(): + if correct_center is None: + self.assertNotIn("center", elements_dict[k]) + else: + self.assertIn(k, calculated_centers) + calculated_center = calculated_centers[k] + self.assertAlmostEqual( + calculated_center["lat"], correct_center["lat"], places=10 + ) + self.assertAlmostEqual( + calculated_center["lon"], correct_center["lon"], places=10 + )