Improve relation center calculation

This commit is contained in:
Alexey Zakharenkov 2022-12-28 14:47:29 +03:00 committed by Alexander Borsuk
parent d5f2026301
commit ec2f57b266
7 changed files with 313 additions and 98 deletions

View file

@ -5,6 +5,8 @@ systems in the world from OpenStreetMap. `subway_structure.py` produces
a list of disjunct systems that can be used for routing and for displaying
of metro maps.
[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
## How To Validate

View file

@ -9,6 +9,7 @@ import sys
import time
import urllib.parse
import urllib.request
from typing import Dict, List, Optional, Tuple
import processors
from subway_io import (
@ -28,6 +29,9 @@ from subway_structure import (
)
Point = Tuple[float, float]
def overpass_request(overground, overpass_api, bboxes):
query = "[out:json][timeout:1000];("
modes = MODES_OVERGROUND if overground else MODES_RAPID
@ -71,114 +75,155 @@ def slugify(name):
return re.sub(r"[^a-z0-9_-]+", "", name.lower().replace(" ", "_"))
def get_way_center(
element: dict, node_centers: Dict[int, Point]
) -> Optional[Point]:
"""
:param element: dict describing OSM element
:param node_centers: osm_id => (lat, lon)
:return: tuple with center coordinates, or None
"""
# If elements have been queried via overpass-api with
# 'out center;' clause then ways already have 'center' attribute
if "center" in element:
return element["center"]["lat"], element["center"]["lon"]
if "nodes" not in element:
return None
center = [0, 0]
count = 0
way_nodes = element["nodes"]
way_nodes_len = len(element["nodes"])
for i, nd in enumerate(way_nodes):
if nd not in node_centers:
continue
# Don't count the first node of a closed way twice
if (
i == way_nodes_len - 1
and way_nodes_len > 1
and way_nodes[0] == way_nodes[-1]
):
break
center[0] += node_centers[nd][0]
center[1] += node_centers[nd][1]
count += 1
if count == 0:
return None
element["center"] = {"lat": center[0] / count, "lon": center[1] / count}
return element["center"]["lat"], element["center"]["lon"]
def get_relation_center(
element: dict,
node_centers: Dict[int, Point],
way_centers: Dict[int, Point],
relation_centers: Dict[int, Point],
ignore_unlocalized_child_relations: bool = False,
) -> Optional[Point]:
"""
:param element: dict describing OSM element
:param node_centers: osm_id => (lat, lon)
:param way_centers: osm_id => (lat, lon)
:param relation_centers: osm_id => (lat, lon)
:param ignore_unlocalized_child_relations: if a member that is a relation
has no center, skip it and calculate center based on member nodes,
ways and other, "localized" (with known centers), relations
:return: tuple with center coordinates, or None
"""
# If elements have been queried via overpass-api with
# 'out center;' clause then some relations already have 'center'
# attribute. But this is not the case for relations composed only
# of other relations (e.g., route_master, stop_area_group or
# stop_area with only members that are multipolygons)
if "center" in element:
return element["center"]["lat"], element["center"]["lon"]
if "center" in element:
return element["center"]
center = [0, 0]
count = 0
for m in element.get("members", list()):
m_id = m["ref"]
m_type = m["type"]
if m_type == "relation" and m_id not in relation_centers:
if ignore_unlocalized_child_relations:
continue
else:
# Cannot calculate fair center because the center
# of a child relation is not known yet
return None
member_container = (
node_centers
if m_type == "node"
else way_centers
if m_type == "way"
else relation_centers
)
if m_id in member_container:
center[0] += member_container[m_id][0]
center[1] += member_container[m_id][1]
count += 1
if count == 0:
return None
element["center"] = {"lat": center[0] / count, "lon": center[1] / count}
return element["center"]["lat"], element["center"]["lon"]
def calculate_centers(elements):
"""Adds 'center' key to each way/relation in elements,
except for empty ways or relations.
Relies on nodes-ways-relations order in the elements list.
"""
nodes = {} # id(int) => (lat, lon)
ways = {} # id(int) => (lat, lon)
relations = {} # id(int) => (lat, lon)
empty_relations = set() # ids(int) of relations without members
# or containing only empty relations
nodes: Dict[int, Point] = {} # id => (lat, lon)
ways: Dict[int, Point] = {} # id => (lat, lon)
relations: Dict[int, Point] = {} # id => (lat, lon)
def calculate_way_center(el):
# If element has been queried via overpass-api with 'out center;'
# clause then ways already have 'center' attribute
if "center" in el:
ways[el["id"]] = (el["center"]["lat"], el["center"]["lon"])
return
center = [0, 0]
count = 0
way_nodes = el["nodes"]
way_nodes_len = len(el["nodes"])
for i, nd in enumerate(way_nodes):
if nd not in nodes:
continue
# Don't count the first node of a closed way twice
if (
i == way_nodes_len - 1
and way_nodes_len > 1
and way_nodes[0] == way_nodes[-1]
):
break
center[0] += nodes[nd][0]
center[1] += nodes[nd][1]
count += 1
if count > 0:
el["center"] = {"lat": center[0] / count, "lon": center[1] / count}
ways[el["id"]] = (el["center"]["lat"], el["center"]["lon"])
def calculate_relation_center(el):
# If element has been queried via overpass-api with 'out center;'
# clause then some relations already have 'center' attribute
if "center" in el:
relations[el["id"]] = (el["center"]["lat"], el["center"]["lon"])
return True
center = [0, 0]
count = 0
for m in el.get("members", []):
if m["type"] == "relation" and m["ref"] not in relations:
if m["ref"] in empty_relations:
# Ignore empty child relations
continue
else:
# Center of child relation is not known yet
return False
member_container = (
nodes
if m["type"] == "node"
else ways
if m["type"] == "way"
else relations
)
if m["ref"] in member_container:
center[0] += member_container[m["ref"]][0]
center[1] += member_container[m["ref"]][1]
count += 1
if count == 0:
empty_relations.add(el["id"])
else:
el["center"] = {"lat": center[0] / count, "lon": center[1] / count}
relations[el["id"]] = (el["center"]["lat"], el["center"]["lon"])
return True
relations_without_center = []
unlocalized_relations = [] # 'unlocalized' means the center of the
# relation has not been calculated yet
for el in elements:
if el["type"] == "node":
nodes[el["id"]] = (el["lat"], el["lon"])
elif el["type"] == "way":
if "nodes" in el:
calculate_way_center(el)
if center := get_way_center(el, nodes):
ways[el["id"]] = center
elif el["type"] == "relation":
if not calculate_relation_center(el):
relations_without_center.append(el)
if center := get_relation_center(el, nodes, ways, relations):
relations[el["id"]] = center
else:
unlocalized_relations.append(el)
def iterate_relation_centers_calculation(
ignore_unlocalized_child_relations: bool,
) -> List[int]:
unlocalized_relations_upd = []
for rel in unlocalized_relations:
if center := get_relation_center(
rel, nodes, ways, relations, ignore_unlocalized_child_relations
):
relations[rel["id"]] = center
else:
unlocalized_relations_upd.append(rel)
return unlocalized_relations_upd
# Calculate centers for relations that have no one yet
while relations_without_center:
new_relations_without_center = []
for rel in relations_without_center:
if not calculate_relation_center(rel):
new_relations_without_center.append(rel)
if len(new_relations_without_center) == len(relations_without_center):
break
relations_without_center = new_relations_without_center
if relations_without_center:
logging.error(
"Cannot calculate center for the relations (%d in total): %s%s",
len(relations_without_center),
", ".join(str(rel["id"]) for rel in relations_without_center[:20]),
", ..." if len(relations_without_center) > 20 else "",
)
if empty_relations:
logging.warning(
"Empty relations (%d in total): %s%s",
len(empty_relations),
", ".join(str(x) for x in list(empty_relations)[:20]),
", ..." if len(empty_relations) > 20 else "",
)
while unlocalized_relations:
unlocalized_relations_upd = iterate_relation_centers_calculation(False)
progress = len(unlocalized_relations_upd) < len(unlocalized_relations)
if not progress:
unlocalized_relations_upd = iterate_relation_centers_calculation(
True
)
progress = len(unlocalized_relations_upd) < len(
unlocalized_relations
)
if not progress:
break
unlocalized_relations = unlocalized_relations_upd
def add_osm_elements_to_cities(osm_elements, cities):

View file

@ -3,7 +3,7 @@ from functools import partial
from io import BytesIO, StringIO
from itertools import permutations
from tarfile import TarFile, TarInfo
from typing import List, Set
from typing import List, Optional, Set
from zipfile import ZipFile
from ._common import (
@ -344,7 +344,9 @@ def dict_to_row(dict_data: dict, record_type: str) -> list:
]
def make_gtfs(filename: str, gtfs_data: dict, fmt: str = None) -> None:
def make_gtfs(
filename: str, gtfs_data: dict, fmt: Optional[str] = None
) -> None:
if not fmt:
fmt = "tar" if filename.endswith(".tar") else "zip"

View file

@ -36,7 +36,7 @@ Environment variable reference:
- BBOX: bounding box of an extract; x1,y1,x2,y2. Has precedence over \$POLY
- POLY: *.poly file with [multi]polygon comprising cities with metro
If neither \$BBOX nor \$POLY is set, then \$POLY is generated
- SKIP_PLANET_UPDATE: skip \$PLANET file update. Any non-empty string is True
- SKIP_PLANET_UPDATE: skip \$PLANET_METRO file update. Any non-empty string is True
- SKIP_FILTERING: skip filtering railway data. Any non-empty string is True
- FILTERED_DATA: path to filtered data. Defaults to \$TMPDIR/subways.osm
- MAPSME: file name for maps.me json output
@ -55,6 +55,7 @@ Environment variable reference:
- SERVER: server name and path to upload HTML files (e.g. ilya@osmz.ru:/var/www/)
- SERVER_KEY: rsa key to supply for uploading the files
- REMOVE_HTML: set to 1 to remove \$HTML_DIR after uploading
- QUIET: set to any non-empty value to use WARNING log level in process_subways.py. Default is INFO.
EOF
exit
fi
@ -237,7 +238,7 @@ if [ -n "${DUMP-}" ]; then
fi
VALIDATION="$TMPDIR/validation.json"
"$PYTHON" "$SUBWAYS_PATH/process_subways.py" -q \
"$PYTHON" "$SUBWAYS_PATH/process_subways.py" ${QUIET:+-q} \
-x "$FILTERED_DATA" -l "$VALIDATION" \
${MAPSME:+--output-mapsme "$MAPSME"} \
${GTFS:+--output-gtfs "$GTFS"} \

View file

@ -0,0 +1,28 @@
{
"w38836456": {
"lat": 55.73064775,
"lon": 37.446065950000005
},
"w489951237": {
"lat": 55.730760724999996,
"lon": 37.44602055
},
"r7588527": {
"lat": 55.73066371666667,
"lon": 37.44604881666667
},
"r7588528": {
"lat": 55.73075192499999,
"lon": 37.44609837
},
"r7588561": {
"lat": 55.73070782083333,
"lon": 37.44607359333334
},
"r13426423": {
"lat": 55.730760724999996,
"lon": 37.44602055
},
"r100": null,
"r101": null
}

View file

@ -0,0 +1,82 @@
<?xml version='1.0' encoding='UTF-8'?>
<osm version='0.6' generator='JOSM'>
<node id='244036218' visible='true' version='27' lat='55.7306986' lon='37.4460134'>
<tag k='public_transport' v='stop_position' />
<tag k='subway' v='yes' />
</node>
<node id='244038961' visible='true' version='26' lat='55.730801' lon='37.4464724'>
<tag k='railway' v='subway_entrance' />
</node>
<node id='461075776' visible='true' version='5' lat='55.7304682' lon='37.4447392' />
<node id='461075811' visible='true' version='7' lat='55.7308273' lon='37.4473927'>
<tag k='barrier' v='gate' />
</node>
<node id='1191237441' visible='true' version='18' lat='55.7308185' lon='37.4459574'>
<tag k='public_transport' v='stop_position' />
<tag k='subway' v='yes' />
</node>
<node id='4821481210' visible='true' version='3' lat='55.7305372' lon='37.4447339' />
<node id='4821481211' visible='true' version='3' lat='55.7306293' lon='37.4446944' />
<node id='4821481212' visible='true' version='3' lat='55.7308921' lon='37.4473467' />
<node id='4821481213' visible='true' version='3' lat='55.7309843' lon='37.4473072' />
<node id='5176248500' visible='true' version='2' lat='55.7306626' lon='37.4460524'>
<tag k='public_transport' v='stop_position' />
<tag k='subway' v='yes' />
</node>
<node id='5176248502' visible='true' version='8' lat='55.7306808' lon='37.4460281'>
<tag k='name' v='Кунцевская' />
<tag k='railway' v='station' />
<tag k='station' v='subway' />
</node>
<way id='38836456' version='4' visible='true'>
<nd ref='461075776' />
<nd ref='461075811' />
<tag k='railway' v='platform' />
</way>
<way id='489951237' visible='true' version='6'>
<nd ref='4821481210' />
<nd ref='4821481211' />
<nd ref='4821481213' />
<nd ref='4821481212' />
<nd ref='4821481210' />
</way>
<relation id='7588527' visible='true' version='7'>
<member type='node' ref='5176248502' role='' />
<member type='node' ref='5176248500' role='stop' />
<member type='way' ref='38836456' role='' />
<tag k='public_transport' v='stop_area' />
<tag k='type' v='public_transport' />
</relation>
<relation id='7588528' visible='true' version='6'>
<member type='node' ref='5176248502' role='' />
<member type='node' ref='244036218' role='stop' />
<member type='node' ref='1191237441' role='stop' />
<member type='relation' ref='13426423' role='platform' />
<member type='node' ref='244038961' role='' />
<member type='relation' ref='7588561' role='' /> <!-- cyclic ref -->
<tag k='public_transport' v='stop_area' />
<tag k='type' v='public_transport' />
</relation>
<relation id='7588561' visible='true' version='5'>
<member type='relation' ref='7588528' role='' />
<member type='relation' ref='7588527' role='' />
<member type='node' ref='1' role='' /> <!-- incomplete ref -->
<member type='way' ref='1' role='' /> <!-- incomplete ref -->
<member type='relation' ref='1' role='' /> <!-- incomplete ref -->
<tag k='name' v='Кунцевская' />
<tag k='public_transport' v='stop_area_group' />
<tag k='type' v='public_transport' />
</relation>
<relation id='13426423' visible='true' version='4'>
<member type='way' ref='489951237' role='outer' />
<tag k='public_transport' v='platform' />
<tag k='type' v='multipolygon' />
</relation>
<relation id='100' visible='true' version='1'>
<tag k='description' v='emtpy relation' />
</relation>
<relation id='101' visible='true' version='1'>
<member type='node' ref='1' role='' /> <!-- incomplete ref -->
<tag k='description' v='only incomplete members' />
</relation>
</osm>

View file

@ -0,0 +1,55 @@
import json
from pathlib import Path
from unittest import TestCase
from process_subways import calculate_centers
from subway_io import load_xml
class TestCenterCalculation(TestCase):
"""Test center calculation. Test data [should] contain among others
the following edge cases:
- an empty relation. It's element should not obtain "center" key.
- relation as member of relation, the child relation following the parent
in the OSM XML file.
- relation with incomplete members (broken references).
- relations with cyclic references.
"""
ASSETS_PATH = Path(__file__).resolve().parent / "assets"
OSM_DATA = str(ASSETS_PATH / "kuntsevskaya_transfer.osm")
CORRECT_CENTERS = str(ASSETS_PATH / "kuntsevskaya_centers.json")
def test__calculate_centers(self) -> None:
elements = load_xml(self.OSM_DATA)
calculate_centers(elements)
elements_dict = {
f"{'w' if el['type'] == 'way' else 'r'}{el['id']}": el
for el in elements
}
calculated_centers = {
k: el["center"]
for k, el in elements_dict.items()
if "center" in el
}
with open(self.CORRECT_CENTERS) as f:
correct_centers = json.load(f)
self.assertTrue(set(calculated_centers).issubset(correct_centers))
for k, correct_center in correct_centers.items():
if correct_center is None:
self.assertNotIn("center", elements_dict[k])
else:
self.assertIn(k, calculated_centers)
calculated_center = calculated_centers[k]
self.assertAlmostEqual(
calculated_center["lat"], correct_center["lat"], places=10
)
self.assertAlmostEqual(
calculated_center["lon"], correct_center["lon"], places=10
)