Improve autosplit edge cases: a cluster with 1 subregion now gets its name; a superregion that is split&merged upto itself is now stays as is

This commit is contained in:
Alexey Zakharenkov 2020-07-24 10:48:16 +03:00
parent 2e90ba48e5
commit 66a75c1a52
4 changed files with 52 additions and 137 deletions

View file

@ -33,7 +33,7 @@ CREATE TABLE borders_backup (
CREATE TABLE splitting (
osm_border_id BIGINT NOT NULL REFERENCES osm_borders(osm_id), -- reference to parent osm region
id BIGINT NOT NULL, -- representative subregion id
subregion_ids BIGINT[] NOT NULL,
city_population_thr INT NOT NULL,
cluster_population_thr INT NOT NULL,
geom geometry NOT NULL

View file

@ -4,7 +4,11 @@ import psycopg2
from collections import defaultdict
from config import AUTOSPLIT_TABLE as autosplit_table
from config import (
AUTOSPLIT_TABLE as autosplit_table,
TABLE as table,
OSM_TABLE as osm_table
)
class DisjointClusterUnion:
@ -93,11 +97,12 @@ class DisjointClusterUnion:
def enrich_with_population_and_cities(conn, subregions):
cursor = conn.cursor()
cursor.execute("""
ids = ','.join(str(x) for x in subregions.keys())
cursor.execute(f"""
SELECT b.osm_id, c.name, c.population
FROM osm_borders b, osm_cities c
FROM {osm_table} b, osm_cities c
WHERE b.osm_id IN ({ids}) AND ST_CONTAINS(b.way, c.center)
""".format(ids=','.join(str(x) for x in subregions.keys()))
"""
)
for rec in cursor:
sub_id = int(rec[0])
@ -110,9 +115,9 @@ def enrich_with_population_and_cities(conn, subregions):
def find_subregions(conn, region_id, next_level):
cursor = conn.cursor()
cursor.execute("""
cursor.execute(f"""
SELECT subreg.osm_id, subreg.name
FROM osm_borders reg, osm_borders subreg
FROM {osm_table} reg, {osm_table} subreg
WHERE reg.osm_id = %s AND subreg.admin_level = %s AND
ST_Contains(reg.way, subreg.way)
""",
@ -177,18 +182,17 @@ def get_best_cluster_to_join_with(small_cluster_id, dcu: DisjointClusterUnion, c
def calculate_common_border_matrix(conn, subregion_ids):
cursor = conn.cursor()
subregion_ids_str = ','.join(str(x) for x in subregion_ids)
# ST_Intersection returns 0 if its parameter is a geometry other than
# LINESTRING or MULTILINESTRING
cursor.execute("""
cursor.execute(f"""
SELECT b1.osm_id AS osm_id1, b2.osm_id AS osm_id2,
ST_Length(geography(ST_Intersection(b1.way, b2.way))) AS intersection
FROM osm_borders b1, osm_borders b2
FROM {osm_table} b1, {osm_table} b2
WHERE b1.osm_id IN ({subregion_ids_str}) AND
b2.osm_id IN ({subregion_ids_str})
AND b1.osm_id < b2.osm_id
""".format(
subregion_ids_str=','.join(str(x) for x in subregion_ids),
)
"""
)
common_border_matrix = {} # {subregion_id: { subregion_id: float} } where len > 0
for rec in cursor:
@ -243,7 +247,7 @@ def get_union_sql(subregion_ids):
assert(len(subregion_ids) > 0)
if len(subregion_ids) == 1:
return f"""
SELECT way FROM osm_borders WHERE osm_id={subregion_ids[0]}
SELECT way FROM {osm_table} WHERE osm_id={subregion_ids[0]}
"""
else:
return f"""
@ -306,11 +310,14 @@ def save_splitting_to_db(conn, dcu: DisjointClusterUnion):
""")
for cluster_id, data in dcu.clusters.items():
subregion_ids = data['subregion_ids']
#subregion_ids_array_str = f"{{','.join(str(x) for x in subregion_ids)}}"
cluster_geometry_sql = get_union_sql(subregion_ids)
cursor.execute(f"""
INSERT INTO {autosplit_table} (osm_border_id, id, geom, city_population_thr, cluster_population_thr) VALUES (
INSERT INTO {autosplit_table} (osm_border_id, subregion_ids, geom,
city_population_thr, cluster_population_thr)
VALUES (
{dcu.region_id},
{cluster_id},
'{{{','.join(str(x) for x in subregion_ids)}}}',
({cluster_geometry_sql}),
{dcu.city_population_thr},
{dcu.cluster_population_thr}
@ -319,96 +326,6 @@ def save_splitting_to_db(conn, dcu: DisjointClusterUnion):
conn.commit()
def prepare_bulk_split():
need_split = [
# large region name, admin_level (2 in most cases), admin_level to split'n'merge, into subregions of what admin_level
('Germany', 2, 4, 6), # Half of the country is covered by units of AL=5
('Metropolitan France', 3, 4, 6),
('Spain', 2, 4, 6),
('Portugal', 2, 4, 6),
('Belgium', 2, 4, 6),
('Italy', 2, 4, 6),
('Switzerland', 2, 2, 4), # has admin_level=5
('Austria', 2, 4, 6),
('Poland', 2, 4, 6), # 380(!) of AL=6
('Czechia', 2, 6, 7),
('Ukraine', 2, 4, 6), # should merge back to region=4 level clusters
('United Kingdom', 2, 5, 6), # whole country is divided by level 4; level 5 is necessary but not comprehensive
('Denmark', 2, 4, 7),
('Norway', 2, 4, 7),
('Sweden', 2, 4, 7), # though division by level 4 is currently ideal
('Finland', 2, 6, 7), # though division by level 6 is currently ideal
('Estonia', 2, 2, 6),
('Latvia', 2, 4, 6), # the whole country takes 56Mb, all 6-level units should merge into 4-level clusters
('Lithuania', 2, 2, 4), # now Lithuania has 2 mwms of size 60Mb each
('Belarus', 2, 2, 4), # 6 regions + Minsk city. Would it be merged with the region?
('Slovakia', 2, 2, 4), # there are no subregions 5, 6, 7. Must leave all 8 4-level regions
('Hungary', 2, 5, 6),
#('Slovenia', 2, 2, 8), # no levels 3,4,5,6; level 7 incomplete.
('Croatia', 2, 2, 6),
('Bosnia and Herzegovina', 2, 2, 4), # other levels - 5, 6, 7 - are incomplete.
('Serbia', 2, 4, 6),
('Romania', 2, 2, 4),
('Bulgaria', 2, 2, 4),
('Greece', 2, 4, 5), # has 7 4-level regions, must merge 5-level to them again
('Ireland', 2, 5, 6), # 5-level don't cover the whole country! Still...
('Turkey', 2, 3, 4),
]
cursor = conn.cursor()
regions_subset = need_split # [x for x in need_split if x[0] in ('Norway',)]
#cursor.execute("UPDATE osm_borders SET need_split=false WHERE need_split=true")
#cursor.execute("UPDATE osm_borders SET parent=null WHERE parent is not null")
for country_name, country_level, split_level, lower_level in regions_subset:
print(f"start {country_name}")
cursor.execute(f"""
SELECT osm_id FROM osm_borders
WHERE osm_id < 0 AND admin_level={country_level} AND name=%s
""", (country_name,))
country_border_id = None
for rec in cursor:
assert (not country_border_id), f"more than one country {country_name}"
country_border_id = int(rec[0])
cursor.execute(f"""
UPDATE osm_borders b
SET need_split=true,
next_admin_level={lower_level},
parent = {country_border_id}
WHERE parent IS NULL
AND osm_id < 0 AND admin_level={split_level} AND ST_Contains(
(SELECT way FROM osm_borders WHERE osm_id={country_border_id}),
b.way
)""", (country_name,))
cursor.execute(f"""
UPDATE osm_borders b
SET parent = (SELECT osm_id FROM osm_borders
WHERE osm_id < 0 AND admin_level={split_level} AND ST_Contains(way, b.way)
AND osm_id != -72639 -- crunch to exclude double Crimea region
)
WHERE parent IS NULL
AND osm_id < 0 and admin_level={lower_level} AND ST_Contains(
(SELECT way FROM osm_borders WHERE admin_level={country_level} AND name=%s),
b.way
)""",
(country_name,))
conn.commit()
def process_ready_to_split(conn):
cursor = conn.cursor()
cursor.execute(
f"""SELECT osm_id
FROM osm_borders
WHERE need_split
-- AND osm_id IN (-8654) -- crunch to restrict the whole process to some regions
-- AND osm_id < -51701 -- crunch to not process what has been already processed
ORDER BY osm_id DESC
"""
)
for rec in cursor:
region_id = int(rec[0])
split_region(region_id)
def get_region_and_country_names(conn, region_id):
#if region_id != -1574364: return
cursor = conn.cursor()
@ -416,11 +333,11 @@ def get_region_and_country_names(conn, region_id):
cursor.execute(
f"""SELECT name,
(SELECT name
FROM osm_borders
WHERE osm_id<0 AND admin_level=2 AND ST_contains(way, b1.way)
FROM {osm_table}
WHERE admin_level = 2 AND ST_contains(way, b1.way)
) AS country_name
FROM osm_borders b1
WHERE osm_id={region_id}
WHERE osm_id = {region_id}
AND b1.osm_id NOT IN (-9086712) -- crunch, stub to exclude incorrect subregions
"""
)
@ -429,8 +346,8 @@ def get_region_and_country_names(conn, region_id):
conn.rollback()
cursor.execute(
f"""SELECT name
FROM osm_borders b1
WHERE osm_id={region_id}
FROM {osm_table} b1
WHERE osm_id = {region_id}
"""
)
region_name = cursor.fetchone()[0]
@ -465,7 +382,6 @@ def save_splitting(dcu: DisjointClusterUnion, conn,
save_splitting_to_file(conn, dcu, filename_prefix)
#PREFIX = ''
GENERATE_ALL_POLY=False
FOLDER='split_results'
#CITY_POPULATION_THR = 500000
@ -473,16 +389,6 @@ FOLDER='split_results'
if __name__ == '__main__':
conn = psycopg2.connect("dbname=az_gis3")
prepare_bulk_split()
import sys; sys.exit()
process_ready_to_split(conn)
#with open('splitting-162050.json') as f:
import sys; sys.exit()
# clusters = json.load(f)
#make_polys(clusters)
#import sys; sys.exit()
PREFIX = "UBavaria"
CITY_POPULATION_THR = 500000

View file

@ -19,10 +19,11 @@ from flask_compress import Compress
import psycopg2
import config
from auto_split import prepare_bulk_split, split_region
from auto_split import split_region
from countries_structure import (
CountryStructureException,
create_countries_initial_structure,
get_osm_border_name_by_osm_id,
)
try:
@ -358,7 +359,7 @@ def get_parent_region_id(region_id):
""", (region_id,)
)
rec = cursor.fetchone()
parent_id = int(rec[0]) if rec[0] is not None else None
parent_id = int(rec[0]) if rec and rec[0] is not None else None
return parent_id
def get_child_region_ids(region_id):
@ -381,7 +382,7 @@ def join_to_parent():
region_id = int(request.args.get('id'))
parent_id = get_parent_region_id(region_id)
if not parent_id:
return jsonify(status=f'Region {region_id} has no parent')
return jsonify(status=f'Region {region_id} does not exist or has no parent')
cursor = g.conn.cursor()
descendants = [[parent_id]] # regions ordered by hierarchical level
@ -686,13 +687,13 @@ def get_clusters_one(region_id, next_level, thresholds):
"""
splitting_sql_params = (region_id,) + thresholds
cursor.execute(f"""
SELECT id FROM {autosplit_table}
SELECT 1 FROM {autosplit_table}
WHERE {where_clause}
""", splitting_sql_params)
if cursor.rowcount == 0:
split_region(g.conn, region_id, next_level, thresholds)
cursor.execute(f"""
SELECT id, ST_AsGeoJSON(ST_SimplifyPreserveTopology(geom, 0.01)) as way
SELECT subregion_ids[1], ST_AsGeoJSON(ST_SimplifyPreserveTopology(geom, 0.01)) as way
FROM {autosplit_table}
WHERE {where_clause}
""", splitting_sql_params)
@ -781,7 +782,7 @@ def divide_into_clusters(region_ids, next_level, thresholds):
"""
splitting_sql_params = (region_id,) + thresholds
cursor.execute(f"""
SELECT id FROM {autosplit_table}
SELECT 1 FROM {autosplit_table}
WHERE {where_clause}
""", splitting_sql_params)
if cursor.rowcount == 0:
@ -790,19 +791,27 @@ def divide_into_clusters(region_ids, next_level, thresholds):
free_id = get_free_id()
counter = 0
cursor.execute(f"""
SELECT id
SELECT subregion_ids
FROM {autosplit_table} WHERE {where_clause}
""", splitting_sql_params)
if cursor.rowcount == 1:
continue
for rec in cursor:
cluster_id = rec[0]
counter += 1
name = f"{base_name}_{counter}"
subregion_ids = rec[0]
cluster_id = subregion_ids[0]
if len(subregion_ids) == 1:
subregion_id = cluster_id
name = get_osm_border_name_by_osm_id(g.conn, subregion_id)
else:
counter += 1
free_id -= 1
subregion_id = free_id
name = f"{base_name}_{counter}"
insert_cursor.execute(f"""
INSERT INTO {table} (id, name, parent_id, geom, modified, count_k)
SELECT {free_id}, '{name}', osm_border_id, geom, now(), -1
FROM {autosplit_table} WHERE id = %s AND {where_clause}
""", (cluster_id,) + splitting_sql_params)
free_id -= 1
SELECT {subregion_id}, %s, osm_border_id, geom, now(), -1
FROM {autosplit_table} WHERE subregion_ids[1] = %s AND {where_clause}
""", (name, cluster_id,) + splitting_sql_params)
g.conn.commit()
return jsonify(status='ok')

View file

@ -314,7 +314,7 @@ def _make_country_structure(conn, country_osm_id):
names = {} # osm_id => osm name
parents = {} # osm_id => parent_osm_id
country_name = _get_osm_border_name_by_osm_id(conn, country_osm_id)
country_name = get_osm_border_name_by_osm_id(conn, country_osm_id)
names[country_osm_id] = country_name
parents[country_osm_id] = None
@ -353,7 +353,7 @@ def create_countries_initial_structure(conn):
_make_country_structure(conn, rec[0])
conn.commit()
def _get_osm_border_name_by_osm_id(conn, osm_id):
def get_osm_border_name_by_osm_id(conn, osm_id):
cursor = conn.cursor()
cursor.execute(f"""
SELECT name FROM {osm_table}