Some refactoring due to code review

This commit is contained in:
Alexey Zakharenkov 2020-10-27 09:52:28 +03:00
parent 097e158d69
commit e758c7991a
5 changed files with 192 additions and 213 deletions

View file

@ -1,9 +1,6 @@
import itertools
import json
from collections import defaultdict
import psycopg2
from config import (
AUTOSPLIT_TABLE as autosplit_table,
OSM_TABLE as osm_table,
@ -122,7 +119,7 @@ def get_best_cluster_to_join_with(small_cluster_id,
def calculate_common_border_matrix(conn, subregion_ids):
cursor = conn.cursor()
subregion_ids_str = ','.join(str(x) for x in subregion_ids)
# ST_Intersection returns 0 if its parameter is a geometry other than
# ST_Length returns 0 if its parameter is a geometry other than
# LINESTRING or MULTILINESTRING
cursor.execute(f"""
SELECT b1.osm_id AS osm_id1, b2.osm_id AS osm_id2,
@ -145,15 +142,13 @@ def calculate_common_border_matrix(conn, subregion_ids):
return common_border_matrix
def find_golden_splitting(conn, border_id, next_level,
country_region_name, mwm_size_thr):
def find_golden_splitting(conn, border_id, next_level, mwm_size_thr):
subregions = get_subregions_info(conn, border_id, osm_table,
next_level, need_cities=True)
if not subregions:
return
dcu = DisjointClusterUnion(border_id, subregions, mwm_size_thr)
#save_splitting_to_file(dcu, f'all_{country_region_name}')
all_subregion_ids = dcu.get_all_subregion_ids()
common_border_matrix = calculate_common_border_matrix(conn, all_subregion_ids)
@ -189,55 +184,9 @@ def get_union_sql(subregion_ids):
"""
def get_geojson(conn, union_sql):
cursor = conn.cursor()
cursor.execute(f"""SELECT ST_AsGeoJSON(({union_sql}))""")
rec = cursor.fetchone()
return rec[0]
def write_polygons_to_poly(file, polygons, name_prefix):
pcounter = 1
for polygon in polygons:
outer = True
for ring in polygon:
inner_mark = '' if outer else '!'
name = pcounter if outer else -pcounter
file.write(f"{inner_mark}{name_prefix}_{name}\n")
pcounter = pcounter + 1
for coord in ring:
file.write('\t{:E}\t{:E}\n'.format(coord[0], coord[1]))
file.write('END\n')
outer = False
def save_splitting_to_file(conn, dcu: DisjointClusterUnion, filename_prefix=None):
"""May be used for debugging"""
GENERATE_ALL_POLY=False
FOLDER='split_results'
with open(f"{FOLDER}/{filename_prefix}.poly", 'w') as poly_file:
poly_file.write(f"{filename_prefix}\n")
for cluster_id, data in dcu.clusters.items():
subregion_ids = data['subregion_ids']
cluster_geometry_sql = get_union_sql(subregion_ids)
geojson = get_geojson(conn, cluster_geometry_sql)
geometry = json.loads(geojson)
polygons = [geometry['coordinates']] if geometry['type'] == 'Polygon' else geometry['coordinates']
name_prefix=f"{filename_prefix}_{abs(cluster_id)}"
write_polygons_to_poly(poly_file, polygons, name_prefix)
if GENERATE_ALL_POLY:
with open(f"{FOLDER}/{filename_prefix}{cluster_id}.poly", 'w') as f:
f.write(f"{filename_prefix}_{cluster_id}")
write_polygons_to_poly(f, polygons, name_prefix)
f.write('END\n')
poly_file.write('END\n')
with open(f"{FOLDER}/{filename_prefix}-splitting.json", 'w') as f:
json.dump(dcu.clusters, f, ensure_ascii=False, indent=2)
def save_splitting_to_db(conn, dcu: DisjointClusterUnion):
cursor = conn.cursor()
# remove previous splitting of the region
# Remove previous splitting of the region
cursor.execute(f"""
DELETE FROM {autosplit_table}
WHERE osm_border_id = {dcu.region_id}
@ -261,53 +210,12 @@ def save_splitting_to_db(conn, dcu: DisjointClusterUnion):
conn.commit()
def get_region_and_country_names(conn, region_id):
cursor = conn.cursor()
try:
cursor.execute(f"""
SELECT name,
(SELECT name
FROM {osm_table}
WHERE admin_level = 2
AND ST_Contains(way, b1.way)) AS country_name
FROM osm_borders b1
WHERE osm_id = {region_id}
AND b1.osm_id NOT IN (-9086712) -- crunch, stub to exclude incorrect subregions
"""
)
region_name, country_name = cursor.fetchone()
except psycopg2.errors.CardinalityViolation:
conn.rollback()
cursor.execute(f"""
SELECT name
FROM {osm_table} b1
WHERE osm_id = {region_id}
"""
)
region_name = cursor.fetchone()[0]
country_name = None
print(f"Many countries for region '{region_name}' id={region_id}")
return region_name, country_name
def split_region(conn, region_id, next_level,
mwm_size_thr,
save_to_files=False):
region_name, country_name = get_region_and_country_names(conn, region_id)
region_name = region_name.replace('/', '|')
country_region_name = f"{country_name}_{region_name}" if country_name else region_name
dcu = find_golden_splitting(conn, region_id, next_level,
country_region_name, mwm_size_thr)
def split_region(conn, region_id, next_level, mwm_size_thr):
dcu = find_golden_splitting(conn, region_id, next_level, mwm_size_thr)
if dcu is None:
return
save_splitting(conn, dcu, save_to_files, country_region_name)
def save_splitting(conn, dcu: DisjointClusterUnion,
save_to_files=None, country_region_name=None):
save_splitting_to_db(conn, dcu)
if save_to_files:
print(f"Saving {country_region_name}")
filename_prefix = f"{country_region_name}-{dcu.city_population_thr}"
save_splitting_to_file(conn, dcu, filename_prefix)
## May need to debug
#from auto_split_debug import save_splitting_to_file
#save_splitting_to_file(conn, dcu)

View file

@ -0,0 +1,63 @@
import json
import os
from auto_split import (
DisjointClusterUnion,
get_union_sql,
)
from countries_structure import (
get_region_full_name,
)
GENERATE_ALL_POLY = False
FOLDER = 'split_results'
def save_splitting_to_file(conn, dcu: DisjointClusterUnion):
if not os.path.exists(FOLDER):
os.mkdir(FOLDER)
region_full_name = get_region_full_name(conn, dcu.region_id)
filename_prefix = f"{region_full_name}-{dcu.mwm_size_thr}"
with open(os.path.join(FOLDER, f"{filename_prefix}.poly"), 'w') as poly_file:
poly_file.write(f"{filename_prefix}\n")
for cluster_id, data in dcu.clusters.items():
subregion_ids = data['subregion_ids']
cluster_geometry_sql = get_union_sql(subregion_ids)
geojson = get_geojson(conn, cluster_geometry_sql)
geometry = json.loads(geojson)
polygons = ([geometry['coordinates']]
if geometry['type'] == 'Polygon'
else geometry['coordinates'])
name_prefix=f"{filename_prefix}_{abs(cluster_id)}"
write_polygons_to_poly(poly_file, polygons, name_prefix)
if GENERATE_ALL_POLY:
with open(os.path.join(FOLDER, f"{filename_prefix}{cluster_id}.poly"), 'w') as f:
f.write(f"{filename_prefix}_{cluster_id}")
write_polygons_to_poly(f, polygons, name_prefix)
f.write('END\n')
poly_file.write('END\n')
with open(os.path.join(FOLDER, f"{filename_prefix}-splitting.json"), 'w') as f:
json.dump(dcu.clusters, f, ensure_ascii=False, indent=2)
def get_geojson(conn, union_sql):
cursor = conn.cursor()
cursor.execute(f"""SELECT ST_AsGeoJSON(({union_sql}))""")
rec = cursor.fetchone()
return rec[0]
def write_polygons_to_poly(file, polygons, name_prefix):
pcounter = 1
for polygon in polygons:
outer = True
for ring in polygon:
inner_mark = '' if outer else '!'
name = pcounter if outer else -pcounter
file.write(f"{inner_mark}{name_prefix}_{name}\n")
pcounter = pcounter + 1
for coord in ring:
file.write('\t{:E}\t{:E}\n'.format(coord[0], coord[1]))
file.write('END\n')
outer = False

View file

@ -5,7 +5,6 @@ import re
import sys, traceback
import zipfile
from unidecode import unidecode
from queue import Queue
from flask import (
Flask, g,
@ -24,6 +23,10 @@ from countries_structure import (
CountryStructureException,
create_countries_initial_structure,
get_osm_border_name_by_osm_id,
get_region_country,
get_region_full_name,
get_similar_regions,
is_administrative_region,
)
from subregions import (
get_subregions_info,
@ -116,7 +119,7 @@ def fetch_borders(**kwargs):
borders = []
for rec in cur:
region_id = rec[8]
country_id, country_name = get_region_country(region_id)
country_id, country_name = get_region_country(g.conn, region_id)
props = { 'name': rec[0] or '', 'nodes': rec[2], 'modified': rec[3],
'disabled': rec[4], 'count_k': rec[5],
'comment': rec[6],
@ -508,103 +511,6 @@ def update_comment():
g.conn.commit()
return jsonify(status='ok')
def is_administrative_region(region_id):
osm_table = config.OSM_TABLE
cur = g.conn.cursor()
cur.execute(f"""
SELECT count(1) FROM {osm_table} WHERE osm_id = %s
""", (region_id,)
)
count = cur.fetchone()[0]
return (count > 0)
def find_osm_child_regions(region_id):
cursor = g.conn.cursor()
table = config.TABLE
osm_table = config.OSM_TABLE
cursor.execute(f"""
SELECT c.id, oc.admin_level
FROM {table} c, {table} p, {osm_table} oc
WHERE p.id = c.parent_id AND c.id = oc.osm_id
AND p.id = %s
""", (region_id,)
)
children = []
for rec in cursor:
children.append({'id': int(rec[0]), 'admin_level': int(rec[1])})
return children
def is_leaf(region_id):
cursor = g.conn.cursor()
cursor.execute(f"""
SELECT count(1)
FROM {config.TABLE}
WHERE parent_id = %s
""", (region_id,)
)
count = cursor.fetchone()[0]
return (count == 0)
def get_region_country(region_id):
"""Returns the uppermost predecessor of the region in the hierarchy,
possibly itself.
"""
predecessors = get_predecessors(region_id)
return predecessors[-1]
def get_predecessors(region_id):
"""Returns the list of (id, name)-tuples of all predecessors,
starting from the very region_id.
"""
predecessors = []
table = config.TABLE
cursor = g.conn.cursor()
while True:
cursor.execute(f"""
SELECT id, name, parent_id
FROM {table} WHERE id={region_id}
"""
)
rec = cursor.fetchone()
if not rec:
raise Exception(f"No record in '{table}' table with id = {region_id}")
predecessors.append(rec[0:2])
parent_id = rec[2]
if not parent_id:
break
region_id = parent_id
return predecessors
def get_region_full_name(region_id):
predecessors = get_predecessors(region_id)
return '_'.join(pr[1] for pr in reversed(predecessors))
def get_similar_regions(region_id, only_leaves=False):
"""Returns ids of regions of the same admin_level in the same country.
Prerequisite: is_administrative_region(region_id) is True.
"""
cursor = g.conn.cursor()
cursor.execute(f"""
SELECT admin_level FROM {config.OSM_TABLE}
WHERE osm_id = %s""", (region_id,)
)
admin_level = int(cursor.fetchone()[0])
country_id, country_name = get_region_country(region_id)
q = Queue()
q.put({'id': country_id, 'admin_level': 2})
similar_region_ids = []
while not q.empty():
item = q.get()
if item['admin_level'] == admin_level:
similar_region_ids.append(item['id'])
elif item['admin_level'] < admin_level:
children = find_osm_child_regions(item['id'])
for ch in children:
q.put(ch)
if only_leaves:
similar_region_ids = [r_id for r_id in similar_region_ids
if is_leaf(r_id)]
return similar_region_ids
@app.route('/divpreview')
def divide_preview():
@ -613,13 +519,13 @@ def divide_preview():
next_level = int(request.args.get('next_level'))
except ValueError:
return jsonify(status="Not a number in next level")
is_admin_region = is_administrative_region(region_id)
is_admin_region = is_administrative_region(g.conn, region_id)
region_ids = [region_id]
apply_to_similar = (request.args.get('apply_to_similar') == 'true')
if apply_to_similar:
if not is_admin_region:
return jsonify(status="Could not use 'apply to similar' for non-administrative regions")
region_ids = get_similar_regions(region_id, only_leaves=True)
region_ids = get_similar_regions(g.conn, region_id, only_leaves=True)
auto_divide = (request.args.get('auto_divide') == 'true')
if auto_divide:
if not is_admin_region:
@ -730,13 +636,13 @@ def divide():
next_level = int(request.args.get('next_level'))
except ValueError:
return jsonify(status="Not a number in next level")
is_admin_region = is_administrative_region(region_id)
is_admin_region = is_administrative_region(g.conn, region_id)
apply_to_similar = (request.args.get('apply_to_similar') == 'true')
region_ids = [region_id]
if apply_to_similar:
if not is_admin_region:
return jsonify(status="Could not use 'apply to similar' for non-administrative regions")
region_ids = get_similar_regions(region_id, only_leaves=True)
region_ids = get_similar_regions(g.conn, region_id, only_leaves=True)
auto_divide = (request.args.get('auto_divide') == 'true')
if auto_divide:
if not is_admin_region:
@ -763,7 +669,7 @@ def divide_into_subregions_one(region_id, next_level):
subregions = get_subregions_info(g.conn, region_id, table,
next_level, need_cities=False)
cursor = g.conn.cursor()
is_admin_region = is_administrative_region(region_id)
is_admin_region = is_administrative_region(g.conn, region_id)
if is_admin_region:
for subregion_id, data in subregions.items():
cursor.execute(f"""
@ -1520,7 +1426,7 @@ def export_poly():
else geometry['coordinates'])
# sanitize name, src: http://stackoverflow.com/a/295466/1297601
name = border['properties']['name'] or str(-border['properties']['id'])
fullname = get_region_full_name(border['properties']['id'])
fullname = get_region_full_name(g.conn, border['properties']['id'])
filename = unidecode(fullname)
filename = re.sub('[^\w _-]', '', filename).strip()
filename = filename + '.poly'

View file

@ -1,18 +1,16 @@
import itertools
from queue import Queue
import config
from config import (
TABLE as table,
OSM_TABLE as osm_table
)
from subregions import (
get_subregions_info,
update_border_mwm_size_estimation,
)
table = config.TABLE
osm_table = config.OSM_TABLE
autosplit_table = config.AUTOSPLIT_TABLE
# admin_level => list of countries which should be initially divided at one admin level
unilevel_countries = {
2: [
@ -361,6 +359,7 @@ def create_countries_initial_structure(conn):
conn.commit()
return warnings
def get_osm_border_name_by_osm_id(conn, osm_id):
cursor = conn.cursor()
cursor.execute(f"""
@ -386,3 +385,104 @@ def _get_country_osm_id_by_name(conn, name):
if not rec:
raise CountryStructureException(f'Not found country "{name}"')
return int(rec[0])
def is_administrative_region(conn, region_id):
cursor = conn.cursor()
cursor.execute(f"""
SELECT count(1) FROM {osm_table} WHERE osm_id = %s
""", (region_id,)
)
count = cursor.fetchone()[0]
return (count > 0)
def find_osm_child_regions(conn, region_id):
cursor = conn.cursor()
cursor.execute(f"""
SELECT c.id, oc.admin_level
FROM {table} c, {table} p, {osm_table} oc
WHERE p.id = c.parent_id AND c.id = oc.osm_id
AND p.id = %s
""", (region_id,)
)
children = []
for rec in cursor:
children.append({'id': int(rec[0]), 'admin_level': int(rec[1])})
return children
def is_leaf(conn, region_id):
cursor = conn.cursor()
cursor.execute(f"""
SELECT count(1)
FROM {table}
WHERE parent_id = %s
""", (region_id,)
)
count = cursor.fetchone()[0]
return (count == 0)
def get_region_country(conn, region_id):
"""Returns the uppermost predecessor of the region in the hierarchy,
possibly itself.
"""
predecessors = get_predecessors(conn, region_id)
return predecessors[-1]
def get_predecessors(conn, region_id):
"""Returns the list of (id, name)-tuples of all predecessors,
starting from the very region_id.
"""
predecessors = []
cursor = conn.cursor()
while True:
cursor.execute(f"""
SELECT id, name, parent_id
FROM {table} WHERE id={region_id}
"""
)
rec = cursor.fetchone()
if not rec:
raise Exception(f"No record in '{table}' table with id = {region_id}")
predecessors.append(rec[0:2])
parent_id = rec[2]
if not parent_id:
break
region_id = parent_id
return predecessors
def get_region_full_name(conn, region_id):
predecessors = get_predecessors(conn, region_id)
return '_'.join(pr[1] for pr in reversed(predecessors))
def get_similar_regions(conn, region_id, only_leaves=False):
"""Returns ids of regions of the same admin_level in the same country.
Prerequisite: is_administrative_region(region_id) is True.
"""
cursor = conn.cursor()
cursor.execute(f"""
SELECT admin_level FROM {osm_table}
WHERE osm_id = %s""", (region_id,)
)
admin_level = int(cursor.fetchone()[0])
country_id, country_name = get_region_country(conn, region_id)
q = Queue()
q.put({'id': country_id, 'admin_level': 2})
similar_region_ids = []
while not q.empty():
item = q.get()
if item['admin_level'] == admin_level:
similar_region_ids.append(item['id'])
elif item['admin_level'] < admin_level:
children = find_osm_child_regions(item['id'])
for ch in children:
q.put(ch)
if only_leaves:
similar_region_ids = [r_id for r_id in similar_region_ids
if is_leaf(conn, r_id)]
return similar_region_ids

View file

@ -60,6 +60,8 @@ def _get_subregions_basic_info(conn, region_id, region_table,
def _add_population_data(conn, subregions, need_cities):
if not subregions:
return
cursor = conn.cursor()
subregion_ids = ','.join(str(x) for x in subregions.keys())
cursor.execute(f"""