Merge pull request #8 from alexey-zakharenkov/improve-sustainability
Improve sustainability
This commit is contained in:
commit
360d52b7ac
10 changed files with 301 additions and 273 deletions
|
@ -38,6 +38,7 @@ CREATE TABLE splitting (
|
|||
subregion_ids BIGINT[] NOT NULL,
|
||||
mwm_size_est REAL NOT NULL,
|
||||
mwm_size_thr INTEGER NOT NULL, -- mwm size threshold in Kb, 4-bytes INTEGER is enough
|
||||
next_level INTEGER NOT NULL,
|
||||
geom geometry NOT NULL
|
||||
);
|
||||
CREATE INDEX splitting_idx ON splitting (osm_border_id, mwm_size_thr);
|
||||
CREATE INDEX splitting_idx ON splitting (osm_border_id, mwm_size_thr, next_level);
|
||||
|
|
|
@ -12,9 +12,12 @@ from subregions import get_subregions_info
|
|||
class DisjointClusterUnion:
|
||||
"""Disjoint set union implementation for administrative subregions."""
|
||||
|
||||
def __init__(self, region_id, subregions, mwm_size_thr=None):
|
||||
def __init__(self, region_id, subregions, next_level, mwm_size_thr=None):
|
||||
assert all(s_data['mwm_size_est'] is not None
|
||||
for s_data in subregions.values())
|
||||
self.region_id = region_id
|
||||
self.subregions = subregions
|
||||
self.next_level = next_level
|
||||
self.mwm_size_thr = mwm_size_thr or MWM_SIZE_THRESHOLD
|
||||
self.representatives = {sub_id: sub_id for sub_id in subregions}
|
||||
# A cluster is one or more subregions with common borders
|
||||
|
@ -84,7 +87,8 @@ def get_best_cluster_to_join_with(small_cluster_id,
|
|||
for subregion_id in subregion_ids:
|
||||
for other_subregion_id, length in common_border_matrix[subregion_id].items():
|
||||
other_cluster_id = dcu.find_cluster(other_subregion_id)
|
||||
if other_cluster_id != small_cluster_id:
|
||||
if (other_cluster_id != small_cluster_id and
|
||||
not dcu.clusters[other_cluster_id]['finished']):
|
||||
common_borders[other_cluster_id] += length
|
||||
if not common_borders:
|
||||
return None
|
||||
|
@ -144,8 +148,10 @@ def find_golden_splitting(conn, border_id, next_level, mwm_size_thr):
|
|||
next_level, need_cities=True)
|
||||
if not subregions:
|
||||
return
|
||||
if any(s_data['mwm_size_est'] is None for s_data in subregions.values()):
|
||||
return
|
||||
|
||||
dcu = DisjointClusterUnion(border_id, subregions, mwm_size_thr)
|
||||
dcu = DisjointClusterUnion(border_id, subregions, next_level, mwm_size_thr)
|
||||
all_subregion_ids = dcu.get_all_subregion_ids()
|
||||
common_border_matrix = calculate_common_border_matrix(conn, all_subregion_ids)
|
||||
|
||||
|
@ -188,6 +194,7 @@ def save_splitting_to_db(conn, dcu: DisjointClusterUnion):
|
|||
DELETE FROM {autosplit_table}
|
||||
WHERE osm_border_id = {dcu.region_id}
|
||||
AND mwm_size_thr = {dcu.mwm_size_thr}
|
||||
AND next_level = {dcu.next_level}
|
||||
""")
|
||||
for cluster_id, data in dcu.clusters.items():
|
||||
subregion_ids = data['subregion_ids']
|
||||
|
@ -196,12 +203,13 @@ def save_splitting_to_db(conn, dcu: DisjointClusterUnion):
|
|||
)
|
||||
cluster_geometry_sql = get_union_sql(subregion_ids)
|
||||
cursor.execute(f"""
|
||||
INSERT INTO {autosplit_table} (osm_border_id, subregion_ids,
|
||||
geom, mwm_size_thr, mwm_size_est)
|
||||
INSERT INTO {autosplit_table} (osm_border_id, subregion_ids, geom,
|
||||
next_level, mwm_size_thr, mwm_size_est)
|
||||
VALUES (
|
||||
{dcu.region_id},
|
||||
'{subregion_ids_array_str}',
|
||||
({cluster_geometry_sql}),
|
||||
{dcu.next_level},
|
||||
{dcu.mwm_size_thr},
|
||||
{data['mwm_size_est']}
|
||||
)
|
||||
|
|
|
@ -218,16 +218,30 @@ def get_server_configuration():
|
|||
mwm_size_thr=config.MWM_SIZE_THRESHOLD)
|
||||
|
||||
|
||||
def prepare_sql_search_string(string):
|
||||
if string.startswith('^'):
|
||||
string = string[1:]
|
||||
else:
|
||||
string = f"%{string}"
|
||||
if string.endswith('$'):
|
||||
string = string[:-1]
|
||||
else:
|
||||
string = f"{string}%"
|
||||
return string
|
||||
|
||||
|
||||
@app.route('/search')
|
||||
def search():
|
||||
query = request.args.get('q')
|
||||
sql_search_string = prepare_sql_search_string(query)
|
||||
|
||||
with g.conn.cursor() as cursor:
|
||||
cursor.execute(f"""
|
||||
SELECT ST_XMin(geom), ST_YMin(geom), ST_XMax(geom), ST_YMax(geom)
|
||||
FROM {config.BORDERS_TABLE}
|
||||
WHERE name ILIKE %s
|
||||
ORDER BY (ST_Area(geography(geom)))
|
||||
LIMIT 1""", (f'%{query}%',)
|
||||
LIMIT 1""", (sql_search_string,)
|
||||
)
|
||||
if cursor.rowcount > 0:
|
||||
rec = cursor.fetchone()
|
||||
|
@ -312,10 +326,10 @@ def join_borders():
|
|||
with g.conn.cursor() as cursor:
|
||||
try:
|
||||
borders_table = config.BORDERS_TABLE
|
||||
free_id = get_free_id()
|
||||
joint_id = get_free_id()
|
||||
cursor.execute(f"""
|
||||
UPDATE {borders_table}
|
||||
SET id = {free_id},
|
||||
SET id = {joint_id},
|
||||
geom = ST_Union({borders_table}.geom, b2.geom),
|
||||
mwm_size_est = {borders_table}.mwm_size_est + b2.mwm_size_est,
|
||||
count_k = -1
|
||||
|
@ -326,6 +340,26 @@ def join_borders():
|
|||
except psycopg2.Error as e:
|
||||
g.conn.rollback()
|
||||
return jsonify(status=str(e))
|
||||
|
||||
# If joint_id is the only child of its parent, then leave only parent
|
||||
parent_id = get_parent_region_id(g.conn, joint_id)
|
||||
if parent_id is not None:
|
||||
cursor.execute(f"""
|
||||
SELECT count(*) FROM {borders_table} WHERE parent_id = %s
|
||||
""", (parent_id,)
|
||||
)
|
||||
children_cnt = cursor.fetchone()[0]
|
||||
if children_cnt == 1:
|
||||
cursor.execute(f"""
|
||||
UPDATE {borders_table}
|
||||
SET mwm_size_est = (SELECT mwm_size_est
|
||||
FROM {borders_table}
|
||||
WHERE id = %s)
|
||||
WHERE id = %s
|
||||
""", (joint_id, parent_id)
|
||||
)
|
||||
cursor.execute(f"DELETE FROM {borders_table} WHERE id = %s",
|
||||
(joint_id,))
|
||||
g.conn.commit()
|
||||
return jsonify(status='ok')
|
||||
|
||||
|
@ -413,29 +447,9 @@ def find_osm_borders():
|
|||
def copy_from_osm():
|
||||
osm_id = int(request.args.get('id'))
|
||||
name = request.args.get('name')
|
||||
name_sql = f"'{name}'" if name else "'name'"
|
||||
borders_table = config.BORDERS_TABLE
|
||||
osm_table = config.OSM_TABLE
|
||||
with g.conn.cursor() as cursor:
|
||||
# Check if this id already in use
|
||||
cursor.execute(f"SELECT id FROM {borders_table} WHERE id = %s",
|
||||
(osm_id,))
|
||||
rec = cursor.fetchone()
|
||||
if rec and rec[0]:
|
||||
return jsonify(status=f"Region with id={osm_id} already exists")
|
||||
cursor.execute(f"""
|
||||
INSERT INTO {borders_table} (id, geom, name, modified, count_k)
|
||||
SELECT osm_id, way, {name_sql}, now(), -1
|
||||
FROM {osm_table}
|
||||
WHERE osm_id = %s
|
||||
""", (osm_id,)
|
||||
)
|
||||
assign_region_to_lowest_parent(osm_id)
|
||||
warnings = []
|
||||
try:
|
||||
update_border_mwm_size_estimation(g.conn, osm_id)
|
||||
except Exception as e:
|
||||
warnings.append(str(e))
|
||||
errors, warnings = copy_region_from_osm(g.conn, osm_id, name)
|
||||
if errors:
|
||||
return jsonify(status='\n'.join(errors))
|
||||
g.conn.commit()
|
||||
return jsonify(status='ok', warnings=warnings)
|
||||
|
||||
|
@ -983,7 +997,7 @@ def border():
|
|||
@app.route('/start_over')
|
||||
def start_over():
|
||||
try:
|
||||
warnings = create_countries_initial_structure(g.conn)
|
||||
create_countries_initial_structure(g.conn)
|
||||
except CountryStructureException as e:
|
||||
return jsonify(status=str(e))
|
||||
|
||||
|
@ -991,7 +1005,7 @@ def start_over():
|
|||
with g.conn.cursor() as cursor:
|
||||
cursor.execute(f"DELETE FROM {autosplit_table}")
|
||||
g.conn.commit()
|
||||
return jsonify(status='ok', warnings=warnings[:10])
|
||||
return jsonify(status='ok')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
|
@ -3,13 +3,18 @@ import json
|
|||
|
||||
from flask import g, jsonify
|
||||
|
||||
import config
|
||||
from config import (
|
||||
AUTOSPLIT_TABLE as autosplit_table,
|
||||
BORDERS_TABLE as borders_table,
|
||||
OSM_TABLE as osm_table,
|
||||
)
|
||||
from auto_split import split_region
|
||||
from countries_structure import get_osm_border_name_by_osm_id
|
||||
from subregions import (
|
||||
get_parent_region_id,
|
||||
get_region_country,
|
||||
get_subregions_info,
|
||||
is_administrative_region,
|
||||
update_border_mwm_size_estimation,
|
||||
)
|
||||
|
||||
|
||||
|
@ -19,14 +24,13 @@ def geom_intersects_bbox_sql(xmin, ymin, xmax, ymax):
|
|||
|
||||
|
||||
def fetch_borders(**kwargs):
|
||||
borders_table = kwargs.get('table', config.BORDERS_TABLE)
|
||||
a_borders_table = kwargs.get('table', borders_table)
|
||||
simplify = kwargs.get('simplify', 0)
|
||||
where_clause = kwargs.get('where_clause', '1=1')
|
||||
only_leaves = kwargs.get('only_leaves', True)
|
||||
osm_table = config.OSM_TABLE
|
||||
geom = (f'ST_SimplifyPreserveTopology(geom, {simplify})'
|
||||
if simplify > 0 else 'geom')
|
||||
leaves_filter = (f""" AND id NOT IN (SELECT parent_id FROM {borders_table}
|
||||
leaves_filter = (f""" AND id NOT IN (SELECT parent_id FROM {a_borders_table}
|
||||
WHERE parent_id IS NOT NULL)"""
|
||||
if only_leaves else '')
|
||||
query = f"""
|
||||
|
@ -48,14 +52,14 @@ def fetch_borders(**kwargs):
|
|||
WHERE osm_id = t.id
|
||||
) AS admin_level,
|
||||
parent_id,
|
||||
( SELECT name FROM {borders_table}
|
||||
( SELECT name FROM {a_borders_table}
|
||||
WHERE id = t.parent_id
|
||||
) AS parent_name,
|
||||
( SELECT admin_level FROM {osm_table}
|
||||
WHERE osm_id = (SELECT parent_id FROM {borders_table} WHERE id = t.id)
|
||||
WHERE osm_id = (SELECT parent_id FROM {a_borders_table} WHERE id = t.id)
|
||||
) AS parent_admin_level,
|
||||
mwm_size_est
|
||||
FROM {borders_table} t
|
||||
FROM {a_borders_table} t
|
||||
WHERE ({where_clause}) {leaves_filter}
|
||||
) q
|
||||
ORDER BY area DESC
|
||||
|
@ -104,8 +108,6 @@ def get_subregions_for_preview(region_ids, next_level):
|
|||
|
||||
|
||||
def get_subregions_one_for_preview(region_id, next_level):
|
||||
osm_table = config.OSM_TABLE
|
||||
borders_table = config.BORDERS_TABLE
|
||||
with g.conn.cursor() as cursor:
|
||||
# We use ST_SimplifyPreserveTopology, since ST_Simplify would give NULL
|
||||
# for very little regions.
|
||||
|
@ -137,12 +139,12 @@ def get_clusters_for_preview(region_ids, next_level, thresholds):
|
|||
|
||||
|
||||
def get_clusters_for_preview_one(region_id, next_level, mwm_size_thr):
|
||||
autosplit_table = config.AUTOSPLIT_TABLE
|
||||
where_clause = f"""
|
||||
osm_border_id = %s
|
||||
AND mwm_size_thr = %s
|
||||
AND next_level = %s
|
||||
"""
|
||||
splitting_sql_params = (region_id, mwm_size_thr)
|
||||
splitting_sql_params = (region_id, mwm_size_thr, next_level)
|
||||
with g.conn.cursor() as cursor:
|
||||
cursor.execute(f"""
|
||||
SELECT 1 FROM {autosplit_table}
|
||||
|
@ -190,47 +192,54 @@ def divide_into_clusters_preview(region_ids, next_level, mwm_size_thr):
|
|||
|
||||
def divide_into_subregions(region_ids, next_level):
|
||||
for region_id in region_ids:
|
||||
divide_into_subregions_one(region_id, next_level)
|
||||
divide_region_into_subregions(g.conn, region_id, next_level)
|
||||
g.conn.commit()
|
||||
return jsonify(status='ok')
|
||||
|
||||
|
||||
def divide_into_subregions_one(region_id, next_level):
|
||||
borders_table = config.BORDERS_TABLE
|
||||
osm_table = config.OSM_TABLE
|
||||
subregions = get_subregions_info(g.conn, region_id, borders_table,
|
||||
def divide_region_into_subregions(conn, region_id, next_level):
|
||||
"""Divides a region into subregions of specified admin level.
|
||||
Returns the list of added subregion ids.
|
||||
"""
|
||||
subregions = get_subregions_info(conn, region_id, borders_table,
|
||||
next_level, need_cities=False)
|
||||
with g.conn.cursor() as cursor:
|
||||
is_admin_region = is_administrative_region(g.conn, region_id)
|
||||
if not subregions:
|
||||
return []
|
||||
with conn.cursor() as cursor:
|
||||
subregion_ids_str = ','.join(str(x) for x in subregions.keys())
|
||||
cursor.execute(f"""
|
||||
SELECT id
|
||||
FROM {borders_table}
|
||||
WHERE id IN ({subregion_ids_str})
|
||||
"""
|
||||
)
|
||||
occupied_ids = [rec[0] for rec in cursor]
|
||||
ids_to_insert = set(subregions.keys()) - set(occupied_ids)
|
||||
if not ids_to_insert:
|
||||
return []
|
||||
|
||||
is_admin_region = is_administrative_region(conn, region_id)
|
||||
|
||||
if is_admin_region:
|
||||
for subregion_id, data in subregions.items():
|
||||
cursor.execute(f"""
|
||||
INSERT INTO {borders_table}
|
||||
(id, geom, name, parent_id, modified, count_k, mwm_size_est)
|
||||
SELECT osm_id, way, name, %s, now(), -1, {data['mwm_size_est']}
|
||||
FROM {osm_table}
|
||||
WHERE osm_id = %s
|
||||
""", (region_id, subregion_id)
|
||||
)
|
||||
parent_id = region_id
|
||||
else:
|
||||
for subregion_id, data in subregions.items():
|
||||
cursor.execute(f"""
|
||||
INSERT INTO {borders_table}
|
||||
(id, geom, name, parent_id, modified, count_k, mwm_size_est)
|
||||
SELECT osm_id, way, name,
|
||||
(SELECT parent_id FROM {borders_table} WHERE id = %s),
|
||||
now(), -1, {data['mwm_size_est']}
|
||||
FROM {osm_table}
|
||||
WHERE osm_id = %s
|
||||
""", (region_id, subregion_id)
|
||||
)
|
||||
parent_id = get_parent_region_id(conn, region_id)
|
||||
|
||||
for subregion_id in ids_to_insert:
|
||||
mwm_size_est = subregions[subregion_id]['mwm_size_est']
|
||||
cursor.execute(f"""
|
||||
INSERT INTO {borders_table}
|
||||
(id, geom, name, parent_id, modified, count_k, mwm_size_est)
|
||||
SELECT osm_id, way, name, {parent_id}, now(), -1, %s
|
||||
FROM {osm_table}
|
||||
WHERE osm_id = %s""", (mwm_size_est, subregion_id,)
|
||||
)
|
||||
if not is_admin_region:
|
||||
cursor.execute(f"DELETE FROM {borders_table} WHERE id = %s", (region_id,))
|
||||
g.conn.commit()
|
||||
return ids_to_insert
|
||||
|
||||
|
||||
def divide_into_clusters(region_ids, next_level, mwm_size_thr):
|
||||
borders_table = config.BORDERS_TABLE
|
||||
autosplit_table = config.AUTOSPLIT_TABLE
|
||||
cursor = g.conn.cursor()
|
||||
insert_cursor = g.conn.cursor()
|
||||
for region_id in region_ids:
|
||||
|
@ -240,8 +249,9 @@ def divide_into_clusters(region_ids, next_level, mwm_size_thr):
|
|||
where_clause = f"""
|
||||
osm_border_id = %s
|
||||
AND mwm_size_thr = %s
|
||||
AND next_level = %s
|
||||
"""
|
||||
splitting_sql_params = (region_id, mwm_size_thr)
|
||||
splitting_sql_params = (region_id, mwm_size_thr, next_level)
|
||||
cursor.execute(f"""
|
||||
SELECT 1 FROM {autosplit_table}
|
||||
WHERE {where_clause}
|
||||
|
@ -258,46 +268,52 @@ def divide_into_clusters(region_ids, next_level, mwm_size_thr):
|
|||
""", splitting_sql_params
|
||||
)
|
||||
if cursor.rowcount == 1:
|
||||
continue
|
||||
for rec in cursor:
|
||||
subregion_ids = rec[0]
|
||||
cluster_id = subregion_ids[0]
|
||||
if len(subregion_ids) == 1:
|
||||
subregion_id = cluster_id
|
||||
name = get_osm_border_name_by_osm_id(g.conn, subregion_id)
|
||||
else:
|
||||
counter += 1
|
||||
free_id -= 1
|
||||
subregion_id = free_id
|
||||
name = f"{base_name}_{counter}"
|
||||
insert_cursor.execute(f"""
|
||||
INSERT INTO {borders_table} (id, name, parent_id, geom, modified, count_k, mwm_size_est)
|
||||
SELECT {subregion_id}, %s, osm_border_id, geom, now(), -1, mwm_size_est
|
||||
FROM {autosplit_table} WHERE subregion_ids[1] = %s AND {where_clause}
|
||||
""", (name, cluster_id,) + splitting_sql_params
|
||||
)
|
||||
UPDATE {borders_table}
|
||||
SET modified = now(),
|
||||
mwm_size_est = (SELECT mwm_size_est
|
||||
FROM {autosplit_table}
|
||||
WHERE {where_clause})
|
||||
WHERE id = {region_id}
|
||||
""", splitting_sql_params)
|
||||
else:
|
||||
for rec in cursor:
|
||||
subregion_ids = rec[0]
|
||||
cluster_id = subregion_ids[0]
|
||||
if len(subregion_ids) == 1:
|
||||
subregion_id = cluster_id
|
||||
name = get_osm_border_name_by_osm_id(g.conn, subregion_id)
|
||||
else:
|
||||
counter += 1
|
||||
free_id -= 1
|
||||
subregion_id = free_id
|
||||
name = f"{base_name}_{counter}"
|
||||
insert_cursor.execute(f"""
|
||||
INSERT INTO {borders_table} (id, name, parent_id, geom, modified, count_k, mwm_size_est)
|
||||
SELECT {subregion_id}, %s, osm_border_id, geom, now(), -1, mwm_size_est
|
||||
FROM {autosplit_table} WHERE subregion_ids[1] = %s AND {where_clause}
|
||||
""", (name, cluster_id,) + splitting_sql_params
|
||||
)
|
||||
g.conn.commit()
|
||||
return jsonify(status='ok')
|
||||
|
||||
|
||||
def get_free_id():
|
||||
with g.conn.cursor() as cursor:
|
||||
borders_table = config.BORDERS_TABLE
|
||||
cursor.execute(f"SELECT min(id) FROM {borders_table} WHERE id < -1000000000")
|
||||
min_id = cursor.fetchone()[0]
|
||||
free_id = min_id - 1 if min_id else -1_000_000_001
|
||||
return free_id
|
||||
|
||||
|
||||
def assign_region_to_lowest_parent(region_id):
|
||||
def assign_region_to_lowest_parent(conn, region_id):
|
||||
"""Lowest parent is the region with lowest (maximum by absolute value)
|
||||
admin_level containing given region."""
|
||||
pot_parents = find_potential_parents(region_id)
|
||||
if pot_parents:
|
||||
# potential_parents are sorted by area ascending
|
||||
parent_id = pot_parents[0]['properties']['id']
|
||||
borders_table = config.BORDERS_TABLE
|
||||
with g.conn.cursor() as cursor:
|
||||
with conn.cursor() as cursor:
|
||||
cursor.execute(f"""
|
||||
UPDATE {borders_table}
|
||||
SET parent_id = %s
|
||||
|
@ -309,7 +325,6 @@ def assign_region_to_lowest_parent(region_id):
|
|||
|
||||
|
||||
def create_or_update_region(region, free_id):
|
||||
borders_table = config.BORDERS_TABLE
|
||||
with g.conn.cursor() as cursor:
|
||||
if region['id'] < 0:
|
||||
if not free_id:
|
||||
|
@ -323,7 +338,7 @@ def create_or_update_region(region, free_id):
|
|||
""", (region_id, region['name'],
|
||||
region['disabled'], region['wkt'])
|
||||
)
|
||||
assign_region_to_lowest_parent(region_id)
|
||||
assign_region_to_lowest_parent(g.conn, region_id)
|
||||
return region_id
|
||||
else:
|
||||
cursor.execute(f"SELECT count(1) FROM {borders_table} WHERE id = %s",
|
||||
|
@ -346,8 +361,6 @@ def create_or_update_region(region, free_id):
|
|||
|
||||
|
||||
def find_potential_parents(region_id):
|
||||
borders_table = config.BORDERS_TABLE
|
||||
osm_table = config.OSM_TABLE
|
||||
p_geogr = "geography(p.geom)"
|
||||
c_geogr = "geography(c.geom)"
|
||||
query = f"""
|
||||
|
@ -380,3 +393,44 @@ def find_potential_parents(region_id):
|
|||
}
|
||||
parents.append(feature)
|
||||
return parents
|
||||
|
||||
|
||||
def copy_region_from_osm(conn, region_id, name=None, parent_id='not_passed'):
|
||||
errors, warnings = [], []
|
||||
with conn.cursor() as cursor:
|
||||
# Check if this id already in use
|
||||
cursor.execute(f"SELECT name FROM {borders_table} WHERE id = %s",
|
||||
(region_id,))
|
||||
if cursor.rowcount > 0:
|
||||
name = cursor.fetchone()[0]
|
||||
errors.append(f"Region with id={region_id} already exists under name '{name}'")
|
||||
return errors, warnings
|
||||
|
||||
name_expr = f"'{name}'" if name else "name"
|
||||
parent_id_expr = f"{parent_id}" if isinstance(parent_id, int) else "NULL"
|
||||
cursor.execute(f"""
|
||||
INSERT INTO {borders_table}
|
||||
(id, geom, name, parent_id, modified, count_k)
|
||||
SELECT osm_id, way, {name_expr}, {parent_id_expr}, now(), -1
|
||||
FROM {osm_table}
|
||||
WHERE osm_id = %s
|
||||
""", (region_id,)
|
||||
)
|
||||
if parent_id == 'not_passed':
|
||||
assign_region_to_lowest_parent(conn, region_id)
|
||||
try:
|
||||
update_border_mwm_size_estimation(conn, region_id)
|
||||
except Exception as e:
|
||||
warnings.append(str(e))
|
||||
return errors, warnings
|
||||
|
||||
|
||||
def get_osm_border_name_by_osm_id(conn, osm_id):
|
||||
with conn.cursor() as cursor:
|
||||
cursor.execute(f"""
|
||||
SELECT name FROM {osm_table}
|
||||
WHERE osm_id = %s
|
||||
""", (osm_id,))
|
||||
rec = cursor.fetchone()
|
||||
return rec[0] if rec else None
|
||||
|
||||
|
|
|
@ -33,3 +33,9 @@ MWM_SIZE_THRESHOLD = 70*1024
|
|||
# Estimated mwm size is predicted by the 'model.pkl' with 'scaler.pkl' for X
|
||||
MWM_SIZE_PREDICTION_MODEL_PATH = '/app/data/model.pkl'
|
||||
MWM_SIZE_PREDICTION_MODEL_SCALER_PATH = '/app/data/scaler.pkl'
|
||||
MWM_SIZE_PREDICTION_MODEL_LIMITATIONS = {
|
||||
'area': 5500 * 1.5,
|
||||
'urban_pop': 3500000 * 1.5,
|
||||
'city_cnt': 32 * 1.5,
|
||||
'hamlet_cnt': 2120 * 1.5
|
||||
}
|
||||
|
|
|
@ -1,12 +1,13 @@
|
|||
from borders_api_utils import (
|
||||
copy_region_from_osm,
|
||||
divide_region_into_subregions,
|
||||
get_osm_border_name_by_osm_id,
|
||||
)
|
||||
from config import (
|
||||
BORDERS_TABLE as borders_table,
|
||||
OSM_TABLE as osm_table
|
||||
)
|
||||
from countries_division import country_initial_levels
|
||||
from subregions import (
|
||||
get_subregions_info,
|
||||
update_border_mwm_size_estimation,
|
||||
)
|
||||
|
||||
|
||||
class CountryStructureException(Exception):
|
||||
|
@ -16,80 +17,25 @@ class CountryStructureException(Exception):
|
|||
def _clear_borders(conn):
|
||||
with conn.cursor() as cursor:
|
||||
cursor.execute(f"DELETE FROM {borders_table}")
|
||||
conn.commit()
|
||||
|
||||
|
||||
def _find_subregions(conn, osm_ids, next_level, regions):
|
||||
"""Return subregions of level 'next_level' for regions with osm_ids."""
|
||||
subregion_ids = []
|
||||
for osm_id in osm_ids:
|
||||
more_subregions = get_subregions_info(conn, osm_id, borders_table,
|
||||
next_level, need_cities=False)
|
||||
for subregion_id, subregion_data in more_subregions.items():
|
||||
region_data = regions.setdefault(subregion_id, {})
|
||||
region_data['name'] = subregion_data['name']
|
||||
region_data['mwm_size_est'] = subregion_data['mwm_size_est']
|
||||
region_data['parent_id'] = osm_id
|
||||
subregion_ids.append(subregion_id)
|
||||
return subregion_ids
|
||||
|
||||
|
||||
def _create_regions(conn, osm_ids, regions):
|
||||
if not osm_ids:
|
||||
return
|
||||
osm_ids = list(osm_ids) # to ensure order
|
||||
sql_values = ','.join(
|
||||
f'({osm_id},'
|
||||
'%s,'
|
||||
f"{regions[osm_id].get('parent_id', 'NULL')},"
|
||||
f"{regions[osm_id].get('mwm_size_est', 'NULL')},"
|
||||
f'(SELECT way FROM {osm_table} WHERE osm_id={osm_id}),'
|
||||
'now())'
|
||||
for osm_id in osm_ids
|
||||
)
|
||||
with conn.cursor() as cursor:
|
||||
cursor.execute(f"""
|
||||
INSERT INTO {borders_table} (id, name, parent_id, mwm_size_est,
|
||||
geom, modified)
|
||||
VALUES {sql_values}
|
||||
""", tuple(regions[osm_id]['name'] for osm_id in osm_ids)
|
||||
)
|
||||
|
||||
|
||||
def _make_country_structure(conn, country_osm_id):
|
||||
regions = {} # osm_id: { 'name': name,
|
||||
# 'mwm_size_est': size,
|
||||
# 'parent_id': parent_id }
|
||||
|
||||
country_name = get_osm_border_name_by_osm_id(conn, country_osm_id)
|
||||
country_data = regions.setdefault(country_osm_id, {})
|
||||
country_data['name'] = country_name
|
||||
# TODO: country_data['mwm_size_est'] = ...
|
||||
|
||||
_create_regions(conn, [country_osm_id], regions)
|
||||
copy_region_from_osm(conn, country_osm_id, parent_id=None)
|
||||
|
||||
if country_initial_levels.get(country_name):
|
||||
admin_levels = country_initial_levels[country_name]
|
||||
prev_admin_levels = [2] + admin_levels[:-1]
|
||||
prev_region_ids = [country_osm_id]
|
||||
prev_level_region_ids = [country_osm_id]
|
||||
|
||||
for admin_level, prev_level in zip(admin_levels, prev_admin_levels):
|
||||
if not prev_region_ids:
|
||||
raise CountryStructureException(
|
||||
f"Empty prev_region_ids at {country_name}, "
|
||||
f"AL={admin_level}, prev-AL={prev_level}"
|
||||
)
|
||||
subregion_ids = _find_subregions(conn, prev_region_ids,
|
||||
admin_level, regions)
|
||||
_create_regions(conn, subregion_ids, regions)
|
||||
prev_region_ids = subregion_ids
|
||||
warning = None
|
||||
if len(regions) == 1:
|
||||
try:
|
||||
update_border_mwm_size_estimation(conn, country_osm_id)
|
||||
except Exception as e:
|
||||
warning = str(e)
|
||||
return warning
|
||||
current_level_region_ids = []
|
||||
for region_id in prev_level_region_ids:
|
||||
subregion_ids = divide_region_into_subregions(
|
||||
conn, region_id, admin_level)
|
||||
current_level_region_ids.extend(subregion_ids)
|
||||
prev_level_region_ids = current_level_region_ids
|
||||
|
||||
|
||||
def create_countries_initial_structure(conn):
|
||||
|
@ -97,32 +43,15 @@ def create_countries_initial_structure(conn):
|
|||
with conn.cursor() as cursor:
|
||||
# TODO: process overlapping countries, like Ukraine and Russia with common Crimea
|
||||
cursor.execute(f"""
|
||||
SELECT osm_id, name
|
||||
SELECT osm_id
|
||||
FROM {osm_table}
|
||||
WHERE admin_level = 2 and name != 'Ukraine'
|
||||
WHERE admin_level = 2
|
||||
"""
|
||||
)
|
||||
warnings = []
|
||||
for rec in cursor:
|
||||
warning = _make_country_structure(conn, rec[0])
|
||||
if warning:
|
||||
warnings.append(warning)
|
||||
for country_osm_id, *_ in cursor:
|
||||
_make_country_structure(conn, country_osm_id)
|
||||
conn.commit()
|
||||
return warnings
|
||||
|
||||
|
||||
def get_osm_border_name_by_osm_id(conn, osm_id):
|
||||
with conn.cursor() as cursor:
|
||||
cursor.execute(f"""
|
||||
SELECT name FROM {osm_table}
|
||||
WHERE osm_id = %s
|
||||
""", (osm_id,))
|
||||
rec = cursor.fetchone()
|
||||
if not rec:
|
||||
raise CountryStructureException(
|
||||
f'Not found region with osm_id="{osm_id}"'
|
||||
)
|
||||
return rec[0]
|
||||
return
|
||||
|
||||
|
||||
def _get_country_osm_id_by_name(conn, name):
|
||||
|
@ -137,4 +66,5 @@ def _get_country_osm_id_by_name(conn, name):
|
|||
rec = cursor.fetchone()
|
||||
if not rec:
|
||||
raise CountryStructureException(f'Not found country "{name}"')
|
||||
return int(rec[0])
|
||||
return rec[0]
|
||||
|
||||
|
|
|
@ -6,6 +6,8 @@ import config
|
|||
|
||||
class MwmSizePredictor:
|
||||
|
||||
factors = ('urban_pop', 'area', 'city_cnt', 'hamlet_cnt',)
|
||||
|
||||
def __init__(self):
|
||||
with open(config.MWM_SIZE_PREDICTION_MODEL_PATH, 'rb') as f:
|
||||
self.model = pickle.load(f)
|
||||
|
@ -20,9 +22,9 @@ class MwmSizePredictor:
|
|||
|
||||
@classmethod
|
||||
def predict(cls, features_array):
|
||||
"""1D or 2D array of feature values for predictions. Features are
|
||||
'urban_pop', 'area', 'city_cnt', 'hamlet_cnt' as defined for the
|
||||
prediction model.
|
||||
"""1D or 2D array of feature values for predictions.
|
||||
Each feature is a list of values for factors
|
||||
defined by 'cls.factors' sequence.
|
||||
"""
|
||||
X = np.array(features_array)
|
||||
one_prediction = (X.ndim == 1)
|
||||
|
|
|
@ -316,8 +316,9 @@ function selectLayer(e) {
|
|||
$('#b_size').text(
|
||||
Math.round(props['count_k'] * BYTES_FOR_NODE / 1024 / 1024) + ' MB'
|
||||
);
|
||||
$('#pa_size').text(Math.round(props['mwm_size_est'] / 1024) + ' MB');
|
||||
//$('#b_nodes').text(borders[selectedId].layer.getLatLngs()[0].length);
|
||||
var mwm_size_est = props['mwm_size_est'];
|
||||
var mwm_size_est_text = mwm_size_est === null ? '-' : Math.round(props['mwm_size_est']/1024) + ' MB';
|
||||
$('#pa_size').text(mwm_size_est_text);
|
||||
$('#b_nodes').text(props['nodes']);
|
||||
$('#b_date').text(props['modified']);
|
||||
$('#b_area').text(L.Util.formatNum(props['area'] / 1000000, 2));
|
||||
|
@ -1114,7 +1115,7 @@ function bDivideDrawPreview(response) {
|
|||
var show_divide_button = (subregions.features.length > 1);
|
||||
if (clusters) {
|
||||
subregions_count_text += ', ' + clusters.features.length + ' кластеров';
|
||||
show_divide_button = (clusters.features.length > 1);
|
||||
show_divide_button = (clusters.features.length > 0);
|
||||
}
|
||||
$('#d_count').text(subregions_count_text).show();
|
||||
if (show_divide_button)
|
||||
|
|
|
@ -3,6 +3,7 @@ from queue import Queue
|
|||
|
||||
from config import (
|
||||
BORDERS_TABLE as borders_table,
|
||||
MWM_SIZE_PREDICTION_MODEL_LIMITATIONS,
|
||||
OSM_TABLE as osm_table,
|
||||
OSM_PLACES_TABLE as osm_places_table,
|
||||
)
|
||||
|
@ -19,19 +20,19 @@ def get_subregions_info(conn, region_id, region_table,
|
|||
:return: dict {subregion_id => subregion data} including area and population info
|
||||
"""
|
||||
subregions = _get_subregions_basic_info(conn, region_id, region_table,
|
||||
next_level, need_cities)
|
||||
_add_population_data(conn, subregions, need_cities)
|
||||
_add_mwm_size_estimation(subregions)
|
||||
next_level)
|
||||
_add_mwm_size_estimation(conn, subregions, need_cities)
|
||||
keys = ('name', 'mwm_size_est')
|
||||
if need_cities:
|
||||
keys = keys + ('cities',)
|
||||
return {subregion_id: {k: subregion_data[k] for k in keys}
|
||||
return {subregion_id: {k: subregion_data[k] for k in keys
|
||||
if k in subregion_data}
|
||||
for subregion_id, subregion_data in subregions.items()
|
||||
}
|
||||
|
||||
|
||||
def _get_subregions_basic_info(conn, region_id, region_table,
|
||||
next_level, need_cities):
|
||||
next_level):
|
||||
cursor = conn.cursor()
|
||||
region_id_column, region_geom_column = (
|
||||
('id', 'geom') if region_table == borders_table else
|
||||
|
@ -51,98 +52,109 @@ def _get_subregions_basic_info(conn, region_id, region_table,
|
|||
'osm_id': rec[0],
|
||||
'name': rec[1],
|
||||
'area': rec[2],
|
||||
'urban_pop': 0,
|
||||
'city_cnt': 0,
|
||||
'hamlet_cnt': 0
|
||||
}
|
||||
if need_cities:
|
||||
subregion_data['cities'] = []
|
||||
subregions[rec[0]] = subregion_data
|
||||
return subregions
|
||||
|
||||
|
||||
def _add_population_data(conn, subregions, need_cities):
|
||||
if not subregions:
|
||||
"""Adds population data only for subregions that are suitable
|
||||
for mwm size estimation.
|
||||
"""
|
||||
subregion_ids = [
|
||||
s_id for s_id, s_data in subregions.items()
|
||||
if s_data['area'] <= MWM_SIZE_PREDICTION_MODEL_LIMITATIONS['area']
|
||||
]
|
||||
if not subregion_ids:
|
||||
return
|
||||
cursor = conn.cursor()
|
||||
subregion_ids = ','.join(str(x) for x in subregions.keys())
|
||||
cursor.execute(f"""
|
||||
SELECT b.osm_id, p.name, coalesce(p.population, 0), p.place
|
||||
FROM {osm_table} b, {osm_places_table} p
|
||||
WHERE b.osm_id IN ({subregion_ids})
|
||||
AND ST_Contains(b.way, p.center)
|
||||
"""
|
||||
)
|
||||
for subregion_id, place_name, place_population, place_type in cursor:
|
||||
subregion_data = subregions[subregion_id]
|
||||
if place_type in ('city', 'town'):
|
||||
subregion_data['city_cnt'] += 1
|
||||
subregion_data['urban_pop'] += place_population
|
||||
if need_cities:
|
||||
subregion_data['cities'].append({
|
||||
'name': place_name,
|
||||
'population': place_population
|
||||
})
|
||||
else:
|
||||
subregion_data['hamlet_cnt'] += 1
|
||||
|
||||
for subregion_id, data in subregions.items():
|
||||
data.update({
|
||||
'urban_pop': 0,
|
||||
'city_cnt': 0,
|
||||
'hamlet_cnt': 0
|
||||
})
|
||||
if need_cities:
|
||||
data['cities'] = []
|
||||
|
||||
subregion_ids_str = ','.join(str(x) for x in subregion_ids)
|
||||
with conn.cursor() as cursor:
|
||||
cursor.execute(f"""
|
||||
SELECT b.osm_id, p.name, coalesce(p.population, 0), p.place
|
||||
FROM {osm_table} b, {osm_places_table} p
|
||||
WHERE b.osm_id IN ({subregion_ids_str})
|
||||
AND ST_Contains(b.way, p.center)
|
||||
"""
|
||||
)
|
||||
for subregion_id, place_name, place_population, place_type in cursor:
|
||||
subregion_data = subregions[subregion_id]
|
||||
if place_type in ('city', 'town'):
|
||||
subregion_data['city_cnt'] += 1
|
||||
subregion_data['urban_pop'] += place_population
|
||||
if need_cities:
|
||||
subregion_data['cities'].append({
|
||||
'name': place_name,
|
||||
'population': place_population
|
||||
})
|
||||
else:
|
||||
subregion_data['hamlet_cnt'] += 1
|
||||
|
||||
|
||||
def _add_mwm_size_estimation(subregions):
|
||||
subregions_sorted = [
|
||||
def _add_mwm_size_estimation(conn, subregions, need_cities):
|
||||
for subregion_data in subregions.values():
|
||||
subregion_data['mwm_size_est'] = None
|
||||
|
||||
_add_population_data(conn, subregions, need_cities)
|
||||
|
||||
subregions_to_predict = [
|
||||
(
|
||||
s_id,
|
||||
[subregions[s_id][f] for f in
|
||||
('urban_pop', 'area', 'city_cnt', 'hamlet_cnt')]
|
||||
[subregions[s_id][f] for f in MwmSizePredictor.factors]
|
||||
)
|
||||
for s_id in sorted(subregions.keys())
|
||||
if all(subregions[s_id].get(f) is not None and
|
||||
subregions[s_id][f] <=
|
||||
MWM_SIZE_PREDICTION_MODEL_LIMITATIONS[f]
|
||||
for f in MwmSizePredictor.factors)
|
||||
]
|
||||
|
||||
feature_array = [x[1] for x in subregions_sorted]
|
||||
if not subregions_to_predict:
|
||||
return
|
||||
|
||||
feature_array = [x[1] for x in subregions_to_predict]
|
||||
predictions = MwmSizePredictor.predict(feature_array)
|
||||
|
||||
for subregion_id, mwm_size_prediction in zip(
|
||||
(x[0] for x in subregions_sorted),
|
||||
(x[0] for x in subregions_to_predict),
|
||||
predictions
|
||||
):
|
||||
subregions[subregion_id]['mwm_size_est'] = mwm_size_prediction
|
||||
|
||||
|
||||
def update_border_mwm_size_estimation(conn, border_id):
|
||||
cursor = conn.cursor()
|
||||
cursor.execute(f"""
|
||||
SELECT name, ST_Area(geography(geom))/1.0E+6 area
|
||||
FROM {borders_table}
|
||||
WHERE id = %s""", (border_id, ))
|
||||
name, area = cursor.fetchone()
|
||||
if math.isnan(area):
|
||||
raise Exception(f"Area is NaN for border '{name}' ({border_id})")
|
||||
border_data = {
|
||||
'area': area,
|
||||
'urban_pop': 0,
|
||||
'city_cnt': 0,
|
||||
'hamlet_cnt': 0
|
||||
}
|
||||
cursor.execute(f"""
|
||||
SELECT coalesce(p.population, 0), p.place
|
||||
FROM {borders_table} b, {osm_places_table} p
|
||||
WHERE b.id = %s
|
||||
AND ST_Contains(b.geom, p.center)
|
||||
""", (border_id, ))
|
||||
for place_population, place_type in cursor:
|
||||
if place_type in ('city', 'town'):
|
||||
border_data['city_cnt'] += 1
|
||||
border_data['urban_pop'] += place_population
|
||||
else:
|
||||
border_data['hamlet_cnt'] += 1
|
||||
|
||||
feature_array = [
|
||||
border_data[f] for f in
|
||||
('urban_pop', 'area', 'city_cnt', 'hamlet_cnt')
|
||||
]
|
||||
mwm_size_est = MwmSizePredictor.predict(feature_array)
|
||||
cursor.execute(f"UPDATE {borders_table} SET mwm_size_est = %s WHERE id = %s",
|
||||
(mwm_size_est, border_id))
|
||||
conn.commit()
|
||||
with conn.cursor() as cursor:
|
||||
cursor.execute(f"""
|
||||
SELECT name, ST_Area(geography(geom))/1.0E+6 area
|
||||
FROM {borders_table}
|
||||
WHERE id = %s""", (border_id,))
|
||||
name, area = cursor.fetchone()
|
||||
if math.isnan(area):
|
||||
e = Exception(f"Area is NaN for border '{name}' ({border_id})")
|
||||
raise e
|
||||
border_data = {
|
||||
'area': area,
|
||||
}
|
||||
regions = {border_id: border_data}
|
||||
_add_mwm_size_estimation(conn, regions, need_cities=False)
|
||||
mwm_size_est = border_data.get('mwm_size_est')
|
||||
# mwm_size_est may be None. Python's None is converted to NULL
|
||||
# duging %s substitution in execute().
|
||||
cursor.execute(f"""
|
||||
UPDATE {borders_table}
|
||||
SET mwm_size_est = %s
|
||||
WHERE id = %s
|
||||
""", (mwm_size_est, border_id,))
|
||||
conn.commit()
|
||||
|
||||
|
||||
def is_administrative_region(conn, region_id):
|
||||
|
@ -245,7 +257,7 @@ def get_similar_regions(conn, region_id, only_leaves=False):
|
|||
if item['admin_level'] == admin_level:
|
||||
similar_region_ids.append(item['id'])
|
||||
elif item['admin_level'] < admin_level:
|
||||
children = find_osm_child_regions(item['id'])
|
||||
children = find_osm_child_regions(conn, item['id'])
|
||||
for ch in children:
|
||||
q.put(ch)
|
||||
if only_leaves:
|
||||
|
|
|
@ -210,7 +210,7 @@
|
|||
<span id="wait_start_over">ожидайте...</span>
|
||||
</div>
|
||||
<div id="search">
|
||||
Поиск <input type="text" id="fsearch">
|
||||
Поиск <input type="text" id="fsearch" placeholder="Use ^/$ for start/end">
|
||||
<button id="b_search" onclick="doSearch()">🔍</button>
|
||||
</div>
|
||||
</div>
|
||||
|
|
Loading…
Add table
Reference in a new issue