More robust work with mwm size predictions, including prediction model limitations
This commit is contained in:
parent
a944aee15c
commit
c1ca4c68b1
8 changed files with 158 additions and 114 deletions
|
@ -38,6 +38,7 @@ CREATE TABLE splitting (
|
||||||
subregion_ids BIGINT[] NOT NULL,
|
subregion_ids BIGINT[] NOT NULL,
|
||||||
mwm_size_est REAL NOT NULL,
|
mwm_size_est REAL NOT NULL,
|
||||||
mwm_size_thr INTEGER NOT NULL, -- mwm size threshold in Kb, 4-bytes INTEGER is enough
|
mwm_size_thr INTEGER NOT NULL, -- mwm size threshold in Kb, 4-bytes INTEGER is enough
|
||||||
|
next_level INTEGER NOT NULL,
|
||||||
geom geometry NOT NULL
|
geom geometry NOT NULL
|
||||||
);
|
);
|
||||||
CREATE INDEX splitting_idx ON splitting (osm_border_id, mwm_size_thr);
|
CREATE INDEX splitting_idx ON splitting (osm_border_id, mwm_size_thr, next_level);
|
||||||
|
|
|
@ -12,9 +12,12 @@ from subregions import get_subregions_info
|
||||||
class DisjointClusterUnion:
|
class DisjointClusterUnion:
|
||||||
"""Disjoint set union implementation for administrative subregions."""
|
"""Disjoint set union implementation for administrative subregions."""
|
||||||
|
|
||||||
def __init__(self, region_id, subregions, mwm_size_thr=None):
|
def __init__(self, region_id, subregions, next_level, mwm_size_thr=None):
|
||||||
|
assert all(s_data['mwm_size_est'] is not None
|
||||||
|
for s_data in subregions.values())
|
||||||
self.region_id = region_id
|
self.region_id = region_id
|
||||||
self.subregions = subregions
|
self.subregions = subregions
|
||||||
|
self.next_level = next_level
|
||||||
self.mwm_size_thr = mwm_size_thr or MWM_SIZE_THRESHOLD
|
self.mwm_size_thr = mwm_size_thr or MWM_SIZE_THRESHOLD
|
||||||
self.representatives = {sub_id: sub_id for sub_id in subregions}
|
self.representatives = {sub_id: sub_id for sub_id in subregions}
|
||||||
# A cluster is one or more subregions with common borders
|
# A cluster is one or more subregions with common borders
|
||||||
|
@ -84,7 +87,8 @@ def get_best_cluster_to_join_with(small_cluster_id,
|
||||||
for subregion_id in subregion_ids:
|
for subregion_id in subregion_ids:
|
||||||
for other_subregion_id, length in common_border_matrix[subregion_id].items():
|
for other_subregion_id, length in common_border_matrix[subregion_id].items():
|
||||||
other_cluster_id = dcu.find_cluster(other_subregion_id)
|
other_cluster_id = dcu.find_cluster(other_subregion_id)
|
||||||
if other_cluster_id != small_cluster_id:
|
if (other_cluster_id != small_cluster_id and
|
||||||
|
not dcu.clusters[other_cluster_id]['finished']):
|
||||||
common_borders[other_cluster_id] += length
|
common_borders[other_cluster_id] += length
|
||||||
if not common_borders:
|
if not common_borders:
|
||||||
return None
|
return None
|
||||||
|
@ -144,8 +148,10 @@ def find_golden_splitting(conn, border_id, next_level, mwm_size_thr):
|
||||||
next_level, need_cities=True)
|
next_level, need_cities=True)
|
||||||
if not subregions:
|
if not subregions:
|
||||||
return
|
return
|
||||||
|
if any(s_data['mwm_size_est'] is None for s_data in subregions.values()):
|
||||||
|
return
|
||||||
|
|
||||||
dcu = DisjointClusterUnion(border_id, subregions, mwm_size_thr)
|
dcu = DisjointClusterUnion(border_id, subregions, next_level, mwm_size_thr)
|
||||||
all_subregion_ids = dcu.get_all_subregion_ids()
|
all_subregion_ids = dcu.get_all_subregion_ids()
|
||||||
common_border_matrix = calculate_common_border_matrix(conn, all_subregion_ids)
|
common_border_matrix = calculate_common_border_matrix(conn, all_subregion_ids)
|
||||||
|
|
||||||
|
@ -188,6 +194,7 @@ def save_splitting_to_db(conn, dcu: DisjointClusterUnion):
|
||||||
DELETE FROM {autosplit_table}
|
DELETE FROM {autosplit_table}
|
||||||
WHERE osm_border_id = {dcu.region_id}
|
WHERE osm_border_id = {dcu.region_id}
|
||||||
AND mwm_size_thr = {dcu.mwm_size_thr}
|
AND mwm_size_thr = {dcu.mwm_size_thr}
|
||||||
|
AND next_level = {dcu.next_level}
|
||||||
""")
|
""")
|
||||||
for cluster_id, data in dcu.clusters.items():
|
for cluster_id, data in dcu.clusters.items():
|
||||||
subregion_ids = data['subregion_ids']
|
subregion_ids = data['subregion_ids']
|
||||||
|
@ -196,12 +203,13 @@ def save_splitting_to_db(conn, dcu: DisjointClusterUnion):
|
||||||
)
|
)
|
||||||
cluster_geometry_sql = get_union_sql(subregion_ids)
|
cluster_geometry_sql = get_union_sql(subregion_ids)
|
||||||
cursor.execute(f"""
|
cursor.execute(f"""
|
||||||
INSERT INTO {autosplit_table} (osm_border_id, subregion_ids,
|
INSERT INTO {autosplit_table} (osm_border_id, subregion_ids, geom,
|
||||||
geom, mwm_size_thr, mwm_size_est)
|
next_level, mwm_size_thr, mwm_size_est)
|
||||||
VALUES (
|
VALUES (
|
||||||
{dcu.region_id},
|
{dcu.region_id},
|
||||||
'{subregion_ids_array_str}',
|
'{subregion_ids_array_str}',
|
||||||
({cluster_geometry_sql}),
|
({cluster_geometry_sql}),
|
||||||
|
{dcu.next_level},
|
||||||
{dcu.mwm_size_thr},
|
{dcu.mwm_size_thr},
|
||||||
{data['mwm_size_est']}
|
{data['mwm_size_est']}
|
||||||
)
|
)
|
||||||
|
|
|
@ -413,11 +413,11 @@ def find_osm_borders():
|
||||||
def copy_from_osm():
|
def copy_from_osm():
|
||||||
osm_id = int(request.args.get('id'))
|
osm_id = int(request.args.get('id'))
|
||||||
name = request.args.get('name')
|
name = request.args.get('name')
|
||||||
success = copy_region_from_osm(g.conn, osm_id, name)
|
errors, warnings = copy_region_from_osm(g.conn, osm_id, name)
|
||||||
if not success:
|
if errors:
|
||||||
return jsonify(status=f"Region with id={osm_id} already exists")
|
return jsonify(status='\n'.join(errors))
|
||||||
g.conn.commit()
|
g.conn.commit()
|
||||||
return jsonify(status='ok')
|
return jsonify(status='ok', warnings=warnings)
|
||||||
|
|
||||||
|
|
||||||
@app.route('/rename')
|
@app.route('/rename')
|
||||||
|
|
|
@ -143,8 +143,9 @@ def get_clusters_for_preview_one(region_id, next_level, mwm_size_thr):
|
||||||
where_clause = f"""
|
where_clause = f"""
|
||||||
osm_border_id = %s
|
osm_border_id = %s
|
||||||
AND mwm_size_thr = %s
|
AND mwm_size_thr = %s
|
||||||
|
AND next_level = %s
|
||||||
"""
|
"""
|
||||||
splitting_sql_params = (region_id, mwm_size_thr)
|
splitting_sql_params = (region_id, mwm_size_thr, next_level)
|
||||||
with g.conn.cursor() as cursor:
|
with g.conn.cursor() as cursor:
|
||||||
cursor.execute(f"""
|
cursor.execute(f"""
|
||||||
SELECT 1 FROM {autosplit_table}
|
SELECT 1 FROM {autosplit_table}
|
||||||
|
@ -231,9 +232,9 @@ def divide_region_into_subregions(conn, region_id, next_level):
|
||||||
cursor.execute(f"""
|
cursor.execute(f"""
|
||||||
INSERT INTO {borders_table}
|
INSERT INTO {borders_table}
|
||||||
(id, geom, name, parent_id, modified, count_k, mwm_size_est)
|
(id, geom, name, parent_id, modified, count_k, mwm_size_est)
|
||||||
SELECT osm_id, way, name, {parent_id}, now(), -1, {mwm_size_est}
|
SELECT osm_id, way, name, {parent_id}, now(), -1, %s
|
||||||
FROM {osm_table}
|
FROM {osm_table}
|
||||||
WHERE osm_id = %s""", (subregion_id,)
|
WHERE osm_id = %s""", (mwm_size_est, subregion_id,)
|
||||||
)
|
)
|
||||||
if not is_admin_region:
|
if not is_admin_region:
|
||||||
cursor.execute(f"DELETE FROM {borders_table} WHERE id = %s", (region_id,))
|
cursor.execute(f"DELETE FROM {borders_table} WHERE id = %s", (region_id,))
|
||||||
|
@ -251,8 +252,9 @@ def divide_into_clusters(region_ids, next_level, mwm_size_thr):
|
||||||
where_clause = f"""
|
where_clause = f"""
|
||||||
osm_border_id = %s
|
osm_border_id = %s
|
||||||
AND mwm_size_thr = %s
|
AND mwm_size_thr = %s
|
||||||
|
AND next_level = %s
|
||||||
"""
|
"""
|
||||||
splitting_sql_params = (region_id, mwm_size_thr)
|
splitting_sql_params = (region_id, mwm_size_thr, next_level)
|
||||||
cursor.execute(f"""
|
cursor.execute(f"""
|
||||||
SELECT 1 FROM {autosplit_table}
|
SELECT 1 FROM {autosplit_table}
|
||||||
WHERE {where_clause}
|
WHERE {where_clause}
|
||||||
|
@ -269,24 +271,32 @@ def divide_into_clusters(region_ids, next_level, mwm_size_thr):
|
||||||
""", splitting_sql_params
|
""", splitting_sql_params
|
||||||
)
|
)
|
||||||
if cursor.rowcount == 1:
|
if cursor.rowcount == 1:
|
||||||
continue
|
|
||||||
for rec in cursor:
|
|
||||||
subregion_ids = rec[0]
|
|
||||||
cluster_id = subregion_ids[0]
|
|
||||||
if len(subregion_ids) == 1:
|
|
||||||
subregion_id = cluster_id
|
|
||||||
name = get_osm_border_name_by_osm_id(g.conn, subregion_id)
|
|
||||||
else:
|
|
||||||
counter += 1
|
|
||||||
free_id -= 1
|
|
||||||
subregion_id = free_id
|
|
||||||
name = f"{base_name}_{counter}"
|
|
||||||
insert_cursor.execute(f"""
|
insert_cursor.execute(f"""
|
||||||
INSERT INTO {borders_table} (id, name, parent_id, geom, modified, count_k, mwm_size_est)
|
UPDATE {borders_table}
|
||||||
SELECT {subregion_id}, %s, osm_border_id, geom, now(), -1, mwm_size_est
|
SET modified = now(),
|
||||||
FROM {autosplit_table} WHERE subregion_ids[1] = %s AND {where_clause}
|
mwm_size_est = (SELECT mwm_size_est
|
||||||
""", (name, cluster_id,) + splitting_sql_params
|
FROM {autosplit_table}
|
||||||
)
|
WHERE {where_clause})
|
||||||
|
WHERE id = {region_id}
|
||||||
|
""", splitting_sql_params)
|
||||||
|
else:
|
||||||
|
for rec in cursor:
|
||||||
|
subregion_ids = rec[0]
|
||||||
|
cluster_id = subregion_ids[0]
|
||||||
|
if len(subregion_ids) == 1:
|
||||||
|
subregion_id = cluster_id
|
||||||
|
name = get_osm_border_name_by_osm_id(g.conn, subregion_id)
|
||||||
|
else:
|
||||||
|
counter += 1
|
||||||
|
free_id -= 1
|
||||||
|
subregion_id = free_id
|
||||||
|
name = f"{base_name}_{counter}"
|
||||||
|
insert_cursor.execute(f"""
|
||||||
|
INSERT INTO {borders_table} (id, name, parent_id, geom, modified, count_k, mwm_size_est)
|
||||||
|
SELECT {subregion_id}, %s, osm_border_id, geom, now(), -1, mwm_size_est
|
||||||
|
FROM {autosplit_table} WHERE subregion_ids[1] = %s AND {where_clause}
|
||||||
|
""", (name, cluster_id,) + splitting_sql_params
|
||||||
|
)
|
||||||
g.conn.commit()
|
g.conn.commit()
|
||||||
return jsonify(status='ok')
|
return jsonify(status='ok')
|
||||||
|
|
||||||
|
@ -393,13 +403,16 @@ def find_potential_parents(region_id):
|
||||||
|
|
||||||
|
|
||||||
def copy_region_from_osm(conn, region_id, name=None, parent_id='not_passed'):
|
def copy_region_from_osm(conn, region_id, name=None, parent_id='not_passed'):
|
||||||
|
errors, warnings = [], []
|
||||||
borders_table = main_borders_table
|
borders_table = main_borders_table
|
||||||
with conn.cursor() as cursor:
|
with conn.cursor() as cursor:
|
||||||
# Check if this id already in use
|
# Check if this id already in use
|
||||||
cursor.execute(f"SELECT id FROM {borders_table} WHERE id = %s",
|
cursor.execute(f"SELECT name FROM {borders_table} WHERE id = %s",
|
||||||
(region_id,))
|
(region_id,))
|
||||||
if cursor.rowcount > 0:
|
if cursor.rowcount > 0:
|
||||||
return False
|
name = cursor.fetchone()[0]
|
||||||
|
errors.append(f"Region with id={region_id} already exists under name '{name}'")
|
||||||
|
return errors, warnings
|
||||||
|
|
||||||
name_expr = f"'{name}'" if name else "name"
|
name_expr = f"'{name}'" if name else "name"
|
||||||
parent_id_expr = f"{parent_id}" if isinstance(parent_id, int) else "NULL"
|
parent_id_expr = f"{parent_id}" if isinstance(parent_id, int) else "NULL"
|
||||||
|
@ -413,8 +426,11 @@ def copy_region_from_osm(conn, region_id, name=None, parent_id='not_passed'):
|
||||||
)
|
)
|
||||||
if parent_id == 'not_passed':
|
if parent_id == 'not_passed':
|
||||||
assign_region_to_lowest_parent(conn, region_id)
|
assign_region_to_lowest_parent(conn, region_id)
|
||||||
update_border_mwm_size_estimation(conn, region_id)
|
try:
|
||||||
return True
|
update_border_mwm_size_estimation(conn, region_id)
|
||||||
|
except Exception as e:
|
||||||
|
warnings.append(str(e))
|
||||||
|
return errors, warnings
|
||||||
|
|
||||||
|
|
||||||
def get_osm_border_name_by_osm_id(conn, osm_id):
|
def get_osm_border_name_by_osm_id(conn, osm_id):
|
||||||
|
|
|
@ -33,3 +33,9 @@ MWM_SIZE_THRESHOLD = 70*1024
|
||||||
# Estimated mwm size is predicted by the 'model.pkl' with 'scaler.pkl' for X
|
# Estimated mwm size is predicted by the 'model.pkl' with 'scaler.pkl' for X
|
||||||
MWM_SIZE_PREDICTION_MODEL_PATH = '/app/data/model.pkl'
|
MWM_SIZE_PREDICTION_MODEL_PATH = '/app/data/model.pkl'
|
||||||
MWM_SIZE_PREDICTION_MODEL_SCALER_PATH = '/app/data/scaler.pkl'
|
MWM_SIZE_PREDICTION_MODEL_SCALER_PATH = '/app/data/scaler.pkl'
|
||||||
|
MWM_SIZE_PREDICTION_MODEL_LIMITATIONS = {
|
||||||
|
'area': 5500 * 1.5,
|
||||||
|
'urban_pop': 3500000 * 1.5,
|
||||||
|
'city_cnt': 32 * 1.5,
|
||||||
|
'hamlet_cnt': 2120 * 1.5
|
||||||
|
}
|
||||||
|
|
|
@ -6,6 +6,8 @@ import config
|
||||||
|
|
||||||
class MwmSizePredictor:
|
class MwmSizePredictor:
|
||||||
|
|
||||||
|
factors = ('urban_pop', 'area', 'city_cnt', 'hamlet_cnt')
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
with open(config.MWM_SIZE_PREDICTION_MODEL_PATH, 'rb') as f:
|
with open(config.MWM_SIZE_PREDICTION_MODEL_PATH, 'rb') as f:
|
||||||
self.model = pickle.load(f)
|
self.model = pickle.load(f)
|
||||||
|
@ -20,9 +22,9 @@ class MwmSizePredictor:
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def predict(cls, features_array):
|
def predict(cls, features_array):
|
||||||
"""1D or 2D array of feature values for predictions. Features are
|
"""1D or 2D array of feature values for predictions.
|
||||||
'urban_pop', 'area', 'city_cnt', 'hamlet_cnt' as defined for the
|
Each feature is a list of values for factors
|
||||||
prediction model.
|
defined by 'cls.factors' sequence.
|
||||||
"""
|
"""
|
||||||
X = np.array(features_array)
|
X = np.array(features_array)
|
||||||
one_prediction = (X.ndim == 1)
|
one_prediction = (X.ndim == 1)
|
||||||
|
|
|
@ -316,8 +316,9 @@ function selectLayer(e) {
|
||||||
$('#b_size').text(
|
$('#b_size').text(
|
||||||
Math.round(props['count_k'] * BYTES_FOR_NODE / 1024 / 1024) + ' MB'
|
Math.round(props['count_k'] * BYTES_FOR_NODE / 1024 / 1024) + ' MB'
|
||||||
);
|
);
|
||||||
$('#pa_size').text(Math.round(props['mwm_size_est'] / 1024) + ' MB');
|
var mwm_size_est = props['mwm_size_est'];
|
||||||
//$('#b_nodes').text(borders[selectedId].layer.getLatLngs()[0].length);
|
var mwm_size_est_text = mwm_size_est === null ? '-' : Math.round(props['mwm_size_est']/1024) + ' MB';
|
||||||
|
$('#pa_size').text(mwm_size_est_text);
|
||||||
$('#b_nodes').text(props['nodes']);
|
$('#b_nodes').text(props['nodes']);
|
||||||
$('#b_date').text(props['modified']);
|
$('#b_date').text(props['modified']);
|
||||||
$('#b_area').text(L.Util.formatNum(props['area'] / 1000000, 2));
|
$('#b_area').text(L.Util.formatNum(props['area'] / 1000000, 2));
|
||||||
|
@ -1114,7 +1115,7 @@ function bDivideDrawPreview(response) {
|
||||||
var show_divide_button = (subregions.features.length > 1);
|
var show_divide_button = (subregions.features.length > 1);
|
||||||
if (clusters) {
|
if (clusters) {
|
||||||
subregions_count_text += ', ' + clusters.features.length + ' кластеров';
|
subregions_count_text += ', ' + clusters.features.length + ' кластеров';
|
||||||
show_divide_button = (clusters.features.length > 1);
|
show_divide_button = (clusters.features.length > 0);
|
||||||
}
|
}
|
||||||
$('#d_count').text(subregions_count_text).show();
|
$('#d_count').text(subregions_count_text).show();
|
||||||
if (show_divide_button)
|
if (show_divide_button)
|
||||||
|
|
|
@ -5,6 +5,8 @@ from config import (
|
||||||
BORDERS_TABLE as borders_table,
|
BORDERS_TABLE as borders_table,
|
||||||
OSM_TABLE as osm_table,
|
OSM_TABLE as osm_table,
|
||||||
OSM_PLACES_TABLE as osm_places_table,
|
OSM_PLACES_TABLE as osm_places_table,
|
||||||
|
|
||||||
|
MWM_SIZE_PREDICTION_MODEL_LIMITATIONS,
|
||||||
)
|
)
|
||||||
from mwm_size_predictor import MwmSizePredictor
|
from mwm_size_predictor import MwmSizePredictor
|
||||||
|
|
||||||
|
@ -20,12 +22,11 @@ def get_subregions_info(conn, region_id, region_table,
|
||||||
"""
|
"""
|
||||||
subregions = _get_subregions_basic_info(conn, region_id, region_table,
|
subregions = _get_subregions_basic_info(conn, region_id, region_table,
|
||||||
next_level, need_cities)
|
next_level, need_cities)
|
||||||
_add_population_data(conn, subregions, need_cities)
|
_add_mwm_size_estimation(conn, subregions, need_cities)
|
||||||
_add_mwm_size_estimation(subregions)
|
|
||||||
keys = ('name', 'mwm_size_est')
|
keys = ('name', 'mwm_size_est')
|
||||||
if need_cities:
|
if need_cities:
|
||||||
keys = keys + ('cities',)
|
keys = keys + ('cities',)
|
||||||
return {subregion_id: {k: subregion_data[k] for k in keys}
|
return {subregion_id: {k: subregion_data.get(k) for k in keys}
|
||||||
for subregion_id, subregion_data in subregions.items()
|
for subregion_id, subregion_data in subregions.items()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -51,100 +52,109 @@ def _get_subregions_basic_info(conn, region_id, region_table,
|
||||||
'osm_id': rec[0],
|
'osm_id': rec[0],
|
||||||
'name': rec[1],
|
'name': rec[1],
|
||||||
'area': rec[2],
|
'area': rec[2],
|
||||||
'urban_pop': 0,
|
|
||||||
'city_cnt': 0,
|
|
||||||
'hamlet_cnt': 0
|
|
||||||
}
|
}
|
||||||
if need_cities:
|
|
||||||
subregion_data['cities'] = []
|
|
||||||
subregions[rec[0]] = subregion_data
|
subregions[rec[0]] = subregion_data
|
||||||
return subregions
|
return subregions
|
||||||
|
|
||||||
|
|
||||||
def _add_population_data(conn, subregions, need_cities):
|
def _add_population_data(conn, subregions, need_cities):
|
||||||
if not subregions:
|
"""Adds population data only for subregions that are suitable
|
||||||
|
for mwm size estimation.
|
||||||
|
"""
|
||||||
|
subregion_ids = [
|
||||||
|
s_id for s_id, s_data in subregions.items()
|
||||||
|
if s_data['area'] <= MWM_SIZE_PREDICTION_MODEL_LIMITATIONS['area']
|
||||||
|
]
|
||||||
|
if not subregion_ids:
|
||||||
return
|
return
|
||||||
cursor = conn.cursor()
|
|
||||||
subregion_ids = ','.join(str(x) for x in subregions.keys())
|
for subregion_id, data in subregions.items():
|
||||||
cursor.execute(f"""
|
data.update({
|
||||||
SELECT b.osm_id, p.name, coalesce(p.population, 0), p.place
|
'urban_pop': 0,
|
||||||
FROM {osm_table} b, {osm_places_table} p
|
'city_cnt': 0,
|
||||||
WHERE b.osm_id IN ({subregion_ids})
|
'hamlet_cnt': 0
|
||||||
AND ST_Contains(b.way, p.center)
|
})
|
||||||
"""
|
if need_cities:
|
||||||
)
|
data['cities'] = []
|
||||||
for subregion_id, place_name, place_population, place_type in cursor:
|
|
||||||
subregion_data = subregions[subregion_id]
|
subregion_ids_str = ','.join(str(x) for x in subregion_ids)
|
||||||
if place_type in ('city', 'town'):
|
with conn.cursor() as cursor:
|
||||||
subregion_data['city_cnt'] += 1
|
cursor.execute(f"""
|
||||||
subregion_data['urban_pop'] += place_population
|
SELECT b.osm_id, p.name, coalesce(p.population, 0), p.place
|
||||||
if need_cities:
|
FROM {osm_table} b, {osm_places_table} p
|
||||||
subregion_data['cities'].append({
|
WHERE b.osm_id IN ({subregion_ids_str})
|
||||||
'name': place_name,
|
AND ST_Contains(b.way, p.center)
|
||||||
'population': place_population
|
"""
|
||||||
})
|
)
|
||||||
else:
|
for subregion_id, place_name, place_population, place_type in cursor:
|
||||||
subregion_data['hamlet_cnt'] += 1
|
subregion_data = subregions[subregion_id]
|
||||||
|
if place_type in ('city', 'town'):
|
||||||
|
subregion_data['city_cnt'] += 1
|
||||||
|
subregion_data['urban_pop'] += place_population
|
||||||
|
if need_cities:
|
||||||
|
subregion_data['cities'].append({
|
||||||
|
'name': place_name,
|
||||||
|
'population': place_population
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
subregion_data['hamlet_cnt'] += 1
|
||||||
|
|
||||||
|
|
||||||
def _add_mwm_size_estimation(subregions):
|
def _add_mwm_size_estimation(conn, subregions, need_cities):
|
||||||
if not subregions:
|
for subregion_data in subregions.values():
|
||||||
return
|
subregion_data['mwm_size_est'] = None
|
||||||
subregions_sorted = [
|
|
||||||
|
_add_population_data(conn, subregions, need_cities)
|
||||||
|
|
||||||
|
subregions_to_predict = [
|
||||||
(
|
(
|
||||||
s_id,
|
s_id,
|
||||||
[subregions[s_id][f] for f in
|
[subregions[s_id][f] for f in MwmSizePredictor.factors]
|
||||||
('urban_pop', 'area', 'city_cnt', 'hamlet_cnt')]
|
|
||||||
)
|
)
|
||||||
for s_id in sorted(subregions.keys())
|
for s_id in sorted(subregions.keys())
|
||||||
|
if all(subregions[s_id].get(f) is not None and
|
||||||
|
subregions[s_id][f] <=
|
||||||
|
MWM_SIZE_PREDICTION_MODEL_LIMITATIONS[f]
|
||||||
|
for f in MwmSizePredictor.factors)
|
||||||
]
|
]
|
||||||
|
|
||||||
feature_array = [x[1] for x in subregions_sorted]
|
if not subregions_to_predict:
|
||||||
|
return
|
||||||
|
|
||||||
|
feature_array = [x[1] for x in subregions_to_predict]
|
||||||
predictions = MwmSizePredictor.predict(feature_array)
|
predictions = MwmSizePredictor.predict(feature_array)
|
||||||
|
|
||||||
for subregion_id, mwm_size_prediction in zip(
|
for subregion_id, mwm_size_prediction in zip(
|
||||||
(x[0] for x in subregions_sorted),
|
(x[0] for x in subregions_to_predict),
|
||||||
predictions
|
predictions
|
||||||
):
|
):
|
||||||
subregions[subregion_id]['mwm_size_est'] = mwm_size_prediction
|
subregions[subregion_id]['mwm_size_est'] = mwm_size_prediction
|
||||||
|
|
||||||
|
|
||||||
def update_border_mwm_size_estimation(conn, border_id):
|
def update_border_mwm_size_estimation(conn, border_id):
|
||||||
cursor = conn.cursor()
|
with conn.cursor() as cursor:
|
||||||
cursor.execute(f"""
|
cursor.execute(f"""
|
||||||
SELECT name, ST_Area(geography(geom))/1.0E+6 area
|
SELECT name, ST_Area(geography(geom))/1.0E+6 area
|
||||||
FROM {borders_table}
|
FROM {borders_table}
|
||||||
WHERE id = %s""", (border_id, ))
|
WHERE id = %s""", (border_id,))
|
||||||
name, area = cursor.fetchone()
|
name, area = cursor.fetchone()
|
||||||
if math.isnan(area):
|
if math.isnan(area):
|
||||||
raise Exception(f"Area is NaN for border '{name}' ({border_id})")
|
e = Exception(f"Area is NaN for border '{name}' ({border_id})")
|
||||||
border_data = {
|
raise e
|
||||||
'area': area,
|
border_data = {
|
||||||
'urban_pop': 0,
|
'area': area,
|
||||||
'city_cnt': 0,
|
}
|
||||||
'hamlet_cnt': 0
|
regions = {border_id: border_data}
|
||||||
}
|
_add_mwm_size_estimation(conn, regions, need_cities=False)
|
||||||
cursor.execute(f"""
|
mwm_size_est = border_data.get('mwm_size_est')
|
||||||
SELECT coalesce(p.population, 0), p.place
|
# mwm_size_est may be None. Python's None is converted to NULL
|
||||||
FROM {borders_table} b, {osm_places_table} p
|
# duging %s substitution in execute().
|
||||||
WHERE b.id = %s
|
cursor.execute(f"""
|
||||||
AND ST_Contains(b.geom, p.center)
|
UPDATE {borders_table}
|
||||||
""", (border_id, ))
|
SET mwm_size_est = %s
|
||||||
for place_population, place_type in cursor:
|
WHERE id = %s
|
||||||
if place_type in ('city', 'town'):
|
""", (mwm_size_est, border_id,))
|
||||||
border_data['city_cnt'] += 1
|
conn.commit()
|
||||||
border_data['urban_pop'] += place_population
|
|
||||||
else:
|
|
||||||
border_data['hamlet_cnt'] += 1
|
|
||||||
|
|
||||||
feature_array = [
|
|
||||||
border_data[f] for f in
|
|
||||||
('urban_pop', 'area', 'city_cnt', 'hamlet_cnt')
|
|
||||||
]
|
|
||||||
mwm_size_est = MwmSizePredictor.predict(feature_array)
|
|
||||||
cursor.execute(f"UPDATE {borders_table} SET mwm_size_est = %s WHERE id = %s",
|
|
||||||
(mwm_size_est, border_id))
|
|
||||||
conn.commit()
|
|
||||||
|
|
||||||
|
|
||||||
def is_administrative_region(conn, region_id):
|
def is_administrative_region(conn, region_id):
|
||||||
|
|
Loading…
Add table
Reference in a new issue