MWM size prediction model
This commit is contained in:
parent
b13e31bff1
commit
580a1ab9ac
17 changed files with 491 additions and 286 deletions
2
.gitignore
vendored
2
.gitignore
vendored
|
@ -1,2 +1,4 @@
|
|||
__pycache__
|
||||
*.pyc
|
||||
.idea
|
||||
nohup.out
|
||||
|
|
|
@ -14,7 +14,7 @@ CREATE TABLE borders (
|
|||
count_k INTEGER,
|
||||
modified TIMESTAMP NOT NULL,
|
||||
cmnt VARCHAR(500),
|
||||
mwm_size_est double precision
|
||||
mwm_size_est REAL
|
||||
);
|
||||
CREATE INDEX borders_geom_gits_idx ON borders USING gist (geom);
|
||||
CREATE INDEX borders_parent_id_idx ON borders (parent_id);
|
||||
|
@ -29,15 +29,15 @@ CREATE TABLE borders_backup (
|
|||
count_k INTEGER,
|
||||
modified TIMESTAMP NOT NULL,
|
||||
cmnt VARCHAR(500),
|
||||
mwm_size_est double precision,
|
||||
mwm_size_est REAL,
|
||||
PRIMARY KEY (backup, id)
|
||||
);
|
||||
|
||||
CREATE TABLE splitting (
|
||||
osm_border_id BIGINT NOT NULL REFERENCES osm_borders(osm_id), -- reference to parent osm region
|
||||
subregion_ids BIGINT[] NOT NULL,
|
||||
mwm_size_est double precision NOT NULL,
|
||||
mwm_size_thr double precision NOT NULL,
|
||||
mwm_size_est REAL NOT NULL,
|
||||
mwm_size_thr INTEGER NOT NULL, -- mwm size threshold in Kb, 4-bytes INTEGER is enough
|
||||
geom geometry NOT NULL
|
||||
);
|
||||
CREATE INDEX splitting_idx ON splitting (osm_border_id, mwm_size_thr);
|
||||
|
|
|
@ -21,13 +21,14 @@ services:
|
|||
context: ./db
|
||||
dockerfile: Dockerfile.db
|
||||
args:
|
||||
PLANET_URL: http://generator.testdata.mapsme.cloud.devmail.ru/planet/planet-latest.o5m
|
||||
PLANET_URL_external: https://planet.openstreetmap.org/pbf/planet-latest.osm.pbf
|
||||
PLANET_URL_small: http://download.geofabrik.de/africa/eritrea-latest.osm.pbf
|
||||
PLANET_URL_full: https://planet.openstreetmap.org/pbf/planet-latest.osm.pbf
|
||||
PLANET_URL: http://download.geofabrik.de/africa/eritrea-latest.osm.pbf
|
||||
container_name: db
|
||||
restart: always
|
||||
environment:
|
||||
POSTGRES_HOST_AUTH_METHOD: password
|
||||
POSTGRES_USER: postgres
|
||||
POSTGRES_PASSWORD: postgres
|
||||
|
||||
ports:
|
||||
- "55432:5432"
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
FROM tiangolo/uwsgi-nginx-flask:latest
|
||||
|
||||
RUN pip install flask_cors flask_compress psycopg2 unidecode
|
||||
RUN pip install flask_cors flask_compress psycopg2 unidecode numpy sklearn
|
||||
|
||||
COPY app /app
|
||||
COPY ./uwsgi.ini /app
|
||||
|
|
|
@ -6,20 +6,21 @@ from collections import defaultdict
|
|||
|
||||
from config import (
|
||||
AUTOSPLIT_TABLE as autosplit_table,
|
||||
TABLE as table,
|
||||
OSM_TABLE as osm_table
|
||||
OSM_TABLE as osm_table,
|
||||
MWM_SIZE_THRESHOLD,
|
||||
)
|
||||
from subregions import get_subregions_info
|
||||
|
||||
|
||||
class DisjointClusterUnion:
|
||||
"""Disjoint set union implementation for administrative subregions."""
|
||||
|
||||
def __init__(self, region_id, subregions, thresholds):
|
||||
def __init__(self, region_id, subregions, mwm_size_thr=None):
|
||||
self.region_id = region_id
|
||||
self.subregions = subregions
|
||||
self.city_population_thr, self.cluster_population_thr = thresholds
|
||||
self.mwm_size_thr = mwm_size_thr or MWM_SIZE_THRESHOLD
|
||||
self.representatives = {sub_id: sub_id for sub_id in subregions}
|
||||
# a cluster is one or more subregions with common borders
|
||||
# A cluster is one or more subregions with common borders
|
||||
self.clusters = {} # representative => cluster object
|
||||
|
||||
# At the beginning, each subregion forms a cluster.
|
||||
|
@ -28,27 +29,20 @@ class DisjointClusterUnion:
|
|||
self.clusters[subregion_id] = {
|
||||
'representative': subregion_id,
|
||||
'subregion_ids': [subregion_id],
|
||||
'population': data['population'],
|
||||
'big_cities_cnt': sum(1 for c in data['cities'] if self.is_city_big(c)),
|
||||
'mwm_size_est': data['mwm_size_est'],
|
||||
'finished': False, # True if the cluster cannot be merged with another
|
||||
}
|
||||
|
||||
|
||||
def is_city_big(self, city):
|
||||
return city['population'] >= self.city_population_thr
|
||||
|
||||
def get_smallest_cluster(self):
|
||||
"""Find minimal cluster without big cities."""
|
||||
smallest_cluster_id = min(
|
||||
filter(
|
||||
lambda cluster_id: (
|
||||
not self.clusters[cluster_id]['finished'] and
|
||||
self.clusters[cluster_id]['big_cities_cnt'] == 0)
|
||||
,
|
||||
lambda cluster_id:
|
||||
not self.clusters[cluster_id]['finished'],
|
||||
self.clusters.keys()
|
||||
),
|
||||
default=None,
|
||||
key=lambda cluster_id: self.clusters[cluster_id]['population']
|
||||
key=lambda cluster_id: self.clusters[cluster_id]['mwm_size_est']
|
||||
)
|
||||
return smallest_cluster_id
|
||||
|
||||
|
@ -63,9 +57,9 @@ class DisjointClusterUnion:
|
|||
self.representatives[subregion_id] = representative
|
||||
return representative
|
||||
|
||||
def get_cluster_population(self, subregion_id):
|
||||
def get_cluster_mwm_size_est(self, subregion_id):
|
||||
cluster_id = self.find_cluster(subregion_id)
|
||||
return self.clusters[cluster_id]['population']
|
||||
return self.clusters[cluster_id]['mwm_size_est']
|
||||
|
||||
def get_cluster_count(self):
|
||||
return len(self.clusters)
|
||||
|
@ -77,8 +71,7 @@ class DisjointClusterUnion:
|
|||
r_cluster = self.clusters[retained_cluster_id]
|
||||
d_cluster = self.clusters[dropped_cluster_id]
|
||||
r_cluster['subregion_ids'].extend(d_cluster['subregion_ids'])
|
||||
r_cluster['population'] += d_cluster['population']
|
||||
r_cluster['big_cities_cnt'] += d_cluster['big_cities_cnt']
|
||||
r_cluster['mwm_size_est'] += d_cluster['mwm_size_est']
|
||||
del self.clusters[dropped_cluster_id]
|
||||
self.representatives[dropped_cluster_id] = retained_cluster_id
|
||||
return retained_cluster_id
|
||||
|
@ -95,52 +88,13 @@ class DisjointClusterUnion:
|
|||
return subregion_ids
|
||||
|
||||
|
||||
def enrich_with_population_and_cities(conn, subregions):
|
||||
cursor = conn.cursor()
|
||||
ids = ','.join(str(x) for x in subregions.keys())
|
||||
cursor.execute(f"""
|
||||
SELECT b.osm_id, c.name, c.population
|
||||
FROM {osm_table} b, osm_cities c
|
||||
WHERE b.osm_id IN ({ids}) AND ST_CONTAINS(b.way, c.center)
|
||||
"""
|
||||
)
|
||||
for rec in cursor:
|
||||
sub_id = int(rec[0])
|
||||
subregions[sub_id]['cities'].append({
|
||||
'name': rec[1],
|
||||
'population': int(rec[2])
|
||||
})
|
||||
subregions[sub_id]['population'] += int(rec[2])
|
||||
|
||||
|
||||
def find_subregions(conn, region_id, next_level):
|
||||
cursor = conn.cursor()
|
||||
cursor.execute(f"""
|
||||
SELECT subreg.osm_id, subreg.name
|
||||
FROM {osm_table} reg, {osm_table} subreg
|
||||
WHERE reg.osm_id = %s AND subreg.admin_level = %s AND
|
||||
ST_Contains(reg.way, subreg.way)
|
||||
""",
|
||||
(region_id, next_level)
|
||||
)
|
||||
subregions = {
|
||||
int(rec[0]):
|
||||
{
|
||||
'osm_id': int(rec[0]),
|
||||
'name': rec[1],
|
||||
'population': 0,
|
||||
'cities': []
|
||||
}
|
||||
for rec in cursor
|
||||
}
|
||||
if subregions:
|
||||
enrich_with_population_and_cities(conn, subregions)
|
||||
return subregions
|
||||
|
||||
|
||||
def get_best_cluster_to_join_with(small_cluster_id, dcu: DisjointClusterUnion, common_border_matrix):
|
||||
def get_best_cluster_to_join_with(small_cluster_id,
|
||||
dcu: DisjointClusterUnion,
|
||||
common_border_matrix):
|
||||
if small_cluster_id not in common_border_matrix:
|
||||
return None # this may be if a subregion is isolated, like Bezirk Lienz inside Tyrol, Austria
|
||||
# This may be if a subregion is isolated,
|
||||
# like Bezirk Lienz inside Tyrol, Austria
|
||||
return None
|
||||
common_borders = defaultdict(lambda: 0.0) # cluster representative => common border length
|
||||
subregion_ids = dcu.get_cluster_subregion_ids(small_cluster_id)
|
||||
for subregion_id in subregion_ids:
|
||||
|
@ -148,29 +102,26 @@ def get_best_cluster_to_join_with(small_cluster_id, dcu: DisjointClusterUnion, c
|
|||
other_cluster_id = dcu.find_cluster(other_subregion_id)
|
||||
if other_cluster_id != small_cluster_id:
|
||||
common_borders[other_cluster_id] += length
|
||||
#print(f"common_borders={json.dumps(common_borders)} of len {len(common_borders)}")
|
||||
#common_borders = {k:v for k,v in common_borders.items() if v > 0.0}
|
||||
if not common_borders:
|
||||
return None
|
||||
total_common_border_length = sum(common_borders.values())
|
||||
total_adjacent_population = sum(dcu.get_cluster_population(x) for x in common_borders)
|
||||
total_adjacent_mwm_size_est = sum(dcu.get_cluster_mwm_size_est(x) for x in common_borders)
|
||||
choice_criterion = (
|
||||
(
|
||||
lambda cluster_id: (
|
||||
common_borders[cluster_id]/total_common_border_length +
|
||||
-dcu.get_cluster_population(cluster_id)/total_adjacent_population
|
||||
common_borders[cluster_id]/total_common_border_length +
|
||||
-dcu.get_cluster_mwm_size_est(cluster_id)/total_adjacent_mwm_size_est
|
||||
)
|
||||
) if total_adjacent_population else
|
||||
) if total_adjacent_mwm_size_est else
|
||||
lambda cluster_id: (
|
||||
common_borders[cluster_id]/total_common_border_length
|
||||
)
|
||||
)
|
||||
small_cluster_population = dcu.get_cluster_population(small_cluster_id)
|
||||
best_cluster_id = max(
|
||||
filter(
|
||||
lambda cluster_id: (
|
||||
small_cluster_population + dcu.get_cluster_population(cluster_id)
|
||||
<= dcu.cluster_population_thr
|
||||
dcu.clusters[small_cluster_id]['mwm_size_est'] +
|
||||
dcu.clusters[cluster_id]['mwm_size_est'] <= dcu.mwm_size_thr
|
||||
),
|
||||
common_borders.keys()
|
||||
),
|
||||
|
@ -207,39 +158,31 @@ def calculate_common_border_matrix(conn, subregion_ids):
|
|||
|
||||
|
||||
def find_golden_splitting(conn, border_id, next_level,
|
||||
country_region_name, thresholds):
|
||||
subregions = find_subregions(conn, border_id, next_level)
|
||||
country_region_name, mwm_size_thr):
|
||||
subregions = get_subregions_info(conn, border_id, osm_table,
|
||||
next_level, need_cities=True)
|
||||
if not subregions:
|
||||
print(f"No subregions for {border_id} {country_region_name}")
|
||||
return
|
||||
|
||||
dcu = DisjointClusterUnion(border_id, subregions, thresholds)
|
||||
dcu = DisjointClusterUnion(border_id, subregions, mwm_size_thr)
|
||||
#save_splitting_to_file(dcu, f'all_{country_region_name}')
|
||||
all_subregion_ids = dcu.get_all_subregion_ids()
|
||||
common_border_matrix = calculate_common_border_matrix(conn, all_subregion_ids)
|
||||
|
||||
i = 0
|
||||
while True:
|
||||
with open(f"clusters-{i:02d}.json", 'w') as f:
|
||||
json.dump(dcu.clusters, f, ensure_ascii=False, indent=2)
|
||||
if dcu.get_cluster_count() == 1:
|
||||
return dcu
|
||||
i += 1
|
||||
#print(f"i = {i}")
|
||||
smallest_cluster_id = dcu.get_smallest_cluster()
|
||||
if not smallest_cluster_id:
|
||||
return dcu # TODO: return target splitting
|
||||
#print(f"smallest cluster = {json.dumps(dcu.clusters[smallest_cluster_id])}")
|
||||
return dcu
|
||||
best_cluster_id = get_best_cluster_to_join_with(smallest_cluster_id, dcu, common_border_matrix)
|
||||
if not best_cluster_id: # !!! a case for South West England and popul 500000
|
||||
dcu.mark_cluster_finished(smallest_cluster_id)
|
||||
if not best_cluster_id:
|
||||
dcu.mark_cluster_finished(smallest_cluster_id)
|
||||
continue
|
||||
assert (smallest_cluster_id != best_cluster_id), f"{smallest_cluster_id}"
|
||||
#print(f"best cluster = {json.dumps(dcu.clusters[best_cluster_id])}")
|
||||
new_cluster_id = dcu.union(smallest_cluster_id, best_cluster_id)
|
||||
#print(f"{json.dumps(dcu.clusters[new_cluster_id])}")
|
||||
#print()
|
||||
#import sys; sys.exit()
|
||||
dcu.union(smallest_cluster_id, best_cluster_id)
|
||||
return dcu
|
||||
|
||||
|
||||
|
@ -279,6 +222,9 @@ def write_polygons_to_poly(file, polygons, name_prefix):
|
|||
|
||||
|
||||
def save_splitting_to_file(conn, dcu: DisjointClusterUnion, filename_prefix=None):
|
||||
"""May be used for debugging"""
|
||||
GENERATE_ALL_POLY=False
|
||||
FOLDER='split_results'
|
||||
with open(f"{FOLDER}/{filename_prefix}.poly", 'w') as poly_file:
|
||||
poly_file.write(f"{filename_prefix}\n")
|
||||
for cluster_id, data in dcu.clusters.items():
|
||||
|
@ -297,7 +243,7 @@ def save_splitting_to_file(conn, dcu: DisjointClusterUnion, filename_prefix=None
|
|||
poly_file.write('END\n')
|
||||
with open(f"{FOLDER}/{filename_prefix}-splitting.json", 'w') as f:
|
||||
json.dump(dcu.clusters, f, ensure_ascii=False, indent=2)
|
||||
|
||||
|
||||
|
||||
def save_splitting_to_db(conn, dcu: DisjointClusterUnion):
|
||||
cursor = conn.cursor()
|
||||
|
@ -305,8 +251,7 @@ def save_splitting_to_db(conn, dcu: DisjointClusterUnion):
|
|||
cursor.execute(f"""
|
||||
DELETE FROM {autosplit_table}
|
||||
WHERE osm_border_id = {dcu.region_id}
|
||||
AND city_population_thr = {dcu.city_population_thr}
|
||||
AND cluster_population_thr = {dcu.cluster_population_thr}
|
||||
AND mwm_size_thr = {dcu.mwm_size_thr}
|
||||
""")
|
||||
for cluster_id, data in dcu.clusters.items():
|
||||
subregion_ids = data['subregion_ids']
|
||||
|
@ -314,20 +259,19 @@ def save_splitting_to_db(conn, dcu: DisjointClusterUnion):
|
|||
cluster_geometry_sql = get_union_sql(subregion_ids)
|
||||
cursor.execute(f"""
|
||||
INSERT INTO {autosplit_table} (osm_border_id, subregion_ids, geom,
|
||||
city_population_thr, cluster_population_thr)
|
||||
mwm_size_thr, mwm_size_est)
|
||||
VALUES (
|
||||
{dcu.region_id},
|
||||
'{{{','.join(str(x) for x in subregion_ids)}}}',
|
||||
({cluster_geometry_sql}),
|
||||
{dcu.city_population_thr},
|
||||
{dcu.cluster_population_thr}
|
||||
{dcu.mwm_size_thr},
|
||||
{data['mwm_size_est']}
|
||||
)
|
||||
""")
|
||||
""")
|
||||
conn.commit()
|
||||
|
||||
|
||||
|
||||
def get_region_and_country_names(conn, region_id):
|
||||
#if region_id != -1574364: return
|
||||
cursor = conn.cursor()
|
||||
try:
|
||||
cursor.execute(
|
||||
|
@ -355,18 +299,15 @@ def get_region_and_country_names(conn, region_id):
|
|||
print(f"Many countries for region '{region_name}' id={region_id}")
|
||||
return region_name, country_name
|
||||
|
||||
DEFAULT_CITY_POPULATION_THRESHOLD = 500000
|
||||
DEFAULT_CLUSTER_POPULATION_THR = 500000
|
||||
|
||||
def split_region(conn, region_id, next_level,
|
||||
thresholds=(DEFAULT_CITY_POPULATION_THRESHOLD,
|
||||
DEFAULT_CLUSTER_POPULATION_THR),
|
||||
mwm_size_thr,
|
||||
save_to_files=False):
|
||||
region_name, country_name = get_region_and_country_names(conn, region_id)
|
||||
region_name = region_name.replace('/', '|')
|
||||
country_region_name = f"{country_name}_{region_name}" if country_name else region_name
|
||||
dcu = find_golden_splitting(conn, region_id, next_level,
|
||||
country_region_name, thresholds)
|
||||
country_region_name, mwm_size_thr)
|
||||
if dcu is None:
|
||||
return
|
||||
|
||||
|
@ -378,30 +319,6 @@ def save_splitting(dcu: DisjointClusterUnion, conn,
|
|||
save_splitting_to_db(conn, dcu)
|
||||
if save_to_files:
|
||||
print(f"Saving {country_region_name}")
|
||||
filename_prefix = f"{country_region_name}-{dcu.city_population_thrR}"
|
||||
filename_prefix = f"{country_region_name}-{dcu.city_population_thr}"
|
||||
save_splitting_to_file(conn, dcu, filename_prefix)
|
||||
|
||||
|
||||
GENERATE_ALL_POLY=False
|
||||
FOLDER='split_results'
|
||||
#CITY_POPULATION_THR = 500000
|
||||
#CLUSTER_POPULATION_THR = 500000
|
||||
|
||||
if __name__ == '__main__':
|
||||
conn = psycopg2.connect("dbname=az_gis3")
|
||||
|
||||
PREFIX = "UBavaria"
|
||||
CITY_POPULATION_THR = 500000
|
||||
CLUSTER_POPULATION_THR = 500000
|
||||
|
||||
region_id = -162050 # -165475 # California ## -162050 # Florida
|
||||
region_id = -2145274 # Upper Bavaria
|
||||
#region_id = -151339 # South West England
|
||||
#region_id = -58446 # Scotland
|
||||
dcu = find_golden_splitting(region_id)
|
||||
make_polys(dcu.clusters)
|
||||
with open(f"{PREFIX}_{CITY_POPULATION_THR}_splitting{region_id}-poplen.json", 'w') as f:
|
||||
json.dump(dcu.clusters, f, ensure_ascii=False, indent=2)
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -25,6 +25,7 @@ from countries_structure import (
|
|||
create_countries_initial_structure,
|
||||
get_osm_border_name_by_osm_id,
|
||||
)
|
||||
from subregions import get_subregions_info
|
||||
|
||||
try:
|
||||
from lxml import etree
|
||||
|
@ -78,7 +79,8 @@ def fetch_borders(**kwargs):
|
|||
query = f"""
|
||||
SELECT name, geometry, nodes, modified, disabled, count_k, cmnt,
|
||||
(CASE WHEN area = 'NaN' THEN 0 ELSE area END) AS area,
|
||||
id, admin_level, parent_id, parent_name
|
||||
id, admin_level, parent_id, parent_name,
|
||||
mwm_size_est
|
||||
FROM (
|
||||
SELECT name,
|
||||
ST_AsGeoJSON({geom}, 7) as geometry,
|
||||
|
@ -95,7 +97,8 @@ def fetch_borders(**kwargs):
|
|||
parent_id,
|
||||
( SELECT name FROM {table}
|
||||
WHERE id = t.parent_id
|
||||
) AS parent_name
|
||||
) AS parent_name,
|
||||
mwm_size_est
|
||||
FROM {table} t
|
||||
WHERE ({where_clause}) {leaves_filter}
|
||||
) q
|
||||
|
@ -112,18 +115,19 @@ def fetch_borders(**kwargs):
|
|||
'disabled': rec[4], 'count_k': rec[5],
|
||||
'comment': rec[6],
|
||||
'area': rec[7],
|
||||
'id': region_id, 'country_id': country_id,
|
||||
'id': region_id,
|
||||
'admin_level': rec[9],
|
||||
'parent_id': rec[10],
|
||||
'parent_name': rec[11] or '',
|
||||
'country_name': country_name
|
||||
'country_id': country_id,
|
||||
'country_name': country_name,
|
||||
'mwm_size_est': rec[12]
|
||||
}
|
||||
feature = {'type': 'Feature',
|
||||
'geometry': json.loads(rec[1]),
|
||||
'properties': props
|
||||
}
|
||||
borders.append(feature)
|
||||
#print([x['properties'] for x in borders])
|
||||
return borders
|
||||
|
||||
def simplify_level_to_postgis_value(simplify_level):
|
||||
|
@ -228,8 +232,8 @@ def query_crossing():
|
|||
pass
|
||||
return jsonify(type='FeatureCollection', features=result)
|
||||
|
||||
@app.route('/tables')
|
||||
def check_osm_table():
|
||||
@app.route('/config')
|
||||
def get_server_configuration():
|
||||
osm = False
|
||||
backup = False
|
||||
old = []
|
||||
|
@ -260,7 +264,9 @@ def check_osm_table():
|
|||
crossing = True
|
||||
except psycopg2.Error as e:
|
||||
pass
|
||||
return jsonify(osm=osm, tables=old, readonly=config.READONLY, backup=backup, crossing=crossing)
|
||||
return jsonify(osm=osm, tables=old, readonly=config.READONLY,
|
||||
backup=backup, crossing=crossing,
|
||||
mwm_size_thr=config.MWM_SIZE_THRESHOLD)
|
||||
|
||||
@app.route('/search')
|
||||
def search():
|
||||
|
@ -341,9 +347,10 @@ def join_borders():
|
|||
cur.execute(f"""
|
||||
UPDATE {table}
|
||||
SET id = {free_id},
|
||||
geom = ST_Union(geom, b2.g),
|
||||
geom = ST_Union({table}.geom, b2.geom),
|
||||
mwm_size_est = {table}.mwm_size_est + b2.mwm_size_est,
|
||||
count_k = -1
|
||||
FROM (SELECT geom AS g FROM {table} WHERE id = %s) AS b2
|
||||
FROM (SELECT geom, mwm_size_est FROM {table} WHERE id = %s) AS b2
|
||||
WHERE id = %s""", (region_id2, region_id1))
|
||||
cur.execute(f"DELETE FROM {table} WHERE id = %s", (region_id2,))
|
||||
except psycopg2.Error as e:
|
||||
|
@ -630,24 +637,23 @@ def divide_preview():
|
|||
if not is_admin:
|
||||
return jsonify(status="Could not apply auto-division to non-administrative regions")
|
||||
try:
|
||||
city_population_thr = int(request.args.get('city_population_thr'))
|
||||
cluster_population_thr = int(request.args.get('cluster_population_thr'))
|
||||
mwm_size_thr = int(request.args.get('mwm_size_thr'))
|
||||
except ValueError:
|
||||
return jsonify(status='Not a number in thresholds.')
|
||||
return divide_into_clusters_preview(
|
||||
region_ids, next_level,
|
||||
(city_population_thr, cluster_population_thr))
|
||||
mwm_size_thr)
|
||||
else:
|
||||
return divide_into_subregions_preview(region_ids, next_level)
|
||||
|
||||
def get_subregions(region_ids, next_level):
|
||||
def get_subregions_for_preview(region_ids, next_level):
|
||||
subregions = list(itertools.chain.from_iterable(
|
||||
get_subregions_one(region_id, next_level)
|
||||
get_subregions_one_for_preview(region_id, next_level)
|
||||
for region_id in region_ids
|
||||
))
|
||||
return subregions
|
||||
|
||||
def get_subregions_one(region_id, next_level):
|
||||
def get_subregions_one_for_preview(region_id, next_level):
|
||||
osm_table = config.OSM_TABLE
|
||||
table = config.TABLE
|
||||
cur = g.conn.cursor()
|
||||
|
@ -671,28 +677,28 @@ def get_subregions_one(region_id, next_level):
|
|||
subregions.append(feature)
|
||||
return subregions
|
||||
|
||||
def get_clusters(region_ids, next_level, thresholds):
|
||||
def get_clusters_for_preview(region_ids, next_level, thresholds):
|
||||
clusters = list(itertools.chain.from_iterable(
|
||||
get_clusters_one(region_id, next_level, thresholds)
|
||||
get_clusters_for_preview_one(region_id, next_level, thresholds)
|
||||
for region_id in region_ids
|
||||
))
|
||||
return clusters
|
||||
|
||||
def get_clusters_one(region_id, next_level, thresholds):
|
||||
def get_clusters_for_preview_one(region_id, next_level, mwm_size_thr):
|
||||
autosplit_table = config.AUTOSPLIT_TABLE
|
||||
cursor = g.conn.cursor()
|
||||
where_clause = f"""
|
||||
osm_border_id = %s
|
||||
AND city_population_thr = %s
|
||||
AND cluster_population_thr = %s
|
||||
AND mwm_size_thr = %s
|
||||
"""
|
||||
splitting_sql_params = (region_id,) + thresholds
|
||||
splitting_sql_params = (region_id, mwm_size_thr)
|
||||
cursor.execute(f"""
|
||||
SELECT 1 FROM {autosplit_table}
|
||||
WHERE {where_clause}
|
||||
""", splitting_sql_params)
|
||||
if cursor.rowcount == 0:
|
||||
split_region(g.conn, region_id, next_level, thresholds)
|
||||
split_region(g.conn, region_id, next_level, mwm_size_thr)
|
||||
|
||||
cursor.execute(f"""
|
||||
SELECT subregion_ids[1], ST_AsGeoJSON(ST_SimplifyPreserveTopology(geom, 0.01)) as way
|
||||
FROM {autosplit_table}
|
||||
|
@ -700,23 +706,24 @@ def get_clusters_one(region_id, next_level, thresholds):
|
|||
""", splitting_sql_params)
|
||||
clusters = []
|
||||
for rec in cursor:
|
||||
cluster = { 'type': 'Feature',
|
||||
'geometry': json.loads(rec[1]),
|
||||
'properties': {'osm_id': int(rec[0])}
|
||||
cluster = {
|
||||
'type': 'Feature',
|
||||
'geometry': json.loads(rec[1]),
|
||||
'properties': {'osm_id': int(rec[0])}
|
||||
}
|
||||
clusters.append(cluster)
|
||||
return clusters
|
||||
|
||||
def divide_into_subregions_preview(region_ids, next_level):
|
||||
subregions = get_subregions(region_ids, next_level)
|
||||
subregions = get_subregions_for_preview(region_ids, next_level)
|
||||
return jsonify(
|
||||
status='ok',
|
||||
subregions={'type': 'FeatureCollection', 'features': subregions}
|
||||
)
|
||||
|
||||
def divide_into_clusters_preview(region_ids, next_level, thresholds):
|
||||
subregions = get_subregions(region_ids, next_level)
|
||||
clusters = get_clusters(region_ids, next_level, thresholds)
|
||||
def divide_into_clusters_preview(region_ids, next_level, mwm_size_thr):
|
||||
subregions = get_subregions_for_preview(region_ids, next_level)
|
||||
clusters = get_clusters_for_preview(region_ids, next_level, mwm_size_thr)
|
||||
return jsonify(
|
||||
status='ok',
|
||||
subregions={'type': 'FeatureCollection', 'features': subregions},
|
||||
|
@ -744,51 +751,53 @@ def divide():
|
|||
if not is_admin:
|
||||
return jsonify(status="Could not apply auto-division to non-administrative regions")
|
||||
try:
|
||||
city_population_thr = int(request.args.get('city_population_thr'))
|
||||
cluster_population_thr = int(request.args.get('cluster_population_thr'))
|
||||
mwm_size_thr = int(request.args.get('mwm_size_thr'))
|
||||
except ValueError:
|
||||
return jsonify(status='Not a number in thresholds.')
|
||||
return divide_into_clusters(
|
||||
region_ids, next_level,
|
||||
(city_population_thr, cluster_population_thr))
|
||||
mwm_size_thr)
|
||||
else:
|
||||
return divide_into_subregions(region_ids, next_level)
|
||||
|
||||
def divide_into_subregions(region_ids, next_level):
|
||||
table = config.TABLE
|
||||
osm_table = config.OSM_TABLE
|
||||
cur = g.conn.cursor()
|
||||
for region_id in region_ids:
|
||||
is_admin = is_administrative_region(region_id)
|
||||
if is_admin:
|
||||
# TODO: rewrite SELECT into join rather than subquery to enable gist index
|
||||
cur.execute(f"""
|
||||
INSERT INTO {table} (id, geom, name, parent_id, modified, count_k)
|
||||
SELECT osm_id, way, name, %s, now(), -1
|
||||
FROM {osm_table}
|
||||
WHERE ST_Contains(
|
||||
(SELECT geom FROM {table} WHERE id = %s), way
|
||||
)
|
||||
AND admin_level = {next_level}
|
||||
""", (region_id, region_id,)
|
||||
)
|
||||
else:
|
||||
cur.execute(f"""
|
||||
INSERT INTO {table} (id, geom, name, parent_id, modified, count_k)
|
||||
SELECT osm_id, way, name, (SELECT parent_id FROM {table} WHERE id = %s), now(), -1
|
||||
FROM {osm_table}
|
||||
WHERE ST_Contains(
|
||||
(SELECT geom FROM {table} WHERE id = %s), way
|
||||
)
|
||||
AND admin_level = {next_level}
|
||||
""", (region_id, region_id,)
|
||||
)
|
||||
cur.execute(f"DELETE FROM {table} WHERE id = %s", (region_id,))
|
||||
|
||||
divide_into_subregions_one(region_id, next_level)
|
||||
g.conn.commit()
|
||||
return jsonify(status='ok')
|
||||
|
||||
def divide_into_clusters(region_ids, next_level, thresholds):
|
||||
def divide_into_subregions_one(region_id, next_level):
|
||||
table = config.TABLE
|
||||
osm_table = config.OSM_TABLE
|
||||
subregions = get_subregions_info(g.conn, region_id, table,
|
||||
next_level, need_cities=False)
|
||||
cursor = g.conn.cursor()
|
||||
is_admin = is_administrative_region(region_id)
|
||||
if is_admin:
|
||||
for subregion_id, data in subregions.items():
|
||||
cursor.execute(f"""
|
||||
INSERT INTO {table}
|
||||
(id, geom, name, parent_id, modified, count_k, mwm_size_est)
|
||||
SELECT osm_id, way, name, %s, now(), -1, {data['mwm_size_est']}
|
||||
FROM {osm_table}
|
||||
WHERE osm_id = %s
|
||||
""", (region_id, subregion_id)
|
||||
)
|
||||
else:
|
||||
for subregion_id, data in subregions.items():
|
||||
cursor.execute(f"""
|
||||
INSERT INTO {table}
|
||||
(id, geom, name, parent_id, modified, count_k, mwm_size_est)
|
||||
SELECT osm_id, way, name,
|
||||
(SELECT parent_id FROM {table} WHERE id = %s),
|
||||
now(), -1, {data['mwm_size_est']}
|
||||
FROM {osm_table}
|
||||
WHERE osm_id = %s
|
||||
""", (region_id, subregion_id)
|
||||
)
|
||||
cursor.execute(f"DELETE FROM {table} WHERE id = %s", (region_id,))
|
||||
|
||||
def divide_into_clusters(region_ids, next_level, mwm_size_thr):
|
||||
table = config.TABLE
|
||||
autosplit_table = config.AUTOSPLIT_TABLE
|
||||
cursor = g.conn.cursor()
|
||||
|
@ -799,16 +808,15 @@ def divide_into_clusters(region_ids, next_level, thresholds):
|
|||
|
||||
where_clause = f"""
|
||||
osm_border_id = %s
|
||||
AND city_population_thr = %s
|
||||
AND cluster_population_thr = %s
|
||||
AND mwm_size_thr = %s
|
||||
"""
|
||||
splitting_sql_params = (region_id,) + thresholds
|
||||
splitting_sql_params = (region_id, mwm_size_thr)
|
||||
cursor.execute(f"""
|
||||
SELECT 1 FROM {autosplit_table}
|
||||
WHERE {where_clause}
|
||||
""", splitting_sql_params)
|
||||
if cursor.rowcount == 0:
|
||||
split_region(g.conn, region_id, next_level, thresholds)
|
||||
split_region(g.conn, region_id, next_level, mwm_size_thr)
|
||||
|
||||
free_id = get_free_id()
|
||||
counter = 0
|
||||
|
@ -830,8 +838,8 @@ def divide_into_clusters(region_ids, next_level, thresholds):
|
|||
subregion_id = free_id
|
||||
name = f"{base_name}_{counter}"
|
||||
insert_cursor.execute(f"""
|
||||
INSERT INTO {table} (id, name, parent_id, geom, modified, count_k)
|
||||
SELECT {subregion_id}, %s, osm_border_id, geom, now(), -1
|
||||
INSERT INTO {table} (id, name, parent_id, geom, modified, count_k, mwm_size_est)
|
||||
SELECT {subregion_id}, %s, osm_border_id, geom, now(), -1, mwm_size_est
|
||||
FROM {autosplit_table} WHERE subregion_ids[1] = %s AND {where_clause}
|
||||
""", (name, cluster_id,) + splitting_sql_params)
|
||||
g.conn.commit()
|
||||
|
|
|
@ -8,11 +8,13 @@ READONLY = False
|
|||
TABLE = 'borders'
|
||||
# from where OSM borders are imported
|
||||
OSM_TABLE = 'osm_borders'
|
||||
# All populated places in OSM
|
||||
OSM_PLACES_TABLE = 'osm_places'
|
||||
# transit table for autosplitting results
|
||||
AUTOSPLIT_TABLE = 'splitting'
|
||||
## tables with borders for reference
|
||||
# tables with borders for reference
|
||||
OTHER_TABLES = {
|
||||
#'old': 'old_borders'
|
||||
#'old': 'old_borders'
|
||||
}
|
||||
# backup table
|
||||
BACKUP = 'borders_backup'
|
||||
|
@ -28,3 +30,8 @@ IMPORT_ERROR_ALERT = False
|
|||
DAEMON_STATUS_PATH = '/tmp/borders-daemon-status.txt'
|
||||
DAEMON_PID_PATH = '/tmp/borders-daemon.pid'
|
||||
DAEMON_LOG_PATH = '/var/log/borders-daemon.log'
|
||||
# mwm size threshold in Kb
|
||||
MWM_SIZE_THRESHOLD = 70*1024
|
||||
# Estimated mwm size is predicted by the 'model.pkl' with 'scaler.pkl' for X
|
||||
MWM_SIZE_PREDICTION_MODEL_PATH = '/app/data/model.pkl'
|
||||
MWM_SIZE_PREDICTION_MODEL_SCALER_PATH = '/app/data/scaler.pkl'
|
||||
|
|
|
@ -2,6 +2,8 @@ import itertools
|
|||
|
||||
import config
|
||||
|
||||
from subregions import get_subregions_info
|
||||
|
||||
|
||||
table = config.TABLE
|
||||
osm_table = config.OSM_TABLE
|
||||
|
@ -260,43 +262,32 @@ def _clear_borders(conn):
|
|||
conn.commit()
|
||||
|
||||
|
||||
def _find_subregions(conn, osm_ids, next_level, parents, names):
|
||||
def _find_subregions(conn, osm_ids, next_level, regions):
|
||||
"""Return subregions of level 'next_level' for regions with osm_ids."""
|
||||
cursor = conn.cursor()
|
||||
parent_osm_ids = ','.join(str(x) for x in osm_ids)
|
||||
cursor.execute(f"""
|
||||
SELECT b.osm_id, b.name, subb.osm_id, subb.name
|
||||
FROM {osm_table} b, {osm_table} subb
|
||||
WHERE subb.admin_level=%s
|
||||
AND b.osm_id IN ({parent_osm_ids})
|
||||
AND ST_Contains(b.way, subb.way)
|
||||
""",
|
||||
(next_level,)
|
||||
)
|
||||
|
||||
# parent_osm_id => [(osm_id, name), (osm_id, name), ...]
|
||||
subregion_ids = []
|
||||
|
||||
for rec in cursor:
|
||||
parent_osm_id = rec[0]
|
||||
osm_id = rec[2]
|
||||
parents[osm_id] = parent_osm_id
|
||||
name = rec[3]
|
||||
names[osm_id] = name
|
||||
subregion_ids.append(osm_id)
|
||||
for osm_id in osm_ids:
|
||||
more_subregions = get_subregions_info(conn, osm_id, table,
|
||||
next_level, need_cities=False)
|
||||
for subregion_id, subregion_data in more_subregions.items():
|
||||
region_data = regions.setdefault(subregion_id, {})
|
||||
region_data['name'] = subregion_data['name']
|
||||
region_data['mwm_size_est'] = subregion_data['mwm_size_est']
|
||||
region_data['parent_id'] = osm_id
|
||||
subregion_ids.append(subregion_id)
|
||||
return subregion_ids
|
||||
|
||||
|
||||
def _create_regions(conn, osm_ids, parents, names):
|
||||
def _create_regions(conn, osm_ids, regions):
|
||||
if not osm_ids:
|
||||
return
|
||||
osm_ids = list(osm_ids) # to ensure order
|
||||
cursor = conn.cursor()
|
||||
sql_values = ','.join(
|
||||
f'({osm_id},'
|
||||
'%s,'
|
||||
'%s,'
|
||||
f"{regions[osm_id].get('parent_id', 'NULL')},"
|
||||
f"{regions[osm_id].get('mwm_size_est', 'NULL')},"
|
||||
f'(SELECT way FROM {osm_table} WHERE osm_id={osm_id}),'
|
||||
f'{parents[osm_id] or "NULL"},'
|
||||
'now())'
|
||||
for osm_id in osm_ids
|
||||
)
|
||||
|
@ -304,21 +295,23 @@ def _create_regions(conn, osm_ids, parents, names):
|
|||
#print(f"names={tuple(names[osm_id] for osm_id in osm_ids)}")
|
||||
#print(f"all parents={parents}")
|
||||
cursor.execute(f"""
|
||||
INSERT INTO {table} (id, name, geom, parent_id, modified)
|
||||
INSERT INTO {table} (id, name, parent_id, mwm_size_est, geom, modified)
|
||||
VALUES {sql_values}
|
||||
""", tuple(names[osm_id] for osm_id in osm_ids)
|
||||
""", tuple(regions[osm_id]['name'] for osm_id in osm_ids)
|
||||
)
|
||||
|
||||
|
||||
def _make_country_structure(conn, country_osm_id):
|
||||
names = {} # osm_id => osm name
|
||||
parents = {} # osm_id => parent_osm_id
|
||||
regions = {} # osm_id: { 'name': name,
|
||||
# 'mwm_size_est': size,
|
||||
# 'parent_id': parent_id }
|
||||
|
||||
country_name = get_osm_border_name_by_osm_id(conn, country_osm_id)
|
||||
names[country_osm_id] = country_name
|
||||
parents[country_osm_id] = None
|
||||
country_data = regions.setdefault(country_osm_id, {})
|
||||
country_data['name'] = country_name
|
||||
# TODO: country_data['mwm_size_est'] = ...
|
||||
|
||||
_create_regions(conn, [country_osm_id], parents, names)
|
||||
_create_regions(conn, [country_osm_id], regions)
|
||||
|
||||
if country_initial_levels.get(country_name):
|
||||
admin_levels = country_initial_levels[country_name]
|
||||
|
@ -332,18 +325,19 @@ def _make_country_structure(conn, country_osm_id):
|
|||
f"AL={admin_level}, prev-AL={prev_level}"
|
||||
)
|
||||
subregion_ids = _find_subregions(conn, prev_region_ids,
|
||||
admin_level, parents, names)
|
||||
_create_regions(conn, subregion_ids, parents, names)
|
||||
admin_level, regions)
|
||||
_create_regions(conn, subregion_ids, regions)
|
||||
prev_region_ids = subregion_ids
|
||||
|
||||
|
||||
def create_countries_initial_structure(conn):
|
||||
_clear_borders(conn)
|
||||
cursor = conn.cursor()
|
||||
# TODO: process overlapping countries, like Ukraine and Russia with common Crimea
|
||||
cursor.execute(f"""
|
||||
SELECT osm_id, name
|
||||
FROM {osm_table}
|
||||
WHERE admin_level = 2
|
||||
WHERE admin_level = 2 and name != 'Ukraine'
|
||||
"""
|
||||
# and name in --('Germany', 'Luxembourg', 'Austria')
|
||||
# ({','.join(f"'{c}'" for c in country_initial_levels.keys())})
|
||||
|
|
BIN
web/app/data/model.pkl
Normal file
BIN
web/app/data/model.pkl
Normal file
Binary file not shown.
BIN
web/app/data/mwm_data.xlsx
Normal file
BIN
web/app/data/mwm_data.xlsx
Normal file
Binary file not shown.
119
web/app/data/prediction_model.py
Normal file
119
web/app/data/prediction_model.py
Normal file
|
@ -0,0 +1,119 @@
|
|||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
from sklearn.linear_model import LinearRegression
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.model_selection import (
|
||||
cross_val_score,
|
||||
KFold,
|
||||
GridSearchCV,
|
||||
)
|
||||
from sklearn.svm import SVR
|
||||
from sklearn.preprocessing import StandardScaler
|
||||
|
||||
|
||||
data = pd.read_excel('mwm_data.xlsx', sheet_name='mwms_all', header=1)
|
||||
data = data[data['exclude'] == 0]
|
||||
#data['is_urban2'] = data.apply(lambda row: row['pop_density'] > 260, axis=1) # 260 - median of pop_density
|
||||
|
||||
popul_column = 'urban_pop' # options are 'population and 'urban_pop' (for population of cities and towns only)
|
||||
feature_names = [popul_column, 'area', 'city_cnt', 'hamlet_cnt']
|
||||
target_name = 'size'
|
||||
|
||||
for feature in set(feature_names) - set(['area']): # if area is None it's an error!
|
||||
data[feature] = data[feature].fillna(0)
|
||||
|
||||
|
||||
scoring = 'neg_mean_squared_error' # another option is 'r2'
|
||||
|
||||
|
||||
def my_cross_validation(sample):
|
||||
X = sample[feature_names]
|
||||
y = sample[target_name]
|
||||
|
||||
sc_X = StandardScaler()
|
||||
X = sc_X.fit_transform(X)
|
||||
|
||||
lin_regression = LinearRegression(fit_intercept=False)
|
||||
svr_linear = SVR(kernel='linear')
|
||||
svr_rbf = SVR(kernel='rbf')
|
||||
|
||||
for estimator_name, estimator in zip(
|
||||
('LinRegression', 'SVR_linear', 'SVR_rbf'),
|
||||
(lin_regression, svr_linear, svr_rbf)):
|
||||
cv_scores = cross_val_score(estimator, X, y,
|
||||
cv=KFold(5, shuffle=True, random_state=1),
|
||||
scoring=scoring)
|
||||
mean_score = np.mean(cv_scores)
|
||||
print(f"{estimator_name:15}", cv_scores, mean_score)
|
||||
|
||||
|
||||
def my_grid_search(sample):
|
||||
X = sample[feature_names]
|
||||
y = sample[target_name]
|
||||
|
||||
sc_X = StandardScaler()
|
||||
X = sc_X.fit_transform(X)
|
||||
|
||||
X_train, X_test, y_train, y_test = train_test_split(
|
||||
X, y, test_size=0.2, random_state=0)
|
||||
|
||||
C_array = [10 ** n for n in range(6, 7)]
|
||||
gamma_array = [0.009 + i * 0.001 for i in range(-7, 11, 2)] + ['auto', 'scale']
|
||||
epsilon_array = [0.5 * i for i in range(0, 15)]
|
||||
coef0_array = [-0.1, -0.01, 0, 0.01, 0.1]
|
||||
param_grid = [
|
||||
{'kernel': ['linear'], 'C': C_array, 'epsilon': epsilon_array},
|
||||
{'kernel': ['rbf'], 'C': C_array, 'gamma': gamma_array, 'epsilon': epsilon_array},
|
||||
{'kernel': ['poly', 'sigmoid'],
|
||||
'C': C_array, 'gamma': gamma_array, 'epsilon': epsilon_array, 'coef0': coef0_array},
|
||||
]
|
||||
|
||||
svr = SVR()
|
||||
grid_search = GridSearchCV(svr, param_grid, scoring=scoring)
|
||||
grid_search.fit(X_train, y_train)
|
||||
#means = grid_search.cv_results_['mean_test_score']
|
||||
#stds = grid_search.cv_results_['std_test_score']
|
||||
#print("Grid scores on development set:")
|
||||
#for mean, std, params in zip(means, stds, grid_search.cv_results_['params']):
|
||||
# print("%0.3f (+/-%0.03f) for %r" % (mean, std, params))
|
||||
|
||||
print("C", C_array)
|
||||
print("gamma", gamma_array)
|
||||
print("epsilon", epsilon_array)
|
||||
print("coef0", coef0_array)
|
||||
print("Best_params:", grid_search.best_params_, grid_search.best_score_)
|
||||
|
||||
|
||||
def train_and_serialize_model(sample):
|
||||
X = sample[feature_names]
|
||||
y = sample[target_name]
|
||||
|
||||
X_head = X[0:4]
|
||||
scaler = StandardScaler()
|
||||
X = scaler.fit_transform(X)
|
||||
|
||||
# Parameters tuned with GridSearch
|
||||
regressor = SVR(kernel='rbf', C=10**6, epsilon=0.0, gamma=0.012)
|
||||
regressor.fit(X, y)
|
||||
|
||||
print(regressor.predict(X[0:4]))
|
||||
|
||||
# Serialize model
|
||||
import pickle
|
||||
with open('model.pkl', 'wb') as f:
|
||||
pickle.dump(regressor, f)
|
||||
with open('scaler.pkl', 'wb') as f:
|
||||
pickle.dump(scaler, f)
|
||||
|
||||
# Deserialize model and test it on X_head samples
|
||||
with open('model.pkl', 'rb') as f:
|
||||
regressor2 = pickle.load(f)
|
||||
with open('scaler.pkl', 'rb') as f:
|
||||
scaler2 = pickle.load(f)
|
||||
print(regressor2.predict(scaler2.transform(X_head)))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
train_and_serialize_model(data)
|
||||
|
BIN
web/app/data/scaler.pkl
Normal file
BIN
web/app/data/scaler.pkl
Normal file
Binary file not shown.
29
web/app/mwm_size_predictor.py
Normal file
29
web/app/mwm_size_predictor.py
Normal file
|
@ -0,0 +1,29 @@
|
|||
import numpy as np
|
||||
import pickle
|
||||
|
||||
import config
|
||||
|
||||
|
||||
class MwmSizePredictor:
|
||||
|
||||
def __init__(self):
|
||||
with open(config.MWM_SIZE_PREDICTION_MODEL_PATH, 'rb') as f:
|
||||
self.model = pickle.load(f)
|
||||
with open(config.MWM_SIZE_PREDICTION_MODEL_SCALER_PATH, 'rb') as f:
|
||||
self.scaler = pickle.load(f)
|
||||
|
||||
def predict(self, features_array):
|
||||
"""1D or 2D array of feature values for predictions. Features are
|
||||
'urban_pop', 'area', 'city_cnt', 'hamlet_cnt' as defined for the
|
||||
prediction model.
|
||||
"""
|
||||
X = np.array(features_array)
|
||||
one_prediction = (X.ndim == 1)
|
||||
if one_prediction:
|
||||
X = X.reshape(1, -1)
|
||||
X_scaled = self.scaler.transform(X)
|
||||
predictions = self.model.predict(X_scaled)
|
||||
if one_prediction:
|
||||
return predictions[0]
|
||||
else:
|
||||
return predictions.tolist()
|
|
@ -3,11 +3,14 @@ var STYLE_SELECTED = { stroke: true, color: '#ff3', weight: 3, fill: true, fillO
|
|||
var FILL_TOO_SMALL = '#0f0';
|
||||
var FILL_TOO_BIG = '#800';
|
||||
var FILL_ZERO = 'black';
|
||||
var OLD_BORDERS_NAME; // filled in checkHasOSM()
|
||||
var IMPORT_ENABLED = true;
|
||||
|
||||
var map, borders = {}, bordersLayer, selectedId, editing = false, readonly = false;
|
||||
var size_good = 50, size_bad = 70;
|
||||
var map, borders = {}, bordersLayer, selectedId, editing = false;
|
||||
var config = { // server config
|
||||
READONLY: false,
|
||||
MWM_SIZE_THR: 70,
|
||||
OLD_BORDERS_NAME: undefined // may be filled in getServerConfiguration()
|
||||
};
|
||||
var size_good, size_bad;
|
||||
var maxRank = 1;
|
||||
var tooSmallLayer = null;
|
||||
var oldBordersLayer = null;
|
||||
|
@ -68,17 +71,17 @@ function init() {
|
|||
else
|
||||
$('#population_thresholds').hide();
|
||||
});
|
||||
checkHasOSM();
|
||||
getServerConfiguration();
|
||||
filterSelect(true);
|
||||
}
|
||||
|
||||
function checkHasOSM() {
|
||||
$.ajax(getServer('tables'), {
|
||||
function getServerConfiguration() {
|
||||
$.ajax(getServer('config'), {
|
||||
success: function(res) {
|
||||
if( res.osm )
|
||||
$('#osm_actions').css('display', 'block');
|
||||
if( res.tables && res.tables.length > 0 ) {
|
||||
OLD_BORDERS_NAME = res.tables[0];
|
||||
config.OLD_BORDERS_NAME = res.tables[0];
|
||||
$('#old_action').css('display', 'block');
|
||||
$('#josm_old').css('display', 'inline');
|
||||
}
|
||||
|
@ -91,7 +94,7 @@ function checkHasOSM() {
|
|||
$('#action_buttons').css('display', 'none');
|
||||
$('#import_link').css('display', 'none');
|
||||
$('#backups').css('display', 'none');
|
||||
readonly = true;
|
||||
config.READONLY = true;
|
||||
}
|
||||
if( !res.readonly && IMPORT_ENABLED ) {
|
||||
$('#import_link').css('display', 'none');
|
||||
|
@ -100,6 +103,11 @@ function checkHasOSM() {
|
|||
var iframe = '<iframe name="import_frame" class="h_iframe" src="about:blank"></iframe>';
|
||||
// $('#filefm').after(iframe);
|
||||
}
|
||||
size_bad = config.MWM_SIZE_THR = Math.round(parseInt(res.mwm_size_thr)/1024);
|
||||
size_good = Math.round(size_bad * 0.7 / 10) * 10;
|
||||
$('#r_green').val(size_good);
|
||||
$('#r_red').val(size_bad);
|
||||
$('#mwm_size_thr').val(config.MWM_SIZE_THR);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
@ -148,11 +156,11 @@ function updateBorders() {
|
|||
crossingLayer.clearLayers();
|
||||
}
|
||||
|
||||
if( oldBordersLayer != null && OLD_BORDERS_NAME ) {
|
||||
if( oldBordersLayer != null && config.OLD_BORDERS_NAME ) {
|
||||
oldBordersLayer.clearLayers();
|
||||
$.ajax(getServer('bbox'), {
|
||||
data: {
|
||||
'table': OLD_BORDERS_NAME,
|
||||
'table': config.OLD_BORDERS_NAME,
|
||||
'simplify': simplified,
|
||||
'xmin': b.getWest(),
|
||||
'xmax': b.getEast(),
|
||||
|
@ -302,6 +310,7 @@ function selectLayer(e) {
|
|||
$('#b_al').text(props['admin_level'] ? '('+props['admin_level']+')' : '');
|
||||
$('#b_parent_name').text(props['parent_name']);
|
||||
$('#b_size').text(Math.round(props['count_k'] * BYTES_FOR_NODE / 1024 / 1024) + ' MB');
|
||||
$('#pa_size').text(Math.round(props['mwm_size_est']/1024) + ' MB');
|
||||
//$('#b_nodes').text(borders[selectedId].layer.getLatLngs()[0].length);
|
||||
$('#b_nodes').text(props['nodes']);
|
||||
$('#b_date').text(props['modified']);
|
||||
|
@ -317,7 +326,7 @@ function selectLayer(e) {
|
|||
|
||||
function filterSelect(noRefresh) {
|
||||
var value = $('#f_type').val();
|
||||
$('#f_size').css('display', value == 'size' ? 'block' : 'none');
|
||||
$('#f_size').css('display', value.endsWith('size') ? 'block' : 'none');
|
||||
$('#f_chars').css('display', value == 'chars' ? 'block' : 'none');
|
||||
$('#f_comments').css('display', value == 'comments' ? 'block' : 'none');
|
||||
$('#f_topo').css('display', value == 'topo' ? 'block' : 'none');
|
||||
|
@ -336,29 +345,47 @@ var colors = ['red', 'orange', 'yellow', 'lime', 'green', 'olive', 'cyan', 'dark
|
|||
'blue', 'navy', 'magenta', 'purple', 'deeppink', 'brown'] //'black';
|
||||
var alphabet = 'abcdefghijklmnopqrstuvwxyz';
|
||||
|
||||
function getStringHash(str) {
|
||||
var hash = 0, i, chr;
|
||||
/*
|
||||
for (i = 0; i < str.length; i++) {
|
||||
chr = str.charCodeAt(i);
|
||||
hash = ((hash << 5) - hash) + chr;
|
||||
hash |= 0; // Convert to 32bit integer
|
||||
}
|
||||
*/
|
||||
hash = str.charCodeAt(0) + str.charCodeAt(1);
|
||||
return hash;
|
||||
}
|
||||
|
||||
function getCountryColor(props) {
|
||||
var country_name = props.country_name;
|
||||
if (!country_name)
|
||||
return 'black';
|
||||
var firstLetter = country_name[0].toLowerCase();
|
||||
var index = alphabet.indexOf(firstLetter);
|
||||
if (index === -1)
|
||||
return 'black';
|
||||
var indexInColors = index % colors.length;
|
||||
var hash = getStringHash(country_name);
|
||||
var indexInColors = Math.abs(hash) % colors.length;
|
||||
return colors[indexInColors];
|
||||
}
|
||||
|
||||
function getColor(props) {
|
||||
var color = STYLE_BORDER.color;
|
||||
var fType = $('#f_type').val();
|
||||
if( fType == 'size' ) {
|
||||
if( fType == 'nodes_size' ) {
|
||||
if( props['count_k'] <= 0 )
|
||||
color = FILL_ZERO;
|
||||
else if( props['count_k'] * BYTES_FOR_NODE < size_good * 1024 * 1024 )
|
||||
color = FILL_TOO_SMALL;
|
||||
else if( props['count_k'] * BYTES_FOR_NODE > size_bad * 1024 * 1024 )
|
||||
color = FILL_TOO_BIG;
|
||||
} else if( fType == 'topo' ) {
|
||||
} else if( fType == 'predict_size' ) {
|
||||
if( props['mwm_size_est'] <= 0 )
|
||||
color = FILL_ZERO;
|
||||
else if( props['mwm_size_est'] < size_good * 1024 )
|
||||
color = FILL_TOO_SMALL;
|
||||
else if( props['mwm_size_est'] > size_bad * 1024 )
|
||||
color = FILL_TOO_BIG;
|
||||
}
|
||||
else if( fType == 'topo' ) {
|
||||
var rings = countRings([0, 0], props.layer);
|
||||
if( rings[1] > 0 )
|
||||
color = FILL_TOO_BIG;
|
||||
|
@ -471,7 +498,7 @@ function bJOSM() {
|
|||
function bJosmOld() {
|
||||
var b = map.getBounds();
|
||||
importInJOSM('josm', {
|
||||
'table': OLD_BORDERS_NAME,
|
||||
'table': config.OLD_BORDERS_NAME,
|
||||
'xmin': b.getWest(),
|
||||
'xmax': b.getEast(),
|
||||
'ymin': b.getSouth(),
|
||||
|
@ -508,7 +535,7 @@ function finishRename() {
|
|||
}
|
||||
|
||||
function bToggleRename() {
|
||||
if( !selectedId || !(selectedId in borders) || readonly )
|
||||
if( !selectedId || !(selectedId in borders) || config.READONLY )
|
||||
return;
|
||||
var rename_el = $('#rename');
|
||||
if (rename_el.is(':hidden')) {
|
||||
|
@ -952,11 +979,8 @@ function clearDivideLayers() {
|
|||
|
||||
function bDividePreview() {
|
||||
var auto_divide = $('#auto_divide').prop('checked');
|
||||
if (auto_divide && (
|
||||
!$('#city_population_thr').val() ||
|
||||
!$('#cluster_population_thr').val())
|
||||
) {
|
||||
alert('Fill population thresholds');
|
||||
if (auto_divide && !$('#mwm_size_thr').val()) {
|
||||
alert('Fill mmw size threshold');
|
||||
return;
|
||||
}
|
||||
clearDivideLayers();
|
||||
|
@ -970,8 +994,7 @@ function bDividePreview() {
|
|||
'apply_to_similar': apply_to_similar
|
||||
};
|
||||
if (auto_divide) {
|
||||
params['city_population_thr'] = $('#city_population_thr').val();
|
||||
params['cluster_population_thr'] = $('#cluster_population_thr').val();
|
||||
params['mwm_size_thr'] = parseInt($('#mwm_size_thr').val()) * 1024;
|
||||
}
|
||||
$.ajax(getServer('divpreview'), {
|
||||
data: params,
|
||||
|
@ -1025,8 +1048,7 @@ function bDivideDo() {
|
|||
'apply_to_similar': apply_to_similar
|
||||
};
|
||||
if (auto_divide) {
|
||||
params['city_population_thr'] = $('#city_population_thr').val();
|
||||
params['cluster_population_thr'] = $('#cluster_population_thr').val();
|
||||
params['mwm_size_thr'] = parseInt($('#mwm_size_thr').val()) * 1024;
|
||||
}
|
||||
$.ajax(getServer('divide'), {
|
||||
data: params,
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
const BYTES_FOR_NODE = 8;
|
||||
const IMPORT_ENABLED = true;
|
||||
|
||||
const SELF_URL = document.location.origin;
|
||||
|
||||
|
|
102
web/app/subregions.py
Normal file
102
web/app/subregions.py
Normal file
|
@ -0,0 +1,102 @@
|
|||
import config
|
||||
from mwm_size_predictor import MwmSizePredictor
|
||||
|
||||
|
||||
osm_table = config.OSM_TABLE
|
||||
osm_places_table = config.OSM_PLACES_TABLE
|
||||
size_predictor = MwmSizePredictor()
|
||||
|
||||
|
||||
def get_subregions_info(conn, region_id, region_table,
|
||||
next_level, need_cities=False):
|
||||
"""
|
||||
:param conn: psycopg2 connection
|
||||
:param region_id:
|
||||
:param region_table: maybe TABLE or OSM_TABLE from config.py
|
||||
:param next_level: admin level of subregions to find
|
||||
:return: dict {subregion_id => subregion data} including area and population info
|
||||
"""
|
||||
subregions = _get_subregions_basic_info(conn, region_id, region_table,
|
||||
next_level, need_cities)
|
||||
_add_population_data(conn, subregions, need_cities)
|
||||
_add_mwm_size_estimation(subregions)
|
||||
keys = ('name', 'mwm_size_est')
|
||||
if need_cities:
|
||||
keys = keys + ('cities',)
|
||||
return {subregion_id: {k: subregion_data[k] for k in keys}
|
||||
for subregion_id, subregion_data in subregions.items()
|
||||
}
|
||||
|
||||
|
||||
def _get_subregions_basic_info(conn, region_id, region_table,
|
||||
next_level, need_cities):
|
||||
cursor = conn.cursor()
|
||||
region_id_column, region_geom_column = (
|
||||
('id', 'geom') if region_table == config.TABLE else
|
||||
('osm_id', 'way')
|
||||
)
|
||||
cursor.execute(f"""
|
||||
SELECT subreg.osm_id, subreg.name, ST_Area(geography(subreg.way))/1.0E+6 area
|
||||
FROM {region_table} reg, {osm_table} subreg
|
||||
WHERE reg.{region_id_column} = %s AND subreg.admin_level = %s AND
|
||||
ST_Contains(reg.{region_geom_column}, subreg.way)
|
||||
""", (region_id, next_level)
|
||||
)
|
||||
subregions = {}
|
||||
for rec in cursor:
|
||||
subregion_data = {
|
||||
'osm_id': rec[0],
|
||||
'name': rec[1],
|
||||
'area': rec[2],
|
||||
'urban_pop': 0,
|
||||
'city_cnt': 0,
|
||||
'hamlet_cnt': 0
|
||||
}
|
||||
if need_cities:
|
||||
subregion_data['cities'] = []
|
||||
subregions[rec[0]] = subregion_data
|
||||
return subregions
|
||||
|
||||
|
||||
def _add_population_data(conn, subregions, need_cities):
|
||||
cursor = conn.cursor()
|
||||
subregion_ids = ','.join(str(x) for x in subregions.keys())
|
||||
cursor.execute(f"""
|
||||
SELECT b.osm_id, p.name, COALESCE(p.population, 0), p.place
|
||||
FROM {osm_table} b, {osm_places_table} p
|
||||
WHERE b.osm_id IN ({subregion_ids})
|
||||
AND ST_CONTAINS(b.way, p.center)
|
||||
"""
|
||||
)
|
||||
for subregion_id, place_name, place_population, place_type in cursor:
|
||||
subregion_data = subregions[subregion_id]
|
||||
if place_type in ('city', 'town'):
|
||||
subregion_data['city_cnt'] += 1
|
||||
subregion_data['urban_pop'] += place_population
|
||||
if need_cities:
|
||||
subregion_data['cities'].append({
|
||||
'name': place_name,
|
||||
'population': place_population
|
||||
})
|
||||
else:
|
||||
subregion_data['hamlet_cnt'] += 1
|
||||
|
||||
|
||||
def _add_mwm_size_estimation(subregions):
|
||||
subregions_sorted = [
|
||||
(
|
||||
s_id,
|
||||
[subregions[s_id][f] for f in
|
||||
('urban_pop', 'area', 'city_cnt', 'hamlet_cnt')]
|
||||
)
|
||||
for s_id in sorted(subregions.keys())
|
||||
]
|
||||
|
||||
feature_array = [x[1] for x in subregions_sorted]
|
||||
predictions = size_predictor.predict(feature_array)
|
||||
|
||||
for subregion_id, mwm_size_prediction in zip(
|
||||
(x[0] for x in subregions_sorted),
|
||||
predictions
|
||||
):
|
||||
subregions[subregion_id]['mwm_size_est'] = mwm_size_prediction
|
|
@ -30,8 +30,11 @@
|
|||
#backup_saving, #backup_restoring { margin-bottom: 1em; }
|
||||
#filefm, #old_action, #josm_old, #cross_actions { display: none; }
|
||||
#h_iframe { display: block; width: 100%; height: 80px; }
|
||||
a, a:hover, a:visited { color: blue; }
|
||||
#start_over, #start_over:hover, #start_over:visited { color: red; }
|
||||
#city_population_thr, #cluster_population_thr { max-width: 80px;}
|
||||
#population_thresholds { padding-left: 1.5em; }
|
||||
#mwm_size_thr { max-width: 50px;}
|
||||
#r_green, #r_red { width: 40px; }
|
||||
#b_import { max-width: 180px; }
|
||||
#import_div { position: relative; display: none; }
|
||||
#hide_import_button {
|
||||
|
@ -44,7 +47,7 @@
|
|||
align-items: center;
|
||||
justify-content: center;
|
||||
cursor: pointer;
|
||||
}
|
||||
}
|
||||
|
||||
</style>
|
||||
</head>
|
||||
|
@ -54,7 +57,8 @@
|
|||
<div id="filter">
|
||||
Раскраска по <select size="1" id="f_type" value="size" onchange="filterSelect()">
|
||||
<option value="country">стране</option>
|
||||
<option value="size">размеру</option>
|
||||
<option value="nodes_size">размеру по точкам</option>
|
||||
<option value="predict_size">предсказ. размеру</option>
|
||||
<option value="topo">топологии</option>
|
||||
<option value="chars">буквам в назв.</option>
|
||||
<option value="comments">комментариям</option>
|
||||
|
@ -138,7 +142,8 @@
|
|||
</div>
|
||||
<div id="potential_parents">
|
||||
</div>
|
||||
<b>Оценка размера:</b> <span id="b_size"></span><br>
|
||||
<b>Оценка размера по точкам:</b> <span id="b_size"></span><br>
|
||||
<b>Оценка размера по нас+пл:</b> <span id="pa_size"></span><br>
|
||||
<b>Последняя правка:</b> <span id="b_date"></span><br>
|
||||
<b>Количество точек:</b> <span id="b_nodes"></span><br>
|
||||
<b>Площадь:</b> <span id="b_area"></span> км²<br>
|
||||
|
@ -188,20 +193,18 @@
|
|||
<button onclick="bPointCancel()">Вернуться</button>
|
||||
</div>
|
||||
<div id="divide" class="actions">
|
||||
Выбранная область <span id="region_to_divide"></span>
|
||||
Выбранная область <span id="region_to_divide"></span>
|
||||
будет заменена вложенными областями уровня
|
||||
<input type="number" id="next_level" min="2" max="12">.<br>
|
||||
<br>
|
||||
<div>
|
||||
<input type="checkbox" id="auto_divide" checked>
|
||||
<label for="auto_divide">Автослияние по населению</label>
|
||||
</div>
|
||||
<div id="population_thresholds">
|
||||
Порог населения города, региона<br>
|
||||
<input id="city_population_thr" type="number"
|
||||
min="0" max="8000000000" value="500000" step="50000">,
|
||||
<input id="cluster_population_thr" type="number"
|
||||
min="0" max="8000000000" value="500000" step="50000">
|
||||
<div id="population_thresholds">
|
||||
Верхняя граница размера mwm:
|
||||
<input id="mwm_size_thr" type="number"
|
||||
min="1" value="70" step="1"> Мб
|
||||
</div>
|
||||
</div>
|
||||
<div>
|
||||
<input type="checkbox" id="apply_to_similar">
|
||||
|
|
Loading…
Add table
Reference in a new issue