MWM size prediction model

This commit is contained in:
Alexey Zakharenkov 2020-10-02 12:30:52 +03:00
parent b13e31bff1
commit 580a1ab9ac
17 changed files with 491 additions and 286 deletions

2
.gitignore vendored
View file

@ -1,2 +1,4 @@
__pycache__ __pycache__
*.pyc *.pyc
.idea
nohup.out

View file

@ -14,7 +14,7 @@ CREATE TABLE borders (
count_k INTEGER, count_k INTEGER,
modified TIMESTAMP NOT NULL, modified TIMESTAMP NOT NULL,
cmnt VARCHAR(500), cmnt VARCHAR(500),
mwm_size_est double precision mwm_size_est REAL
); );
CREATE INDEX borders_geom_gits_idx ON borders USING gist (geom); CREATE INDEX borders_geom_gits_idx ON borders USING gist (geom);
CREATE INDEX borders_parent_id_idx ON borders (parent_id); CREATE INDEX borders_parent_id_idx ON borders (parent_id);
@ -29,15 +29,15 @@ CREATE TABLE borders_backup (
count_k INTEGER, count_k INTEGER,
modified TIMESTAMP NOT NULL, modified TIMESTAMP NOT NULL,
cmnt VARCHAR(500), cmnt VARCHAR(500),
mwm_size_est double precision, mwm_size_est REAL,
PRIMARY KEY (backup, id) PRIMARY KEY (backup, id)
); );
CREATE TABLE splitting ( CREATE TABLE splitting (
osm_border_id BIGINT NOT NULL REFERENCES osm_borders(osm_id), -- reference to parent osm region osm_border_id BIGINT NOT NULL REFERENCES osm_borders(osm_id), -- reference to parent osm region
subregion_ids BIGINT[] NOT NULL, subregion_ids BIGINT[] NOT NULL,
mwm_size_est double precision NOT NULL, mwm_size_est REAL NOT NULL,
mwm_size_thr double precision NOT NULL, mwm_size_thr INTEGER NOT NULL, -- mwm size threshold in Kb, 4-bytes INTEGER is enough
geom geometry NOT NULL geom geometry NOT NULL
); );
CREATE INDEX splitting_idx ON splitting (osm_border_id, mwm_size_thr); CREATE INDEX splitting_idx ON splitting (osm_border_id, mwm_size_thr);

View file

@ -21,13 +21,14 @@ services:
context: ./db context: ./db
dockerfile: Dockerfile.db dockerfile: Dockerfile.db
args: args:
PLANET_URL: http://generator.testdata.mapsme.cloud.devmail.ru/planet/planet-latest.o5m PLANET_URL_full: https://planet.openstreetmap.org/pbf/planet-latest.osm.pbf
PLANET_URL_external: https://planet.openstreetmap.org/pbf/planet-latest.osm.pbf PLANET_URL: http://download.geofabrik.de/africa/eritrea-latest.osm.pbf
PLANET_URL_small: http://download.geofabrik.de/africa/eritrea-latest.osm.pbf
container_name: db container_name: db
restart: always restart: always
environment: environment:
POSTGRES_HOST_AUTH_METHOD: password POSTGRES_HOST_AUTH_METHOD: password
POSTGRES_USER: postgres POSTGRES_USER: postgres
POSTGRES_PASSWORD: postgres POSTGRES_PASSWORD: postgres
ports:
- "55432:5432"

View file

@ -1,6 +1,6 @@
FROM tiangolo/uwsgi-nginx-flask:latest FROM tiangolo/uwsgi-nginx-flask:latest
RUN pip install flask_cors flask_compress psycopg2 unidecode RUN pip install flask_cors flask_compress psycopg2 unidecode numpy sklearn
COPY app /app COPY app /app
COPY ./uwsgi.ini /app COPY ./uwsgi.ini /app

View file

@ -6,20 +6,21 @@ from collections import defaultdict
from config import ( from config import (
AUTOSPLIT_TABLE as autosplit_table, AUTOSPLIT_TABLE as autosplit_table,
TABLE as table, OSM_TABLE as osm_table,
OSM_TABLE as osm_table MWM_SIZE_THRESHOLD,
) )
from subregions import get_subregions_info
class DisjointClusterUnion: class DisjointClusterUnion:
"""Disjoint set union implementation for administrative subregions.""" """Disjoint set union implementation for administrative subregions."""
def __init__(self, region_id, subregions, thresholds): def __init__(self, region_id, subregions, mwm_size_thr=None):
self.region_id = region_id self.region_id = region_id
self.subregions = subregions self.subregions = subregions
self.city_population_thr, self.cluster_population_thr = thresholds self.mwm_size_thr = mwm_size_thr or MWM_SIZE_THRESHOLD
self.representatives = {sub_id: sub_id for sub_id in subregions} self.representatives = {sub_id: sub_id for sub_id in subregions}
# a cluster is one or more subregions with common borders # A cluster is one or more subregions with common borders
self.clusters = {} # representative => cluster object self.clusters = {} # representative => cluster object
# At the beginning, each subregion forms a cluster. # At the beginning, each subregion forms a cluster.
@ -28,27 +29,20 @@ class DisjointClusterUnion:
self.clusters[subregion_id] = { self.clusters[subregion_id] = {
'representative': subregion_id, 'representative': subregion_id,
'subregion_ids': [subregion_id], 'subregion_ids': [subregion_id],
'population': data['population'], 'mwm_size_est': data['mwm_size_est'],
'big_cities_cnt': sum(1 for c in data['cities'] if self.is_city_big(c)),
'finished': False, # True if the cluster cannot be merged with another 'finished': False, # True if the cluster cannot be merged with another
} }
def is_city_big(self, city):
return city['population'] >= self.city_population_thr
def get_smallest_cluster(self): def get_smallest_cluster(self):
"""Find minimal cluster without big cities.""" """Find minimal cluster without big cities."""
smallest_cluster_id = min( smallest_cluster_id = min(
filter( filter(
lambda cluster_id: ( lambda cluster_id:
not self.clusters[cluster_id]['finished'] and not self.clusters[cluster_id]['finished'],
self.clusters[cluster_id]['big_cities_cnt'] == 0)
,
self.clusters.keys() self.clusters.keys()
), ),
default=None, default=None,
key=lambda cluster_id: self.clusters[cluster_id]['population'] key=lambda cluster_id: self.clusters[cluster_id]['mwm_size_est']
) )
return smallest_cluster_id return smallest_cluster_id
@ -63,9 +57,9 @@ class DisjointClusterUnion:
self.representatives[subregion_id] = representative self.representatives[subregion_id] = representative
return representative return representative
def get_cluster_population(self, subregion_id): def get_cluster_mwm_size_est(self, subregion_id):
cluster_id = self.find_cluster(subregion_id) cluster_id = self.find_cluster(subregion_id)
return self.clusters[cluster_id]['population'] return self.clusters[cluster_id]['mwm_size_est']
def get_cluster_count(self): def get_cluster_count(self):
return len(self.clusters) return len(self.clusters)
@ -77,8 +71,7 @@ class DisjointClusterUnion:
r_cluster = self.clusters[retained_cluster_id] r_cluster = self.clusters[retained_cluster_id]
d_cluster = self.clusters[dropped_cluster_id] d_cluster = self.clusters[dropped_cluster_id]
r_cluster['subregion_ids'].extend(d_cluster['subregion_ids']) r_cluster['subregion_ids'].extend(d_cluster['subregion_ids'])
r_cluster['population'] += d_cluster['population'] r_cluster['mwm_size_est'] += d_cluster['mwm_size_est']
r_cluster['big_cities_cnt'] += d_cluster['big_cities_cnt']
del self.clusters[dropped_cluster_id] del self.clusters[dropped_cluster_id]
self.representatives[dropped_cluster_id] = retained_cluster_id self.representatives[dropped_cluster_id] = retained_cluster_id
return retained_cluster_id return retained_cluster_id
@ -95,52 +88,13 @@ class DisjointClusterUnion:
return subregion_ids return subregion_ids
def enrich_with_population_and_cities(conn, subregions): def get_best_cluster_to_join_with(small_cluster_id,
cursor = conn.cursor() dcu: DisjointClusterUnion,
ids = ','.join(str(x) for x in subregions.keys()) common_border_matrix):
cursor.execute(f"""
SELECT b.osm_id, c.name, c.population
FROM {osm_table} b, osm_cities c
WHERE b.osm_id IN ({ids}) AND ST_CONTAINS(b.way, c.center)
"""
)
for rec in cursor:
sub_id = int(rec[0])
subregions[sub_id]['cities'].append({
'name': rec[1],
'population': int(rec[2])
})
subregions[sub_id]['population'] += int(rec[2])
def find_subregions(conn, region_id, next_level):
cursor = conn.cursor()
cursor.execute(f"""
SELECT subreg.osm_id, subreg.name
FROM {osm_table} reg, {osm_table} subreg
WHERE reg.osm_id = %s AND subreg.admin_level = %s AND
ST_Contains(reg.way, subreg.way)
""",
(region_id, next_level)
)
subregions = {
int(rec[0]):
{
'osm_id': int(rec[0]),
'name': rec[1],
'population': 0,
'cities': []
}
for rec in cursor
}
if subregions:
enrich_with_population_and_cities(conn, subregions)
return subregions
def get_best_cluster_to_join_with(small_cluster_id, dcu: DisjointClusterUnion, common_border_matrix):
if small_cluster_id not in common_border_matrix: if small_cluster_id not in common_border_matrix:
return None # this may be if a subregion is isolated, like Bezirk Lienz inside Tyrol, Austria # This may be if a subregion is isolated,
# like Bezirk Lienz inside Tyrol, Austria
return None
common_borders = defaultdict(lambda: 0.0) # cluster representative => common border length common_borders = defaultdict(lambda: 0.0) # cluster representative => common border length
subregion_ids = dcu.get_cluster_subregion_ids(small_cluster_id) subregion_ids = dcu.get_cluster_subregion_ids(small_cluster_id)
for subregion_id in subregion_ids: for subregion_id in subregion_ids:
@ -148,29 +102,26 @@ def get_best_cluster_to_join_with(small_cluster_id, dcu: DisjointClusterUnion, c
other_cluster_id = dcu.find_cluster(other_subregion_id) other_cluster_id = dcu.find_cluster(other_subregion_id)
if other_cluster_id != small_cluster_id: if other_cluster_id != small_cluster_id:
common_borders[other_cluster_id] += length common_borders[other_cluster_id] += length
#print(f"common_borders={json.dumps(common_borders)} of len {len(common_borders)}")
#common_borders = {k:v for k,v in common_borders.items() if v > 0.0}
if not common_borders: if not common_borders:
return None return None
total_common_border_length = sum(common_borders.values()) total_common_border_length = sum(common_borders.values())
total_adjacent_population = sum(dcu.get_cluster_population(x) for x in common_borders) total_adjacent_mwm_size_est = sum(dcu.get_cluster_mwm_size_est(x) for x in common_borders)
choice_criterion = ( choice_criterion = (
( (
lambda cluster_id: ( lambda cluster_id: (
common_borders[cluster_id]/total_common_border_length + common_borders[cluster_id]/total_common_border_length +
-dcu.get_cluster_population(cluster_id)/total_adjacent_population -dcu.get_cluster_mwm_size_est(cluster_id)/total_adjacent_mwm_size_est
) )
) if total_adjacent_population else ) if total_adjacent_mwm_size_est else
lambda cluster_id: ( lambda cluster_id: (
common_borders[cluster_id]/total_common_border_length common_borders[cluster_id]/total_common_border_length
) )
) )
small_cluster_population = dcu.get_cluster_population(small_cluster_id)
best_cluster_id = max( best_cluster_id = max(
filter( filter(
lambda cluster_id: ( lambda cluster_id: (
small_cluster_population + dcu.get_cluster_population(cluster_id) dcu.clusters[small_cluster_id]['mwm_size_est'] +
<= dcu.cluster_population_thr dcu.clusters[cluster_id]['mwm_size_est'] <= dcu.mwm_size_thr
), ),
common_borders.keys() common_borders.keys()
), ),
@ -207,39 +158,31 @@ def calculate_common_border_matrix(conn, subregion_ids):
def find_golden_splitting(conn, border_id, next_level, def find_golden_splitting(conn, border_id, next_level,
country_region_name, thresholds): country_region_name, mwm_size_thr):
subregions = find_subregions(conn, border_id, next_level) subregions = get_subregions_info(conn, border_id, osm_table,
next_level, need_cities=True)
if not subregions: if not subregions:
print(f"No subregions for {border_id} {country_region_name}")
return return
dcu = DisjointClusterUnion(border_id, subregions, thresholds) dcu = DisjointClusterUnion(border_id, subregions, mwm_size_thr)
#save_splitting_to_file(dcu, f'all_{country_region_name}') #save_splitting_to_file(dcu, f'all_{country_region_name}')
all_subregion_ids = dcu.get_all_subregion_ids() all_subregion_ids = dcu.get_all_subregion_ids()
common_border_matrix = calculate_common_border_matrix(conn, all_subregion_ids) common_border_matrix = calculate_common_border_matrix(conn, all_subregion_ids)
i = 0 i = 0
while True: while True:
with open(f"clusters-{i:02d}.json", 'w') as f:
json.dump(dcu.clusters, f, ensure_ascii=False, indent=2)
if dcu.get_cluster_count() == 1: if dcu.get_cluster_count() == 1:
return dcu return dcu
i += 1 i += 1
#print(f"i = {i}")
smallest_cluster_id = dcu.get_smallest_cluster() smallest_cluster_id = dcu.get_smallest_cluster()
if not smallest_cluster_id: if not smallest_cluster_id:
return dcu # TODO: return target splitting return dcu
#print(f"smallest cluster = {json.dumps(dcu.clusters[smallest_cluster_id])}")
best_cluster_id = get_best_cluster_to_join_with(smallest_cluster_id, dcu, common_border_matrix) best_cluster_id = get_best_cluster_to_join_with(smallest_cluster_id, dcu, common_border_matrix)
if not best_cluster_id: # !!! a case for South West England and popul 500000 if not best_cluster_id:
dcu.mark_cluster_finished(smallest_cluster_id) dcu.mark_cluster_finished(smallest_cluster_id)
continue continue
assert (smallest_cluster_id != best_cluster_id), f"{smallest_cluster_id}" assert (smallest_cluster_id != best_cluster_id), f"{smallest_cluster_id}"
#print(f"best cluster = {json.dumps(dcu.clusters[best_cluster_id])}") dcu.union(smallest_cluster_id, best_cluster_id)
new_cluster_id = dcu.union(smallest_cluster_id, best_cluster_id)
#print(f"{json.dumps(dcu.clusters[new_cluster_id])}")
#print()
#import sys; sys.exit()
return dcu return dcu
@ -279,6 +222,9 @@ def write_polygons_to_poly(file, polygons, name_prefix):
def save_splitting_to_file(conn, dcu: DisjointClusterUnion, filename_prefix=None): def save_splitting_to_file(conn, dcu: DisjointClusterUnion, filename_prefix=None):
"""May be used for debugging"""
GENERATE_ALL_POLY=False
FOLDER='split_results'
with open(f"{FOLDER}/{filename_prefix}.poly", 'w') as poly_file: with open(f"{FOLDER}/{filename_prefix}.poly", 'w') as poly_file:
poly_file.write(f"{filename_prefix}\n") poly_file.write(f"{filename_prefix}\n")
for cluster_id, data in dcu.clusters.items(): for cluster_id, data in dcu.clusters.items():
@ -297,7 +243,7 @@ def save_splitting_to_file(conn, dcu: DisjointClusterUnion, filename_prefix=None
poly_file.write('END\n') poly_file.write('END\n')
with open(f"{FOLDER}/{filename_prefix}-splitting.json", 'w') as f: with open(f"{FOLDER}/{filename_prefix}-splitting.json", 'w') as f:
json.dump(dcu.clusters, f, ensure_ascii=False, indent=2) json.dump(dcu.clusters, f, ensure_ascii=False, indent=2)
def save_splitting_to_db(conn, dcu: DisjointClusterUnion): def save_splitting_to_db(conn, dcu: DisjointClusterUnion):
cursor = conn.cursor() cursor = conn.cursor()
@ -305,8 +251,7 @@ def save_splitting_to_db(conn, dcu: DisjointClusterUnion):
cursor.execute(f""" cursor.execute(f"""
DELETE FROM {autosplit_table} DELETE FROM {autosplit_table}
WHERE osm_border_id = {dcu.region_id} WHERE osm_border_id = {dcu.region_id}
AND city_population_thr = {dcu.city_population_thr} AND mwm_size_thr = {dcu.mwm_size_thr}
AND cluster_population_thr = {dcu.cluster_population_thr}
""") """)
for cluster_id, data in dcu.clusters.items(): for cluster_id, data in dcu.clusters.items():
subregion_ids = data['subregion_ids'] subregion_ids = data['subregion_ids']
@ -314,20 +259,19 @@ def save_splitting_to_db(conn, dcu: DisjointClusterUnion):
cluster_geometry_sql = get_union_sql(subregion_ids) cluster_geometry_sql = get_union_sql(subregion_ids)
cursor.execute(f""" cursor.execute(f"""
INSERT INTO {autosplit_table} (osm_border_id, subregion_ids, geom, INSERT INTO {autosplit_table} (osm_border_id, subregion_ids, geom,
city_population_thr, cluster_population_thr) mwm_size_thr, mwm_size_est)
VALUES ( VALUES (
{dcu.region_id}, {dcu.region_id},
'{{{','.join(str(x) for x in subregion_ids)}}}', '{{{','.join(str(x) for x in subregion_ids)}}}',
({cluster_geometry_sql}), ({cluster_geometry_sql}),
{dcu.city_population_thr}, {dcu.mwm_size_thr},
{dcu.cluster_population_thr} {data['mwm_size_est']}
) )
""") """)
conn.commit() conn.commit()
def get_region_and_country_names(conn, region_id): def get_region_and_country_names(conn, region_id):
#if region_id != -1574364: return
cursor = conn.cursor() cursor = conn.cursor()
try: try:
cursor.execute( cursor.execute(
@ -355,18 +299,15 @@ def get_region_and_country_names(conn, region_id):
print(f"Many countries for region '{region_name}' id={region_id}") print(f"Many countries for region '{region_name}' id={region_id}")
return region_name, country_name return region_name, country_name
DEFAULT_CITY_POPULATION_THRESHOLD = 500000
DEFAULT_CLUSTER_POPULATION_THR = 500000
def split_region(conn, region_id, next_level, def split_region(conn, region_id, next_level,
thresholds=(DEFAULT_CITY_POPULATION_THRESHOLD, mwm_size_thr,
DEFAULT_CLUSTER_POPULATION_THR),
save_to_files=False): save_to_files=False):
region_name, country_name = get_region_and_country_names(conn, region_id) region_name, country_name = get_region_and_country_names(conn, region_id)
region_name = region_name.replace('/', '|') region_name = region_name.replace('/', '|')
country_region_name = f"{country_name}_{region_name}" if country_name else region_name country_region_name = f"{country_name}_{region_name}" if country_name else region_name
dcu = find_golden_splitting(conn, region_id, next_level, dcu = find_golden_splitting(conn, region_id, next_level,
country_region_name, thresholds) country_region_name, mwm_size_thr)
if dcu is None: if dcu is None:
return return
@ -378,30 +319,6 @@ def save_splitting(dcu: DisjointClusterUnion, conn,
save_splitting_to_db(conn, dcu) save_splitting_to_db(conn, dcu)
if save_to_files: if save_to_files:
print(f"Saving {country_region_name}") print(f"Saving {country_region_name}")
filename_prefix = f"{country_region_name}-{dcu.city_population_thrR}" filename_prefix = f"{country_region_name}-{dcu.city_population_thr}"
save_splitting_to_file(conn, dcu, filename_prefix) save_splitting_to_file(conn, dcu, filename_prefix)
GENERATE_ALL_POLY=False
FOLDER='split_results'
#CITY_POPULATION_THR = 500000
#CLUSTER_POPULATION_THR = 500000
if __name__ == '__main__':
conn = psycopg2.connect("dbname=az_gis3")
PREFIX = "UBavaria"
CITY_POPULATION_THR = 500000
CLUSTER_POPULATION_THR = 500000
region_id = -162050 # -165475 # California ## -162050 # Florida
region_id = -2145274 # Upper Bavaria
#region_id = -151339 # South West England
#region_id = -58446 # Scotland
dcu = find_golden_splitting(region_id)
make_polys(dcu.clusters)
with open(f"{PREFIX}_{CITY_POPULATION_THR}_splitting{region_id}-poplen.json", 'w') as f:
json.dump(dcu.clusters, f, ensure_ascii=False, indent=2)

View file

@ -25,6 +25,7 @@ from countries_structure import (
create_countries_initial_structure, create_countries_initial_structure,
get_osm_border_name_by_osm_id, get_osm_border_name_by_osm_id,
) )
from subregions import get_subregions_info
try: try:
from lxml import etree from lxml import etree
@ -78,7 +79,8 @@ def fetch_borders(**kwargs):
query = f""" query = f"""
SELECT name, geometry, nodes, modified, disabled, count_k, cmnt, SELECT name, geometry, nodes, modified, disabled, count_k, cmnt,
(CASE WHEN area = 'NaN' THEN 0 ELSE area END) AS area, (CASE WHEN area = 'NaN' THEN 0 ELSE area END) AS area,
id, admin_level, parent_id, parent_name id, admin_level, parent_id, parent_name,
mwm_size_est
FROM ( FROM (
SELECT name, SELECT name,
ST_AsGeoJSON({geom}, 7) as geometry, ST_AsGeoJSON({geom}, 7) as geometry,
@ -95,7 +97,8 @@ def fetch_borders(**kwargs):
parent_id, parent_id,
( SELECT name FROM {table} ( SELECT name FROM {table}
WHERE id = t.parent_id WHERE id = t.parent_id
) AS parent_name ) AS parent_name,
mwm_size_est
FROM {table} t FROM {table} t
WHERE ({where_clause}) {leaves_filter} WHERE ({where_clause}) {leaves_filter}
) q ) q
@ -112,18 +115,19 @@ def fetch_borders(**kwargs):
'disabled': rec[4], 'count_k': rec[5], 'disabled': rec[4], 'count_k': rec[5],
'comment': rec[6], 'comment': rec[6],
'area': rec[7], 'area': rec[7],
'id': region_id, 'country_id': country_id, 'id': region_id,
'admin_level': rec[9], 'admin_level': rec[9],
'parent_id': rec[10], 'parent_id': rec[10],
'parent_name': rec[11] or '', 'parent_name': rec[11] or '',
'country_name': country_name 'country_id': country_id,
'country_name': country_name,
'mwm_size_est': rec[12]
} }
feature = {'type': 'Feature', feature = {'type': 'Feature',
'geometry': json.loads(rec[1]), 'geometry': json.loads(rec[1]),
'properties': props 'properties': props
} }
borders.append(feature) borders.append(feature)
#print([x['properties'] for x in borders])
return borders return borders
def simplify_level_to_postgis_value(simplify_level): def simplify_level_to_postgis_value(simplify_level):
@ -228,8 +232,8 @@ def query_crossing():
pass pass
return jsonify(type='FeatureCollection', features=result) return jsonify(type='FeatureCollection', features=result)
@app.route('/tables') @app.route('/config')
def check_osm_table(): def get_server_configuration():
osm = False osm = False
backup = False backup = False
old = [] old = []
@ -260,7 +264,9 @@ def check_osm_table():
crossing = True crossing = True
except psycopg2.Error as e: except psycopg2.Error as e:
pass pass
return jsonify(osm=osm, tables=old, readonly=config.READONLY, backup=backup, crossing=crossing) return jsonify(osm=osm, tables=old, readonly=config.READONLY,
backup=backup, crossing=crossing,
mwm_size_thr=config.MWM_SIZE_THRESHOLD)
@app.route('/search') @app.route('/search')
def search(): def search():
@ -341,9 +347,10 @@ def join_borders():
cur.execute(f""" cur.execute(f"""
UPDATE {table} UPDATE {table}
SET id = {free_id}, SET id = {free_id},
geom = ST_Union(geom, b2.g), geom = ST_Union({table}.geom, b2.geom),
mwm_size_est = {table}.mwm_size_est + b2.mwm_size_est,
count_k = -1 count_k = -1
FROM (SELECT geom AS g FROM {table} WHERE id = %s) AS b2 FROM (SELECT geom, mwm_size_est FROM {table} WHERE id = %s) AS b2
WHERE id = %s""", (region_id2, region_id1)) WHERE id = %s""", (region_id2, region_id1))
cur.execute(f"DELETE FROM {table} WHERE id = %s", (region_id2,)) cur.execute(f"DELETE FROM {table} WHERE id = %s", (region_id2,))
except psycopg2.Error as e: except psycopg2.Error as e:
@ -630,24 +637,23 @@ def divide_preview():
if not is_admin: if not is_admin:
return jsonify(status="Could not apply auto-division to non-administrative regions") return jsonify(status="Could not apply auto-division to non-administrative regions")
try: try:
city_population_thr = int(request.args.get('city_population_thr')) mwm_size_thr = int(request.args.get('mwm_size_thr'))
cluster_population_thr = int(request.args.get('cluster_population_thr'))
except ValueError: except ValueError:
return jsonify(status='Not a number in thresholds.') return jsonify(status='Not a number in thresholds.')
return divide_into_clusters_preview( return divide_into_clusters_preview(
region_ids, next_level, region_ids, next_level,
(city_population_thr, cluster_population_thr)) mwm_size_thr)
else: else:
return divide_into_subregions_preview(region_ids, next_level) return divide_into_subregions_preview(region_ids, next_level)
def get_subregions(region_ids, next_level): def get_subregions_for_preview(region_ids, next_level):
subregions = list(itertools.chain.from_iterable( subregions = list(itertools.chain.from_iterable(
get_subregions_one(region_id, next_level) get_subregions_one_for_preview(region_id, next_level)
for region_id in region_ids for region_id in region_ids
)) ))
return subregions return subregions
def get_subregions_one(region_id, next_level): def get_subregions_one_for_preview(region_id, next_level):
osm_table = config.OSM_TABLE osm_table = config.OSM_TABLE
table = config.TABLE table = config.TABLE
cur = g.conn.cursor() cur = g.conn.cursor()
@ -671,28 +677,28 @@ def get_subregions_one(region_id, next_level):
subregions.append(feature) subregions.append(feature)
return subregions return subregions
def get_clusters(region_ids, next_level, thresholds): def get_clusters_for_preview(region_ids, next_level, thresholds):
clusters = list(itertools.chain.from_iterable( clusters = list(itertools.chain.from_iterable(
get_clusters_one(region_id, next_level, thresholds) get_clusters_for_preview_one(region_id, next_level, thresholds)
for region_id in region_ids for region_id in region_ids
)) ))
return clusters return clusters
def get_clusters_one(region_id, next_level, thresholds): def get_clusters_for_preview_one(region_id, next_level, mwm_size_thr):
autosplit_table = config.AUTOSPLIT_TABLE autosplit_table = config.AUTOSPLIT_TABLE
cursor = g.conn.cursor() cursor = g.conn.cursor()
where_clause = f""" where_clause = f"""
osm_border_id = %s osm_border_id = %s
AND city_population_thr = %s AND mwm_size_thr = %s
AND cluster_population_thr = %s
""" """
splitting_sql_params = (region_id,) + thresholds splitting_sql_params = (region_id, mwm_size_thr)
cursor.execute(f""" cursor.execute(f"""
SELECT 1 FROM {autosplit_table} SELECT 1 FROM {autosplit_table}
WHERE {where_clause} WHERE {where_clause}
""", splitting_sql_params) """, splitting_sql_params)
if cursor.rowcount == 0: if cursor.rowcount == 0:
split_region(g.conn, region_id, next_level, thresholds) split_region(g.conn, region_id, next_level, mwm_size_thr)
cursor.execute(f""" cursor.execute(f"""
SELECT subregion_ids[1], ST_AsGeoJSON(ST_SimplifyPreserveTopology(geom, 0.01)) as way SELECT subregion_ids[1], ST_AsGeoJSON(ST_SimplifyPreserveTopology(geom, 0.01)) as way
FROM {autosplit_table} FROM {autosplit_table}
@ -700,23 +706,24 @@ def get_clusters_one(region_id, next_level, thresholds):
""", splitting_sql_params) """, splitting_sql_params)
clusters = [] clusters = []
for rec in cursor: for rec in cursor:
cluster = { 'type': 'Feature', cluster = {
'geometry': json.loads(rec[1]), 'type': 'Feature',
'properties': {'osm_id': int(rec[0])} 'geometry': json.loads(rec[1]),
'properties': {'osm_id': int(rec[0])}
} }
clusters.append(cluster) clusters.append(cluster)
return clusters return clusters
def divide_into_subregions_preview(region_ids, next_level): def divide_into_subregions_preview(region_ids, next_level):
subregions = get_subregions(region_ids, next_level) subregions = get_subregions_for_preview(region_ids, next_level)
return jsonify( return jsonify(
status='ok', status='ok',
subregions={'type': 'FeatureCollection', 'features': subregions} subregions={'type': 'FeatureCollection', 'features': subregions}
) )
def divide_into_clusters_preview(region_ids, next_level, thresholds): def divide_into_clusters_preview(region_ids, next_level, mwm_size_thr):
subregions = get_subregions(region_ids, next_level) subregions = get_subregions_for_preview(region_ids, next_level)
clusters = get_clusters(region_ids, next_level, thresholds) clusters = get_clusters_for_preview(region_ids, next_level, mwm_size_thr)
return jsonify( return jsonify(
status='ok', status='ok',
subregions={'type': 'FeatureCollection', 'features': subregions}, subregions={'type': 'FeatureCollection', 'features': subregions},
@ -744,51 +751,53 @@ def divide():
if not is_admin: if not is_admin:
return jsonify(status="Could not apply auto-division to non-administrative regions") return jsonify(status="Could not apply auto-division to non-administrative regions")
try: try:
city_population_thr = int(request.args.get('city_population_thr')) mwm_size_thr = int(request.args.get('mwm_size_thr'))
cluster_population_thr = int(request.args.get('cluster_population_thr'))
except ValueError: except ValueError:
return jsonify(status='Not a number in thresholds.') return jsonify(status='Not a number in thresholds.')
return divide_into_clusters( return divide_into_clusters(
region_ids, next_level, region_ids, next_level,
(city_population_thr, cluster_population_thr)) mwm_size_thr)
else: else:
return divide_into_subregions(region_ids, next_level) return divide_into_subregions(region_ids, next_level)
def divide_into_subregions(region_ids, next_level): def divide_into_subregions(region_ids, next_level):
table = config.TABLE
osm_table = config.OSM_TABLE
cur = g.conn.cursor()
for region_id in region_ids: for region_id in region_ids:
is_admin = is_administrative_region(region_id) divide_into_subregions_one(region_id, next_level)
if is_admin:
# TODO: rewrite SELECT into join rather than subquery to enable gist index
cur.execute(f"""
INSERT INTO {table} (id, geom, name, parent_id, modified, count_k)
SELECT osm_id, way, name, %s, now(), -1
FROM {osm_table}
WHERE ST_Contains(
(SELECT geom FROM {table} WHERE id = %s), way
)
AND admin_level = {next_level}
""", (region_id, region_id,)
)
else:
cur.execute(f"""
INSERT INTO {table} (id, geom, name, parent_id, modified, count_k)
SELECT osm_id, way, name, (SELECT parent_id FROM {table} WHERE id = %s), now(), -1
FROM {osm_table}
WHERE ST_Contains(
(SELECT geom FROM {table} WHERE id = %s), way
)
AND admin_level = {next_level}
""", (region_id, region_id,)
)
cur.execute(f"DELETE FROM {table} WHERE id = %s", (region_id,))
g.conn.commit() g.conn.commit()
return jsonify(status='ok') return jsonify(status='ok')
def divide_into_clusters(region_ids, next_level, thresholds): def divide_into_subregions_one(region_id, next_level):
table = config.TABLE
osm_table = config.OSM_TABLE
subregions = get_subregions_info(g.conn, region_id, table,
next_level, need_cities=False)
cursor = g.conn.cursor()
is_admin = is_administrative_region(region_id)
if is_admin:
for subregion_id, data in subregions.items():
cursor.execute(f"""
INSERT INTO {table}
(id, geom, name, parent_id, modified, count_k, mwm_size_est)
SELECT osm_id, way, name, %s, now(), -1, {data['mwm_size_est']}
FROM {osm_table}
WHERE osm_id = %s
""", (region_id, subregion_id)
)
else:
for subregion_id, data in subregions.items():
cursor.execute(f"""
INSERT INTO {table}
(id, geom, name, parent_id, modified, count_k, mwm_size_est)
SELECT osm_id, way, name,
(SELECT parent_id FROM {table} WHERE id = %s),
now(), -1, {data['mwm_size_est']}
FROM {osm_table}
WHERE osm_id = %s
""", (region_id, subregion_id)
)
cursor.execute(f"DELETE FROM {table} WHERE id = %s", (region_id,))
def divide_into_clusters(region_ids, next_level, mwm_size_thr):
table = config.TABLE table = config.TABLE
autosplit_table = config.AUTOSPLIT_TABLE autosplit_table = config.AUTOSPLIT_TABLE
cursor = g.conn.cursor() cursor = g.conn.cursor()
@ -799,16 +808,15 @@ def divide_into_clusters(region_ids, next_level, thresholds):
where_clause = f""" where_clause = f"""
osm_border_id = %s osm_border_id = %s
AND city_population_thr = %s AND mwm_size_thr = %s
AND cluster_population_thr = %s
""" """
splitting_sql_params = (region_id,) + thresholds splitting_sql_params = (region_id, mwm_size_thr)
cursor.execute(f""" cursor.execute(f"""
SELECT 1 FROM {autosplit_table} SELECT 1 FROM {autosplit_table}
WHERE {where_clause} WHERE {where_clause}
""", splitting_sql_params) """, splitting_sql_params)
if cursor.rowcount == 0: if cursor.rowcount == 0:
split_region(g.conn, region_id, next_level, thresholds) split_region(g.conn, region_id, next_level, mwm_size_thr)
free_id = get_free_id() free_id = get_free_id()
counter = 0 counter = 0
@ -830,8 +838,8 @@ def divide_into_clusters(region_ids, next_level, thresholds):
subregion_id = free_id subregion_id = free_id
name = f"{base_name}_{counter}" name = f"{base_name}_{counter}"
insert_cursor.execute(f""" insert_cursor.execute(f"""
INSERT INTO {table} (id, name, parent_id, geom, modified, count_k) INSERT INTO {table} (id, name, parent_id, geom, modified, count_k, mwm_size_est)
SELECT {subregion_id}, %s, osm_border_id, geom, now(), -1 SELECT {subregion_id}, %s, osm_border_id, geom, now(), -1, mwm_size_est
FROM {autosplit_table} WHERE subregion_ids[1] = %s AND {where_clause} FROM {autosplit_table} WHERE subregion_ids[1] = %s AND {where_clause}
""", (name, cluster_id,) + splitting_sql_params) """, (name, cluster_id,) + splitting_sql_params)
g.conn.commit() g.conn.commit()

View file

@ -8,11 +8,13 @@ READONLY = False
TABLE = 'borders' TABLE = 'borders'
# from where OSM borders are imported # from where OSM borders are imported
OSM_TABLE = 'osm_borders' OSM_TABLE = 'osm_borders'
# All populated places in OSM
OSM_PLACES_TABLE = 'osm_places'
# transit table for autosplitting results # transit table for autosplitting results
AUTOSPLIT_TABLE = 'splitting' AUTOSPLIT_TABLE = 'splitting'
## tables with borders for reference # tables with borders for reference
OTHER_TABLES = { OTHER_TABLES = {
#'old': 'old_borders' #'old': 'old_borders'
} }
# backup table # backup table
BACKUP = 'borders_backup' BACKUP = 'borders_backup'
@ -28,3 +30,8 @@ IMPORT_ERROR_ALERT = False
DAEMON_STATUS_PATH = '/tmp/borders-daemon-status.txt' DAEMON_STATUS_PATH = '/tmp/borders-daemon-status.txt'
DAEMON_PID_PATH = '/tmp/borders-daemon.pid' DAEMON_PID_PATH = '/tmp/borders-daemon.pid'
DAEMON_LOG_PATH = '/var/log/borders-daemon.log' DAEMON_LOG_PATH = '/var/log/borders-daemon.log'
# mwm size threshold in Kb
MWM_SIZE_THRESHOLD = 70*1024
# Estimated mwm size is predicted by the 'model.pkl' with 'scaler.pkl' for X
MWM_SIZE_PREDICTION_MODEL_PATH = '/app/data/model.pkl'
MWM_SIZE_PREDICTION_MODEL_SCALER_PATH = '/app/data/scaler.pkl'

View file

@ -2,6 +2,8 @@ import itertools
import config import config
from subregions import get_subregions_info
table = config.TABLE table = config.TABLE
osm_table = config.OSM_TABLE osm_table = config.OSM_TABLE
@ -260,43 +262,32 @@ def _clear_borders(conn):
conn.commit() conn.commit()
def _find_subregions(conn, osm_ids, next_level, parents, names): def _find_subregions(conn, osm_ids, next_level, regions):
"""Return subregions of level 'next_level' for regions with osm_ids.""" """Return subregions of level 'next_level' for regions with osm_ids."""
cursor = conn.cursor()
parent_osm_ids = ','.join(str(x) for x in osm_ids)
cursor.execute(f"""
SELECT b.osm_id, b.name, subb.osm_id, subb.name
FROM {osm_table} b, {osm_table} subb
WHERE subb.admin_level=%s
AND b.osm_id IN ({parent_osm_ids})
AND ST_Contains(b.way, subb.way)
""",
(next_level,)
)
# parent_osm_id => [(osm_id, name), (osm_id, name), ...]
subregion_ids = [] subregion_ids = []
for osm_id in osm_ids:
for rec in cursor: more_subregions = get_subregions_info(conn, osm_id, table,
parent_osm_id = rec[0] next_level, need_cities=False)
osm_id = rec[2] for subregion_id, subregion_data in more_subregions.items():
parents[osm_id] = parent_osm_id region_data = regions.setdefault(subregion_id, {})
name = rec[3] region_data['name'] = subregion_data['name']
names[osm_id] = name region_data['mwm_size_est'] = subregion_data['mwm_size_est']
subregion_ids.append(osm_id) region_data['parent_id'] = osm_id
subregion_ids.append(subregion_id)
return subregion_ids return subregion_ids
def _create_regions(conn, osm_ids, parents, names): def _create_regions(conn, osm_ids, regions):
if not osm_ids: if not osm_ids:
return return
osm_ids = list(osm_ids) # to ensure order osm_ids = list(osm_ids) # to ensure order
cursor = conn.cursor() cursor = conn.cursor()
sql_values = ','.join( sql_values = ','.join(
f'({osm_id},' f'({osm_id},'
'%s,' '%s,'
f"{regions[osm_id].get('parent_id', 'NULL')},"
f"{regions[osm_id].get('mwm_size_est', 'NULL')},"
f'(SELECT way FROM {osm_table} WHERE osm_id={osm_id}),' f'(SELECT way FROM {osm_table} WHERE osm_id={osm_id}),'
f'{parents[osm_id] or "NULL"},'
'now())' 'now())'
for osm_id in osm_ids for osm_id in osm_ids
) )
@ -304,21 +295,23 @@ def _create_regions(conn, osm_ids, parents, names):
#print(f"names={tuple(names[osm_id] for osm_id in osm_ids)}") #print(f"names={tuple(names[osm_id] for osm_id in osm_ids)}")
#print(f"all parents={parents}") #print(f"all parents={parents}")
cursor.execute(f""" cursor.execute(f"""
INSERT INTO {table} (id, name, geom, parent_id, modified) INSERT INTO {table} (id, name, parent_id, mwm_size_est, geom, modified)
VALUES {sql_values} VALUES {sql_values}
""", tuple(names[osm_id] for osm_id in osm_ids) """, tuple(regions[osm_id]['name'] for osm_id in osm_ids)
) )
def _make_country_structure(conn, country_osm_id): def _make_country_structure(conn, country_osm_id):
names = {} # osm_id => osm name regions = {} # osm_id: { 'name': name,
parents = {} # osm_id => parent_osm_id # 'mwm_size_est': size,
# 'parent_id': parent_id }
country_name = get_osm_border_name_by_osm_id(conn, country_osm_id) country_name = get_osm_border_name_by_osm_id(conn, country_osm_id)
names[country_osm_id] = country_name country_data = regions.setdefault(country_osm_id, {})
parents[country_osm_id] = None country_data['name'] = country_name
# TODO: country_data['mwm_size_est'] = ...
_create_regions(conn, [country_osm_id], parents, names) _create_regions(conn, [country_osm_id], regions)
if country_initial_levels.get(country_name): if country_initial_levels.get(country_name):
admin_levels = country_initial_levels[country_name] admin_levels = country_initial_levels[country_name]
@ -332,18 +325,19 @@ def _make_country_structure(conn, country_osm_id):
f"AL={admin_level}, prev-AL={prev_level}" f"AL={admin_level}, prev-AL={prev_level}"
) )
subregion_ids = _find_subregions(conn, prev_region_ids, subregion_ids = _find_subregions(conn, prev_region_ids,
admin_level, parents, names) admin_level, regions)
_create_regions(conn, subregion_ids, parents, names) _create_regions(conn, subregion_ids, regions)
prev_region_ids = subregion_ids prev_region_ids = subregion_ids
def create_countries_initial_structure(conn): def create_countries_initial_structure(conn):
_clear_borders(conn) _clear_borders(conn)
cursor = conn.cursor() cursor = conn.cursor()
# TODO: process overlapping countries, like Ukraine and Russia with common Crimea
cursor.execute(f""" cursor.execute(f"""
SELECT osm_id, name SELECT osm_id, name
FROM {osm_table} FROM {osm_table}
WHERE admin_level = 2 WHERE admin_level = 2 and name != 'Ukraine'
""" """
# and name in --('Germany', 'Luxembourg', 'Austria') # and name in --('Germany', 'Luxembourg', 'Austria')
# ({','.join(f"'{c}'" for c in country_initial_levels.keys())}) # ({','.join(f"'{c}'" for c in country_initial_levels.keys())})

BIN
web/app/data/model.pkl Normal file

Binary file not shown.

BIN
web/app/data/mwm_data.xlsx Normal file

Binary file not shown.

View file

@ -0,0 +1,119 @@
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.model_selection import (
cross_val_score,
KFold,
GridSearchCV,
)
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
data = pd.read_excel('mwm_data.xlsx', sheet_name='mwms_all', header=1)
data = data[data['exclude'] == 0]
#data['is_urban2'] = data.apply(lambda row: row['pop_density'] > 260, axis=1) # 260 - median of pop_density
popul_column = 'urban_pop' # options are 'population and 'urban_pop' (for population of cities and towns only)
feature_names = [popul_column, 'area', 'city_cnt', 'hamlet_cnt']
target_name = 'size'
for feature in set(feature_names) - set(['area']): # if area is None it's an error!
data[feature] = data[feature].fillna(0)
scoring = 'neg_mean_squared_error' # another option is 'r2'
def my_cross_validation(sample):
X = sample[feature_names]
y = sample[target_name]
sc_X = StandardScaler()
X = sc_X.fit_transform(X)
lin_regression = LinearRegression(fit_intercept=False)
svr_linear = SVR(kernel='linear')
svr_rbf = SVR(kernel='rbf')
for estimator_name, estimator in zip(
('LinRegression', 'SVR_linear', 'SVR_rbf'),
(lin_regression, svr_linear, svr_rbf)):
cv_scores = cross_val_score(estimator, X, y,
cv=KFold(5, shuffle=True, random_state=1),
scoring=scoring)
mean_score = np.mean(cv_scores)
print(f"{estimator_name:15}", cv_scores, mean_score)
def my_grid_search(sample):
X = sample[feature_names]
y = sample[target_name]
sc_X = StandardScaler()
X = sc_X.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=0)
C_array = [10 ** n for n in range(6, 7)]
gamma_array = [0.009 + i * 0.001 for i in range(-7, 11, 2)] + ['auto', 'scale']
epsilon_array = [0.5 * i for i in range(0, 15)]
coef0_array = [-0.1, -0.01, 0, 0.01, 0.1]
param_grid = [
{'kernel': ['linear'], 'C': C_array, 'epsilon': epsilon_array},
{'kernel': ['rbf'], 'C': C_array, 'gamma': gamma_array, 'epsilon': epsilon_array},
{'kernel': ['poly', 'sigmoid'],
'C': C_array, 'gamma': gamma_array, 'epsilon': epsilon_array, 'coef0': coef0_array},
]
svr = SVR()
grid_search = GridSearchCV(svr, param_grid, scoring=scoring)
grid_search.fit(X_train, y_train)
#means = grid_search.cv_results_['mean_test_score']
#stds = grid_search.cv_results_['std_test_score']
#print("Grid scores on development set:")
#for mean, std, params in zip(means, stds, grid_search.cv_results_['params']):
# print("%0.3f (+/-%0.03f) for %r" % (mean, std, params))
print("C", C_array)
print("gamma", gamma_array)
print("epsilon", epsilon_array)
print("coef0", coef0_array)
print("Best_params:", grid_search.best_params_, grid_search.best_score_)
def train_and_serialize_model(sample):
X = sample[feature_names]
y = sample[target_name]
X_head = X[0:4]
scaler = StandardScaler()
X = scaler.fit_transform(X)
# Parameters tuned with GridSearch
regressor = SVR(kernel='rbf', C=10**6, epsilon=0.0, gamma=0.012)
regressor.fit(X, y)
print(regressor.predict(X[0:4]))
# Serialize model
import pickle
with open('model.pkl', 'wb') as f:
pickle.dump(regressor, f)
with open('scaler.pkl', 'wb') as f:
pickle.dump(scaler, f)
# Deserialize model and test it on X_head samples
with open('model.pkl', 'rb') as f:
regressor2 = pickle.load(f)
with open('scaler.pkl', 'rb') as f:
scaler2 = pickle.load(f)
print(regressor2.predict(scaler2.transform(X_head)))
if __name__ == '__main__':
train_and_serialize_model(data)

BIN
web/app/data/scaler.pkl Normal file

Binary file not shown.

View file

@ -0,0 +1,29 @@
import numpy as np
import pickle
import config
class MwmSizePredictor:
def __init__(self):
with open(config.MWM_SIZE_PREDICTION_MODEL_PATH, 'rb') as f:
self.model = pickle.load(f)
with open(config.MWM_SIZE_PREDICTION_MODEL_SCALER_PATH, 'rb') as f:
self.scaler = pickle.load(f)
def predict(self, features_array):
"""1D or 2D array of feature values for predictions. Features are
'urban_pop', 'area', 'city_cnt', 'hamlet_cnt' as defined for the
prediction model.
"""
X = np.array(features_array)
one_prediction = (X.ndim == 1)
if one_prediction:
X = X.reshape(1, -1)
X_scaled = self.scaler.transform(X)
predictions = self.model.predict(X_scaled)
if one_prediction:
return predictions[0]
else:
return predictions.tolist()

View file

@ -3,11 +3,14 @@ var STYLE_SELECTED = { stroke: true, color: '#ff3', weight: 3, fill: true, fillO
var FILL_TOO_SMALL = '#0f0'; var FILL_TOO_SMALL = '#0f0';
var FILL_TOO_BIG = '#800'; var FILL_TOO_BIG = '#800';
var FILL_ZERO = 'black'; var FILL_ZERO = 'black';
var OLD_BORDERS_NAME; // filled in checkHasOSM()
var IMPORT_ENABLED = true;
var map, borders = {}, bordersLayer, selectedId, editing = false, readonly = false; var map, borders = {}, bordersLayer, selectedId, editing = false;
var size_good = 50, size_bad = 70; var config = { // server config
READONLY: false,
MWM_SIZE_THR: 70,
OLD_BORDERS_NAME: undefined // may be filled in getServerConfiguration()
};
var size_good, size_bad;
var maxRank = 1; var maxRank = 1;
var tooSmallLayer = null; var tooSmallLayer = null;
var oldBordersLayer = null; var oldBordersLayer = null;
@ -68,17 +71,17 @@ function init() {
else else
$('#population_thresholds').hide(); $('#population_thresholds').hide();
}); });
checkHasOSM(); getServerConfiguration();
filterSelect(true); filterSelect(true);
} }
function checkHasOSM() { function getServerConfiguration() {
$.ajax(getServer('tables'), { $.ajax(getServer('config'), {
success: function(res) { success: function(res) {
if( res.osm ) if( res.osm )
$('#osm_actions').css('display', 'block'); $('#osm_actions').css('display', 'block');
if( res.tables && res.tables.length > 0 ) { if( res.tables && res.tables.length > 0 ) {
OLD_BORDERS_NAME = res.tables[0]; config.OLD_BORDERS_NAME = res.tables[0];
$('#old_action').css('display', 'block'); $('#old_action').css('display', 'block');
$('#josm_old').css('display', 'inline'); $('#josm_old').css('display', 'inline');
} }
@ -91,7 +94,7 @@ function checkHasOSM() {
$('#action_buttons').css('display', 'none'); $('#action_buttons').css('display', 'none');
$('#import_link').css('display', 'none'); $('#import_link').css('display', 'none');
$('#backups').css('display', 'none'); $('#backups').css('display', 'none');
readonly = true; config.READONLY = true;
} }
if( !res.readonly && IMPORT_ENABLED ) { if( !res.readonly && IMPORT_ENABLED ) {
$('#import_link').css('display', 'none'); $('#import_link').css('display', 'none');
@ -100,6 +103,11 @@ function checkHasOSM() {
var iframe = '<iframe name="import_frame" class="h_iframe" src="about:blank"></iframe>'; var iframe = '<iframe name="import_frame" class="h_iframe" src="about:blank"></iframe>';
// $('#filefm').after(iframe); // $('#filefm').after(iframe);
} }
size_bad = config.MWM_SIZE_THR = Math.round(parseInt(res.mwm_size_thr)/1024);
size_good = Math.round(size_bad * 0.7 / 10) * 10;
$('#r_green').val(size_good);
$('#r_red').val(size_bad);
$('#mwm_size_thr').val(config.MWM_SIZE_THR);
} }
}); });
} }
@ -148,11 +156,11 @@ function updateBorders() {
crossingLayer.clearLayers(); crossingLayer.clearLayers();
} }
if( oldBordersLayer != null && OLD_BORDERS_NAME ) { if( oldBordersLayer != null && config.OLD_BORDERS_NAME ) {
oldBordersLayer.clearLayers(); oldBordersLayer.clearLayers();
$.ajax(getServer('bbox'), { $.ajax(getServer('bbox'), {
data: { data: {
'table': OLD_BORDERS_NAME, 'table': config.OLD_BORDERS_NAME,
'simplify': simplified, 'simplify': simplified,
'xmin': b.getWest(), 'xmin': b.getWest(),
'xmax': b.getEast(), 'xmax': b.getEast(),
@ -302,6 +310,7 @@ function selectLayer(e) {
$('#b_al').text(props['admin_level'] ? '('+props['admin_level']+')' : ''); $('#b_al').text(props['admin_level'] ? '('+props['admin_level']+')' : '');
$('#b_parent_name').text(props['parent_name']); $('#b_parent_name').text(props['parent_name']);
$('#b_size').text(Math.round(props['count_k'] * BYTES_FOR_NODE / 1024 / 1024) + ' MB'); $('#b_size').text(Math.round(props['count_k'] * BYTES_FOR_NODE / 1024 / 1024) + ' MB');
$('#pa_size').text(Math.round(props['mwm_size_est']/1024) + ' MB');
//$('#b_nodes').text(borders[selectedId].layer.getLatLngs()[0].length); //$('#b_nodes').text(borders[selectedId].layer.getLatLngs()[0].length);
$('#b_nodes').text(props['nodes']); $('#b_nodes').text(props['nodes']);
$('#b_date').text(props['modified']); $('#b_date').text(props['modified']);
@ -317,7 +326,7 @@ function selectLayer(e) {
function filterSelect(noRefresh) { function filterSelect(noRefresh) {
var value = $('#f_type').val(); var value = $('#f_type').val();
$('#f_size').css('display', value == 'size' ? 'block' : 'none'); $('#f_size').css('display', value.endsWith('size') ? 'block' : 'none');
$('#f_chars').css('display', value == 'chars' ? 'block' : 'none'); $('#f_chars').css('display', value == 'chars' ? 'block' : 'none');
$('#f_comments').css('display', value == 'comments' ? 'block' : 'none'); $('#f_comments').css('display', value == 'comments' ? 'block' : 'none');
$('#f_topo').css('display', value == 'topo' ? 'block' : 'none'); $('#f_topo').css('display', value == 'topo' ? 'block' : 'none');
@ -336,29 +345,47 @@ var colors = ['red', 'orange', 'yellow', 'lime', 'green', 'olive', 'cyan', 'dark
'blue', 'navy', 'magenta', 'purple', 'deeppink', 'brown'] //'black'; 'blue', 'navy', 'magenta', 'purple', 'deeppink', 'brown'] //'black';
var alphabet = 'abcdefghijklmnopqrstuvwxyz'; var alphabet = 'abcdefghijklmnopqrstuvwxyz';
function getStringHash(str) {
var hash = 0, i, chr;
/*
for (i = 0; i < str.length; i++) {
chr = str.charCodeAt(i);
hash = ((hash << 5) - hash) + chr;
hash |= 0; // Convert to 32bit integer
}
*/
hash = str.charCodeAt(0) + str.charCodeAt(1);
return hash;
}
function getCountryColor(props) { function getCountryColor(props) {
var country_name = props.country_name; var country_name = props.country_name;
if (!country_name) if (!country_name)
return 'black'; return 'black';
var firstLetter = country_name[0].toLowerCase(); var hash = getStringHash(country_name);
var index = alphabet.indexOf(firstLetter); var indexInColors = Math.abs(hash) % colors.length;
if (index === -1)
return 'black';
var indexInColors = index % colors.length;
return colors[indexInColors]; return colors[indexInColors];
} }
function getColor(props) { function getColor(props) {
var color = STYLE_BORDER.color; var color = STYLE_BORDER.color;
var fType = $('#f_type').val(); var fType = $('#f_type').val();
if( fType == 'size' ) { if( fType == 'nodes_size' ) {
if( props['count_k'] <= 0 ) if( props['count_k'] <= 0 )
color = FILL_ZERO; color = FILL_ZERO;
else if( props['count_k'] * BYTES_FOR_NODE < size_good * 1024 * 1024 ) else if( props['count_k'] * BYTES_FOR_NODE < size_good * 1024 * 1024 )
color = FILL_TOO_SMALL; color = FILL_TOO_SMALL;
else if( props['count_k'] * BYTES_FOR_NODE > size_bad * 1024 * 1024 ) else if( props['count_k'] * BYTES_FOR_NODE > size_bad * 1024 * 1024 )
color = FILL_TOO_BIG; color = FILL_TOO_BIG;
} else if( fType == 'topo' ) { } else if( fType == 'predict_size' ) {
if( props['mwm_size_est'] <= 0 )
color = FILL_ZERO;
else if( props['mwm_size_est'] < size_good * 1024 )
color = FILL_TOO_SMALL;
else if( props['mwm_size_est'] > size_bad * 1024 )
color = FILL_TOO_BIG;
}
else if( fType == 'topo' ) {
var rings = countRings([0, 0], props.layer); var rings = countRings([0, 0], props.layer);
if( rings[1] > 0 ) if( rings[1] > 0 )
color = FILL_TOO_BIG; color = FILL_TOO_BIG;
@ -471,7 +498,7 @@ function bJOSM() {
function bJosmOld() { function bJosmOld() {
var b = map.getBounds(); var b = map.getBounds();
importInJOSM('josm', { importInJOSM('josm', {
'table': OLD_BORDERS_NAME, 'table': config.OLD_BORDERS_NAME,
'xmin': b.getWest(), 'xmin': b.getWest(),
'xmax': b.getEast(), 'xmax': b.getEast(),
'ymin': b.getSouth(), 'ymin': b.getSouth(),
@ -508,7 +535,7 @@ function finishRename() {
} }
function bToggleRename() { function bToggleRename() {
if( !selectedId || !(selectedId in borders) || readonly ) if( !selectedId || !(selectedId in borders) || config.READONLY )
return; return;
var rename_el = $('#rename'); var rename_el = $('#rename');
if (rename_el.is(':hidden')) { if (rename_el.is(':hidden')) {
@ -952,11 +979,8 @@ function clearDivideLayers() {
function bDividePreview() { function bDividePreview() {
var auto_divide = $('#auto_divide').prop('checked'); var auto_divide = $('#auto_divide').prop('checked');
if (auto_divide && ( if (auto_divide && !$('#mwm_size_thr').val()) {
!$('#city_population_thr').val() || alert('Fill mmw size threshold');
!$('#cluster_population_thr').val())
) {
alert('Fill population thresholds');
return; return;
} }
clearDivideLayers(); clearDivideLayers();
@ -970,8 +994,7 @@ function bDividePreview() {
'apply_to_similar': apply_to_similar 'apply_to_similar': apply_to_similar
}; };
if (auto_divide) { if (auto_divide) {
params['city_population_thr'] = $('#city_population_thr').val(); params['mwm_size_thr'] = parseInt($('#mwm_size_thr').val()) * 1024;
params['cluster_population_thr'] = $('#cluster_population_thr').val();
} }
$.ajax(getServer('divpreview'), { $.ajax(getServer('divpreview'), {
data: params, data: params,
@ -1025,8 +1048,7 @@ function bDivideDo() {
'apply_to_similar': apply_to_similar 'apply_to_similar': apply_to_similar
}; };
if (auto_divide) { if (auto_divide) {
params['city_population_thr'] = $('#city_population_thr').val(); params['mwm_size_thr'] = parseInt($('#mwm_size_thr').val()) * 1024;
params['cluster_population_thr'] = $('#cluster_population_thr').val();
} }
$.ajax(getServer('divide'), { $.ajax(getServer('divide'), {
data: params, data: params,

View file

@ -1,4 +1,5 @@
const BYTES_FOR_NODE = 8; const BYTES_FOR_NODE = 8;
const IMPORT_ENABLED = true;
const SELF_URL = document.location.origin; const SELF_URL = document.location.origin;

102
web/app/subregions.py Normal file
View file

@ -0,0 +1,102 @@
import config
from mwm_size_predictor import MwmSizePredictor
osm_table = config.OSM_TABLE
osm_places_table = config.OSM_PLACES_TABLE
size_predictor = MwmSizePredictor()
def get_subregions_info(conn, region_id, region_table,
next_level, need_cities=False):
"""
:param conn: psycopg2 connection
:param region_id:
:param region_table: maybe TABLE or OSM_TABLE from config.py
:param next_level: admin level of subregions to find
:return: dict {subregion_id => subregion data} including area and population info
"""
subregions = _get_subregions_basic_info(conn, region_id, region_table,
next_level, need_cities)
_add_population_data(conn, subregions, need_cities)
_add_mwm_size_estimation(subregions)
keys = ('name', 'mwm_size_est')
if need_cities:
keys = keys + ('cities',)
return {subregion_id: {k: subregion_data[k] for k in keys}
for subregion_id, subregion_data in subregions.items()
}
def _get_subregions_basic_info(conn, region_id, region_table,
next_level, need_cities):
cursor = conn.cursor()
region_id_column, region_geom_column = (
('id', 'geom') if region_table == config.TABLE else
('osm_id', 'way')
)
cursor.execute(f"""
SELECT subreg.osm_id, subreg.name, ST_Area(geography(subreg.way))/1.0E+6 area
FROM {region_table} reg, {osm_table} subreg
WHERE reg.{region_id_column} = %s AND subreg.admin_level = %s AND
ST_Contains(reg.{region_geom_column}, subreg.way)
""", (region_id, next_level)
)
subregions = {}
for rec in cursor:
subregion_data = {
'osm_id': rec[0],
'name': rec[1],
'area': rec[2],
'urban_pop': 0,
'city_cnt': 0,
'hamlet_cnt': 0
}
if need_cities:
subregion_data['cities'] = []
subregions[rec[0]] = subregion_data
return subregions
def _add_population_data(conn, subregions, need_cities):
cursor = conn.cursor()
subregion_ids = ','.join(str(x) for x in subregions.keys())
cursor.execute(f"""
SELECT b.osm_id, p.name, COALESCE(p.population, 0), p.place
FROM {osm_table} b, {osm_places_table} p
WHERE b.osm_id IN ({subregion_ids})
AND ST_CONTAINS(b.way, p.center)
"""
)
for subregion_id, place_name, place_population, place_type in cursor:
subregion_data = subregions[subregion_id]
if place_type in ('city', 'town'):
subregion_data['city_cnt'] += 1
subregion_data['urban_pop'] += place_population
if need_cities:
subregion_data['cities'].append({
'name': place_name,
'population': place_population
})
else:
subregion_data['hamlet_cnt'] += 1
def _add_mwm_size_estimation(subregions):
subregions_sorted = [
(
s_id,
[subregions[s_id][f] for f in
('urban_pop', 'area', 'city_cnt', 'hamlet_cnt')]
)
for s_id in sorted(subregions.keys())
]
feature_array = [x[1] for x in subregions_sorted]
predictions = size_predictor.predict(feature_array)
for subregion_id, mwm_size_prediction in zip(
(x[0] for x in subregions_sorted),
predictions
):
subregions[subregion_id]['mwm_size_est'] = mwm_size_prediction

View file

@ -30,8 +30,11 @@
#backup_saving, #backup_restoring { margin-bottom: 1em; } #backup_saving, #backup_restoring { margin-bottom: 1em; }
#filefm, #old_action, #josm_old, #cross_actions { display: none; } #filefm, #old_action, #josm_old, #cross_actions { display: none; }
#h_iframe { display: block; width: 100%; height: 80px; } #h_iframe { display: block; width: 100%; height: 80px; }
a, a:hover, a:visited { color: blue; }
#start_over, #start_over:hover, #start_over:visited { color: red; } #start_over, #start_over:hover, #start_over:visited { color: red; }
#city_population_thr, #cluster_population_thr { max-width: 80px;} #population_thresholds { padding-left: 1.5em; }
#mwm_size_thr { max-width: 50px;}
#r_green, #r_red { width: 40px; }
#b_import { max-width: 180px; } #b_import { max-width: 180px; }
#import_div { position: relative; display: none; } #import_div { position: relative; display: none; }
#hide_import_button { #hide_import_button {
@ -44,7 +47,7 @@
align-items: center; align-items: center;
justify-content: center; justify-content: center;
cursor: pointer; cursor: pointer;
} }
</style> </style>
</head> </head>
@ -54,7 +57,8 @@
<div id="filter"> <div id="filter">
Раскраска по <select size="1" id="f_type" value="size" onchange="filterSelect()"> Раскраска по <select size="1" id="f_type" value="size" onchange="filterSelect()">
<option value="country">стране</option> <option value="country">стране</option>
<option value="size">размеру</option> <option value="nodes_size">размеру по точкам</option>
<option value="predict_size">предсказ. размеру</option>
<option value="topo">топологии</option> <option value="topo">топологии</option>
<option value="chars">буквам в назв.</option> <option value="chars">буквам в назв.</option>
<option value="comments">комментариям</option> <option value="comments">комментариям</option>
@ -138,7 +142,8 @@
</div> </div>
<div id="potential_parents"> <div id="potential_parents">
</div> </div>
<b>Оценка размера:</b> <span id="b_size"></span><br> <b>Оценка размера по точкам:</b> <span id="b_size"></span><br>
<b>Оценка размера по нас+пл:</b> <span id="pa_size"></span><br>
<b>Последняя правка:</b> <span id="b_date"></span><br> <b>Последняя правка:</b> <span id="b_date"></span><br>
<b>Количество точек:</b> <span id="b_nodes"></span><br> <b>Количество точек:</b> <span id="b_nodes"></span><br>
<b>Площадь:</b> <span id="b_area"></span> км²<br> <b>Площадь:</b> <span id="b_area"></span> км²<br>
@ -188,20 +193,18 @@
<button onclick="bPointCancel()">Вернуться</button> <button onclick="bPointCancel()">Вернуться</button>
</div> </div>
<div id="divide" class="actions"> <div id="divide" class="actions">
Выбранная область <span id="region_to_divide"></span> Выбранная область <span id="region_to_divide"></span>
будет заменена вложенными областями уровня будет заменена вложенными областями уровня
<input type="number" id="next_level" min="2" max="12">.<br> <input type="number" id="next_level" min="2" max="12">.<br>
<br> <br>
<div> <div>
<input type="checkbox" id="auto_divide" checked> <input type="checkbox" id="auto_divide" checked>
<label for="auto_divide">Автослияние по населению</label> <label for="auto_divide">Автослияние по населению</label>
</div> <div id="population_thresholds">
<div id="population_thresholds"> Верхняя граница размера mwm:
Порог населения города, региона<br> <input id="mwm_size_thr" type="number"
<input id="city_population_thr" type="number" min="1" value="70" step="1"> Мб
min="0" max="8000000000" value="500000" step="50000">, </div>
<input id="cluster_population_thr" type="number"
min="0" max="8000000000" value="500000" step="50000">
</div> </div>
<div> <div>
<input type="checkbox" id="apply_to_similar"> <input type="checkbox" id="apply_to_similar">