MWM size prediction model
This commit is contained in:
parent
b13e31bff1
commit
580a1ab9ac
17 changed files with 491 additions and 286 deletions
2
.gitignore
vendored
2
.gitignore
vendored
|
@ -1,2 +1,4 @@
|
||||||
__pycache__
|
__pycache__
|
||||||
*.pyc
|
*.pyc
|
||||||
|
.idea
|
||||||
|
nohup.out
|
||||||
|
|
|
@ -14,7 +14,7 @@ CREATE TABLE borders (
|
||||||
count_k INTEGER,
|
count_k INTEGER,
|
||||||
modified TIMESTAMP NOT NULL,
|
modified TIMESTAMP NOT NULL,
|
||||||
cmnt VARCHAR(500),
|
cmnt VARCHAR(500),
|
||||||
mwm_size_est double precision
|
mwm_size_est REAL
|
||||||
);
|
);
|
||||||
CREATE INDEX borders_geom_gits_idx ON borders USING gist (geom);
|
CREATE INDEX borders_geom_gits_idx ON borders USING gist (geom);
|
||||||
CREATE INDEX borders_parent_id_idx ON borders (parent_id);
|
CREATE INDEX borders_parent_id_idx ON borders (parent_id);
|
||||||
|
@ -29,15 +29,15 @@ CREATE TABLE borders_backup (
|
||||||
count_k INTEGER,
|
count_k INTEGER,
|
||||||
modified TIMESTAMP NOT NULL,
|
modified TIMESTAMP NOT NULL,
|
||||||
cmnt VARCHAR(500),
|
cmnt VARCHAR(500),
|
||||||
mwm_size_est double precision,
|
mwm_size_est REAL,
|
||||||
PRIMARY KEY (backup, id)
|
PRIMARY KEY (backup, id)
|
||||||
);
|
);
|
||||||
|
|
||||||
CREATE TABLE splitting (
|
CREATE TABLE splitting (
|
||||||
osm_border_id BIGINT NOT NULL REFERENCES osm_borders(osm_id), -- reference to parent osm region
|
osm_border_id BIGINT NOT NULL REFERENCES osm_borders(osm_id), -- reference to parent osm region
|
||||||
subregion_ids BIGINT[] NOT NULL,
|
subregion_ids BIGINT[] NOT NULL,
|
||||||
mwm_size_est double precision NOT NULL,
|
mwm_size_est REAL NOT NULL,
|
||||||
mwm_size_thr double precision NOT NULL,
|
mwm_size_thr INTEGER NOT NULL, -- mwm size threshold in Kb, 4-bytes INTEGER is enough
|
||||||
geom geometry NOT NULL
|
geom geometry NOT NULL
|
||||||
);
|
);
|
||||||
CREATE INDEX splitting_idx ON splitting (osm_border_id, mwm_size_thr);
|
CREATE INDEX splitting_idx ON splitting (osm_border_id, mwm_size_thr);
|
||||||
|
|
|
@ -21,13 +21,14 @@ services:
|
||||||
context: ./db
|
context: ./db
|
||||||
dockerfile: Dockerfile.db
|
dockerfile: Dockerfile.db
|
||||||
args:
|
args:
|
||||||
PLANET_URL: http://generator.testdata.mapsme.cloud.devmail.ru/planet/planet-latest.o5m
|
PLANET_URL_full: https://planet.openstreetmap.org/pbf/planet-latest.osm.pbf
|
||||||
PLANET_URL_external: https://planet.openstreetmap.org/pbf/planet-latest.osm.pbf
|
PLANET_URL: http://download.geofabrik.de/africa/eritrea-latest.osm.pbf
|
||||||
PLANET_URL_small: http://download.geofabrik.de/africa/eritrea-latest.osm.pbf
|
|
||||||
container_name: db
|
container_name: db
|
||||||
restart: always
|
restart: always
|
||||||
environment:
|
environment:
|
||||||
POSTGRES_HOST_AUTH_METHOD: password
|
POSTGRES_HOST_AUTH_METHOD: password
|
||||||
POSTGRES_USER: postgres
|
POSTGRES_USER: postgres
|
||||||
POSTGRES_PASSWORD: postgres
|
POSTGRES_PASSWORD: postgres
|
||||||
|
ports:
|
||||||
|
- "55432:5432"
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
FROM tiangolo/uwsgi-nginx-flask:latest
|
FROM tiangolo/uwsgi-nginx-flask:latest
|
||||||
|
|
||||||
RUN pip install flask_cors flask_compress psycopg2 unidecode
|
RUN pip install flask_cors flask_compress psycopg2 unidecode numpy sklearn
|
||||||
|
|
||||||
COPY app /app
|
COPY app /app
|
||||||
COPY ./uwsgi.ini /app
|
COPY ./uwsgi.ini /app
|
||||||
|
|
|
@ -6,20 +6,21 @@ from collections import defaultdict
|
||||||
|
|
||||||
from config import (
|
from config import (
|
||||||
AUTOSPLIT_TABLE as autosplit_table,
|
AUTOSPLIT_TABLE as autosplit_table,
|
||||||
TABLE as table,
|
OSM_TABLE as osm_table,
|
||||||
OSM_TABLE as osm_table
|
MWM_SIZE_THRESHOLD,
|
||||||
)
|
)
|
||||||
|
from subregions import get_subregions_info
|
||||||
|
|
||||||
|
|
||||||
class DisjointClusterUnion:
|
class DisjointClusterUnion:
|
||||||
"""Disjoint set union implementation for administrative subregions."""
|
"""Disjoint set union implementation for administrative subregions."""
|
||||||
|
|
||||||
def __init__(self, region_id, subregions, thresholds):
|
def __init__(self, region_id, subregions, mwm_size_thr=None):
|
||||||
self.region_id = region_id
|
self.region_id = region_id
|
||||||
self.subregions = subregions
|
self.subregions = subregions
|
||||||
self.city_population_thr, self.cluster_population_thr = thresholds
|
self.mwm_size_thr = mwm_size_thr or MWM_SIZE_THRESHOLD
|
||||||
self.representatives = {sub_id: sub_id for sub_id in subregions}
|
self.representatives = {sub_id: sub_id for sub_id in subregions}
|
||||||
# a cluster is one or more subregions with common borders
|
# A cluster is one or more subregions with common borders
|
||||||
self.clusters = {} # representative => cluster object
|
self.clusters = {} # representative => cluster object
|
||||||
|
|
||||||
# At the beginning, each subregion forms a cluster.
|
# At the beginning, each subregion forms a cluster.
|
||||||
|
@ -28,27 +29,20 @@ class DisjointClusterUnion:
|
||||||
self.clusters[subregion_id] = {
|
self.clusters[subregion_id] = {
|
||||||
'representative': subregion_id,
|
'representative': subregion_id,
|
||||||
'subregion_ids': [subregion_id],
|
'subregion_ids': [subregion_id],
|
||||||
'population': data['population'],
|
'mwm_size_est': data['mwm_size_est'],
|
||||||
'big_cities_cnt': sum(1 for c in data['cities'] if self.is_city_big(c)),
|
|
||||||
'finished': False, # True if the cluster cannot be merged with another
|
'finished': False, # True if the cluster cannot be merged with another
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def is_city_big(self, city):
|
|
||||||
return city['population'] >= self.city_population_thr
|
|
||||||
|
|
||||||
def get_smallest_cluster(self):
|
def get_smallest_cluster(self):
|
||||||
"""Find minimal cluster without big cities."""
|
"""Find minimal cluster without big cities."""
|
||||||
smallest_cluster_id = min(
|
smallest_cluster_id = min(
|
||||||
filter(
|
filter(
|
||||||
lambda cluster_id: (
|
lambda cluster_id:
|
||||||
not self.clusters[cluster_id]['finished'] and
|
not self.clusters[cluster_id]['finished'],
|
||||||
self.clusters[cluster_id]['big_cities_cnt'] == 0)
|
|
||||||
,
|
|
||||||
self.clusters.keys()
|
self.clusters.keys()
|
||||||
),
|
),
|
||||||
default=None,
|
default=None,
|
||||||
key=lambda cluster_id: self.clusters[cluster_id]['population']
|
key=lambda cluster_id: self.clusters[cluster_id]['mwm_size_est']
|
||||||
)
|
)
|
||||||
return smallest_cluster_id
|
return smallest_cluster_id
|
||||||
|
|
||||||
|
@ -63,9 +57,9 @@ class DisjointClusterUnion:
|
||||||
self.representatives[subregion_id] = representative
|
self.representatives[subregion_id] = representative
|
||||||
return representative
|
return representative
|
||||||
|
|
||||||
def get_cluster_population(self, subregion_id):
|
def get_cluster_mwm_size_est(self, subregion_id):
|
||||||
cluster_id = self.find_cluster(subregion_id)
|
cluster_id = self.find_cluster(subregion_id)
|
||||||
return self.clusters[cluster_id]['population']
|
return self.clusters[cluster_id]['mwm_size_est']
|
||||||
|
|
||||||
def get_cluster_count(self):
|
def get_cluster_count(self):
|
||||||
return len(self.clusters)
|
return len(self.clusters)
|
||||||
|
@ -77,8 +71,7 @@ class DisjointClusterUnion:
|
||||||
r_cluster = self.clusters[retained_cluster_id]
|
r_cluster = self.clusters[retained_cluster_id]
|
||||||
d_cluster = self.clusters[dropped_cluster_id]
|
d_cluster = self.clusters[dropped_cluster_id]
|
||||||
r_cluster['subregion_ids'].extend(d_cluster['subregion_ids'])
|
r_cluster['subregion_ids'].extend(d_cluster['subregion_ids'])
|
||||||
r_cluster['population'] += d_cluster['population']
|
r_cluster['mwm_size_est'] += d_cluster['mwm_size_est']
|
||||||
r_cluster['big_cities_cnt'] += d_cluster['big_cities_cnt']
|
|
||||||
del self.clusters[dropped_cluster_id]
|
del self.clusters[dropped_cluster_id]
|
||||||
self.representatives[dropped_cluster_id] = retained_cluster_id
|
self.representatives[dropped_cluster_id] = retained_cluster_id
|
||||||
return retained_cluster_id
|
return retained_cluster_id
|
||||||
|
@ -95,52 +88,13 @@ class DisjointClusterUnion:
|
||||||
return subregion_ids
|
return subregion_ids
|
||||||
|
|
||||||
|
|
||||||
def enrich_with_population_and_cities(conn, subregions):
|
def get_best_cluster_to_join_with(small_cluster_id,
|
||||||
cursor = conn.cursor()
|
dcu: DisjointClusterUnion,
|
||||||
ids = ','.join(str(x) for x in subregions.keys())
|
common_border_matrix):
|
||||||
cursor.execute(f"""
|
|
||||||
SELECT b.osm_id, c.name, c.population
|
|
||||||
FROM {osm_table} b, osm_cities c
|
|
||||||
WHERE b.osm_id IN ({ids}) AND ST_CONTAINS(b.way, c.center)
|
|
||||||
"""
|
|
||||||
)
|
|
||||||
for rec in cursor:
|
|
||||||
sub_id = int(rec[0])
|
|
||||||
subregions[sub_id]['cities'].append({
|
|
||||||
'name': rec[1],
|
|
||||||
'population': int(rec[2])
|
|
||||||
})
|
|
||||||
subregions[sub_id]['population'] += int(rec[2])
|
|
||||||
|
|
||||||
|
|
||||||
def find_subregions(conn, region_id, next_level):
|
|
||||||
cursor = conn.cursor()
|
|
||||||
cursor.execute(f"""
|
|
||||||
SELECT subreg.osm_id, subreg.name
|
|
||||||
FROM {osm_table} reg, {osm_table} subreg
|
|
||||||
WHERE reg.osm_id = %s AND subreg.admin_level = %s AND
|
|
||||||
ST_Contains(reg.way, subreg.way)
|
|
||||||
""",
|
|
||||||
(region_id, next_level)
|
|
||||||
)
|
|
||||||
subregions = {
|
|
||||||
int(rec[0]):
|
|
||||||
{
|
|
||||||
'osm_id': int(rec[0]),
|
|
||||||
'name': rec[1],
|
|
||||||
'population': 0,
|
|
||||||
'cities': []
|
|
||||||
}
|
|
||||||
for rec in cursor
|
|
||||||
}
|
|
||||||
if subregions:
|
|
||||||
enrich_with_population_and_cities(conn, subregions)
|
|
||||||
return subregions
|
|
||||||
|
|
||||||
|
|
||||||
def get_best_cluster_to_join_with(small_cluster_id, dcu: DisjointClusterUnion, common_border_matrix):
|
|
||||||
if small_cluster_id not in common_border_matrix:
|
if small_cluster_id not in common_border_matrix:
|
||||||
return None # this may be if a subregion is isolated, like Bezirk Lienz inside Tyrol, Austria
|
# This may be if a subregion is isolated,
|
||||||
|
# like Bezirk Lienz inside Tyrol, Austria
|
||||||
|
return None
|
||||||
common_borders = defaultdict(lambda: 0.0) # cluster representative => common border length
|
common_borders = defaultdict(lambda: 0.0) # cluster representative => common border length
|
||||||
subregion_ids = dcu.get_cluster_subregion_ids(small_cluster_id)
|
subregion_ids = dcu.get_cluster_subregion_ids(small_cluster_id)
|
||||||
for subregion_id in subregion_ids:
|
for subregion_id in subregion_ids:
|
||||||
|
@ -148,29 +102,26 @@ def get_best_cluster_to_join_with(small_cluster_id, dcu: DisjointClusterUnion, c
|
||||||
other_cluster_id = dcu.find_cluster(other_subregion_id)
|
other_cluster_id = dcu.find_cluster(other_subregion_id)
|
||||||
if other_cluster_id != small_cluster_id:
|
if other_cluster_id != small_cluster_id:
|
||||||
common_borders[other_cluster_id] += length
|
common_borders[other_cluster_id] += length
|
||||||
#print(f"common_borders={json.dumps(common_borders)} of len {len(common_borders)}")
|
|
||||||
#common_borders = {k:v for k,v in common_borders.items() if v > 0.0}
|
|
||||||
if not common_borders:
|
if not common_borders:
|
||||||
return None
|
return None
|
||||||
total_common_border_length = sum(common_borders.values())
|
total_common_border_length = sum(common_borders.values())
|
||||||
total_adjacent_population = sum(dcu.get_cluster_population(x) for x in common_borders)
|
total_adjacent_mwm_size_est = sum(dcu.get_cluster_mwm_size_est(x) for x in common_borders)
|
||||||
choice_criterion = (
|
choice_criterion = (
|
||||||
(
|
(
|
||||||
lambda cluster_id: (
|
lambda cluster_id: (
|
||||||
common_borders[cluster_id]/total_common_border_length +
|
common_borders[cluster_id]/total_common_border_length +
|
||||||
-dcu.get_cluster_population(cluster_id)/total_adjacent_population
|
-dcu.get_cluster_mwm_size_est(cluster_id)/total_adjacent_mwm_size_est
|
||||||
)
|
)
|
||||||
) if total_adjacent_population else
|
) if total_adjacent_mwm_size_est else
|
||||||
lambda cluster_id: (
|
lambda cluster_id: (
|
||||||
common_borders[cluster_id]/total_common_border_length
|
common_borders[cluster_id]/total_common_border_length
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
small_cluster_population = dcu.get_cluster_population(small_cluster_id)
|
|
||||||
best_cluster_id = max(
|
best_cluster_id = max(
|
||||||
filter(
|
filter(
|
||||||
lambda cluster_id: (
|
lambda cluster_id: (
|
||||||
small_cluster_population + dcu.get_cluster_population(cluster_id)
|
dcu.clusters[small_cluster_id]['mwm_size_est'] +
|
||||||
<= dcu.cluster_population_thr
|
dcu.clusters[cluster_id]['mwm_size_est'] <= dcu.mwm_size_thr
|
||||||
),
|
),
|
||||||
common_borders.keys()
|
common_borders.keys()
|
||||||
),
|
),
|
||||||
|
@ -207,39 +158,31 @@ def calculate_common_border_matrix(conn, subregion_ids):
|
||||||
|
|
||||||
|
|
||||||
def find_golden_splitting(conn, border_id, next_level,
|
def find_golden_splitting(conn, border_id, next_level,
|
||||||
country_region_name, thresholds):
|
country_region_name, mwm_size_thr):
|
||||||
subregions = find_subregions(conn, border_id, next_level)
|
subregions = get_subregions_info(conn, border_id, osm_table,
|
||||||
|
next_level, need_cities=True)
|
||||||
if not subregions:
|
if not subregions:
|
||||||
print(f"No subregions for {border_id} {country_region_name}")
|
|
||||||
return
|
return
|
||||||
|
|
||||||
dcu = DisjointClusterUnion(border_id, subregions, thresholds)
|
dcu = DisjointClusterUnion(border_id, subregions, mwm_size_thr)
|
||||||
#save_splitting_to_file(dcu, f'all_{country_region_name}')
|
#save_splitting_to_file(dcu, f'all_{country_region_name}')
|
||||||
all_subregion_ids = dcu.get_all_subregion_ids()
|
all_subregion_ids = dcu.get_all_subregion_ids()
|
||||||
common_border_matrix = calculate_common_border_matrix(conn, all_subregion_ids)
|
common_border_matrix = calculate_common_border_matrix(conn, all_subregion_ids)
|
||||||
|
|
||||||
i = 0
|
i = 0
|
||||||
while True:
|
while True:
|
||||||
with open(f"clusters-{i:02d}.json", 'w') as f:
|
|
||||||
json.dump(dcu.clusters, f, ensure_ascii=False, indent=2)
|
|
||||||
if dcu.get_cluster_count() == 1:
|
if dcu.get_cluster_count() == 1:
|
||||||
return dcu
|
return dcu
|
||||||
i += 1
|
i += 1
|
||||||
#print(f"i = {i}")
|
|
||||||
smallest_cluster_id = dcu.get_smallest_cluster()
|
smallest_cluster_id = dcu.get_smallest_cluster()
|
||||||
if not smallest_cluster_id:
|
if not smallest_cluster_id:
|
||||||
return dcu # TODO: return target splitting
|
return dcu
|
||||||
#print(f"smallest cluster = {json.dumps(dcu.clusters[smallest_cluster_id])}")
|
|
||||||
best_cluster_id = get_best_cluster_to_join_with(smallest_cluster_id, dcu, common_border_matrix)
|
best_cluster_id = get_best_cluster_to_join_with(smallest_cluster_id, dcu, common_border_matrix)
|
||||||
if not best_cluster_id: # !!! a case for South West England and popul 500000
|
if not best_cluster_id:
|
||||||
dcu.mark_cluster_finished(smallest_cluster_id)
|
dcu.mark_cluster_finished(smallest_cluster_id)
|
||||||
continue
|
continue
|
||||||
assert (smallest_cluster_id != best_cluster_id), f"{smallest_cluster_id}"
|
assert (smallest_cluster_id != best_cluster_id), f"{smallest_cluster_id}"
|
||||||
#print(f"best cluster = {json.dumps(dcu.clusters[best_cluster_id])}")
|
dcu.union(smallest_cluster_id, best_cluster_id)
|
||||||
new_cluster_id = dcu.union(smallest_cluster_id, best_cluster_id)
|
|
||||||
#print(f"{json.dumps(dcu.clusters[new_cluster_id])}")
|
|
||||||
#print()
|
|
||||||
#import sys; sys.exit()
|
|
||||||
return dcu
|
return dcu
|
||||||
|
|
||||||
|
|
||||||
|
@ -279,6 +222,9 @@ def write_polygons_to_poly(file, polygons, name_prefix):
|
||||||
|
|
||||||
|
|
||||||
def save_splitting_to_file(conn, dcu: DisjointClusterUnion, filename_prefix=None):
|
def save_splitting_to_file(conn, dcu: DisjointClusterUnion, filename_prefix=None):
|
||||||
|
"""May be used for debugging"""
|
||||||
|
GENERATE_ALL_POLY=False
|
||||||
|
FOLDER='split_results'
|
||||||
with open(f"{FOLDER}/{filename_prefix}.poly", 'w') as poly_file:
|
with open(f"{FOLDER}/{filename_prefix}.poly", 'w') as poly_file:
|
||||||
poly_file.write(f"{filename_prefix}\n")
|
poly_file.write(f"{filename_prefix}\n")
|
||||||
for cluster_id, data in dcu.clusters.items():
|
for cluster_id, data in dcu.clusters.items():
|
||||||
|
@ -297,7 +243,7 @@ def save_splitting_to_file(conn, dcu: DisjointClusterUnion, filename_prefix=None
|
||||||
poly_file.write('END\n')
|
poly_file.write('END\n')
|
||||||
with open(f"{FOLDER}/{filename_prefix}-splitting.json", 'w') as f:
|
with open(f"{FOLDER}/{filename_prefix}-splitting.json", 'w') as f:
|
||||||
json.dump(dcu.clusters, f, ensure_ascii=False, indent=2)
|
json.dump(dcu.clusters, f, ensure_ascii=False, indent=2)
|
||||||
|
|
||||||
|
|
||||||
def save_splitting_to_db(conn, dcu: DisjointClusterUnion):
|
def save_splitting_to_db(conn, dcu: DisjointClusterUnion):
|
||||||
cursor = conn.cursor()
|
cursor = conn.cursor()
|
||||||
|
@ -305,8 +251,7 @@ def save_splitting_to_db(conn, dcu: DisjointClusterUnion):
|
||||||
cursor.execute(f"""
|
cursor.execute(f"""
|
||||||
DELETE FROM {autosplit_table}
|
DELETE FROM {autosplit_table}
|
||||||
WHERE osm_border_id = {dcu.region_id}
|
WHERE osm_border_id = {dcu.region_id}
|
||||||
AND city_population_thr = {dcu.city_population_thr}
|
AND mwm_size_thr = {dcu.mwm_size_thr}
|
||||||
AND cluster_population_thr = {dcu.cluster_population_thr}
|
|
||||||
""")
|
""")
|
||||||
for cluster_id, data in dcu.clusters.items():
|
for cluster_id, data in dcu.clusters.items():
|
||||||
subregion_ids = data['subregion_ids']
|
subregion_ids = data['subregion_ids']
|
||||||
|
@ -314,20 +259,19 @@ def save_splitting_to_db(conn, dcu: DisjointClusterUnion):
|
||||||
cluster_geometry_sql = get_union_sql(subregion_ids)
|
cluster_geometry_sql = get_union_sql(subregion_ids)
|
||||||
cursor.execute(f"""
|
cursor.execute(f"""
|
||||||
INSERT INTO {autosplit_table} (osm_border_id, subregion_ids, geom,
|
INSERT INTO {autosplit_table} (osm_border_id, subregion_ids, geom,
|
||||||
city_population_thr, cluster_population_thr)
|
mwm_size_thr, mwm_size_est)
|
||||||
VALUES (
|
VALUES (
|
||||||
{dcu.region_id},
|
{dcu.region_id},
|
||||||
'{{{','.join(str(x) for x in subregion_ids)}}}',
|
'{{{','.join(str(x) for x in subregion_ids)}}}',
|
||||||
({cluster_geometry_sql}),
|
({cluster_geometry_sql}),
|
||||||
{dcu.city_population_thr},
|
{dcu.mwm_size_thr},
|
||||||
{dcu.cluster_population_thr}
|
{data['mwm_size_est']}
|
||||||
)
|
)
|
||||||
""")
|
""")
|
||||||
conn.commit()
|
conn.commit()
|
||||||
|
|
||||||
|
|
||||||
def get_region_and_country_names(conn, region_id):
|
def get_region_and_country_names(conn, region_id):
|
||||||
#if region_id != -1574364: return
|
|
||||||
cursor = conn.cursor()
|
cursor = conn.cursor()
|
||||||
try:
|
try:
|
||||||
cursor.execute(
|
cursor.execute(
|
||||||
|
@ -355,18 +299,15 @@ def get_region_and_country_names(conn, region_id):
|
||||||
print(f"Many countries for region '{region_name}' id={region_id}")
|
print(f"Many countries for region '{region_name}' id={region_id}")
|
||||||
return region_name, country_name
|
return region_name, country_name
|
||||||
|
|
||||||
DEFAULT_CITY_POPULATION_THRESHOLD = 500000
|
|
||||||
DEFAULT_CLUSTER_POPULATION_THR = 500000
|
|
||||||
|
|
||||||
def split_region(conn, region_id, next_level,
|
def split_region(conn, region_id, next_level,
|
||||||
thresholds=(DEFAULT_CITY_POPULATION_THRESHOLD,
|
mwm_size_thr,
|
||||||
DEFAULT_CLUSTER_POPULATION_THR),
|
|
||||||
save_to_files=False):
|
save_to_files=False):
|
||||||
region_name, country_name = get_region_and_country_names(conn, region_id)
|
region_name, country_name = get_region_and_country_names(conn, region_id)
|
||||||
region_name = region_name.replace('/', '|')
|
region_name = region_name.replace('/', '|')
|
||||||
country_region_name = f"{country_name}_{region_name}" if country_name else region_name
|
country_region_name = f"{country_name}_{region_name}" if country_name else region_name
|
||||||
dcu = find_golden_splitting(conn, region_id, next_level,
|
dcu = find_golden_splitting(conn, region_id, next_level,
|
||||||
country_region_name, thresholds)
|
country_region_name, mwm_size_thr)
|
||||||
if dcu is None:
|
if dcu is None:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
@ -378,30 +319,6 @@ def save_splitting(dcu: DisjointClusterUnion, conn,
|
||||||
save_splitting_to_db(conn, dcu)
|
save_splitting_to_db(conn, dcu)
|
||||||
if save_to_files:
|
if save_to_files:
|
||||||
print(f"Saving {country_region_name}")
|
print(f"Saving {country_region_name}")
|
||||||
filename_prefix = f"{country_region_name}-{dcu.city_population_thrR}"
|
filename_prefix = f"{country_region_name}-{dcu.city_population_thr}"
|
||||||
save_splitting_to_file(conn, dcu, filename_prefix)
|
save_splitting_to_file(conn, dcu, filename_prefix)
|
||||||
|
|
||||||
|
|
||||||
GENERATE_ALL_POLY=False
|
|
||||||
FOLDER='split_results'
|
|
||||||
#CITY_POPULATION_THR = 500000
|
|
||||||
#CLUSTER_POPULATION_THR = 500000
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
conn = psycopg2.connect("dbname=az_gis3")
|
|
||||||
|
|
||||||
PREFIX = "UBavaria"
|
|
||||||
CITY_POPULATION_THR = 500000
|
|
||||||
CLUSTER_POPULATION_THR = 500000
|
|
||||||
|
|
||||||
region_id = -162050 # -165475 # California ## -162050 # Florida
|
|
||||||
region_id = -2145274 # Upper Bavaria
|
|
||||||
#region_id = -151339 # South West England
|
|
||||||
#region_id = -58446 # Scotland
|
|
||||||
dcu = find_golden_splitting(region_id)
|
|
||||||
make_polys(dcu.clusters)
|
|
||||||
with open(f"{PREFIX}_{CITY_POPULATION_THR}_splitting{region_id}-poplen.json", 'w') as f:
|
|
||||||
json.dump(dcu.clusters, f, ensure_ascii=False, indent=2)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -25,6 +25,7 @@ from countries_structure import (
|
||||||
create_countries_initial_structure,
|
create_countries_initial_structure,
|
||||||
get_osm_border_name_by_osm_id,
|
get_osm_border_name_by_osm_id,
|
||||||
)
|
)
|
||||||
|
from subregions import get_subregions_info
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
|
@ -78,7 +79,8 @@ def fetch_borders(**kwargs):
|
||||||
query = f"""
|
query = f"""
|
||||||
SELECT name, geometry, nodes, modified, disabled, count_k, cmnt,
|
SELECT name, geometry, nodes, modified, disabled, count_k, cmnt,
|
||||||
(CASE WHEN area = 'NaN' THEN 0 ELSE area END) AS area,
|
(CASE WHEN area = 'NaN' THEN 0 ELSE area END) AS area,
|
||||||
id, admin_level, parent_id, parent_name
|
id, admin_level, parent_id, parent_name,
|
||||||
|
mwm_size_est
|
||||||
FROM (
|
FROM (
|
||||||
SELECT name,
|
SELECT name,
|
||||||
ST_AsGeoJSON({geom}, 7) as geometry,
|
ST_AsGeoJSON({geom}, 7) as geometry,
|
||||||
|
@ -95,7 +97,8 @@ def fetch_borders(**kwargs):
|
||||||
parent_id,
|
parent_id,
|
||||||
( SELECT name FROM {table}
|
( SELECT name FROM {table}
|
||||||
WHERE id = t.parent_id
|
WHERE id = t.parent_id
|
||||||
) AS parent_name
|
) AS parent_name,
|
||||||
|
mwm_size_est
|
||||||
FROM {table} t
|
FROM {table} t
|
||||||
WHERE ({where_clause}) {leaves_filter}
|
WHERE ({where_clause}) {leaves_filter}
|
||||||
) q
|
) q
|
||||||
|
@ -112,18 +115,19 @@ def fetch_borders(**kwargs):
|
||||||
'disabled': rec[4], 'count_k': rec[5],
|
'disabled': rec[4], 'count_k': rec[5],
|
||||||
'comment': rec[6],
|
'comment': rec[6],
|
||||||
'area': rec[7],
|
'area': rec[7],
|
||||||
'id': region_id, 'country_id': country_id,
|
'id': region_id,
|
||||||
'admin_level': rec[9],
|
'admin_level': rec[9],
|
||||||
'parent_id': rec[10],
|
'parent_id': rec[10],
|
||||||
'parent_name': rec[11] or '',
|
'parent_name': rec[11] or '',
|
||||||
'country_name': country_name
|
'country_id': country_id,
|
||||||
|
'country_name': country_name,
|
||||||
|
'mwm_size_est': rec[12]
|
||||||
}
|
}
|
||||||
feature = {'type': 'Feature',
|
feature = {'type': 'Feature',
|
||||||
'geometry': json.loads(rec[1]),
|
'geometry': json.loads(rec[1]),
|
||||||
'properties': props
|
'properties': props
|
||||||
}
|
}
|
||||||
borders.append(feature)
|
borders.append(feature)
|
||||||
#print([x['properties'] for x in borders])
|
|
||||||
return borders
|
return borders
|
||||||
|
|
||||||
def simplify_level_to_postgis_value(simplify_level):
|
def simplify_level_to_postgis_value(simplify_level):
|
||||||
|
@ -228,8 +232,8 @@ def query_crossing():
|
||||||
pass
|
pass
|
||||||
return jsonify(type='FeatureCollection', features=result)
|
return jsonify(type='FeatureCollection', features=result)
|
||||||
|
|
||||||
@app.route('/tables')
|
@app.route('/config')
|
||||||
def check_osm_table():
|
def get_server_configuration():
|
||||||
osm = False
|
osm = False
|
||||||
backup = False
|
backup = False
|
||||||
old = []
|
old = []
|
||||||
|
@ -260,7 +264,9 @@ def check_osm_table():
|
||||||
crossing = True
|
crossing = True
|
||||||
except psycopg2.Error as e:
|
except psycopg2.Error as e:
|
||||||
pass
|
pass
|
||||||
return jsonify(osm=osm, tables=old, readonly=config.READONLY, backup=backup, crossing=crossing)
|
return jsonify(osm=osm, tables=old, readonly=config.READONLY,
|
||||||
|
backup=backup, crossing=crossing,
|
||||||
|
mwm_size_thr=config.MWM_SIZE_THRESHOLD)
|
||||||
|
|
||||||
@app.route('/search')
|
@app.route('/search')
|
||||||
def search():
|
def search():
|
||||||
|
@ -341,9 +347,10 @@ def join_borders():
|
||||||
cur.execute(f"""
|
cur.execute(f"""
|
||||||
UPDATE {table}
|
UPDATE {table}
|
||||||
SET id = {free_id},
|
SET id = {free_id},
|
||||||
geom = ST_Union(geom, b2.g),
|
geom = ST_Union({table}.geom, b2.geom),
|
||||||
|
mwm_size_est = {table}.mwm_size_est + b2.mwm_size_est,
|
||||||
count_k = -1
|
count_k = -1
|
||||||
FROM (SELECT geom AS g FROM {table} WHERE id = %s) AS b2
|
FROM (SELECT geom, mwm_size_est FROM {table} WHERE id = %s) AS b2
|
||||||
WHERE id = %s""", (region_id2, region_id1))
|
WHERE id = %s""", (region_id2, region_id1))
|
||||||
cur.execute(f"DELETE FROM {table} WHERE id = %s", (region_id2,))
|
cur.execute(f"DELETE FROM {table} WHERE id = %s", (region_id2,))
|
||||||
except psycopg2.Error as e:
|
except psycopg2.Error as e:
|
||||||
|
@ -630,24 +637,23 @@ def divide_preview():
|
||||||
if not is_admin:
|
if not is_admin:
|
||||||
return jsonify(status="Could not apply auto-division to non-administrative regions")
|
return jsonify(status="Could not apply auto-division to non-administrative regions")
|
||||||
try:
|
try:
|
||||||
city_population_thr = int(request.args.get('city_population_thr'))
|
mwm_size_thr = int(request.args.get('mwm_size_thr'))
|
||||||
cluster_population_thr = int(request.args.get('cluster_population_thr'))
|
|
||||||
except ValueError:
|
except ValueError:
|
||||||
return jsonify(status='Not a number in thresholds.')
|
return jsonify(status='Not a number in thresholds.')
|
||||||
return divide_into_clusters_preview(
|
return divide_into_clusters_preview(
|
||||||
region_ids, next_level,
|
region_ids, next_level,
|
||||||
(city_population_thr, cluster_population_thr))
|
mwm_size_thr)
|
||||||
else:
|
else:
|
||||||
return divide_into_subregions_preview(region_ids, next_level)
|
return divide_into_subregions_preview(region_ids, next_level)
|
||||||
|
|
||||||
def get_subregions(region_ids, next_level):
|
def get_subregions_for_preview(region_ids, next_level):
|
||||||
subregions = list(itertools.chain.from_iterable(
|
subregions = list(itertools.chain.from_iterable(
|
||||||
get_subregions_one(region_id, next_level)
|
get_subregions_one_for_preview(region_id, next_level)
|
||||||
for region_id in region_ids
|
for region_id in region_ids
|
||||||
))
|
))
|
||||||
return subregions
|
return subregions
|
||||||
|
|
||||||
def get_subregions_one(region_id, next_level):
|
def get_subregions_one_for_preview(region_id, next_level):
|
||||||
osm_table = config.OSM_TABLE
|
osm_table = config.OSM_TABLE
|
||||||
table = config.TABLE
|
table = config.TABLE
|
||||||
cur = g.conn.cursor()
|
cur = g.conn.cursor()
|
||||||
|
@ -671,28 +677,28 @@ def get_subregions_one(region_id, next_level):
|
||||||
subregions.append(feature)
|
subregions.append(feature)
|
||||||
return subregions
|
return subregions
|
||||||
|
|
||||||
def get_clusters(region_ids, next_level, thresholds):
|
def get_clusters_for_preview(region_ids, next_level, thresholds):
|
||||||
clusters = list(itertools.chain.from_iterable(
|
clusters = list(itertools.chain.from_iterable(
|
||||||
get_clusters_one(region_id, next_level, thresholds)
|
get_clusters_for_preview_one(region_id, next_level, thresholds)
|
||||||
for region_id in region_ids
|
for region_id in region_ids
|
||||||
))
|
))
|
||||||
return clusters
|
return clusters
|
||||||
|
|
||||||
def get_clusters_one(region_id, next_level, thresholds):
|
def get_clusters_for_preview_one(region_id, next_level, mwm_size_thr):
|
||||||
autosplit_table = config.AUTOSPLIT_TABLE
|
autosplit_table = config.AUTOSPLIT_TABLE
|
||||||
cursor = g.conn.cursor()
|
cursor = g.conn.cursor()
|
||||||
where_clause = f"""
|
where_clause = f"""
|
||||||
osm_border_id = %s
|
osm_border_id = %s
|
||||||
AND city_population_thr = %s
|
AND mwm_size_thr = %s
|
||||||
AND cluster_population_thr = %s
|
|
||||||
"""
|
"""
|
||||||
splitting_sql_params = (region_id,) + thresholds
|
splitting_sql_params = (region_id, mwm_size_thr)
|
||||||
cursor.execute(f"""
|
cursor.execute(f"""
|
||||||
SELECT 1 FROM {autosplit_table}
|
SELECT 1 FROM {autosplit_table}
|
||||||
WHERE {where_clause}
|
WHERE {where_clause}
|
||||||
""", splitting_sql_params)
|
""", splitting_sql_params)
|
||||||
if cursor.rowcount == 0:
|
if cursor.rowcount == 0:
|
||||||
split_region(g.conn, region_id, next_level, thresholds)
|
split_region(g.conn, region_id, next_level, mwm_size_thr)
|
||||||
|
|
||||||
cursor.execute(f"""
|
cursor.execute(f"""
|
||||||
SELECT subregion_ids[1], ST_AsGeoJSON(ST_SimplifyPreserveTopology(geom, 0.01)) as way
|
SELECT subregion_ids[1], ST_AsGeoJSON(ST_SimplifyPreserveTopology(geom, 0.01)) as way
|
||||||
FROM {autosplit_table}
|
FROM {autosplit_table}
|
||||||
|
@ -700,23 +706,24 @@ def get_clusters_one(region_id, next_level, thresholds):
|
||||||
""", splitting_sql_params)
|
""", splitting_sql_params)
|
||||||
clusters = []
|
clusters = []
|
||||||
for rec in cursor:
|
for rec in cursor:
|
||||||
cluster = { 'type': 'Feature',
|
cluster = {
|
||||||
'geometry': json.loads(rec[1]),
|
'type': 'Feature',
|
||||||
'properties': {'osm_id': int(rec[0])}
|
'geometry': json.loads(rec[1]),
|
||||||
|
'properties': {'osm_id': int(rec[0])}
|
||||||
}
|
}
|
||||||
clusters.append(cluster)
|
clusters.append(cluster)
|
||||||
return clusters
|
return clusters
|
||||||
|
|
||||||
def divide_into_subregions_preview(region_ids, next_level):
|
def divide_into_subregions_preview(region_ids, next_level):
|
||||||
subregions = get_subregions(region_ids, next_level)
|
subregions = get_subregions_for_preview(region_ids, next_level)
|
||||||
return jsonify(
|
return jsonify(
|
||||||
status='ok',
|
status='ok',
|
||||||
subregions={'type': 'FeatureCollection', 'features': subregions}
|
subregions={'type': 'FeatureCollection', 'features': subregions}
|
||||||
)
|
)
|
||||||
|
|
||||||
def divide_into_clusters_preview(region_ids, next_level, thresholds):
|
def divide_into_clusters_preview(region_ids, next_level, mwm_size_thr):
|
||||||
subregions = get_subregions(region_ids, next_level)
|
subregions = get_subregions_for_preview(region_ids, next_level)
|
||||||
clusters = get_clusters(region_ids, next_level, thresholds)
|
clusters = get_clusters_for_preview(region_ids, next_level, mwm_size_thr)
|
||||||
return jsonify(
|
return jsonify(
|
||||||
status='ok',
|
status='ok',
|
||||||
subregions={'type': 'FeatureCollection', 'features': subregions},
|
subregions={'type': 'FeatureCollection', 'features': subregions},
|
||||||
|
@ -744,51 +751,53 @@ def divide():
|
||||||
if not is_admin:
|
if not is_admin:
|
||||||
return jsonify(status="Could not apply auto-division to non-administrative regions")
|
return jsonify(status="Could not apply auto-division to non-administrative regions")
|
||||||
try:
|
try:
|
||||||
city_population_thr = int(request.args.get('city_population_thr'))
|
mwm_size_thr = int(request.args.get('mwm_size_thr'))
|
||||||
cluster_population_thr = int(request.args.get('cluster_population_thr'))
|
|
||||||
except ValueError:
|
except ValueError:
|
||||||
return jsonify(status='Not a number in thresholds.')
|
return jsonify(status='Not a number in thresholds.')
|
||||||
return divide_into_clusters(
|
return divide_into_clusters(
|
||||||
region_ids, next_level,
|
region_ids, next_level,
|
||||||
(city_population_thr, cluster_population_thr))
|
mwm_size_thr)
|
||||||
else:
|
else:
|
||||||
return divide_into_subregions(region_ids, next_level)
|
return divide_into_subregions(region_ids, next_level)
|
||||||
|
|
||||||
def divide_into_subregions(region_ids, next_level):
|
def divide_into_subregions(region_ids, next_level):
|
||||||
table = config.TABLE
|
|
||||||
osm_table = config.OSM_TABLE
|
|
||||||
cur = g.conn.cursor()
|
|
||||||
for region_id in region_ids:
|
for region_id in region_ids:
|
||||||
is_admin = is_administrative_region(region_id)
|
divide_into_subregions_one(region_id, next_level)
|
||||||
if is_admin:
|
|
||||||
# TODO: rewrite SELECT into join rather than subquery to enable gist index
|
|
||||||
cur.execute(f"""
|
|
||||||
INSERT INTO {table} (id, geom, name, parent_id, modified, count_k)
|
|
||||||
SELECT osm_id, way, name, %s, now(), -1
|
|
||||||
FROM {osm_table}
|
|
||||||
WHERE ST_Contains(
|
|
||||||
(SELECT geom FROM {table} WHERE id = %s), way
|
|
||||||
)
|
|
||||||
AND admin_level = {next_level}
|
|
||||||
""", (region_id, region_id,)
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
cur.execute(f"""
|
|
||||||
INSERT INTO {table} (id, geom, name, parent_id, modified, count_k)
|
|
||||||
SELECT osm_id, way, name, (SELECT parent_id FROM {table} WHERE id = %s), now(), -1
|
|
||||||
FROM {osm_table}
|
|
||||||
WHERE ST_Contains(
|
|
||||||
(SELECT geom FROM {table} WHERE id = %s), way
|
|
||||||
)
|
|
||||||
AND admin_level = {next_level}
|
|
||||||
""", (region_id, region_id,)
|
|
||||||
)
|
|
||||||
cur.execute(f"DELETE FROM {table} WHERE id = %s", (region_id,))
|
|
||||||
|
|
||||||
g.conn.commit()
|
g.conn.commit()
|
||||||
return jsonify(status='ok')
|
return jsonify(status='ok')
|
||||||
|
|
||||||
def divide_into_clusters(region_ids, next_level, thresholds):
|
def divide_into_subregions_one(region_id, next_level):
|
||||||
|
table = config.TABLE
|
||||||
|
osm_table = config.OSM_TABLE
|
||||||
|
subregions = get_subregions_info(g.conn, region_id, table,
|
||||||
|
next_level, need_cities=False)
|
||||||
|
cursor = g.conn.cursor()
|
||||||
|
is_admin = is_administrative_region(region_id)
|
||||||
|
if is_admin:
|
||||||
|
for subregion_id, data in subregions.items():
|
||||||
|
cursor.execute(f"""
|
||||||
|
INSERT INTO {table}
|
||||||
|
(id, geom, name, parent_id, modified, count_k, mwm_size_est)
|
||||||
|
SELECT osm_id, way, name, %s, now(), -1, {data['mwm_size_est']}
|
||||||
|
FROM {osm_table}
|
||||||
|
WHERE osm_id = %s
|
||||||
|
""", (region_id, subregion_id)
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
for subregion_id, data in subregions.items():
|
||||||
|
cursor.execute(f"""
|
||||||
|
INSERT INTO {table}
|
||||||
|
(id, geom, name, parent_id, modified, count_k, mwm_size_est)
|
||||||
|
SELECT osm_id, way, name,
|
||||||
|
(SELECT parent_id FROM {table} WHERE id = %s),
|
||||||
|
now(), -1, {data['mwm_size_est']}
|
||||||
|
FROM {osm_table}
|
||||||
|
WHERE osm_id = %s
|
||||||
|
""", (region_id, subregion_id)
|
||||||
|
)
|
||||||
|
cursor.execute(f"DELETE FROM {table} WHERE id = %s", (region_id,))
|
||||||
|
|
||||||
|
def divide_into_clusters(region_ids, next_level, mwm_size_thr):
|
||||||
table = config.TABLE
|
table = config.TABLE
|
||||||
autosplit_table = config.AUTOSPLIT_TABLE
|
autosplit_table = config.AUTOSPLIT_TABLE
|
||||||
cursor = g.conn.cursor()
|
cursor = g.conn.cursor()
|
||||||
|
@ -799,16 +808,15 @@ def divide_into_clusters(region_ids, next_level, thresholds):
|
||||||
|
|
||||||
where_clause = f"""
|
where_clause = f"""
|
||||||
osm_border_id = %s
|
osm_border_id = %s
|
||||||
AND city_population_thr = %s
|
AND mwm_size_thr = %s
|
||||||
AND cluster_population_thr = %s
|
|
||||||
"""
|
"""
|
||||||
splitting_sql_params = (region_id,) + thresholds
|
splitting_sql_params = (region_id, mwm_size_thr)
|
||||||
cursor.execute(f"""
|
cursor.execute(f"""
|
||||||
SELECT 1 FROM {autosplit_table}
|
SELECT 1 FROM {autosplit_table}
|
||||||
WHERE {where_clause}
|
WHERE {where_clause}
|
||||||
""", splitting_sql_params)
|
""", splitting_sql_params)
|
||||||
if cursor.rowcount == 0:
|
if cursor.rowcount == 0:
|
||||||
split_region(g.conn, region_id, next_level, thresholds)
|
split_region(g.conn, region_id, next_level, mwm_size_thr)
|
||||||
|
|
||||||
free_id = get_free_id()
|
free_id = get_free_id()
|
||||||
counter = 0
|
counter = 0
|
||||||
|
@ -830,8 +838,8 @@ def divide_into_clusters(region_ids, next_level, thresholds):
|
||||||
subregion_id = free_id
|
subregion_id = free_id
|
||||||
name = f"{base_name}_{counter}"
|
name = f"{base_name}_{counter}"
|
||||||
insert_cursor.execute(f"""
|
insert_cursor.execute(f"""
|
||||||
INSERT INTO {table} (id, name, parent_id, geom, modified, count_k)
|
INSERT INTO {table} (id, name, parent_id, geom, modified, count_k, mwm_size_est)
|
||||||
SELECT {subregion_id}, %s, osm_border_id, geom, now(), -1
|
SELECT {subregion_id}, %s, osm_border_id, geom, now(), -1, mwm_size_est
|
||||||
FROM {autosplit_table} WHERE subregion_ids[1] = %s AND {where_clause}
|
FROM {autosplit_table} WHERE subregion_ids[1] = %s AND {where_clause}
|
||||||
""", (name, cluster_id,) + splitting_sql_params)
|
""", (name, cluster_id,) + splitting_sql_params)
|
||||||
g.conn.commit()
|
g.conn.commit()
|
||||||
|
|
|
@ -8,11 +8,13 @@ READONLY = False
|
||||||
TABLE = 'borders'
|
TABLE = 'borders'
|
||||||
# from where OSM borders are imported
|
# from where OSM borders are imported
|
||||||
OSM_TABLE = 'osm_borders'
|
OSM_TABLE = 'osm_borders'
|
||||||
|
# All populated places in OSM
|
||||||
|
OSM_PLACES_TABLE = 'osm_places'
|
||||||
# transit table for autosplitting results
|
# transit table for autosplitting results
|
||||||
AUTOSPLIT_TABLE = 'splitting'
|
AUTOSPLIT_TABLE = 'splitting'
|
||||||
## tables with borders for reference
|
# tables with borders for reference
|
||||||
OTHER_TABLES = {
|
OTHER_TABLES = {
|
||||||
#'old': 'old_borders'
|
#'old': 'old_borders'
|
||||||
}
|
}
|
||||||
# backup table
|
# backup table
|
||||||
BACKUP = 'borders_backup'
|
BACKUP = 'borders_backup'
|
||||||
|
@ -28,3 +30,8 @@ IMPORT_ERROR_ALERT = False
|
||||||
DAEMON_STATUS_PATH = '/tmp/borders-daemon-status.txt'
|
DAEMON_STATUS_PATH = '/tmp/borders-daemon-status.txt'
|
||||||
DAEMON_PID_PATH = '/tmp/borders-daemon.pid'
|
DAEMON_PID_PATH = '/tmp/borders-daemon.pid'
|
||||||
DAEMON_LOG_PATH = '/var/log/borders-daemon.log'
|
DAEMON_LOG_PATH = '/var/log/borders-daemon.log'
|
||||||
|
# mwm size threshold in Kb
|
||||||
|
MWM_SIZE_THRESHOLD = 70*1024
|
||||||
|
# Estimated mwm size is predicted by the 'model.pkl' with 'scaler.pkl' for X
|
||||||
|
MWM_SIZE_PREDICTION_MODEL_PATH = '/app/data/model.pkl'
|
||||||
|
MWM_SIZE_PREDICTION_MODEL_SCALER_PATH = '/app/data/scaler.pkl'
|
||||||
|
|
|
@ -2,6 +2,8 @@ import itertools
|
||||||
|
|
||||||
import config
|
import config
|
||||||
|
|
||||||
|
from subregions import get_subregions_info
|
||||||
|
|
||||||
|
|
||||||
table = config.TABLE
|
table = config.TABLE
|
||||||
osm_table = config.OSM_TABLE
|
osm_table = config.OSM_TABLE
|
||||||
|
@ -260,43 +262,32 @@ def _clear_borders(conn):
|
||||||
conn.commit()
|
conn.commit()
|
||||||
|
|
||||||
|
|
||||||
def _find_subregions(conn, osm_ids, next_level, parents, names):
|
def _find_subregions(conn, osm_ids, next_level, regions):
|
||||||
"""Return subregions of level 'next_level' for regions with osm_ids."""
|
"""Return subregions of level 'next_level' for regions with osm_ids."""
|
||||||
cursor = conn.cursor()
|
|
||||||
parent_osm_ids = ','.join(str(x) for x in osm_ids)
|
|
||||||
cursor.execute(f"""
|
|
||||||
SELECT b.osm_id, b.name, subb.osm_id, subb.name
|
|
||||||
FROM {osm_table} b, {osm_table} subb
|
|
||||||
WHERE subb.admin_level=%s
|
|
||||||
AND b.osm_id IN ({parent_osm_ids})
|
|
||||||
AND ST_Contains(b.way, subb.way)
|
|
||||||
""",
|
|
||||||
(next_level,)
|
|
||||||
)
|
|
||||||
|
|
||||||
# parent_osm_id => [(osm_id, name), (osm_id, name), ...]
|
|
||||||
subregion_ids = []
|
subregion_ids = []
|
||||||
|
for osm_id in osm_ids:
|
||||||
for rec in cursor:
|
more_subregions = get_subregions_info(conn, osm_id, table,
|
||||||
parent_osm_id = rec[0]
|
next_level, need_cities=False)
|
||||||
osm_id = rec[2]
|
for subregion_id, subregion_data in more_subregions.items():
|
||||||
parents[osm_id] = parent_osm_id
|
region_data = regions.setdefault(subregion_id, {})
|
||||||
name = rec[3]
|
region_data['name'] = subregion_data['name']
|
||||||
names[osm_id] = name
|
region_data['mwm_size_est'] = subregion_data['mwm_size_est']
|
||||||
subregion_ids.append(osm_id)
|
region_data['parent_id'] = osm_id
|
||||||
|
subregion_ids.append(subregion_id)
|
||||||
return subregion_ids
|
return subregion_ids
|
||||||
|
|
||||||
|
|
||||||
def _create_regions(conn, osm_ids, parents, names):
|
def _create_regions(conn, osm_ids, regions):
|
||||||
if not osm_ids:
|
if not osm_ids:
|
||||||
return
|
return
|
||||||
osm_ids = list(osm_ids) # to ensure order
|
osm_ids = list(osm_ids) # to ensure order
|
||||||
cursor = conn.cursor()
|
cursor = conn.cursor()
|
||||||
sql_values = ','.join(
|
sql_values = ','.join(
|
||||||
f'({osm_id},'
|
f'({osm_id},'
|
||||||
'%s,'
|
'%s,'
|
||||||
|
f"{regions[osm_id].get('parent_id', 'NULL')},"
|
||||||
|
f"{regions[osm_id].get('mwm_size_est', 'NULL')},"
|
||||||
f'(SELECT way FROM {osm_table} WHERE osm_id={osm_id}),'
|
f'(SELECT way FROM {osm_table} WHERE osm_id={osm_id}),'
|
||||||
f'{parents[osm_id] or "NULL"},'
|
|
||||||
'now())'
|
'now())'
|
||||||
for osm_id in osm_ids
|
for osm_id in osm_ids
|
||||||
)
|
)
|
||||||
|
@ -304,21 +295,23 @@ def _create_regions(conn, osm_ids, parents, names):
|
||||||
#print(f"names={tuple(names[osm_id] for osm_id in osm_ids)}")
|
#print(f"names={tuple(names[osm_id] for osm_id in osm_ids)}")
|
||||||
#print(f"all parents={parents}")
|
#print(f"all parents={parents}")
|
||||||
cursor.execute(f"""
|
cursor.execute(f"""
|
||||||
INSERT INTO {table} (id, name, geom, parent_id, modified)
|
INSERT INTO {table} (id, name, parent_id, mwm_size_est, geom, modified)
|
||||||
VALUES {sql_values}
|
VALUES {sql_values}
|
||||||
""", tuple(names[osm_id] for osm_id in osm_ids)
|
""", tuple(regions[osm_id]['name'] for osm_id in osm_ids)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def _make_country_structure(conn, country_osm_id):
|
def _make_country_structure(conn, country_osm_id):
|
||||||
names = {} # osm_id => osm name
|
regions = {} # osm_id: { 'name': name,
|
||||||
parents = {} # osm_id => parent_osm_id
|
# 'mwm_size_est': size,
|
||||||
|
# 'parent_id': parent_id }
|
||||||
|
|
||||||
country_name = get_osm_border_name_by_osm_id(conn, country_osm_id)
|
country_name = get_osm_border_name_by_osm_id(conn, country_osm_id)
|
||||||
names[country_osm_id] = country_name
|
country_data = regions.setdefault(country_osm_id, {})
|
||||||
parents[country_osm_id] = None
|
country_data['name'] = country_name
|
||||||
|
# TODO: country_data['mwm_size_est'] = ...
|
||||||
|
|
||||||
_create_regions(conn, [country_osm_id], parents, names)
|
_create_regions(conn, [country_osm_id], regions)
|
||||||
|
|
||||||
if country_initial_levels.get(country_name):
|
if country_initial_levels.get(country_name):
|
||||||
admin_levels = country_initial_levels[country_name]
|
admin_levels = country_initial_levels[country_name]
|
||||||
|
@ -332,18 +325,19 @@ def _make_country_structure(conn, country_osm_id):
|
||||||
f"AL={admin_level}, prev-AL={prev_level}"
|
f"AL={admin_level}, prev-AL={prev_level}"
|
||||||
)
|
)
|
||||||
subregion_ids = _find_subregions(conn, prev_region_ids,
|
subregion_ids = _find_subregions(conn, prev_region_ids,
|
||||||
admin_level, parents, names)
|
admin_level, regions)
|
||||||
_create_regions(conn, subregion_ids, parents, names)
|
_create_regions(conn, subregion_ids, regions)
|
||||||
prev_region_ids = subregion_ids
|
prev_region_ids = subregion_ids
|
||||||
|
|
||||||
|
|
||||||
def create_countries_initial_structure(conn):
|
def create_countries_initial_structure(conn):
|
||||||
_clear_borders(conn)
|
_clear_borders(conn)
|
||||||
cursor = conn.cursor()
|
cursor = conn.cursor()
|
||||||
|
# TODO: process overlapping countries, like Ukraine and Russia with common Crimea
|
||||||
cursor.execute(f"""
|
cursor.execute(f"""
|
||||||
SELECT osm_id, name
|
SELECT osm_id, name
|
||||||
FROM {osm_table}
|
FROM {osm_table}
|
||||||
WHERE admin_level = 2
|
WHERE admin_level = 2 and name != 'Ukraine'
|
||||||
"""
|
"""
|
||||||
# and name in --('Germany', 'Luxembourg', 'Austria')
|
# and name in --('Germany', 'Luxembourg', 'Austria')
|
||||||
# ({','.join(f"'{c}'" for c in country_initial_levels.keys())})
|
# ({','.join(f"'{c}'" for c in country_initial_levels.keys())})
|
||||||
|
|
BIN
web/app/data/model.pkl
Normal file
BIN
web/app/data/model.pkl
Normal file
Binary file not shown.
BIN
web/app/data/mwm_data.xlsx
Normal file
BIN
web/app/data/mwm_data.xlsx
Normal file
Binary file not shown.
119
web/app/data/prediction_model.py
Normal file
119
web/app/data/prediction_model.py
Normal file
|
@ -0,0 +1,119 @@
|
||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from sklearn.linear_model import LinearRegression
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
from sklearn.model_selection import (
|
||||||
|
cross_val_score,
|
||||||
|
KFold,
|
||||||
|
GridSearchCV,
|
||||||
|
)
|
||||||
|
from sklearn.svm import SVR
|
||||||
|
from sklearn.preprocessing import StandardScaler
|
||||||
|
|
||||||
|
|
||||||
|
data = pd.read_excel('mwm_data.xlsx', sheet_name='mwms_all', header=1)
|
||||||
|
data = data[data['exclude'] == 0]
|
||||||
|
#data['is_urban2'] = data.apply(lambda row: row['pop_density'] > 260, axis=1) # 260 - median of pop_density
|
||||||
|
|
||||||
|
popul_column = 'urban_pop' # options are 'population and 'urban_pop' (for population of cities and towns only)
|
||||||
|
feature_names = [popul_column, 'area', 'city_cnt', 'hamlet_cnt']
|
||||||
|
target_name = 'size'
|
||||||
|
|
||||||
|
for feature in set(feature_names) - set(['area']): # if area is None it's an error!
|
||||||
|
data[feature] = data[feature].fillna(0)
|
||||||
|
|
||||||
|
|
||||||
|
scoring = 'neg_mean_squared_error' # another option is 'r2'
|
||||||
|
|
||||||
|
|
||||||
|
def my_cross_validation(sample):
|
||||||
|
X = sample[feature_names]
|
||||||
|
y = sample[target_name]
|
||||||
|
|
||||||
|
sc_X = StandardScaler()
|
||||||
|
X = sc_X.fit_transform(X)
|
||||||
|
|
||||||
|
lin_regression = LinearRegression(fit_intercept=False)
|
||||||
|
svr_linear = SVR(kernel='linear')
|
||||||
|
svr_rbf = SVR(kernel='rbf')
|
||||||
|
|
||||||
|
for estimator_name, estimator in zip(
|
||||||
|
('LinRegression', 'SVR_linear', 'SVR_rbf'),
|
||||||
|
(lin_regression, svr_linear, svr_rbf)):
|
||||||
|
cv_scores = cross_val_score(estimator, X, y,
|
||||||
|
cv=KFold(5, shuffle=True, random_state=1),
|
||||||
|
scoring=scoring)
|
||||||
|
mean_score = np.mean(cv_scores)
|
||||||
|
print(f"{estimator_name:15}", cv_scores, mean_score)
|
||||||
|
|
||||||
|
|
||||||
|
def my_grid_search(sample):
|
||||||
|
X = sample[feature_names]
|
||||||
|
y = sample[target_name]
|
||||||
|
|
||||||
|
sc_X = StandardScaler()
|
||||||
|
X = sc_X.fit_transform(X)
|
||||||
|
|
||||||
|
X_train, X_test, y_train, y_test = train_test_split(
|
||||||
|
X, y, test_size=0.2, random_state=0)
|
||||||
|
|
||||||
|
C_array = [10 ** n for n in range(6, 7)]
|
||||||
|
gamma_array = [0.009 + i * 0.001 for i in range(-7, 11, 2)] + ['auto', 'scale']
|
||||||
|
epsilon_array = [0.5 * i for i in range(0, 15)]
|
||||||
|
coef0_array = [-0.1, -0.01, 0, 0.01, 0.1]
|
||||||
|
param_grid = [
|
||||||
|
{'kernel': ['linear'], 'C': C_array, 'epsilon': epsilon_array},
|
||||||
|
{'kernel': ['rbf'], 'C': C_array, 'gamma': gamma_array, 'epsilon': epsilon_array},
|
||||||
|
{'kernel': ['poly', 'sigmoid'],
|
||||||
|
'C': C_array, 'gamma': gamma_array, 'epsilon': epsilon_array, 'coef0': coef0_array},
|
||||||
|
]
|
||||||
|
|
||||||
|
svr = SVR()
|
||||||
|
grid_search = GridSearchCV(svr, param_grid, scoring=scoring)
|
||||||
|
grid_search.fit(X_train, y_train)
|
||||||
|
#means = grid_search.cv_results_['mean_test_score']
|
||||||
|
#stds = grid_search.cv_results_['std_test_score']
|
||||||
|
#print("Grid scores on development set:")
|
||||||
|
#for mean, std, params in zip(means, stds, grid_search.cv_results_['params']):
|
||||||
|
# print("%0.3f (+/-%0.03f) for %r" % (mean, std, params))
|
||||||
|
|
||||||
|
print("C", C_array)
|
||||||
|
print("gamma", gamma_array)
|
||||||
|
print("epsilon", epsilon_array)
|
||||||
|
print("coef0", coef0_array)
|
||||||
|
print("Best_params:", grid_search.best_params_, grid_search.best_score_)
|
||||||
|
|
||||||
|
|
||||||
|
def train_and_serialize_model(sample):
|
||||||
|
X = sample[feature_names]
|
||||||
|
y = sample[target_name]
|
||||||
|
|
||||||
|
X_head = X[0:4]
|
||||||
|
scaler = StandardScaler()
|
||||||
|
X = scaler.fit_transform(X)
|
||||||
|
|
||||||
|
# Parameters tuned with GridSearch
|
||||||
|
regressor = SVR(kernel='rbf', C=10**6, epsilon=0.0, gamma=0.012)
|
||||||
|
regressor.fit(X, y)
|
||||||
|
|
||||||
|
print(regressor.predict(X[0:4]))
|
||||||
|
|
||||||
|
# Serialize model
|
||||||
|
import pickle
|
||||||
|
with open('model.pkl', 'wb') as f:
|
||||||
|
pickle.dump(regressor, f)
|
||||||
|
with open('scaler.pkl', 'wb') as f:
|
||||||
|
pickle.dump(scaler, f)
|
||||||
|
|
||||||
|
# Deserialize model and test it on X_head samples
|
||||||
|
with open('model.pkl', 'rb') as f:
|
||||||
|
regressor2 = pickle.load(f)
|
||||||
|
with open('scaler.pkl', 'rb') as f:
|
||||||
|
scaler2 = pickle.load(f)
|
||||||
|
print(regressor2.predict(scaler2.transform(X_head)))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
train_and_serialize_model(data)
|
||||||
|
|
BIN
web/app/data/scaler.pkl
Normal file
BIN
web/app/data/scaler.pkl
Normal file
Binary file not shown.
29
web/app/mwm_size_predictor.py
Normal file
29
web/app/mwm_size_predictor.py
Normal file
|
@ -0,0 +1,29 @@
|
||||||
|
import numpy as np
|
||||||
|
import pickle
|
||||||
|
|
||||||
|
import config
|
||||||
|
|
||||||
|
|
||||||
|
class MwmSizePredictor:
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
with open(config.MWM_SIZE_PREDICTION_MODEL_PATH, 'rb') as f:
|
||||||
|
self.model = pickle.load(f)
|
||||||
|
with open(config.MWM_SIZE_PREDICTION_MODEL_SCALER_PATH, 'rb') as f:
|
||||||
|
self.scaler = pickle.load(f)
|
||||||
|
|
||||||
|
def predict(self, features_array):
|
||||||
|
"""1D or 2D array of feature values for predictions. Features are
|
||||||
|
'urban_pop', 'area', 'city_cnt', 'hamlet_cnt' as defined for the
|
||||||
|
prediction model.
|
||||||
|
"""
|
||||||
|
X = np.array(features_array)
|
||||||
|
one_prediction = (X.ndim == 1)
|
||||||
|
if one_prediction:
|
||||||
|
X = X.reshape(1, -1)
|
||||||
|
X_scaled = self.scaler.transform(X)
|
||||||
|
predictions = self.model.predict(X_scaled)
|
||||||
|
if one_prediction:
|
||||||
|
return predictions[0]
|
||||||
|
else:
|
||||||
|
return predictions.tolist()
|
|
@ -3,11 +3,14 @@ var STYLE_SELECTED = { stroke: true, color: '#ff3', weight: 3, fill: true, fillO
|
||||||
var FILL_TOO_SMALL = '#0f0';
|
var FILL_TOO_SMALL = '#0f0';
|
||||||
var FILL_TOO_BIG = '#800';
|
var FILL_TOO_BIG = '#800';
|
||||||
var FILL_ZERO = 'black';
|
var FILL_ZERO = 'black';
|
||||||
var OLD_BORDERS_NAME; // filled in checkHasOSM()
|
|
||||||
var IMPORT_ENABLED = true;
|
|
||||||
|
|
||||||
var map, borders = {}, bordersLayer, selectedId, editing = false, readonly = false;
|
var map, borders = {}, bordersLayer, selectedId, editing = false;
|
||||||
var size_good = 50, size_bad = 70;
|
var config = { // server config
|
||||||
|
READONLY: false,
|
||||||
|
MWM_SIZE_THR: 70,
|
||||||
|
OLD_BORDERS_NAME: undefined // may be filled in getServerConfiguration()
|
||||||
|
};
|
||||||
|
var size_good, size_bad;
|
||||||
var maxRank = 1;
|
var maxRank = 1;
|
||||||
var tooSmallLayer = null;
|
var tooSmallLayer = null;
|
||||||
var oldBordersLayer = null;
|
var oldBordersLayer = null;
|
||||||
|
@ -68,17 +71,17 @@ function init() {
|
||||||
else
|
else
|
||||||
$('#population_thresholds').hide();
|
$('#population_thresholds').hide();
|
||||||
});
|
});
|
||||||
checkHasOSM();
|
getServerConfiguration();
|
||||||
filterSelect(true);
|
filterSelect(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
function checkHasOSM() {
|
function getServerConfiguration() {
|
||||||
$.ajax(getServer('tables'), {
|
$.ajax(getServer('config'), {
|
||||||
success: function(res) {
|
success: function(res) {
|
||||||
if( res.osm )
|
if( res.osm )
|
||||||
$('#osm_actions').css('display', 'block');
|
$('#osm_actions').css('display', 'block');
|
||||||
if( res.tables && res.tables.length > 0 ) {
|
if( res.tables && res.tables.length > 0 ) {
|
||||||
OLD_BORDERS_NAME = res.tables[0];
|
config.OLD_BORDERS_NAME = res.tables[0];
|
||||||
$('#old_action').css('display', 'block');
|
$('#old_action').css('display', 'block');
|
||||||
$('#josm_old').css('display', 'inline');
|
$('#josm_old').css('display', 'inline');
|
||||||
}
|
}
|
||||||
|
@ -91,7 +94,7 @@ function checkHasOSM() {
|
||||||
$('#action_buttons').css('display', 'none');
|
$('#action_buttons').css('display', 'none');
|
||||||
$('#import_link').css('display', 'none');
|
$('#import_link').css('display', 'none');
|
||||||
$('#backups').css('display', 'none');
|
$('#backups').css('display', 'none');
|
||||||
readonly = true;
|
config.READONLY = true;
|
||||||
}
|
}
|
||||||
if( !res.readonly && IMPORT_ENABLED ) {
|
if( !res.readonly && IMPORT_ENABLED ) {
|
||||||
$('#import_link').css('display', 'none');
|
$('#import_link').css('display', 'none');
|
||||||
|
@ -100,6 +103,11 @@ function checkHasOSM() {
|
||||||
var iframe = '<iframe name="import_frame" class="h_iframe" src="about:blank"></iframe>';
|
var iframe = '<iframe name="import_frame" class="h_iframe" src="about:blank"></iframe>';
|
||||||
// $('#filefm').after(iframe);
|
// $('#filefm').after(iframe);
|
||||||
}
|
}
|
||||||
|
size_bad = config.MWM_SIZE_THR = Math.round(parseInt(res.mwm_size_thr)/1024);
|
||||||
|
size_good = Math.round(size_bad * 0.7 / 10) * 10;
|
||||||
|
$('#r_green').val(size_good);
|
||||||
|
$('#r_red').val(size_bad);
|
||||||
|
$('#mwm_size_thr').val(config.MWM_SIZE_THR);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
@ -148,11 +156,11 @@ function updateBorders() {
|
||||||
crossingLayer.clearLayers();
|
crossingLayer.clearLayers();
|
||||||
}
|
}
|
||||||
|
|
||||||
if( oldBordersLayer != null && OLD_BORDERS_NAME ) {
|
if( oldBordersLayer != null && config.OLD_BORDERS_NAME ) {
|
||||||
oldBordersLayer.clearLayers();
|
oldBordersLayer.clearLayers();
|
||||||
$.ajax(getServer('bbox'), {
|
$.ajax(getServer('bbox'), {
|
||||||
data: {
|
data: {
|
||||||
'table': OLD_BORDERS_NAME,
|
'table': config.OLD_BORDERS_NAME,
|
||||||
'simplify': simplified,
|
'simplify': simplified,
|
||||||
'xmin': b.getWest(),
|
'xmin': b.getWest(),
|
||||||
'xmax': b.getEast(),
|
'xmax': b.getEast(),
|
||||||
|
@ -302,6 +310,7 @@ function selectLayer(e) {
|
||||||
$('#b_al').text(props['admin_level'] ? '('+props['admin_level']+')' : '');
|
$('#b_al').text(props['admin_level'] ? '('+props['admin_level']+')' : '');
|
||||||
$('#b_parent_name').text(props['parent_name']);
|
$('#b_parent_name').text(props['parent_name']);
|
||||||
$('#b_size').text(Math.round(props['count_k'] * BYTES_FOR_NODE / 1024 / 1024) + ' MB');
|
$('#b_size').text(Math.round(props['count_k'] * BYTES_FOR_NODE / 1024 / 1024) + ' MB');
|
||||||
|
$('#pa_size').text(Math.round(props['mwm_size_est']/1024) + ' MB');
|
||||||
//$('#b_nodes').text(borders[selectedId].layer.getLatLngs()[0].length);
|
//$('#b_nodes').text(borders[selectedId].layer.getLatLngs()[0].length);
|
||||||
$('#b_nodes').text(props['nodes']);
|
$('#b_nodes').text(props['nodes']);
|
||||||
$('#b_date').text(props['modified']);
|
$('#b_date').text(props['modified']);
|
||||||
|
@ -317,7 +326,7 @@ function selectLayer(e) {
|
||||||
|
|
||||||
function filterSelect(noRefresh) {
|
function filterSelect(noRefresh) {
|
||||||
var value = $('#f_type').val();
|
var value = $('#f_type').val();
|
||||||
$('#f_size').css('display', value == 'size' ? 'block' : 'none');
|
$('#f_size').css('display', value.endsWith('size') ? 'block' : 'none');
|
||||||
$('#f_chars').css('display', value == 'chars' ? 'block' : 'none');
|
$('#f_chars').css('display', value == 'chars' ? 'block' : 'none');
|
||||||
$('#f_comments').css('display', value == 'comments' ? 'block' : 'none');
|
$('#f_comments').css('display', value == 'comments' ? 'block' : 'none');
|
||||||
$('#f_topo').css('display', value == 'topo' ? 'block' : 'none');
|
$('#f_topo').css('display', value == 'topo' ? 'block' : 'none');
|
||||||
|
@ -336,29 +345,47 @@ var colors = ['red', 'orange', 'yellow', 'lime', 'green', 'olive', 'cyan', 'dark
|
||||||
'blue', 'navy', 'magenta', 'purple', 'deeppink', 'brown'] //'black';
|
'blue', 'navy', 'magenta', 'purple', 'deeppink', 'brown'] //'black';
|
||||||
var alphabet = 'abcdefghijklmnopqrstuvwxyz';
|
var alphabet = 'abcdefghijklmnopqrstuvwxyz';
|
||||||
|
|
||||||
|
function getStringHash(str) {
|
||||||
|
var hash = 0, i, chr;
|
||||||
|
/*
|
||||||
|
for (i = 0; i < str.length; i++) {
|
||||||
|
chr = str.charCodeAt(i);
|
||||||
|
hash = ((hash << 5) - hash) + chr;
|
||||||
|
hash |= 0; // Convert to 32bit integer
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
hash = str.charCodeAt(0) + str.charCodeAt(1);
|
||||||
|
return hash;
|
||||||
|
}
|
||||||
|
|
||||||
function getCountryColor(props) {
|
function getCountryColor(props) {
|
||||||
var country_name = props.country_name;
|
var country_name = props.country_name;
|
||||||
if (!country_name)
|
if (!country_name)
|
||||||
return 'black';
|
return 'black';
|
||||||
var firstLetter = country_name[0].toLowerCase();
|
var hash = getStringHash(country_name);
|
||||||
var index = alphabet.indexOf(firstLetter);
|
var indexInColors = Math.abs(hash) % colors.length;
|
||||||
if (index === -1)
|
|
||||||
return 'black';
|
|
||||||
var indexInColors = index % colors.length;
|
|
||||||
return colors[indexInColors];
|
return colors[indexInColors];
|
||||||
}
|
}
|
||||||
|
|
||||||
function getColor(props) {
|
function getColor(props) {
|
||||||
var color = STYLE_BORDER.color;
|
var color = STYLE_BORDER.color;
|
||||||
var fType = $('#f_type').val();
|
var fType = $('#f_type').val();
|
||||||
if( fType == 'size' ) {
|
if( fType == 'nodes_size' ) {
|
||||||
if( props['count_k'] <= 0 )
|
if( props['count_k'] <= 0 )
|
||||||
color = FILL_ZERO;
|
color = FILL_ZERO;
|
||||||
else if( props['count_k'] * BYTES_FOR_NODE < size_good * 1024 * 1024 )
|
else if( props['count_k'] * BYTES_FOR_NODE < size_good * 1024 * 1024 )
|
||||||
color = FILL_TOO_SMALL;
|
color = FILL_TOO_SMALL;
|
||||||
else if( props['count_k'] * BYTES_FOR_NODE > size_bad * 1024 * 1024 )
|
else if( props['count_k'] * BYTES_FOR_NODE > size_bad * 1024 * 1024 )
|
||||||
color = FILL_TOO_BIG;
|
color = FILL_TOO_BIG;
|
||||||
} else if( fType == 'topo' ) {
|
} else if( fType == 'predict_size' ) {
|
||||||
|
if( props['mwm_size_est'] <= 0 )
|
||||||
|
color = FILL_ZERO;
|
||||||
|
else if( props['mwm_size_est'] < size_good * 1024 )
|
||||||
|
color = FILL_TOO_SMALL;
|
||||||
|
else if( props['mwm_size_est'] > size_bad * 1024 )
|
||||||
|
color = FILL_TOO_BIG;
|
||||||
|
}
|
||||||
|
else if( fType == 'topo' ) {
|
||||||
var rings = countRings([0, 0], props.layer);
|
var rings = countRings([0, 0], props.layer);
|
||||||
if( rings[1] > 0 )
|
if( rings[1] > 0 )
|
||||||
color = FILL_TOO_BIG;
|
color = FILL_TOO_BIG;
|
||||||
|
@ -471,7 +498,7 @@ function bJOSM() {
|
||||||
function bJosmOld() {
|
function bJosmOld() {
|
||||||
var b = map.getBounds();
|
var b = map.getBounds();
|
||||||
importInJOSM('josm', {
|
importInJOSM('josm', {
|
||||||
'table': OLD_BORDERS_NAME,
|
'table': config.OLD_BORDERS_NAME,
|
||||||
'xmin': b.getWest(),
|
'xmin': b.getWest(),
|
||||||
'xmax': b.getEast(),
|
'xmax': b.getEast(),
|
||||||
'ymin': b.getSouth(),
|
'ymin': b.getSouth(),
|
||||||
|
@ -508,7 +535,7 @@ function finishRename() {
|
||||||
}
|
}
|
||||||
|
|
||||||
function bToggleRename() {
|
function bToggleRename() {
|
||||||
if( !selectedId || !(selectedId in borders) || readonly )
|
if( !selectedId || !(selectedId in borders) || config.READONLY )
|
||||||
return;
|
return;
|
||||||
var rename_el = $('#rename');
|
var rename_el = $('#rename');
|
||||||
if (rename_el.is(':hidden')) {
|
if (rename_el.is(':hidden')) {
|
||||||
|
@ -952,11 +979,8 @@ function clearDivideLayers() {
|
||||||
|
|
||||||
function bDividePreview() {
|
function bDividePreview() {
|
||||||
var auto_divide = $('#auto_divide').prop('checked');
|
var auto_divide = $('#auto_divide').prop('checked');
|
||||||
if (auto_divide && (
|
if (auto_divide && !$('#mwm_size_thr').val()) {
|
||||||
!$('#city_population_thr').val() ||
|
alert('Fill mmw size threshold');
|
||||||
!$('#cluster_population_thr').val())
|
|
||||||
) {
|
|
||||||
alert('Fill population thresholds');
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
clearDivideLayers();
|
clearDivideLayers();
|
||||||
|
@ -970,8 +994,7 @@ function bDividePreview() {
|
||||||
'apply_to_similar': apply_to_similar
|
'apply_to_similar': apply_to_similar
|
||||||
};
|
};
|
||||||
if (auto_divide) {
|
if (auto_divide) {
|
||||||
params['city_population_thr'] = $('#city_population_thr').val();
|
params['mwm_size_thr'] = parseInt($('#mwm_size_thr').val()) * 1024;
|
||||||
params['cluster_population_thr'] = $('#cluster_population_thr').val();
|
|
||||||
}
|
}
|
||||||
$.ajax(getServer('divpreview'), {
|
$.ajax(getServer('divpreview'), {
|
||||||
data: params,
|
data: params,
|
||||||
|
@ -1025,8 +1048,7 @@ function bDivideDo() {
|
||||||
'apply_to_similar': apply_to_similar
|
'apply_to_similar': apply_to_similar
|
||||||
};
|
};
|
||||||
if (auto_divide) {
|
if (auto_divide) {
|
||||||
params['city_population_thr'] = $('#city_population_thr').val();
|
params['mwm_size_thr'] = parseInt($('#mwm_size_thr').val()) * 1024;
|
||||||
params['cluster_population_thr'] = $('#cluster_population_thr').val();
|
|
||||||
}
|
}
|
||||||
$.ajax(getServer('divide'), {
|
$.ajax(getServer('divide'), {
|
||||||
data: params,
|
data: params,
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
const BYTES_FOR_NODE = 8;
|
const BYTES_FOR_NODE = 8;
|
||||||
|
const IMPORT_ENABLED = true;
|
||||||
|
|
||||||
const SELF_URL = document.location.origin;
|
const SELF_URL = document.location.origin;
|
||||||
|
|
||||||
|
|
102
web/app/subregions.py
Normal file
102
web/app/subregions.py
Normal file
|
@ -0,0 +1,102 @@
|
||||||
|
import config
|
||||||
|
from mwm_size_predictor import MwmSizePredictor
|
||||||
|
|
||||||
|
|
||||||
|
osm_table = config.OSM_TABLE
|
||||||
|
osm_places_table = config.OSM_PLACES_TABLE
|
||||||
|
size_predictor = MwmSizePredictor()
|
||||||
|
|
||||||
|
|
||||||
|
def get_subregions_info(conn, region_id, region_table,
|
||||||
|
next_level, need_cities=False):
|
||||||
|
"""
|
||||||
|
:param conn: psycopg2 connection
|
||||||
|
:param region_id:
|
||||||
|
:param region_table: maybe TABLE or OSM_TABLE from config.py
|
||||||
|
:param next_level: admin level of subregions to find
|
||||||
|
:return: dict {subregion_id => subregion data} including area and population info
|
||||||
|
"""
|
||||||
|
subregions = _get_subregions_basic_info(conn, region_id, region_table,
|
||||||
|
next_level, need_cities)
|
||||||
|
_add_population_data(conn, subregions, need_cities)
|
||||||
|
_add_mwm_size_estimation(subregions)
|
||||||
|
keys = ('name', 'mwm_size_est')
|
||||||
|
if need_cities:
|
||||||
|
keys = keys + ('cities',)
|
||||||
|
return {subregion_id: {k: subregion_data[k] for k in keys}
|
||||||
|
for subregion_id, subregion_data in subregions.items()
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _get_subregions_basic_info(conn, region_id, region_table,
|
||||||
|
next_level, need_cities):
|
||||||
|
cursor = conn.cursor()
|
||||||
|
region_id_column, region_geom_column = (
|
||||||
|
('id', 'geom') if region_table == config.TABLE else
|
||||||
|
('osm_id', 'way')
|
||||||
|
)
|
||||||
|
cursor.execute(f"""
|
||||||
|
SELECT subreg.osm_id, subreg.name, ST_Area(geography(subreg.way))/1.0E+6 area
|
||||||
|
FROM {region_table} reg, {osm_table} subreg
|
||||||
|
WHERE reg.{region_id_column} = %s AND subreg.admin_level = %s AND
|
||||||
|
ST_Contains(reg.{region_geom_column}, subreg.way)
|
||||||
|
""", (region_id, next_level)
|
||||||
|
)
|
||||||
|
subregions = {}
|
||||||
|
for rec in cursor:
|
||||||
|
subregion_data = {
|
||||||
|
'osm_id': rec[0],
|
||||||
|
'name': rec[1],
|
||||||
|
'area': rec[2],
|
||||||
|
'urban_pop': 0,
|
||||||
|
'city_cnt': 0,
|
||||||
|
'hamlet_cnt': 0
|
||||||
|
}
|
||||||
|
if need_cities:
|
||||||
|
subregion_data['cities'] = []
|
||||||
|
subregions[rec[0]] = subregion_data
|
||||||
|
return subregions
|
||||||
|
|
||||||
|
|
||||||
|
def _add_population_data(conn, subregions, need_cities):
|
||||||
|
cursor = conn.cursor()
|
||||||
|
subregion_ids = ','.join(str(x) for x in subregions.keys())
|
||||||
|
cursor.execute(f"""
|
||||||
|
SELECT b.osm_id, p.name, COALESCE(p.population, 0), p.place
|
||||||
|
FROM {osm_table} b, {osm_places_table} p
|
||||||
|
WHERE b.osm_id IN ({subregion_ids})
|
||||||
|
AND ST_CONTAINS(b.way, p.center)
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
for subregion_id, place_name, place_population, place_type in cursor:
|
||||||
|
subregion_data = subregions[subregion_id]
|
||||||
|
if place_type in ('city', 'town'):
|
||||||
|
subregion_data['city_cnt'] += 1
|
||||||
|
subregion_data['urban_pop'] += place_population
|
||||||
|
if need_cities:
|
||||||
|
subregion_data['cities'].append({
|
||||||
|
'name': place_name,
|
||||||
|
'population': place_population
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
subregion_data['hamlet_cnt'] += 1
|
||||||
|
|
||||||
|
|
||||||
|
def _add_mwm_size_estimation(subregions):
|
||||||
|
subregions_sorted = [
|
||||||
|
(
|
||||||
|
s_id,
|
||||||
|
[subregions[s_id][f] for f in
|
||||||
|
('urban_pop', 'area', 'city_cnt', 'hamlet_cnt')]
|
||||||
|
)
|
||||||
|
for s_id in sorted(subregions.keys())
|
||||||
|
]
|
||||||
|
|
||||||
|
feature_array = [x[1] for x in subregions_sorted]
|
||||||
|
predictions = size_predictor.predict(feature_array)
|
||||||
|
|
||||||
|
for subregion_id, mwm_size_prediction in zip(
|
||||||
|
(x[0] for x in subregions_sorted),
|
||||||
|
predictions
|
||||||
|
):
|
||||||
|
subregions[subregion_id]['mwm_size_est'] = mwm_size_prediction
|
|
@ -30,8 +30,11 @@
|
||||||
#backup_saving, #backup_restoring { margin-bottom: 1em; }
|
#backup_saving, #backup_restoring { margin-bottom: 1em; }
|
||||||
#filefm, #old_action, #josm_old, #cross_actions { display: none; }
|
#filefm, #old_action, #josm_old, #cross_actions { display: none; }
|
||||||
#h_iframe { display: block; width: 100%; height: 80px; }
|
#h_iframe { display: block; width: 100%; height: 80px; }
|
||||||
|
a, a:hover, a:visited { color: blue; }
|
||||||
#start_over, #start_over:hover, #start_over:visited { color: red; }
|
#start_over, #start_over:hover, #start_over:visited { color: red; }
|
||||||
#city_population_thr, #cluster_population_thr { max-width: 80px;}
|
#population_thresholds { padding-left: 1.5em; }
|
||||||
|
#mwm_size_thr { max-width: 50px;}
|
||||||
|
#r_green, #r_red { width: 40px; }
|
||||||
#b_import { max-width: 180px; }
|
#b_import { max-width: 180px; }
|
||||||
#import_div { position: relative; display: none; }
|
#import_div { position: relative; display: none; }
|
||||||
#hide_import_button {
|
#hide_import_button {
|
||||||
|
@ -44,7 +47,7 @@
|
||||||
align-items: center;
|
align-items: center;
|
||||||
justify-content: center;
|
justify-content: center;
|
||||||
cursor: pointer;
|
cursor: pointer;
|
||||||
}
|
}
|
||||||
|
|
||||||
</style>
|
</style>
|
||||||
</head>
|
</head>
|
||||||
|
@ -54,7 +57,8 @@
|
||||||
<div id="filter">
|
<div id="filter">
|
||||||
Раскраска по <select size="1" id="f_type" value="size" onchange="filterSelect()">
|
Раскраска по <select size="1" id="f_type" value="size" onchange="filterSelect()">
|
||||||
<option value="country">стране</option>
|
<option value="country">стране</option>
|
||||||
<option value="size">размеру</option>
|
<option value="nodes_size">размеру по точкам</option>
|
||||||
|
<option value="predict_size">предсказ. размеру</option>
|
||||||
<option value="topo">топологии</option>
|
<option value="topo">топологии</option>
|
||||||
<option value="chars">буквам в назв.</option>
|
<option value="chars">буквам в назв.</option>
|
||||||
<option value="comments">комментариям</option>
|
<option value="comments">комментариям</option>
|
||||||
|
@ -138,7 +142,8 @@
|
||||||
</div>
|
</div>
|
||||||
<div id="potential_parents">
|
<div id="potential_parents">
|
||||||
</div>
|
</div>
|
||||||
<b>Оценка размера:</b> <span id="b_size"></span><br>
|
<b>Оценка размера по точкам:</b> <span id="b_size"></span><br>
|
||||||
|
<b>Оценка размера по нас+пл:</b> <span id="pa_size"></span><br>
|
||||||
<b>Последняя правка:</b> <span id="b_date"></span><br>
|
<b>Последняя правка:</b> <span id="b_date"></span><br>
|
||||||
<b>Количество точек:</b> <span id="b_nodes"></span><br>
|
<b>Количество точек:</b> <span id="b_nodes"></span><br>
|
||||||
<b>Площадь:</b> <span id="b_area"></span> км²<br>
|
<b>Площадь:</b> <span id="b_area"></span> км²<br>
|
||||||
|
@ -188,20 +193,18 @@
|
||||||
<button onclick="bPointCancel()">Вернуться</button>
|
<button onclick="bPointCancel()">Вернуться</button>
|
||||||
</div>
|
</div>
|
||||||
<div id="divide" class="actions">
|
<div id="divide" class="actions">
|
||||||
Выбранная область <span id="region_to_divide"></span>
|
Выбранная область <span id="region_to_divide"></span>
|
||||||
будет заменена вложенными областями уровня
|
будет заменена вложенными областями уровня
|
||||||
<input type="number" id="next_level" min="2" max="12">.<br>
|
<input type="number" id="next_level" min="2" max="12">.<br>
|
||||||
<br>
|
<br>
|
||||||
<div>
|
<div>
|
||||||
<input type="checkbox" id="auto_divide" checked>
|
<input type="checkbox" id="auto_divide" checked>
|
||||||
<label for="auto_divide">Автослияние по населению</label>
|
<label for="auto_divide">Автослияние по населению</label>
|
||||||
</div>
|
<div id="population_thresholds">
|
||||||
<div id="population_thresholds">
|
Верхняя граница размера mwm:
|
||||||
Порог населения города, региона<br>
|
<input id="mwm_size_thr" type="number"
|
||||||
<input id="city_population_thr" type="number"
|
min="1" value="70" step="1"> Мб
|
||||||
min="0" max="8000000000" value="500000" step="50000">,
|
</div>
|
||||||
<input id="cluster_population_thr" type="number"
|
|
||||||
min="0" max="8000000000" value="500000" step="50000">
|
|
||||||
</div>
|
</div>
|
||||||
<div>
|
<div>
|
||||||
<input type="checkbox" id="apply_to_similar">
|
<input type="checkbox" id="apply_to_similar">
|
||||||
|
|
Loading…
Add table
Reference in a new issue