diff --git a/.gitignore b/.gitignore index 8d35cb3..c266415 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ __pycache__ *.pyc +.idea +nohup.out diff --git a/db/create_tables.sql b/db/create_tables.sql index ac098b7..b214ae7 100644 --- a/db/create_tables.sql +++ b/db/create_tables.sql @@ -14,7 +14,7 @@ CREATE TABLE borders ( count_k INTEGER, modified TIMESTAMP NOT NULL, cmnt VARCHAR(500), - mwm_size_est double precision + mwm_size_est REAL ); CREATE INDEX borders_geom_gits_idx ON borders USING gist (geom); CREATE INDEX borders_parent_id_idx ON borders (parent_id); @@ -29,15 +29,15 @@ CREATE TABLE borders_backup ( count_k INTEGER, modified TIMESTAMP NOT NULL, cmnt VARCHAR(500), - mwm_size_est double precision, + mwm_size_est REAL, PRIMARY KEY (backup, id) ); CREATE TABLE splitting ( osm_border_id BIGINT NOT NULL REFERENCES osm_borders(osm_id), -- reference to parent osm region subregion_ids BIGINT[] NOT NULL, - mwm_size_est double precision NOT NULL, - mwm_size_thr double precision NOT NULL, + mwm_size_est REAL NOT NULL, + mwm_size_thr INTEGER NOT NULL, -- mwm size threshold in Kb, 4-bytes INTEGER is enough geom geometry NOT NULL ); CREATE INDEX splitting_idx ON splitting (osm_border_id, mwm_size_thr); diff --git a/docker-compose.yaml b/docker-compose.yaml index 2854dc1..57a9140 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -21,13 +21,14 @@ services: context: ./db dockerfile: Dockerfile.db args: - PLANET_URL: http://generator.testdata.mapsme.cloud.devmail.ru/planet/planet-latest.o5m - PLANET_URL_external: https://planet.openstreetmap.org/pbf/planet-latest.osm.pbf - PLANET_URL_small: http://download.geofabrik.de/africa/eritrea-latest.osm.pbf + PLANET_URL_full: https://planet.openstreetmap.org/pbf/planet-latest.osm.pbf + PLANET_URL: http://download.geofabrik.de/africa/eritrea-latest.osm.pbf container_name: db restart: always environment: POSTGRES_HOST_AUTH_METHOD: password POSTGRES_USER: postgres POSTGRES_PASSWORD: postgres - + ports: + - "55432:5432" + diff --git a/web/Dockerfile.web b/web/Dockerfile.web index 5778d0c..9f12b71 100644 --- a/web/Dockerfile.web +++ b/web/Dockerfile.web @@ -1,6 +1,6 @@ FROM tiangolo/uwsgi-nginx-flask:latest -RUN pip install flask_cors flask_compress psycopg2 unidecode +RUN pip install flask_cors flask_compress psycopg2 unidecode numpy sklearn COPY app /app COPY ./uwsgi.ini /app diff --git a/web/app/auto_split.py b/web/app/auto_split.py index 44cbb65..0c0e538 100644 --- a/web/app/auto_split.py +++ b/web/app/auto_split.py @@ -6,20 +6,21 @@ from collections import defaultdict from config import ( AUTOSPLIT_TABLE as autosplit_table, - TABLE as table, - OSM_TABLE as osm_table + OSM_TABLE as osm_table, + MWM_SIZE_THRESHOLD, ) +from subregions import get_subregions_info class DisjointClusterUnion: """Disjoint set union implementation for administrative subregions.""" - def __init__(self, region_id, subregions, thresholds): + def __init__(self, region_id, subregions, mwm_size_thr=None): self.region_id = region_id self.subregions = subregions - self.city_population_thr, self.cluster_population_thr = thresholds + self.mwm_size_thr = mwm_size_thr or MWM_SIZE_THRESHOLD self.representatives = {sub_id: sub_id for sub_id in subregions} - # a cluster is one or more subregions with common borders + # A cluster is one or more subregions with common borders self.clusters = {} # representative => cluster object # At the beginning, each subregion forms a cluster. @@ -28,27 +29,20 @@ class DisjointClusterUnion: self.clusters[subregion_id] = { 'representative': subregion_id, 'subregion_ids': [subregion_id], - 'population': data['population'], - 'big_cities_cnt': sum(1 for c in data['cities'] if self.is_city_big(c)), + 'mwm_size_est': data['mwm_size_est'], 'finished': False, # True if the cluster cannot be merged with another } - - def is_city_big(self, city): - return city['population'] >= self.city_population_thr - def get_smallest_cluster(self): """Find minimal cluster without big cities.""" smallest_cluster_id = min( filter( - lambda cluster_id: ( - not self.clusters[cluster_id]['finished'] and - self.clusters[cluster_id]['big_cities_cnt'] == 0) - , + lambda cluster_id: + not self.clusters[cluster_id]['finished'], self.clusters.keys() ), default=None, - key=lambda cluster_id: self.clusters[cluster_id]['population'] + key=lambda cluster_id: self.clusters[cluster_id]['mwm_size_est'] ) return smallest_cluster_id @@ -63,9 +57,9 @@ class DisjointClusterUnion: self.representatives[subregion_id] = representative return representative - def get_cluster_population(self, subregion_id): + def get_cluster_mwm_size_est(self, subregion_id): cluster_id = self.find_cluster(subregion_id) - return self.clusters[cluster_id]['population'] + return self.clusters[cluster_id]['mwm_size_est'] def get_cluster_count(self): return len(self.clusters) @@ -77,8 +71,7 @@ class DisjointClusterUnion: r_cluster = self.clusters[retained_cluster_id] d_cluster = self.clusters[dropped_cluster_id] r_cluster['subregion_ids'].extend(d_cluster['subregion_ids']) - r_cluster['population'] += d_cluster['population'] - r_cluster['big_cities_cnt'] += d_cluster['big_cities_cnt'] + r_cluster['mwm_size_est'] += d_cluster['mwm_size_est'] del self.clusters[dropped_cluster_id] self.representatives[dropped_cluster_id] = retained_cluster_id return retained_cluster_id @@ -95,52 +88,13 @@ class DisjointClusterUnion: return subregion_ids -def enrich_with_population_and_cities(conn, subregions): - cursor = conn.cursor() - ids = ','.join(str(x) for x in subregions.keys()) - cursor.execute(f""" - SELECT b.osm_id, c.name, c.population - FROM {osm_table} b, osm_cities c - WHERE b.osm_id IN ({ids}) AND ST_CONTAINS(b.way, c.center) - """ - ) - for rec in cursor: - sub_id = int(rec[0]) - subregions[sub_id]['cities'].append({ - 'name': rec[1], - 'population': int(rec[2]) - }) - subregions[sub_id]['population'] += int(rec[2]) - - -def find_subregions(conn, region_id, next_level): - cursor = conn.cursor() - cursor.execute(f""" - SELECT subreg.osm_id, subreg.name - FROM {osm_table} reg, {osm_table} subreg - WHERE reg.osm_id = %s AND subreg.admin_level = %s AND - ST_Contains(reg.way, subreg.way) - """, - (region_id, next_level) - ) - subregions = { - int(rec[0]): - { - 'osm_id': int(rec[0]), - 'name': rec[1], - 'population': 0, - 'cities': [] - } - for rec in cursor - } - if subregions: - enrich_with_population_and_cities(conn, subregions) - return subregions - - -def get_best_cluster_to_join_with(small_cluster_id, dcu: DisjointClusterUnion, common_border_matrix): +def get_best_cluster_to_join_with(small_cluster_id, + dcu: DisjointClusterUnion, + common_border_matrix): if small_cluster_id not in common_border_matrix: - return None # this may be if a subregion is isolated, like Bezirk Lienz inside Tyrol, Austria + # This may be if a subregion is isolated, + # like Bezirk Lienz inside Tyrol, Austria + return None common_borders = defaultdict(lambda: 0.0) # cluster representative => common border length subregion_ids = dcu.get_cluster_subregion_ids(small_cluster_id) for subregion_id in subregion_ids: @@ -148,29 +102,26 @@ def get_best_cluster_to_join_with(small_cluster_id, dcu: DisjointClusterUnion, c other_cluster_id = dcu.find_cluster(other_subregion_id) if other_cluster_id != small_cluster_id: common_borders[other_cluster_id] += length - #print(f"common_borders={json.dumps(common_borders)} of len {len(common_borders)}") - #common_borders = {k:v for k,v in common_borders.items() if v > 0.0} if not common_borders: return None total_common_border_length = sum(common_borders.values()) - total_adjacent_population = sum(dcu.get_cluster_population(x) for x in common_borders) + total_adjacent_mwm_size_est = sum(dcu.get_cluster_mwm_size_est(x) for x in common_borders) choice_criterion = ( ( lambda cluster_id: ( - common_borders[cluster_id]/total_common_border_length + - -dcu.get_cluster_population(cluster_id)/total_adjacent_population + common_borders[cluster_id]/total_common_border_length + + -dcu.get_cluster_mwm_size_est(cluster_id)/total_adjacent_mwm_size_est ) - ) if total_adjacent_population else + ) if total_adjacent_mwm_size_est else lambda cluster_id: ( common_borders[cluster_id]/total_common_border_length ) ) - small_cluster_population = dcu.get_cluster_population(small_cluster_id) best_cluster_id = max( filter( lambda cluster_id: ( - small_cluster_population + dcu.get_cluster_population(cluster_id) - <= dcu.cluster_population_thr + dcu.clusters[small_cluster_id]['mwm_size_est'] + + dcu.clusters[cluster_id]['mwm_size_est'] <= dcu.mwm_size_thr ), common_borders.keys() ), @@ -207,39 +158,31 @@ def calculate_common_border_matrix(conn, subregion_ids): def find_golden_splitting(conn, border_id, next_level, - country_region_name, thresholds): - subregions = find_subregions(conn, border_id, next_level) + country_region_name, mwm_size_thr): + subregions = get_subregions_info(conn, border_id, osm_table, + next_level, need_cities=True) if not subregions: - print(f"No subregions for {border_id} {country_region_name}") return - dcu = DisjointClusterUnion(border_id, subregions, thresholds) + dcu = DisjointClusterUnion(border_id, subregions, mwm_size_thr) #save_splitting_to_file(dcu, f'all_{country_region_name}') all_subregion_ids = dcu.get_all_subregion_ids() common_border_matrix = calculate_common_border_matrix(conn, all_subregion_ids) i = 0 while True: - with open(f"clusters-{i:02d}.json", 'w') as f: - json.dump(dcu.clusters, f, ensure_ascii=False, indent=2) if dcu.get_cluster_count() == 1: return dcu i += 1 - #print(f"i = {i}") smallest_cluster_id = dcu.get_smallest_cluster() if not smallest_cluster_id: - return dcu # TODO: return target splitting - #print(f"smallest cluster = {json.dumps(dcu.clusters[smallest_cluster_id])}") + return dcu best_cluster_id = get_best_cluster_to_join_with(smallest_cluster_id, dcu, common_border_matrix) - if not best_cluster_id: # !!! a case for South West England and popul 500000 - dcu.mark_cluster_finished(smallest_cluster_id) + if not best_cluster_id: + dcu.mark_cluster_finished(smallest_cluster_id) continue assert (smallest_cluster_id != best_cluster_id), f"{smallest_cluster_id}" - #print(f"best cluster = {json.dumps(dcu.clusters[best_cluster_id])}") - new_cluster_id = dcu.union(smallest_cluster_id, best_cluster_id) - #print(f"{json.dumps(dcu.clusters[new_cluster_id])}") - #print() - #import sys; sys.exit() + dcu.union(smallest_cluster_id, best_cluster_id) return dcu @@ -279,6 +222,9 @@ def write_polygons_to_poly(file, polygons, name_prefix): def save_splitting_to_file(conn, dcu: DisjointClusterUnion, filename_prefix=None): + """May be used for debugging""" + GENERATE_ALL_POLY=False + FOLDER='split_results' with open(f"{FOLDER}/{filename_prefix}.poly", 'w') as poly_file: poly_file.write(f"{filename_prefix}\n") for cluster_id, data in dcu.clusters.items(): @@ -297,7 +243,7 @@ def save_splitting_to_file(conn, dcu: DisjointClusterUnion, filename_prefix=None poly_file.write('END\n') with open(f"{FOLDER}/{filename_prefix}-splitting.json", 'w') as f: json.dump(dcu.clusters, f, ensure_ascii=False, indent=2) - + def save_splitting_to_db(conn, dcu: DisjointClusterUnion): cursor = conn.cursor() @@ -305,8 +251,7 @@ def save_splitting_to_db(conn, dcu: DisjointClusterUnion): cursor.execute(f""" DELETE FROM {autosplit_table} WHERE osm_border_id = {dcu.region_id} - AND city_population_thr = {dcu.city_population_thr} - AND cluster_population_thr = {dcu.cluster_population_thr} + AND mwm_size_thr = {dcu.mwm_size_thr} """) for cluster_id, data in dcu.clusters.items(): subregion_ids = data['subregion_ids'] @@ -314,20 +259,19 @@ def save_splitting_to_db(conn, dcu: DisjointClusterUnion): cluster_geometry_sql = get_union_sql(subregion_ids) cursor.execute(f""" INSERT INTO {autosplit_table} (osm_border_id, subregion_ids, geom, - city_population_thr, cluster_population_thr) + mwm_size_thr, mwm_size_est) VALUES ( {dcu.region_id}, '{{{','.join(str(x) for x in subregion_ids)}}}', ({cluster_geometry_sql}), - {dcu.city_population_thr}, - {dcu.cluster_population_thr} + {dcu.mwm_size_thr}, + {data['mwm_size_est']} ) - """) + """) conn.commit() - + def get_region_and_country_names(conn, region_id): - #if region_id != -1574364: return cursor = conn.cursor() try: cursor.execute( @@ -355,18 +299,15 @@ def get_region_and_country_names(conn, region_id): print(f"Many countries for region '{region_name}' id={region_id}") return region_name, country_name -DEFAULT_CITY_POPULATION_THRESHOLD = 500000 -DEFAULT_CLUSTER_POPULATION_THR = 500000 def split_region(conn, region_id, next_level, - thresholds=(DEFAULT_CITY_POPULATION_THRESHOLD, - DEFAULT_CLUSTER_POPULATION_THR), + mwm_size_thr, save_to_files=False): region_name, country_name = get_region_and_country_names(conn, region_id) region_name = region_name.replace('/', '|') country_region_name = f"{country_name}_{region_name}" if country_name else region_name dcu = find_golden_splitting(conn, region_id, next_level, - country_region_name, thresholds) + country_region_name, mwm_size_thr) if dcu is None: return @@ -378,30 +319,6 @@ def save_splitting(dcu: DisjointClusterUnion, conn, save_splitting_to_db(conn, dcu) if save_to_files: print(f"Saving {country_region_name}") - filename_prefix = f"{country_region_name}-{dcu.city_population_thrR}" + filename_prefix = f"{country_region_name}-{dcu.city_population_thr}" save_splitting_to_file(conn, dcu, filename_prefix) - -GENERATE_ALL_POLY=False -FOLDER='split_results' -#CITY_POPULATION_THR = 500000 -#CLUSTER_POPULATION_THR = 500000 - -if __name__ == '__main__': - conn = psycopg2.connect("dbname=az_gis3") - - PREFIX = "UBavaria" - CITY_POPULATION_THR = 500000 - CLUSTER_POPULATION_THR = 500000 - - region_id = -162050 # -165475 # California ## -162050 # Florida - region_id = -2145274 # Upper Bavaria - #region_id = -151339 # South West England - #region_id = -58446 # Scotland - dcu = find_golden_splitting(region_id) - make_polys(dcu.clusters) - with open(f"{PREFIX}_{CITY_POPULATION_THR}_splitting{region_id}-poplen.json", 'w') as f: - json.dump(dcu.clusters, f, ensure_ascii=False, indent=2) - - - diff --git a/web/app/borders_api.py b/web/app/borders_api.py index 422ce65..b725ad0 100755 --- a/web/app/borders_api.py +++ b/web/app/borders_api.py @@ -25,6 +25,7 @@ from countries_structure import ( create_countries_initial_structure, get_osm_border_name_by_osm_id, ) +from subregions import get_subregions_info try: from lxml import etree @@ -78,7 +79,8 @@ def fetch_borders(**kwargs): query = f""" SELECT name, geometry, nodes, modified, disabled, count_k, cmnt, (CASE WHEN area = 'NaN' THEN 0 ELSE area END) AS area, - id, admin_level, parent_id, parent_name + id, admin_level, parent_id, parent_name, + mwm_size_est FROM ( SELECT name, ST_AsGeoJSON({geom}, 7) as geometry, @@ -95,7 +97,8 @@ def fetch_borders(**kwargs): parent_id, ( SELECT name FROM {table} WHERE id = t.parent_id - ) AS parent_name + ) AS parent_name, + mwm_size_est FROM {table} t WHERE ({where_clause}) {leaves_filter} ) q @@ -112,18 +115,19 @@ def fetch_borders(**kwargs): 'disabled': rec[4], 'count_k': rec[5], 'comment': rec[6], 'area': rec[7], - 'id': region_id, 'country_id': country_id, + 'id': region_id, 'admin_level': rec[9], 'parent_id': rec[10], 'parent_name': rec[11] or '', - 'country_name': country_name + 'country_id': country_id, + 'country_name': country_name, + 'mwm_size_est': rec[12] } feature = {'type': 'Feature', 'geometry': json.loads(rec[1]), 'properties': props } borders.append(feature) - #print([x['properties'] for x in borders]) return borders def simplify_level_to_postgis_value(simplify_level): @@ -228,8 +232,8 @@ def query_crossing(): pass return jsonify(type='FeatureCollection', features=result) -@app.route('/tables') -def check_osm_table(): +@app.route('/config') +def get_server_configuration(): osm = False backup = False old = [] @@ -260,7 +264,9 @@ def check_osm_table(): crossing = True except psycopg2.Error as e: pass - return jsonify(osm=osm, tables=old, readonly=config.READONLY, backup=backup, crossing=crossing) + return jsonify(osm=osm, tables=old, readonly=config.READONLY, + backup=backup, crossing=crossing, + mwm_size_thr=config.MWM_SIZE_THRESHOLD) @app.route('/search') def search(): @@ -341,9 +347,10 @@ def join_borders(): cur.execute(f""" UPDATE {table} SET id = {free_id}, - geom = ST_Union(geom, b2.g), + geom = ST_Union({table}.geom, b2.geom), + mwm_size_est = {table}.mwm_size_est + b2.mwm_size_est, count_k = -1 - FROM (SELECT geom AS g FROM {table} WHERE id = %s) AS b2 + FROM (SELECT geom, mwm_size_est FROM {table} WHERE id = %s) AS b2 WHERE id = %s""", (region_id2, region_id1)) cur.execute(f"DELETE FROM {table} WHERE id = %s", (region_id2,)) except psycopg2.Error as e: @@ -630,24 +637,23 @@ def divide_preview(): if not is_admin: return jsonify(status="Could not apply auto-division to non-administrative regions") try: - city_population_thr = int(request.args.get('city_population_thr')) - cluster_population_thr = int(request.args.get('cluster_population_thr')) + mwm_size_thr = int(request.args.get('mwm_size_thr')) except ValueError: return jsonify(status='Not a number in thresholds.') return divide_into_clusters_preview( region_ids, next_level, - (city_population_thr, cluster_population_thr)) + mwm_size_thr) else: return divide_into_subregions_preview(region_ids, next_level) -def get_subregions(region_ids, next_level): +def get_subregions_for_preview(region_ids, next_level): subregions = list(itertools.chain.from_iterable( - get_subregions_one(region_id, next_level) + get_subregions_one_for_preview(region_id, next_level) for region_id in region_ids )) return subregions -def get_subregions_one(region_id, next_level): +def get_subregions_one_for_preview(region_id, next_level): osm_table = config.OSM_TABLE table = config.TABLE cur = g.conn.cursor() @@ -671,28 +677,28 @@ def get_subregions_one(region_id, next_level): subregions.append(feature) return subregions -def get_clusters(region_ids, next_level, thresholds): +def get_clusters_for_preview(region_ids, next_level, thresholds): clusters = list(itertools.chain.from_iterable( - get_clusters_one(region_id, next_level, thresholds) + get_clusters_for_preview_one(region_id, next_level, thresholds) for region_id in region_ids )) return clusters -def get_clusters_one(region_id, next_level, thresholds): +def get_clusters_for_preview_one(region_id, next_level, mwm_size_thr): autosplit_table = config.AUTOSPLIT_TABLE cursor = g.conn.cursor() where_clause = f""" osm_border_id = %s - AND city_population_thr = %s - AND cluster_population_thr = %s + AND mwm_size_thr = %s """ - splitting_sql_params = (region_id,) + thresholds + splitting_sql_params = (region_id, mwm_size_thr) cursor.execute(f""" SELECT 1 FROM {autosplit_table} WHERE {where_clause} """, splitting_sql_params) if cursor.rowcount == 0: - split_region(g.conn, region_id, next_level, thresholds) + split_region(g.conn, region_id, next_level, mwm_size_thr) + cursor.execute(f""" SELECT subregion_ids[1], ST_AsGeoJSON(ST_SimplifyPreserveTopology(geom, 0.01)) as way FROM {autosplit_table} @@ -700,23 +706,24 @@ def get_clusters_one(region_id, next_level, thresholds): """, splitting_sql_params) clusters = [] for rec in cursor: - cluster = { 'type': 'Feature', - 'geometry': json.loads(rec[1]), - 'properties': {'osm_id': int(rec[0])} + cluster = { + 'type': 'Feature', + 'geometry': json.loads(rec[1]), + 'properties': {'osm_id': int(rec[0])} } clusters.append(cluster) return clusters def divide_into_subregions_preview(region_ids, next_level): - subregions = get_subregions(region_ids, next_level) + subregions = get_subregions_for_preview(region_ids, next_level) return jsonify( status='ok', subregions={'type': 'FeatureCollection', 'features': subregions} ) -def divide_into_clusters_preview(region_ids, next_level, thresholds): - subregions = get_subregions(region_ids, next_level) - clusters = get_clusters(region_ids, next_level, thresholds) +def divide_into_clusters_preview(region_ids, next_level, mwm_size_thr): + subregions = get_subregions_for_preview(region_ids, next_level) + clusters = get_clusters_for_preview(region_ids, next_level, mwm_size_thr) return jsonify( status='ok', subregions={'type': 'FeatureCollection', 'features': subregions}, @@ -744,51 +751,53 @@ def divide(): if not is_admin: return jsonify(status="Could not apply auto-division to non-administrative regions") try: - city_population_thr = int(request.args.get('city_population_thr')) - cluster_population_thr = int(request.args.get('cluster_population_thr')) + mwm_size_thr = int(request.args.get('mwm_size_thr')) except ValueError: return jsonify(status='Not a number in thresholds.') return divide_into_clusters( region_ids, next_level, - (city_population_thr, cluster_population_thr)) + mwm_size_thr) else: return divide_into_subregions(region_ids, next_level) def divide_into_subregions(region_ids, next_level): - table = config.TABLE - osm_table = config.OSM_TABLE - cur = g.conn.cursor() for region_id in region_ids: - is_admin = is_administrative_region(region_id) - if is_admin: - # TODO: rewrite SELECT into join rather than subquery to enable gist index - cur.execute(f""" - INSERT INTO {table} (id, geom, name, parent_id, modified, count_k) - SELECT osm_id, way, name, %s, now(), -1 - FROM {osm_table} - WHERE ST_Contains( - (SELECT geom FROM {table} WHERE id = %s), way - ) - AND admin_level = {next_level} - """, (region_id, region_id,) - ) - else: - cur.execute(f""" - INSERT INTO {table} (id, geom, name, parent_id, modified, count_k) - SELECT osm_id, way, name, (SELECT parent_id FROM {table} WHERE id = %s), now(), -1 - FROM {osm_table} - WHERE ST_Contains( - (SELECT geom FROM {table} WHERE id = %s), way - ) - AND admin_level = {next_level} - """, (region_id, region_id,) - ) - cur.execute(f"DELETE FROM {table} WHERE id = %s", (region_id,)) - + divide_into_subregions_one(region_id, next_level) g.conn.commit() return jsonify(status='ok') -def divide_into_clusters(region_ids, next_level, thresholds): +def divide_into_subregions_one(region_id, next_level): + table = config.TABLE + osm_table = config.OSM_TABLE + subregions = get_subregions_info(g.conn, region_id, table, + next_level, need_cities=False) + cursor = g.conn.cursor() + is_admin = is_administrative_region(region_id) + if is_admin: + for subregion_id, data in subregions.items(): + cursor.execute(f""" + INSERT INTO {table} + (id, geom, name, parent_id, modified, count_k, mwm_size_est) + SELECT osm_id, way, name, %s, now(), -1, {data['mwm_size_est']} + FROM {osm_table} + WHERE osm_id = %s + """, (region_id, subregion_id) + ) + else: + for subregion_id, data in subregions.items(): + cursor.execute(f""" + INSERT INTO {table} + (id, geom, name, parent_id, modified, count_k, mwm_size_est) + SELECT osm_id, way, name, + (SELECT parent_id FROM {table} WHERE id = %s), + now(), -1, {data['mwm_size_est']} + FROM {osm_table} + WHERE osm_id = %s + """, (region_id, subregion_id) + ) + cursor.execute(f"DELETE FROM {table} WHERE id = %s", (region_id,)) + +def divide_into_clusters(region_ids, next_level, mwm_size_thr): table = config.TABLE autosplit_table = config.AUTOSPLIT_TABLE cursor = g.conn.cursor() @@ -799,16 +808,15 @@ def divide_into_clusters(region_ids, next_level, thresholds): where_clause = f""" osm_border_id = %s - AND city_population_thr = %s - AND cluster_population_thr = %s + AND mwm_size_thr = %s """ - splitting_sql_params = (region_id,) + thresholds + splitting_sql_params = (region_id, mwm_size_thr) cursor.execute(f""" SELECT 1 FROM {autosplit_table} WHERE {where_clause} """, splitting_sql_params) if cursor.rowcount == 0: - split_region(g.conn, region_id, next_level, thresholds) + split_region(g.conn, region_id, next_level, mwm_size_thr) free_id = get_free_id() counter = 0 @@ -830,8 +838,8 @@ def divide_into_clusters(region_ids, next_level, thresholds): subregion_id = free_id name = f"{base_name}_{counter}" insert_cursor.execute(f""" - INSERT INTO {table} (id, name, parent_id, geom, modified, count_k) - SELECT {subregion_id}, %s, osm_border_id, geom, now(), -1 + INSERT INTO {table} (id, name, parent_id, geom, modified, count_k, mwm_size_est) + SELECT {subregion_id}, %s, osm_border_id, geom, now(), -1, mwm_size_est FROM {autosplit_table} WHERE subregion_ids[1] = %s AND {where_clause} """, (name, cluster_id,) + splitting_sql_params) g.conn.commit() diff --git a/web/app/config.py b/web/app/config.py index 7b8d2e2..a6b04d2 100644 --- a/web/app/config.py +++ b/web/app/config.py @@ -8,11 +8,13 @@ READONLY = False TABLE = 'borders' # from where OSM borders are imported OSM_TABLE = 'osm_borders' +# All populated places in OSM +OSM_PLACES_TABLE = 'osm_places' # transit table for autosplitting results AUTOSPLIT_TABLE = 'splitting' -## tables with borders for reference +# tables with borders for reference OTHER_TABLES = { - #'old': 'old_borders' + #'old': 'old_borders' } # backup table BACKUP = 'borders_backup' @@ -28,3 +30,8 @@ IMPORT_ERROR_ALERT = False DAEMON_STATUS_PATH = '/tmp/borders-daemon-status.txt' DAEMON_PID_PATH = '/tmp/borders-daemon.pid' DAEMON_LOG_PATH = '/var/log/borders-daemon.log' +# mwm size threshold in Kb +MWM_SIZE_THRESHOLD = 70*1024 +# Estimated mwm size is predicted by the 'model.pkl' with 'scaler.pkl' for X +MWM_SIZE_PREDICTION_MODEL_PATH = '/app/data/model.pkl' +MWM_SIZE_PREDICTION_MODEL_SCALER_PATH = '/app/data/scaler.pkl' diff --git a/web/app/countries_structure.py b/web/app/countries_structure.py index cbee332..f230716 100644 --- a/web/app/countries_structure.py +++ b/web/app/countries_structure.py @@ -2,6 +2,8 @@ import itertools import config +from subregions import get_subregions_info + table = config.TABLE osm_table = config.OSM_TABLE @@ -260,43 +262,32 @@ def _clear_borders(conn): conn.commit() -def _find_subregions(conn, osm_ids, next_level, parents, names): +def _find_subregions(conn, osm_ids, next_level, regions): """Return subregions of level 'next_level' for regions with osm_ids.""" - cursor = conn.cursor() - parent_osm_ids = ','.join(str(x) for x in osm_ids) - cursor.execute(f""" - SELECT b.osm_id, b.name, subb.osm_id, subb.name - FROM {osm_table} b, {osm_table} subb - WHERE subb.admin_level=%s - AND b.osm_id IN ({parent_osm_ids}) - AND ST_Contains(b.way, subb.way) - """, - (next_level,) - ) - - # parent_osm_id => [(osm_id, name), (osm_id, name), ...] subregion_ids = [] - - for rec in cursor: - parent_osm_id = rec[0] - osm_id = rec[2] - parents[osm_id] = parent_osm_id - name = rec[3] - names[osm_id] = name - subregion_ids.append(osm_id) + for osm_id in osm_ids: + more_subregions = get_subregions_info(conn, osm_id, table, + next_level, need_cities=False) + for subregion_id, subregion_data in more_subregions.items(): + region_data = regions.setdefault(subregion_id, {}) + region_data['name'] = subregion_data['name'] + region_data['mwm_size_est'] = subregion_data['mwm_size_est'] + region_data['parent_id'] = osm_id + subregion_ids.append(subregion_id) return subregion_ids -def _create_regions(conn, osm_ids, parents, names): +def _create_regions(conn, osm_ids, regions): if not osm_ids: return osm_ids = list(osm_ids) # to ensure order cursor = conn.cursor() sql_values = ','.join( f'({osm_id},' - '%s,' + '%s,' + f"{regions[osm_id].get('parent_id', 'NULL')}," + f"{regions[osm_id].get('mwm_size_est', 'NULL')}," f'(SELECT way FROM {osm_table} WHERE osm_id={osm_id}),' - f'{parents[osm_id] or "NULL"},' 'now())' for osm_id in osm_ids ) @@ -304,21 +295,23 @@ def _create_regions(conn, osm_ids, parents, names): #print(f"names={tuple(names[osm_id] for osm_id in osm_ids)}") #print(f"all parents={parents}") cursor.execute(f""" - INSERT INTO {table} (id, name, geom, parent_id, modified) + INSERT INTO {table} (id, name, parent_id, mwm_size_est, geom, modified) VALUES {sql_values} - """, tuple(names[osm_id] for osm_id in osm_ids) + """, tuple(regions[osm_id]['name'] for osm_id in osm_ids) ) def _make_country_structure(conn, country_osm_id): - names = {} # osm_id => osm name - parents = {} # osm_id => parent_osm_id + regions = {} # osm_id: { 'name': name, + # 'mwm_size_est': size, + # 'parent_id': parent_id } country_name = get_osm_border_name_by_osm_id(conn, country_osm_id) - names[country_osm_id] = country_name - parents[country_osm_id] = None + country_data = regions.setdefault(country_osm_id, {}) + country_data['name'] = country_name + # TODO: country_data['mwm_size_est'] = ... - _create_regions(conn, [country_osm_id], parents, names) + _create_regions(conn, [country_osm_id], regions) if country_initial_levels.get(country_name): admin_levels = country_initial_levels[country_name] @@ -332,18 +325,19 @@ def _make_country_structure(conn, country_osm_id): f"AL={admin_level}, prev-AL={prev_level}" ) subregion_ids = _find_subregions(conn, prev_region_ids, - admin_level, parents, names) - _create_regions(conn, subregion_ids, parents, names) + admin_level, regions) + _create_regions(conn, subregion_ids, regions) prev_region_ids = subregion_ids def create_countries_initial_structure(conn): _clear_borders(conn) cursor = conn.cursor() + # TODO: process overlapping countries, like Ukraine and Russia with common Crimea cursor.execute(f""" SELECT osm_id, name FROM {osm_table} - WHERE admin_level = 2 + WHERE admin_level = 2 and name != 'Ukraine' """ # and name in --('Germany', 'Luxembourg', 'Austria') # ({','.join(f"'{c}'" for c in country_initial_levels.keys())}) diff --git a/web/app/data/model.pkl b/web/app/data/model.pkl new file mode 100644 index 0000000..412a7a8 Binary files /dev/null and b/web/app/data/model.pkl differ diff --git a/web/app/data/mwm_data.xlsx b/web/app/data/mwm_data.xlsx new file mode 100644 index 0000000..bc2b513 Binary files /dev/null and b/web/app/data/mwm_data.xlsx differ diff --git a/web/app/data/prediction_model.py b/web/app/data/prediction_model.py new file mode 100644 index 0000000..5a0de5f --- /dev/null +++ b/web/app/data/prediction_model.py @@ -0,0 +1,119 @@ +import pandas as pd +import numpy as np + +from sklearn.linear_model import LinearRegression +from sklearn.model_selection import train_test_split +from sklearn.model_selection import ( + cross_val_score, + KFold, + GridSearchCV, +) +from sklearn.svm import SVR +from sklearn.preprocessing import StandardScaler + + +data = pd.read_excel('mwm_data.xlsx', sheet_name='mwms_all', header=1) +data = data[data['exclude'] == 0] +#data['is_urban2'] = data.apply(lambda row: row['pop_density'] > 260, axis=1) # 260 - median of pop_density + +popul_column = 'urban_pop' # options are 'population and 'urban_pop' (for population of cities and towns only) +feature_names = [popul_column, 'area', 'city_cnt', 'hamlet_cnt'] +target_name = 'size' + +for feature in set(feature_names) - set(['area']): # if area is None it's an error! + data[feature] = data[feature].fillna(0) + + +scoring = 'neg_mean_squared_error' # another option is 'r2' + + +def my_cross_validation(sample): + X = sample[feature_names] + y = sample[target_name] + + sc_X = StandardScaler() + X = sc_X.fit_transform(X) + + lin_regression = LinearRegression(fit_intercept=False) + svr_linear = SVR(kernel='linear') + svr_rbf = SVR(kernel='rbf') + + for estimator_name, estimator in zip( + ('LinRegression', 'SVR_linear', 'SVR_rbf'), + (lin_regression, svr_linear, svr_rbf)): + cv_scores = cross_val_score(estimator, X, y, + cv=KFold(5, shuffle=True, random_state=1), + scoring=scoring) + mean_score = np.mean(cv_scores) + print(f"{estimator_name:15}", cv_scores, mean_score) + + +def my_grid_search(sample): + X = sample[feature_names] + y = sample[target_name] + + sc_X = StandardScaler() + X = sc_X.fit_transform(X) + + X_train, X_test, y_train, y_test = train_test_split( + X, y, test_size=0.2, random_state=0) + + C_array = [10 ** n for n in range(6, 7)] + gamma_array = [0.009 + i * 0.001 for i in range(-7, 11, 2)] + ['auto', 'scale'] + epsilon_array = [0.5 * i for i in range(0, 15)] + coef0_array = [-0.1, -0.01, 0, 0.01, 0.1] + param_grid = [ + {'kernel': ['linear'], 'C': C_array, 'epsilon': epsilon_array}, + {'kernel': ['rbf'], 'C': C_array, 'gamma': gamma_array, 'epsilon': epsilon_array}, + {'kernel': ['poly', 'sigmoid'], + 'C': C_array, 'gamma': gamma_array, 'epsilon': epsilon_array, 'coef0': coef0_array}, + ] + + svr = SVR() + grid_search = GridSearchCV(svr, param_grid, scoring=scoring) + grid_search.fit(X_train, y_train) + #means = grid_search.cv_results_['mean_test_score'] + #stds = grid_search.cv_results_['std_test_score'] + #print("Grid scores on development set:") + #for mean, std, params in zip(means, stds, grid_search.cv_results_['params']): + # print("%0.3f (+/-%0.03f) for %r" % (mean, std, params)) + + print("C", C_array) + print("gamma", gamma_array) + print("epsilon", epsilon_array) + print("coef0", coef0_array) + print("Best_params:", grid_search.best_params_, grid_search.best_score_) + + +def train_and_serialize_model(sample): + X = sample[feature_names] + y = sample[target_name] + + X_head = X[0:4] + scaler = StandardScaler() + X = scaler.fit_transform(X) + + # Parameters tuned with GridSearch + regressor = SVR(kernel='rbf', C=10**6, epsilon=0.0, gamma=0.012) + regressor.fit(X, y) + + print(regressor.predict(X[0:4])) + + # Serialize model + import pickle + with open('model.pkl', 'wb') as f: + pickle.dump(regressor, f) + with open('scaler.pkl', 'wb') as f: + pickle.dump(scaler, f) + + # Deserialize model and test it on X_head samples + with open('model.pkl', 'rb') as f: + regressor2 = pickle.load(f) + with open('scaler.pkl', 'rb') as f: + scaler2 = pickle.load(f) + print(regressor2.predict(scaler2.transform(X_head))) + + +if __name__ == '__main__': + train_and_serialize_model(data) + diff --git a/web/app/data/scaler.pkl b/web/app/data/scaler.pkl new file mode 100644 index 0000000..4274cfb Binary files /dev/null and b/web/app/data/scaler.pkl differ diff --git a/web/app/mwm_size_predictor.py b/web/app/mwm_size_predictor.py new file mode 100644 index 0000000..112ff78 --- /dev/null +++ b/web/app/mwm_size_predictor.py @@ -0,0 +1,29 @@ +import numpy as np +import pickle + +import config + + +class MwmSizePredictor: + + def __init__(self): + with open(config.MWM_SIZE_PREDICTION_MODEL_PATH, 'rb') as f: + self.model = pickle.load(f) + with open(config.MWM_SIZE_PREDICTION_MODEL_SCALER_PATH, 'rb') as f: + self.scaler = pickle.load(f) + + def predict(self, features_array): + """1D or 2D array of feature values for predictions. Features are + 'urban_pop', 'area', 'city_cnt', 'hamlet_cnt' as defined for the + prediction model. + """ + X = np.array(features_array) + one_prediction = (X.ndim == 1) + if one_prediction: + X = X.reshape(1, -1) + X_scaled = self.scaler.transform(X) + predictions = self.model.predict(X_scaled) + if one_prediction: + return predictions[0] + else: + return predictions.tolist() diff --git a/web/app/static/borders.js b/web/app/static/borders.js index b0bc1d1..b0329e5 100644 --- a/web/app/static/borders.js +++ b/web/app/static/borders.js @@ -3,11 +3,14 @@ var STYLE_SELECTED = { stroke: true, color: '#ff3', weight: 3, fill: true, fillO var FILL_TOO_SMALL = '#0f0'; var FILL_TOO_BIG = '#800'; var FILL_ZERO = 'black'; -var OLD_BORDERS_NAME; // filled in checkHasOSM() -var IMPORT_ENABLED = true; -var map, borders = {}, bordersLayer, selectedId, editing = false, readonly = false; -var size_good = 50, size_bad = 70; +var map, borders = {}, bordersLayer, selectedId, editing = false; +var config = { // server config + READONLY: false, + MWM_SIZE_THR: 70, + OLD_BORDERS_NAME: undefined // may be filled in getServerConfiguration() +}; +var size_good, size_bad; var maxRank = 1; var tooSmallLayer = null; var oldBordersLayer = null; @@ -68,17 +71,17 @@ function init() { else $('#population_thresholds').hide(); }); - checkHasOSM(); + getServerConfiguration(); filterSelect(true); } -function checkHasOSM() { - $.ajax(getServer('tables'), { +function getServerConfiguration() { + $.ajax(getServer('config'), { success: function(res) { if( res.osm ) $('#osm_actions').css('display', 'block'); if( res.tables && res.tables.length > 0 ) { - OLD_BORDERS_NAME = res.tables[0]; + config.OLD_BORDERS_NAME = res.tables[0]; $('#old_action').css('display', 'block'); $('#josm_old').css('display', 'inline'); } @@ -91,7 +94,7 @@ function checkHasOSM() { $('#action_buttons').css('display', 'none'); $('#import_link').css('display', 'none'); $('#backups').css('display', 'none'); - readonly = true; + config.READONLY = true; } if( !res.readonly && IMPORT_ENABLED ) { $('#import_link').css('display', 'none'); @@ -100,6 +103,11 @@ function checkHasOSM() { var iframe = ''; // $('#filefm').after(iframe); } + size_bad = config.MWM_SIZE_THR = Math.round(parseInt(res.mwm_size_thr)/1024); + size_good = Math.round(size_bad * 0.7 / 10) * 10; + $('#r_green').val(size_good); + $('#r_red').val(size_bad); + $('#mwm_size_thr').val(config.MWM_SIZE_THR); } }); } @@ -148,11 +156,11 @@ function updateBorders() { crossingLayer.clearLayers(); } - if( oldBordersLayer != null && OLD_BORDERS_NAME ) { + if( oldBordersLayer != null && config.OLD_BORDERS_NAME ) { oldBordersLayer.clearLayers(); $.ajax(getServer('bbox'), { data: { - 'table': OLD_BORDERS_NAME, + 'table': config.OLD_BORDERS_NAME, 'simplify': simplified, 'xmin': b.getWest(), 'xmax': b.getEast(), @@ -302,6 +310,7 @@ function selectLayer(e) { $('#b_al').text(props['admin_level'] ? '('+props['admin_level']+')' : ''); $('#b_parent_name').text(props['parent_name']); $('#b_size').text(Math.round(props['count_k'] * BYTES_FOR_NODE / 1024 / 1024) + ' MB'); + $('#pa_size').text(Math.round(props['mwm_size_est']/1024) + ' MB'); //$('#b_nodes').text(borders[selectedId].layer.getLatLngs()[0].length); $('#b_nodes').text(props['nodes']); $('#b_date').text(props['modified']); @@ -317,7 +326,7 @@ function selectLayer(e) { function filterSelect(noRefresh) { var value = $('#f_type').val(); - $('#f_size').css('display', value == 'size' ? 'block' : 'none'); + $('#f_size').css('display', value.endsWith('size') ? 'block' : 'none'); $('#f_chars').css('display', value == 'chars' ? 'block' : 'none'); $('#f_comments').css('display', value == 'comments' ? 'block' : 'none'); $('#f_topo').css('display', value == 'topo' ? 'block' : 'none'); @@ -336,29 +345,47 @@ var colors = ['red', 'orange', 'yellow', 'lime', 'green', 'olive', 'cyan', 'dark 'blue', 'navy', 'magenta', 'purple', 'deeppink', 'brown'] //'black'; var alphabet = 'abcdefghijklmnopqrstuvwxyz'; +function getStringHash(str) { + var hash = 0, i, chr; + /* + for (i = 0; i < str.length; i++) { + chr = str.charCodeAt(i); + hash = ((hash << 5) - hash) + chr; + hash |= 0; // Convert to 32bit integer + } + */ + hash = str.charCodeAt(0) + str.charCodeAt(1); + return hash; +} + function getCountryColor(props) { var country_name = props.country_name; if (!country_name) return 'black'; - var firstLetter = country_name[0].toLowerCase(); - var index = alphabet.indexOf(firstLetter); - if (index === -1) - return 'black'; - var indexInColors = index % colors.length; + var hash = getStringHash(country_name); + var indexInColors = Math.abs(hash) % colors.length; return colors[indexInColors]; } function getColor(props) { var color = STYLE_BORDER.color; var fType = $('#f_type').val(); - if( fType == 'size' ) { + if( fType == 'nodes_size' ) { if( props['count_k'] <= 0 ) color = FILL_ZERO; else if( props['count_k'] * BYTES_FOR_NODE < size_good * 1024 * 1024 ) color = FILL_TOO_SMALL; else if( props['count_k'] * BYTES_FOR_NODE > size_bad * 1024 * 1024 ) color = FILL_TOO_BIG; - } else if( fType == 'topo' ) { + } else if( fType == 'predict_size' ) { + if( props['mwm_size_est'] <= 0 ) + color = FILL_ZERO; + else if( props['mwm_size_est'] < size_good * 1024 ) + color = FILL_TOO_SMALL; + else if( props['mwm_size_est'] > size_bad * 1024 ) + color = FILL_TOO_BIG; + } + else if( fType == 'topo' ) { var rings = countRings([0, 0], props.layer); if( rings[1] > 0 ) color = FILL_TOO_BIG; @@ -471,7 +498,7 @@ function bJOSM() { function bJosmOld() { var b = map.getBounds(); importInJOSM('josm', { - 'table': OLD_BORDERS_NAME, + 'table': config.OLD_BORDERS_NAME, 'xmin': b.getWest(), 'xmax': b.getEast(), 'ymin': b.getSouth(), @@ -508,7 +535,7 @@ function finishRename() { } function bToggleRename() { - if( !selectedId || !(selectedId in borders) || readonly ) + if( !selectedId || !(selectedId in borders) || config.READONLY ) return; var rename_el = $('#rename'); if (rename_el.is(':hidden')) { @@ -952,11 +979,8 @@ function clearDivideLayers() { function bDividePreview() { var auto_divide = $('#auto_divide').prop('checked'); - if (auto_divide && ( - !$('#city_population_thr').val() || - !$('#cluster_population_thr').val()) - ) { - alert('Fill population thresholds'); + if (auto_divide && !$('#mwm_size_thr').val()) { + alert('Fill mmw size threshold'); return; } clearDivideLayers(); @@ -970,8 +994,7 @@ function bDividePreview() { 'apply_to_similar': apply_to_similar }; if (auto_divide) { - params['city_population_thr'] = $('#city_population_thr').val(); - params['cluster_population_thr'] = $('#cluster_population_thr').val(); + params['mwm_size_thr'] = parseInt($('#mwm_size_thr').val()) * 1024; } $.ajax(getServer('divpreview'), { data: params, @@ -1025,8 +1048,7 @@ function bDivideDo() { 'apply_to_similar': apply_to_similar }; if (auto_divide) { - params['city_population_thr'] = $('#city_population_thr').val(); - params['cluster_population_thr'] = $('#cluster_population_thr').val(); + params['mwm_size_thr'] = parseInt($('#mwm_size_thr').val()) * 1024; } $.ajax(getServer('divide'), { data: params, diff --git a/web/app/static/config.js b/web/app/static/config.js index d788be1..2d7f45e 100644 --- a/web/app/static/config.js +++ b/web/app/static/config.js @@ -1,4 +1,5 @@ const BYTES_FOR_NODE = 8; +const IMPORT_ENABLED = true; const SELF_URL = document.location.origin; diff --git a/web/app/subregions.py b/web/app/subregions.py new file mode 100644 index 0000000..d5ffff4 --- /dev/null +++ b/web/app/subregions.py @@ -0,0 +1,102 @@ +import config +from mwm_size_predictor import MwmSizePredictor + + +osm_table = config.OSM_TABLE +osm_places_table = config.OSM_PLACES_TABLE +size_predictor = MwmSizePredictor() + + +def get_subregions_info(conn, region_id, region_table, + next_level, need_cities=False): + """ + :param conn: psycopg2 connection + :param region_id: + :param region_table: maybe TABLE or OSM_TABLE from config.py + :param next_level: admin level of subregions to find + :return: dict {subregion_id => subregion data} including area and population info + """ + subregions = _get_subregions_basic_info(conn, region_id, region_table, + next_level, need_cities) + _add_population_data(conn, subregions, need_cities) + _add_mwm_size_estimation(subregions) + keys = ('name', 'mwm_size_est') + if need_cities: + keys = keys + ('cities',) + return {subregion_id: {k: subregion_data[k] for k in keys} + for subregion_id, subregion_data in subregions.items() + } + + +def _get_subregions_basic_info(conn, region_id, region_table, + next_level, need_cities): + cursor = conn.cursor() + region_id_column, region_geom_column = ( + ('id', 'geom') if region_table == config.TABLE else + ('osm_id', 'way') + ) + cursor.execute(f""" + SELECT subreg.osm_id, subreg.name, ST_Area(geography(subreg.way))/1.0E+6 area + FROM {region_table} reg, {osm_table} subreg + WHERE reg.{region_id_column} = %s AND subreg.admin_level = %s AND + ST_Contains(reg.{region_geom_column}, subreg.way) + """, (region_id, next_level) + ) + subregions = {} + for rec in cursor: + subregion_data = { + 'osm_id': rec[0], + 'name': rec[1], + 'area': rec[2], + 'urban_pop': 0, + 'city_cnt': 0, + 'hamlet_cnt': 0 + } + if need_cities: + subregion_data['cities'] = [] + subregions[rec[0]] = subregion_data + return subregions + + +def _add_population_data(conn, subregions, need_cities): + cursor = conn.cursor() + subregion_ids = ','.join(str(x) for x in subregions.keys()) + cursor.execute(f""" + SELECT b.osm_id, p.name, COALESCE(p.population, 0), p.place + FROM {osm_table} b, {osm_places_table} p + WHERE b.osm_id IN ({subregion_ids}) + AND ST_CONTAINS(b.way, p.center) + """ + ) + for subregion_id, place_name, place_population, place_type in cursor: + subregion_data = subregions[subregion_id] + if place_type in ('city', 'town'): + subregion_data['city_cnt'] += 1 + subregion_data['urban_pop'] += place_population + if need_cities: + subregion_data['cities'].append({ + 'name': place_name, + 'population': place_population + }) + else: + subregion_data['hamlet_cnt'] += 1 + + +def _add_mwm_size_estimation(subregions): + subregions_sorted = [ + ( + s_id, + [subregions[s_id][f] for f in + ('urban_pop', 'area', 'city_cnt', 'hamlet_cnt')] + ) + for s_id in sorted(subregions.keys()) + ] + + feature_array = [x[1] for x in subregions_sorted] + predictions = size_predictor.predict(feature_array) + + for subregion_id, mwm_size_prediction in zip( + (x[0] for x in subregions_sorted), + predictions + ): + subregions[subregion_id]['mwm_size_est'] = mwm_size_prediction diff --git a/web/app/templates/index.html b/web/app/templates/index.html index a75465a..7cfff93 100644 --- a/web/app/templates/index.html +++ b/web/app/templates/index.html @@ -30,8 +30,11 @@ #backup_saving, #backup_restoring { margin-bottom: 1em; } #filefm, #old_action, #josm_old, #cross_actions { display: none; } #h_iframe { display: block; width: 100%; height: 80px; } + a, a:hover, a:visited { color: blue; } #start_over, #start_over:hover, #start_over:visited { color: red; } - #city_population_thr, #cluster_population_thr { max-width: 80px;} + #population_thresholds { padding-left: 1.5em; } + #mwm_size_thr { max-width: 50px;} + #r_green, #r_red { width: 40px; } #b_import { max-width: 180px; } #import_div { position: relative; display: none; } #hide_import_button { @@ -44,7 +47,7 @@ align-items: center; justify-content: center; cursor: pointer; - } + } @@ -54,7 +57,8 @@