From c31a2caf0a4ab2bba5929ea66ae399583e808705 Mon Sep 17 00:00:00 2001 From: Alexey Zakharenkov <35913079+alexey-zakharenkov@users.noreply.github.com> Date: Fri, 23 Oct 2020 01:13:45 +0300 Subject: [PATCH] SQL formatting --- db/Dockerfile.db | 4 +- db/create_osm_places_table.sql | 72 ++++++++++++++++------------------ db/init_databases.sh | 2 - db/load_borders.sh | 16 ++++---- db/prepare_borders.sh | 6 ++- db/tiles2pg.py | 13 ++---- docker-compose.yaml | 4 +- scripts/import_points.py | 60 ---------------------------- scripts/poly2postgis.py | 9 ++++- web/app/auto_split.py | 42 ++++++++++---------- web/app/borders_daemon.py | 15 +++---- web/app/subregions.py | 8 ++-- 12 files changed, 93 insertions(+), 158 deletions(-) delete mode 100755 scripts/import_points.py diff --git a/db/Dockerfile.db b/db/Dockerfile.db index e9bde1c..96d155b 100644 --- a/db/Dockerfile.db +++ b/db/Dockerfile.db @@ -11,9 +11,7 @@ RUN apt-get update && apt-get install -y \ python3 \ python3-psycopg2 -##git clone https://github.com/mapsme/borders.git mapsme_borders - -ARG PLANET_URL=http://download.geofabrik.de/europe/andorra-latest.osm.pbf +ARG PLANET_URL=${PLANET_URL} ENV PLANET=planet-file diff --git a/db/create_osm_places_table.sql b/db/create_osm_places_table.sql index 46b84d3..2f2d34d 100644 --- a/db/create_osm_places_table.sql +++ b/db/create_osm_places_table.sql @@ -34,57 +34,53 @@ INSERT INTO osm_places FROM planet_osm_point WHERE place IN ('city', 'town', 'village', 'hamlet', 'isolated_dwelling'); - -create index osm_places_gist_idx on osm_places using gist(way); +CREATE INDEX osm_places_gist_idx ON osm_places USING gist (way); -- Update node population with polygon population where -- the polygon duplicates the node and node has no population - -select count(*) from osm_places where g_type='point' and population is null; - UPDATE osm_places SET population = q.max_population -FROM -( - SELECT n.osm_id node_id, greatest(p.population, n.population) max_population - FROM osm_places n, osm_places p - WHERE p.g_type='polygon' AND n.g_type='point' - AND ST_Contains(p.way, n.way) - AND (strpos(n.name, p.name) > 0 OR strpos(p.name, n.name) > 0) -) q -WHERE g_type='point' and osm_id = q.node_id; +FROM ( + SELECT n.osm_id node_id, + greatest(p.population, n.population) max_population + FROM osm_places n, osm_places p + WHERE p.g_type='polygon' + AND n.g_type='point' + AND ST_Contains(p.way, n.way) + AND (strpos(n.name, p.name) > 0 OR strpos(p.name, n.name) > 0) + ) q +WHERE g_type='point' AND osm_id = q.node_id; -- Delete polygons where exists a node within it with the same name - -DELETE from osm_places WHERE g_type='polygon' and osm_id IN - ( - SELECT p.osm_id - FROM osm_places n, osm_places p - WHERE p.g_type='polygon' AND n.g_type='point' - AND ST_Contains(p.way, n.way) - AND (strpos(n.name, p.name) > 0 OR strpos(p.name, n.name) > 0) - ); +DELETE FROM osm_places +WHERE g_type='polygon' + AND osm_id IN (SELECT p.osm_id + FROM osm_places n, osm_places p + WHERE p.g_type='polygon' + AND n.g_type='point' + AND ST_Contains(p.way, n.way) + AND (strpos(n.name, p.name) > 0 OR strpos(p.name, n.name) > 0)); -- Convert [multi]polygons to points - for further faster requests "is city in region" ALTER TABLE osm_places ADD COLUMN center geometry; -UPDATE osm_places c SET center = - ( - CASE WHEN ST_Contains(way, ST_Centroid(way)) --true for 42972 out of 42999 - THEN ST_Centroid(way) - -- for the rest 27 cities choose arbitrary point as a center - ELSE ( - SELECT (ST_DumpPoints(way)).geom - FROM osm_places - WHERE osm_id = c.osm_id - LIMIT 1 - ) - END - ); +UPDATE osm_places p +SET center = ( + CASE + WHEN ST_Contains(way, ST_Centroid(way)) -- true for 99% of polygons + THEN ST_Centroid(way) + -- for the rest 1% of city polygons choose arbitrary point as a center + ELSE ( + SELECT (ST_DumpPoints(way)).geom + FROM osm_places + WHERE osm_id = p.osm_id + LIMIT 1 + ) + END); -CREATE INDEX osm_places_center_gist_idx ON osm_places USING gist(center); +CREATE INDEX osm_places_center_gist_idx ON osm_places USING gist (center); DROP INDEX osm_places_gist_idx; -ALTER TABLE osm_places DROP column way; +ALTER TABLE osm_places DROP COLUMN way; diff --git a/db/init_databases.sh b/db/init_databases.sh index 741ee77..e48248c 100644 --- a/db/init_databases.sh +++ b/db/init_databases.sh @@ -6,6 +6,4 @@ psql -v ON_ERROR_STOP=1 --username "$POSTGRES_USER" --dbname "$POSTGRES_DB" <<-E CREATE DATABASE gis; CREATE DATABASE borders; GRANT ALL PRIVILEGES ON DATABASE borders TO borders; - - -- GRANT ALL PRIVILEGES ON ALL TABLES IN SCHEMA public TO borders; EOSQL diff --git a/db/load_borders.sh b/db/load_borders.sh index f24ca42..146ca30 100755 --- a/db/load_borders.sh +++ b/db/load_borders.sh @@ -38,7 +38,8 @@ node,way population text linear EOSTYLE fi -$OSM2PGSQL --slim --drop --hstore --style $OSM2PGSQL_STYLE -d $DATABASE -r o5m $OSM2PGSQL_KEYS $FILTERED_PLANET +$OSM2PGSQL --slim --drop --hstore --style $OSM2PGSQL_STYLE -d $DATABASE \ + -r o5m $OSM2PGSQL_KEYS $FILTERED_PLANET RET=$? rm -f $FILTERED_PLANET if [ "$OSM2PGSQL_STYLE_TMP" == "1" ]; then @@ -52,17 +53,16 @@ echo Creating osm_borders table psql $DATABASE -c " DROP TABLE IF EXISTS osm_borders; CREATE TABLE osm_borders AS - SELECT - osm_id, - ST_Buffer(ST_Transform(ST_Collect(way),4326), 0) AS way, - admin_level::INT AS admin_level, - coalesce(max(\"name:en\"), max(name)) AS name + SELECT osm_id, + ST_Buffer(ST_Transform(ST_Collect(way),4326), 0) AS way, + admin_level::INT AS admin_level, + coalesce(max(\"name:en\"), max(name)) AS name FROM planet_osm_polygon WHERE boundary='administrative' AND osm_id < 0 AND admin_level IN ('2', '3', '4', '5', '6', '7') GROUP BY osm_id, admin_level HAVING coalesce(max(\"name:en\"), max(name)) IS NOT NULL; - ALTER TABLE osm_borders ADD PRIMARY KEY (osm_id); -;" || exit 3 +ALTER TABLE osm_borders ADD PRIMARY KEY (osm_id); +" || exit 3 # Copy it to the borders database echo Copying osm_borders table to the borders database diff --git a/db/prepare_borders.sh b/db/prepare_borders.sh index 97925ae..4a32d75 100644 --- a/db/prepare_borders.sh +++ b/db/prepare_borders.sh @@ -27,7 +27,9 @@ else CONVERTED_PLANET=$PLANET fi -$OSMFILTER $CONVERTED_PLANET --keep="boundary=administrative or ( place=city =town =hamlet =village =isolated_dwelling )" --out-o5m -o=$FILTERED_PLANET || exit 3 +$OSMFILTER $CONVERTED_PLANET\ + --keep="boundary=administrative or ( place=city =town =hamlet =village =isolated_dwelling )"\ + --out-o5m -o=$FILTERED_PLANET\ + || exit 3 chmod +r $FILTERED_PLANET - diff --git a/db/tiles2pg.py b/db/tiles2pg.py index d31b6aa..5baf92c 100755 --- a/db/tiles2pg.py +++ b/db/tiles2pg.py @@ -21,15 +21,10 @@ with psycopg2.connect(f'dbname={options.database}') as conn: (count, lat, lon) = (int(m.group(1)), float(m.group(2))/100, float(m.group(3))/100) cur.execute(f''' INSERT INTO {options.table} (count, tile) - VALUES ( - %s, - ST_SetSRID( - ST_MakeBox2d( - ST_Point(%s, %s), - ST_Point(%s, %s) - ), - 4326 - ) + VALUES (%s, + ST_SetSRID(ST_MakeBox2d(ST_Point(%s, %s), + ST_Point(%s, %s)), + 4326) ) ''', (count, lon, lat, lon + 0.01, lat + 0.01) ) diff --git a/docker-compose.yaml b/docker-compose.yaml index 57a9140..aea7cd8 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -21,8 +21,8 @@ services: context: ./db dockerfile: Dockerfile.db args: - PLANET_URL_full: https://planet.openstreetmap.org/pbf/planet-latest.osm.pbf - PLANET_URL: http://download.geofabrik.de/africa/eritrea-latest.osm.pbf + PLANET_URL: https://planet.openstreetmap.org/pbf/planet-latest.osm.pbf + PLANET_URL_small: http://download.geofabrik.de/africa/eritrea-latest.osm.pbf container_name: db restart: always environment: diff --git a/scripts/import_points.py b/scripts/import_points.py deleted file mode 100755 index 27e4f97..0000000 --- a/scripts/import_points.py +++ /dev/null @@ -1,60 +0,0 @@ -#!/usr/bin/python -# -*- coding: utf-8 -*- -import psycopg2 -import os, argparse - -def parse_double_points(line): - if "Double" in line: - words = line.split() - lat = words[9].split("(")[1][:-1] - lon = words[10].split(")")[0] - return float(lon), float(lat), 1 - -def parse_unknown_outgoing(line): - if "Unknowing" in line: - words = line.split() - lat = words[9] - lon = words[10] - return float(lon), float(lat), 2 - -filters = (parse_double_points, parse_unknown_outgoing) - -parser = argparse.ArgumentParser(description='Extract borders warning points from generator log files to databse.') -parser.add_argument('-s', '--source', help='Generator log file path.') -parser.add_argument('-c', '--connection', help='Database connection string.') -parser.add_argument('-t', '--truncate', action='store_true', help='Truncate old data. WARINIG old data will be lost!') -parser.add_argument('-v', dest='verbose', action='store_true', help='Print status messages.') -options = parser.parse_args() - -# Check log file for existance. -if not os.path.exists(options.source): - print "Generator log file", options.source, "does not exists." - exit(1) - -# Process the log. -points = [] -with open(options.source) as logfile: - for line in logfile.readlines(): - for f in filters: - result = f(line) - if result: - points.append(result) - break - -# Print stats. -print "Found {0} points".format(len(points)) -print "Found {0} ways that do not lead to the external mwm and {1} roads that crossing the border several times.". format( - len(filter(lambda a: a[2] == 2, points)), len(filter(lambda a: a[2] == 1, points)) - ) - -# Commit to the database -conn = psycopg2.connect(options.connection) -cursor = conn.cursor() - -if options.truncate: - print "Truncating old data..." - cursor.execute("TRUNCATE TABLE points") - -for p in points: - cursor.execute("INSERT into points (geom, type) VALUES (ST_GeomFromText('POINT(%s %s)', 4326), %s)", p) -conn.commit() diff --git a/scripts/poly2postgis.py b/scripts/poly2postgis.py index d0ba817..a5edc47 100755 --- a/scripts/poly2postgis.py +++ b/scripts/poly2postgis.py @@ -1,7 +1,9 @@ #!/usr/bin/python -import psycopg2 import glob +import psycopg2 + + def read_polygon(f): """Reads an array of coordinates with the final 'END' line.""" coords = [] @@ -26,6 +28,7 @@ def read_polygon(f): coords.append(coords[0]) return '({})'.format(','.join(coords)) + def read_multipolygon(f): """Read the entire poly file and parse in into a WKT.""" polygons = [] @@ -53,6 +56,7 @@ def read_multipolygon(f): else: return "MULTIPOLYGON({})".format(','.join(polygons)) + def convert_poly(input_file, cur): """Reads a multipolygon from input_file and inserts it into borders table.""" with open(input_file, 'r') as f: @@ -60,11 +64,12 @@ def convert_poly(input_file, cur): wkt = read_multipolygon(f) print ' ', name try: - cur.execute('insert into borders (name, geom, modified) values (%s, ST_GeomFromText(%s), now())', (name, wkt)) + cur.execute('INSERT INTO borders (name, geom, modified) VALUES (%s, ST_GeomFromText(%s), now())', (name, wkt)) except psycopg2.Error as e: print wkt raise e + if __name__ == "__main__": conn = psycopg2.connect('dbname=borders') cur = conn.cursor() diff --git a/web/app/auto_split.py b/web/app/auto_split.py index 0c0e538..b199661 100644 --- a/web/app/auto_split.py +++ b/web/app/auto_split.py @@ -1,9 +1,9 @@ import itertools import json -import psycopg2 - from collections import defaultdict +import psycopg2 + from config import ( AUTOSPLIT_TABLE as autosplit_table, OSM_TABLE as osm_table, @@ -34,7 +34,7 @@ class DisjointClusterUnion: } def get_smallest_cluster(self): - """Find minimal cluster without big cities.""" + """Find minimal cluster.""" smallest_cluster_id = min( filter( lambda cluster_id: @@ -140,9 +140,9 @@ def calculate_common_border_matrix(conn, subregion_ids): SELECT b1.osm_id AS osm_id1, b2.osm_id AS osm_id2, ST_Length(geography(ST_Intersection(b1.way, b2.way))) AS intersection FROM {osm_table} b1, {osm_table} b2 - WHERE b1.osm_id IN ({subregion_ids_str}) AND - b2.osm_id IN ({subregion_ids_str}) - AND b1.osm_id < b2.osm_id + WHERE b1.osm_id IN ({subregion_ids_str}) + AND b2.osm_id IN ({subregion_ids_str}) + AND b1.osm_id < b2.osm_id """ ) common_border_matrix = {} # {subregion_id: { subregion_id: float} } where len > 0 @@ -258,16 +258,16 @@ def save_splitting_to_db(conn, dcu: DisjointClusterUnion): #subregion_ids_array_str = f"{{','.join(str(x) for x in subregion_ids)}}" cluster_geometry_sql = get_union_sql(subregion_ids) cursor.execute(f""" - INSERT INTO {autosplit_table} (osm_border_id, subregion_ids, geom, - mwm_size_thr, mwm_size_est) - VALUES ( - {dcu.region_id}, - '{{{','.join(str(x) for x in subregion_ids)}}}', - ({cluster_geometry_sql}), - {dcu.mwm_size_thr}, - {data['mwm_size_est']} - ) - """) + INSERT INTO {autosplit_table} (osm_border_id, subregion_ids, geom, + mwm_size_thr, mwm_size_est) + VALUES ( + {dcu.region_id}, + '{{{','.join(str(x) for x in subregion_ids)}}}', + ({cluster_geometry_sql}), + {dcu.mwm_size_thr}, + {data['mwm_size_est']} + ) + """) conn.commit() @@ -275,11 +275,11 @@ def get_region_and_country_names(conn, region_id): cursor = conn.cursor() try: cursor.execute( - f"""SELECT name, - (SELECT name - FROM {osm_table} - WHERE admin_level = 2 AND ST_contains(way, b1.way) - ) AS country_name + f"""SELECT name, + (SELECT name + FROM {osm_table} + WHERE admin_level = 2 AND ST_Contains(way, b1.way) + ) AS country_name FROM osm_borders b1 WHERE osm_id = {region_id} AND b1.osm_id NOT IN (-9086712) -- crunch, stub to exclude incorrect subregions diff --git a/web/app/borders_daemon.py b/web/app/borders_daemon.py index f0d6c2a..7f7c3c8 100755 --- a/web/app/borders_daemon.py +++ b/web/app/borders_daemon.py @@ -1,9 +1,12 @@ #!/usr/bin/python3 -import os, sys -import time import logging +import sys +import time + import psycopg2 + import config + try: from daemon import runner HAS_DAEMON = True @@ -25,13 +28,12 @@ CHECK_BORDERS_INTERVAL = 10 no_count_queries = [ f""" SELECT id, name - FROM - ( SELECT id, name, + FROM ( + SELECT id, name, ST_Area(geography(geom))/1000000.0 area, ST_Area(geography(ST_Envelope(geom)))/1000000.0 env_area FROM {table} - WHERE {condition} - ) q + WHERE {condition}) q WHERE area != 'NaN'::double precision AND area <= env_area AND env_area < 5000000 @@ -121,7 +123,6 @@ def init_logger(): logger.setLevel(logging.INFO) formatter = logging.Formatter("%(asctime)s [%(levelname)s] %(message)s") handler = logging.FileHandler(config.DAEMON_LOG_PATH) - #handler = logging.StreamHandler() handler.setFormatter(formatter) logger.addHandler(handler) return logger diff --git a/web/app/subregions.py b/web/app/subregions.py index 7051b8f..d359b23 100644 --- a/web/app/subregions.py +++ b/web/app/subregions.py @@ -61,10 +61,10 @@ def _add_population_data(conn, subregions, need_cities): cursor = conn.cursor() subregion_ids = ','.join(str(x) for x in subregions.keys()) cursor.execute(f""" - SELECT b.osm_id, p.name, COALESCE(p.population, 0), p.place + SELECT b.osm_id, p.name, coalesce(p.population, 0), p.place FROM {osm_table} b, {osm_places_table} p WHERE b.osm_id IN ({subregion_ids}) - AND ST_CONTAINS(b.way, p.center) + AND ST_Contains(b.way, p.center) """ ) for subregion_id, place_name, place_population, place_type in cursor: @@ -116,10 +116,10 @@ def update_border_mwm_size_estimation(conn, border_id): 'hamlet_cnt': 0 } cursor.execute(f""" - SELECT COALESCE(p.population, 0), p.place + SELECT coalesce(p.population, 0), p.place FROM {table} b, {config.OSM_PLACES_TABLE} p WHERE b.id = %s - AND ST_CONTAINS(b.geom, p.center) + AND ST_Contains(b.geom, p.center) """, (border_id, )) for place_population, place_type in cursor: if place_type in ('city', 'town'):