SQL formatting

This commit is contained in:
Alexey Zakharenkov 2020-10-23 01:13:45 +03:00
parent fd2541a36b
commit c31a2caf0a
12 changed files with 93 additions and 158 deletions

View file

@ -11,9 +11,7 @@ RUN apt-get update && apt-get install -y \
python3 \ python3 \
python3-psycopg2 python3-psycopg2
##git clone https://github.com/mapsme/borders.git mapsme_borders ARG PLANET_URL=${PLANET_URL}
ARG PLANET_URL=http://download.geofabrik.de/europe/andorra-latest.osm.pbf
ENV PLANET=planet-file ENV PLANET=planet-file

View file

@ -34,57 +34,53 @@ INSERT INTO osm_places
FROM planet_osm_point FROM planet_osm_point
WHERE place IN ('city', 'town', 'village', 'hamlet', 'isolated_dwelling'); WHERE place IN ('city', 'town', 'village', 'hamlet', 'isolated_dwelling');
CREATE INDEX osm_places_gist_idx ON osm_places USING gist (way);
create index osm_places_gist_idx on osm_places using gist(way);
-- Update node population with polygon population where -- Update node population with polygon population where
-- the polygon duplicates the node and node has no population -- the polygon duplicates the node and node has no population
select count(*) from osm_places where g_type='point' and population is null;
UPDATE osm_places UPDATE osm_places
SET population = q.max_population SET population = q.max_population
FROM FROM (
( SELECT n.osm_id node_id,
SELECT n.osm_id node_id, greatest(p.population, n.population) max_population greatest(p.population, n.population) max_population
FROM osm_places n, osm_places p FROM osm_places n, osm_places p
WHERE p.g_type='polygon' AND n.g_type='point' WHERE p.g_type='polygon'
AND ST_Contains(p.way, n.way) AND n.g_type='point'
AND (strpos(n.name, p.name) > 0 OR strpos(p.name, n.name) > 0) AND ST_Contains(p.way, n.way)
) q AND (strpos(n.name, p.name) > 0 OR strpos(p.name, n.name) > 0)
WHERE g_type='point' and osm_id = q.node_id; ) q
WHERE g_type='point' AND osm_id = q.node_id;
-- Delete polygons where exists a node within it with the same name -- Delete polygons where exists a node within it with the same name
DELETE FROM osm_places
DELETE from osm_places WHERE g_type='polygon' and osm_id IN WHERE g_type='polygon'
( AND osm_id IN (SELECT p.osm_id
SELECT p.osm_id FROM osm_places n, osm_places p
FROM osm_places n, osm_places p WHERE p.g_type='polygon'
WHERE p.g_type='polygon' AND n.g_type='point' AND n.g_type='point'
AND ST_Contains(p.way, n.way) AND ST_Contains(p.way, n.way)
AND (strpos(n.name, p.name) > 0 OR strpos(p.name, n.name) > 0) AND (strpos(n.name, p.name) > 0 OR strpos(p.name, n.name) > 0));
);
-- Convert [multi]polygons to points - for further faster requests "is city in region" -- Convert [multi]polygons to points - for further faster requests "is city in region"
ALTER TABLE osm_places ADD COLUMN center geometry; ALTER TABLE osm_places ADD COLUMN center geometry;
UPDATE osm_places c SET center = UPDATE osm_places p
( SET center = (
CASE WHEN ST_Contains(way, ST_Centroid(way)) --true for 42972 out of 42999 CASE
THEN ST_Centroid(way) WHEN ST_Contains(way, ST_Centroid(way)) -- true for 99% of polygons
-- for the rest 27 cities choose arbitrary point as a center THEN ST_Centroid(way)
ELSE ( -- for the rest 1% of city polygons choose arbitrary point as a center
SELECT (ST_DumpPoints(way)).geom ELSE (
FROM osm_places SELECT (ST_DumpPoints(way)).geom
WHERE osm_id = c.osm_id FROM osm_places
LIMIT 1 WHERE osm_id = p.osm_id
) LIMIT 1
END )
); END);
CREATE INDEX osm_places_center_gist_idx ON osm_places USING gist(center); CREATE INDEX osm_places_center_gist_idx ON osm_places USING gist (center);
DROP INDEX osm_places_gist_idx; DROP INDEX osm_places_gist_idx;
ALTER TABLE osm_places DROP column way; ALTER TABLE osm_places DROP COLUMN way;

View file

@ -6,6 +6,4 @@ psql -v ON_ERROR_STOP=1 --username "$POSTGRES_USER" --dbname "$POSTGRES_DB" <<-E
CREATE DATABASE gis; CREATE DATABASE gis;
CREATE DATABASE borders; CREATE DATABASE borders;
GRANT ALL PRIVILEGES ON DATABASE borders TO borders; GRANT ALL PRIVILEGES ON DATABASE borders TO borders;
-- GRANT ALL PRIVILEGES ON ALL TABLES IN SCHEMA public TO borders;
EOSQL EOSQL

View file

@ -38,7 +38,8 @@ node,way population text linear
EOSTYLE EOSTYLE
fi fi
$OSM2PGSQL --slim --drop --hstore --style $OSM2PGSQL_STYLE -d $DATABASE -r o5m $OSM2PGSQL_KEYS $FILTERED_PLANET $OSM2PGSQL --slim --drop --hstore --style $OSM2PGSQL_STYLE -d $DATABASE \
-r o5m $OSM2PGSQL_KEYS $FILTERED_PLANET
RET=$? RET=$?
rm -f $FILTERED_PLANET rm -f $FILTERED_PLANET
if [ "$OSM2PGSQL_STYLE_TMP" == "1" ]; then if [ "$OSM2PGSQL_STYLE_TMP" == "1" ]; then
@ -52,17 +53,16 @@ echo Creating osm_borders table
psql $DATABASE -c " psql $DATABASE -c "
DROP TABLE IF EXISTS osm_borders; DROP TABLE IF EXISTS osm_borders;
CREATE TABLE osm_borders AS CREATE TABLE osm_borders AS
SELECT SELECT osm_id,
osm_id, ST_Buffer(ST_Transform(ST_Collect(way),4326), 0) AS way,
ST_Buffer(ST_Transform(ST_Collect(way),4326), 0) AS way, admin_level::INT AS admin_level,
admin_level::INT AS admin_level, coalesce(max(\"name:en\"), max(name)) AS name
coalesce(max(\"name:en\"), max(name)) AS name
FROM planet_osm_polygon FROM planet_osm_polygon
WHERE boundary='administrative' AND osm_id < 0 AND admin_level IN ('2', '3', '4', '5', '6', '7') WHERE boundary='administrative' AND osm_id < 0 AND admin_level IN ('2', '3', '4', '5', '6', '7')
GROUP BY osm_id, admin_level GROUP BY osm_id, admin_level
HAVING coalesce(max(\"name:en\"), max(name)) IS NOT NULL; HAVING coalesce(max(\"name:en\"), max(name)) IS NOT NULL;
ALTER TABLE osm_borders ADD PRIMARY KEY (osm_id); ALTER TABLE osm_borders ADD PRIMARY KEY (osm_id);
;" || exit 3 " || exit 3
# Copy it to the borders database # Copy it to the borders database
echo Copying osm_borders table to the borders database echo Copying osm_borders table to the borders database

View file

@ -27,7 +27,9 @@ else
CONVERTED_PLANET=$PLANET CONVERTED_PLANET=$PLANET
fi fi
$OSMFILTER $CONVERTED_PLANET --keep="boundary=administrative or ( place=city =town =hamlet =village =isolated_dwelling )" --out-o5m -o=$FILTERED_PLANET || exit 3 $OSMFILTER $CONVERTED_PLANET\
--keep="boundary=administrative or ( place=city =town =hamlet =village =isolated_dwelling )"\
--out-o5m -o=$FILTERED_PLANET\
|| exit 3
chmod +r $FILTERED_PLANET chmod +r $FILTERED_PLANET

View file

@ -21,15 +21,10 @@ with psycopg2.connect(f'dbname={options.database}') as conn:
(count, lat, lon) = (int(m.group(1)), float(m.group(2))/100, float(m.group(3))/100) (count, lat, lon) = (int(m.group(1)), float(m.group(2))/100, float(m.group(3))/100)
cur.execute(f''' cur.execute(f'''
INSERT INTO {options.table} (count, tile) INSERT INTO {options.table} (count, tile)
VALUES ( VALUES (%s,
%s, ST_SetSRID(ST_MakeBox2d(ST_Point(%s, %s),
ST_SetSRID( ST_Point(%s, %s)),
ST_MakeBox2d( 4326)
ST_Point(%s, %s),
ST_Point(%s, %s)
),
4326
)
) )
''', (count, lon, lat, lon + 0.01, lat + 0.01) ''', (count, lon, lat, lon + 0.01, lat + 0.01)
) )

View file

@ -21,8 +21,8 @@ services:
context: ./db context: ./db
dockerfile: Dockerfile.db dockerfile: Dockerfile.db
args: args:
PLANET_URL_full: https://planet.openstreetmap.org/pbf/planet-latest.osm.pbf PLANET_URL: https://planet.openstreetmap.org/pbf/planet-latest.osm.pbf
PLANET_URL: http://download.geofabrik.de/africa/eritrea-latest.osm.pbf PLANET_URL_small: http://download.geofabrik.de/africa/eritrea-latest.osm.pbf
container_name: db container_name: db
restart: always restart: always
environment: environment:

View file

@ -1,60 +0,0 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
import psycopg2
import os, argparse
def parse_double_points(line):
if "Double" in line:
words = line.split()
lat = words[9].split("(")[1][:-1]
lon = words[10].split(")")[0]
return float(lon), float(lat), 1
def parse_unknown_outgoing(line):
if "Unknowing" in line:
words = line.split()
lat = words[9]
lon = words[10]
return float(lon), float(lat), 2
filters = (parse_double_points, parse_unknown_outgoing)
parser = argparse.ArgumentParser(description='Extract borders warning points from generator log files to databse.')
parser.add_argument('-s', '--source', help='Generator log file path.')
parser.add_argument('-c', '--connection', help='Database connection string.')
parser.add_argument('-t', '--truncate', action='store_true', help='Truncate old data. WARINIG old data will be lost!')
parser.add_argument('-v', dest='verbose', action='store_true', help='Print status messages.')
options = parser.parse_args()
# Check log file for existance.
if not os.path.exists(options.source):
print "Generator log file", options.source, "does not exists."
exit(1)
# Process the log.
points = []
with open(options.source) as logfile:
for line in logfile.readlines():
for f in filters:
result = f(line)
if result:
points.append(result)
break
# Print stats.
print "Found {0} points".format(len(points))
print "Found {0} ways that do not lead to the external mwm and {1} roads that crossing the border several times.". format(
len(filter(lambda a: a[2] == 2, points)), len(filter(lambda a: a[2] == 1, points))
)
# Commit to the database
conn = psycopg2.connect(options.connection)
cursor = conn.cursor()
if options.truncate:
print "Truncating old data..."
cursor.execute("TRUNCATE TABLE points")
for p in points:
cursor.execute("INSERT into points (geom, type) VALUES (ST_GeomFromText('POINT(%s %s)', 4326), %s)", p)
conn.commit()

View file

@ -1,7 +1,9 @@
#!/usr/bin/python #!/usr/bin/python
import psycopg2
import glob import glob
import psycopg2
def read_polygon(f): def read_polygon(f):
"""Reads an array of coordinates with the final 'END' line.""" """Reads an array of coordinates with the final 'END' line."""
coords = [] coords = []
@ -26,6 +28,7 @@ def read_polygon(f):
coords.append(coords[0]) coords.append(coords[0])
return '({})'.format(','.join(coords)) return '({})'.format(','.join(coords))
def read_multipolygon(f): def read_multipolygon(f):
"""Read the entire poly file and parse in into a WKT.""" """Read the entire poly file and parse in into a WKT."""
polygons = [] polygons = []
@ -53,6 +56,7 @@ def read_multipolygon(f):
else: else:
return "MULTIPOLYGON({})".format(','.join(polygons)) return "MULTIPOLYGON({})".format(','.join(polygons))
def convert_poly(input_file, cur): def convert_poly(input_file, cur):
"""Reads a multipolygon from input_file and inserts it into borders table.""" """Reads a multipolygon from input_file and inserts it into borders table."""
with open(input_file, 'r') as f: with open(input_file, 'r') as f:
@ -60,11 +64,12 @@ def convert_poly(input_file, cur):
wkt = read_multipolygon(f) wkt = read_multipolygon(f)
print ' ', name print ' ', name
try: try:
cur.execute('insert into borders (name, geom, modified) values (%s, ST_GeomFromText(%s), now())', (name, wkt)) cur.execute('INSERT INTO borders (name, geom, modified) VALUES (%s, ST_GeomFromText(%s), now())', (name, wkt))
except psycopg2.Error as e: except psycopg2.Error as e:
print wkt print wkt
raise e raise e
if __name__ == "__main__": if __name__ == "__main__":
conn = psycopg2.connect('dbname=borders') conn = psycopg2.connect('dbname=borders')
cur = conn.cursor() cur = conn.cursor()

View file

@ -1,9 +1,9 @@
import itertools import itertools
import json import json
import psycopg2
from collections import defaultdict from collections import defaultdict
import psycopg2
from config import ( from config import (
AUTOSPLIT_TABLE as autosplit_table, AUTOSPLIT_TABLE as autosplit_table,
OSM_TABLE as osm_table, OSM_TABLE as osm_table,
@ -34,7 +34,7 @@ class DisjointClusterUnion:
} }
def get_smallest_cluster(self): def get_smallest_cluster(self):
"""Find minimal cluster without big cities.""" """Find minimal cluster."""
smallest_cluster_id = min( smallest_cluster_id = min(
filter( filter(
lambda cluster_id: lambda cluster_id:
@ -140,9 +140,9 @@ def calculate_common_border_matrix(conn, subregion_ids):
SELECT b1.osm_id AS osm_id1, b2.osm_id AS osm_id2, SELECT b1.osm_id AS osm_id1, b2.osm_id AS osm_id2,
ST_Length(geography(ST_Intersection(b1.way, b2.way))) AS intersection ST_Length(geography(ST_Intersection(b1.way, b2.way))) AS intersection
FROM {osm_table} b1, {osm_table} b2 FROM {osm_table} b1, {osm_table} b2
WHERE b1.osm_id IN ({subregion_ids_str}) AND WHERE b1.osm_id IN ({subregion_ids_str})
b2.osm_id IN ({subregion_ids_str}) AND b2.osm_id IN ({subregion_ids_str})
AND b1.osm_id < b2.osm_id AND b1.osm_id < b2.osm_id
""" """
) )
common_border_matrix = {} # {subregion_id: { subregion_id: float} } where len > 0 common_border_matrix = {} # {subregion_id: { subregion_id: float} } where len > 0
@ -258,16 +258,16 @@ def save_splitting_to_db(conn, dcu: DisjointClusterUnion):
#subregion_ids_array_str = f"{{','.join(str(x) for x in subregion_ids)}}" #subregion_ids_array_str = f"{{','.join(str(x) for x in subregion_ids)}}"
cluster_geometry_sql = get_union_sql(subregion_ids) cluster_geometry_sql = get_union_sql(subregion_ids)
cursor.execute(f""" cursor.execute(f"""
INSERT INTO {autosplit_table} (osm_border_id, subregion_ids, geom, INSERT INTO {autosplit_table} (osm_border_id, subregion_ids, geom,
mwm_size_thr, mwm_size_est) mwm_size_thr, mwm_size_est)
VALUES ( VALUES (
{dcu.region_id}, {dcu.region_id},
'{{{','.join(str(x) for x in subregion_ids)}}}', '{{{','.join(str(x) for x in subregion_ids)}}}',
({cluster_geometry_sql}), ({cluster_geometry_sql}),
{dcu.mwm_size_thr}, {dcu.mwm_size_thr},
{data['mwm_size_est']} {data['mwm_size_est']}
) )
""") """)
conn.commit() conn.commit()
@ -275,11 +275,11 @@ def get_region_and_country_names(conn, region_id):
cursor = conn.cursor() cursor = conn.cursor()
try: try:
cursor.execute( cursor.execute(
f"""SELECT name, f"""SELECT name,
(SELECT name (SELECT name
FROM {osm_table} FROM {osm_table}
WHERE admin_level = 2 AND ST_contains(way, b1.way) WHERE admin_level = 2 AND ST_Contains(way, b1.way)
) AS country_name ) AS country_name
FROM osm_borders b1 FROM osm_borders b1
WHERE osm_id = {region_id} WHERE osm_id = {region_id}
AND b1.osm_id NOT IN (-9086712) -- crunch, stub to exclude incorrect subregions AND b1.osm_id NOT IN (-9086712) -- crunch, stub to exclude incorrect subregions

View file

@ -1,9 +1,12 @@
#!/usr/bin/python3 #!/usr/bin/python3
import os, sys
import time
import logging import logging
import sys
import time
import psycopg2 import psycopg2
import config import config
try: try:
from daemon import runner from daemon import runner
HAS_DAEMON = True HAS_DAEMON = True
@ -25,13 +28,12 @@ CHECK_BORDERS_INTERVAL = 10
no_count_queries = [ no_count_queries = [
f""" f"""
SELECT id, name SELECT id, name
FROM FROM (
( SELECT id, name, SELECT id, name,
ST_Area(geography(geom))/1000000.0 area, ST_Area(geography(geom))/1000000.0 area,
ST_Area(geography(ST_Envelope(geom)))/1000000.0 env_area ST_Area(geography(ST_Envelope(geom)))/1000000.0 env_area
FROM {table} FROM {table}
WHERE {condition} WHERE {condition}) q
) q
WHERE area != 'NaN'::double precision WHERE area != 'NaN'::double precision
AND area <= env_area AND area <= env_area
AND env_area < 5000000 AND env_area < 5000000
@ -121,7 +123,6 @@ def init_logger():
logger.setLevel(logging.INFO) logger.setLevel(logging.INFO)
formatter = logging.Formatter("%(asctime)s [%(levelname)s] %(message)s") formatter = logging.Formatter("%(asctime)s [%(levelname)s] %(message)s")
handler = logging.FileHandler(config.DAEMON_LOG_PATH) handler = logging.FileHandler(config.DAEMON_LOG_PATH)
#handler = logging.StreamHandler()
handler.setFormatter(formatter) handler.setFormatter(formatter)
logger.addHandler(handler) logger.addHandler(handler)
return logger return logger

View file

@ -61,10 +61,10 @@ def _add_population_data(conn, subregions, need_cities):
cursor = conn.cursor() cursor = conn.cursor()
subregion_ids = ','.join(str(x) for x in subregions.keys()) subregion_ids = ','.join(str(x) for x in subregions.keys())
cursor.execute(f""" cursor.execute(f"""
SELECT b.osm_id, p.name, COALESCE(p.population, 0), p.place SELECT b.osm_id, p.name, coalesce(p.population, 0), p.place
FROM {osm_table} b, {osm_places_table} p FROM {osm_table} b, {osm_places_table} p
WHERE b.osm_id IN ({subregion_ids}) WHERE b.osm_id IN ({subregion_ids})
AND ST_CONTAINS(b.way, p.center) AND ST_Contains(b.way, p.center)
""" """
) )
for subregion_id, place_name, place_population, place_type in cursor: for subregion_id, place_name, place_population, place_type in cursor:
@ -116,10 +116,10 @@ def update_border_mwm_size_estimation(conn, border_id):
'hamlet_cnt': 0 'hamlet_cnt': 0
} }
cursor.execute(f""" cursor.execute(f"""
SELECT COALESCE(p.population, 0), p.place SELECT coalesce(p.population, 0), p.place
FROM {table} b, {config.OSM_PLACES_TABLE} p FROM {table} b, {config.OSM_PLACES_TABLE} p
WHERE b.id = %s WHERE b.id = %s
AND ST_CONTAINS(b.geom, p.center) AND ST_Contains(b.geom, p.center)
""", (border_id, )) """, (border_id, ))
for place_population, place_type in cursor: for place_population, place_type in cursor:
if place_type in ('city', 'town'): if place_type in ('city', 'town'):