SQL formatting

This commit is contained in:
Alexey Zakharenkov 2020-10-23 01:13:45 +03:00
parent fd2541a36b
commit c31a2caf0a
12 changed files with 93 additions and 158 deletions

View file

@ -11,9 +11,7 @@ RUN apt-get update && apt-get install -y \
python3 \
python3-psycopg2
##git clone https://github.com/mapsme/borders.git mapsme_borders
ARG PLANET_URL=http://download.geofabrik.de/europe/andorra-latest.osm.pbf
ARG PLANET_URL=${PLANET_URL}
ENV PLANET=planet-file

View file

@ -34,57 +34,53 @@ INSERT INTO osm_places
FROM planet_osm_point
WHERE place IN ('city', 'town', 'village', 'hamlet', 'isolated_dwelling');
create index osm_places_gist_idx on osm_places using gist(way);
CREATE INDEX osm_places_gist_idx ON osm_places USING gist (way);
-- Update node population with polygon population where
-- the polygon duplicates the node and node has no population
select count(*) from osm_places where g_type='point' and population is null;
UPDATE osm_places
SET population = q.max_population
FROM
(
SELECT n.osm_id node_id, greatest(p.population, n.population) max_population
FROM osm_places n, osm_places p
WHERE p.g_type='polygon' AND n.g_type='point'
AND ST_Contains(p.way, n.way)
AND (strpos(n.name, p.name) > 0 OR strpos(p.name, n.name) > 0)
) q
WHERE g_type='point' and osm_id = q.node_id;
FROM (
SELECT n.osm_id node_id,
greatest(p.population, n.population) max_population
FROM osm_places n, osm_places p
WHERE p.g_type='polygon'
AND n.g_type='point'
AND ST_Contains(p.way, n.way)
AND (strpos(n.name, p.name) > 0 OR strpos(p.name, n.name) > 0)
) q
WHERE g_type='point' AND osm_id = q.node_id;
-- Delete polygons where exists a node within it with the same name
DELETE from osm_places WHERE g_type='polygon' and osm_id IN
(
SELECT p.osm_id
FROM osm_places n, osm_places p
WHERE p.g_type='polygon' AND n.g_type='point'
AND ST_Contains(p.way, n.way)
AND (strpos(n.name, p.name) > 0 OR strpos(p.name, n.name) > 0)
);
DELETE FROM osm_places
WHERE g_type='polygon'
AND osm_id IN (SELECT p.osm_id
FROM osm_places n, osm_places p
WHERE p.g_type='polygon'
AND n.g_type='point'
AND ST_Contains(p.way, n.way)
AND (strpos(n.name, p.name) > 0 OR strpos(p.name, n.name) > 0));
-- Convert [multi]polygons to points - for further faster requests "is city in region"
ALTER TABLE osm_places ADD COLUMN center geometry;
UPDATE osm_places c SET center =
(
CASE WHEN ST_Contains(way, ST_Centroid(way)) --true for 42972 out of 42999
THEN ST_Centroid(way)
-- for the rest 27 cities choose arbitrary point as a center
ELSE (
SELECT (ST_DumpPoints(way)).geom
FROM osm_places
WHERE osm_id = c.osm_id
LIMIT 1
)
END
);
UPDATE osm_places p
SET center = (
CASE
WHEN ST_Contains(way, ST_Centroid(way)) -- true for 99% of polygons
THEN ST_Centroid(way)
-- for the rest 1% of city polygons choose arbitrary point as a center
ELSE (
SELECT (ST_DumpPoints(way)).geom
FROM osm_places
WHERE osm_id = p.osm_id
LIMIT 1
)
END);
CREATE INDEX osm_places_center_gist_idx ON osm_places USING gist(center);
CREATE INDEX osm_places_center_gist_idx ON osm_places USING gist (center);
DROP INDEX osm_places_gist_idx;
ALTER TABLE osm_places DROP column way;
ALTER TABLE osm_places DROP COLUMN way;

View file

@ -6,6 +6,4 @@ psql -v ON_ERROR_STOP=1 --username "$POSTGRES_USER" --dbname "$POSTGRES_DB" <<-E
CREATE DATABASE gis;
CREATE DATABASE borders;
GRANT ALL PRIVILEGES ON DATABASE borders TO borders;
-- GRANT ALL PRIVILEGES ON ALL TABLES IN SCHEMA public TO borders;
EOSQL

View file

@ -38,7 +38,8 @@ node,way population text linear
EOSTYLE
fi
$OSM2PGSQL --slim --drop --hstore --style $OSM2PGSQL_STYLE -d $DATABASE -r o5m $OSM2PGSQL_KEYS $FILTERED_PLANET
$OSM2PGSQL --slim --drop --hstore --style $OSM2PGSQL_STYLE -d $DATABASE \
-r o5m $OSM2PGSQL_KEYS $FILTERED_PLANET
RET=$?
rm -f $FILTERED_PLANET
if [ "$OSM2PGSQL_STYLE_TMP" == "1" ]; then
@ -52,17 +53,16 @@ echo Creating osm_borders table
psql $DATABASE -c "
DROP TABLE IF EXISTS osm_borders;
CREATE TABLE osm_borders AS
SELECT
osm_id,
ST_Buffer(ST_Transform(ST_Collect(way),4326), 0) AS way,
admin_level::INT AS admin_level,
coalesce(max(\"name:en\"), max(name)) AS name
SELECT osm_id,
ST_Buffer(ST_Transform(ST_Collect(way),4326), 0) AS way,
admin_level::INT AS admin_level,
coalesce(max(\"name:en\"), max(name)) AS name
FROM planet_osm_polygon
WHERE boundary='administrative' AND osm_id < 0 AND admin_level IN ('2', '3', '4', '5', '6', '7')
GROUP BY osm_id, admin_level
HAVING coalesce(max(\"name:en\"), max(name)) IS NOT NULL;
ALTER TABLE osm_borders ADD PRIMARY KEY (osm_id);
;" || exit 3
ALTER TABLE osm_borders ADD PRIMARY KEY (osm_id);
" || exit 3
# Copy it to the borders database
echo Copying osm_borders table to the borders database

View file

@ -27,7 +27,9 @@ else
CONVERTED_PLANET=$PLANET
fi
$OSMFILTER $CONVERTED_PLANET --keep="boundary=administrative or ( place=city =town =hamlet =village =isolated_dwelling )" --out-o5m -o=$FILTERED_PLANET || exit 3
$OSMFILTER $CONVERTED_PLANET\
--keep="boundary=administrative or ( place=city =town =hamlet =village =isolated_dwelling )"\
--out-o5m -o=$FILTERED_PLANET\
|| exit 3
chmod +r $FILTERED_PLANET

View file

@ -21,15 +21,10 @@ with psycopg2.connect(f'dbname={options.database}') as conn:
(count, lat, lon) = (int(m.group(1)), float(m.group(2))/100, float(m.group(3))/100)
cur.execute(f'''
INSERT INTO {options.table} (count, tile)
VALUES (
%s,
ST_SetSRID(
ST_MakeBox2d(
ST_Point(%s, %s),
ST_Point(%s, %s)
),
4326
)
VALUES (%s,
ST_SetSRID(ST_MakeBox2d(ST_Point(%s, %s),
ST_Point(%s, %s)),
4326)
)
''', (count, lon, lat, lon + 0.01, lat + 0.01)
)

View file

@ -21,8 +21,8 @@ services:
context: ./db
dockerfile: Dockerfile.db
args:
PLANET_URL_full: https://planet.openstreetmap.org/pbf/planet-latest.osm.pbf
PLANET_URL: http://download.geofabrik.de/africa/eritrea-latest.osm.pbf
PLANET_URL: https://planet.openstreetmap.org/pbf/planet-latest.osm.pbf
PLANET_URL_small: http://download.geofabrik.de/africa/eritrea-latest.osm.pbf
container_name: db
restart: always
environment:

View file

@ -1,60 +0,0 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
import psycopg2
import os, argparse
def parse_double_points(line):
if "Double" in line:
words = line.split()
lat = words[9].split("(")[1][:-1]
lon = words[10].split(")")[0]
return float(lon), float(lat), 1
def parse_unknown_outgoing(line):
if "Unknowing" in line:
words = line.split()
lat = words[9]
lon = words[10]
return float(lon), float(lat), 2
filters = (parse_double_points, parse_unknown_outgoing)
parser = argparse.ArgumentParser(description='Extract borders warning points from generator log files to databse.')
parser.add_argument('-s', '--source', help='Generator log file path.')
parser.add_argument('-c', '--connection', help='Database connection string.')
parser.add_argument('-t', '--truncate', action='store_true', help='Truncate old data. WARINIG old data will be lost!')
parser.add_argument('-v', dest='verbose', action='store_true', help='Print status messages.')
options = parser.parse_args()
# Check log file for existance.
if not os.path.exists(options.source):
print "Generator log file", options.source, "does not exists."
exit(1)
# Process the log.
points = []
with open(options.source) as logfile:
for line in logfile.readlines():
for f in filters:
result = f(line)
if result:
points.append(result)
break
# Print stats.
print "Found {0} points".format(len(points))
print "Found {0} ways that do not lead to the external mwm and {1} roads that crossing the border several times.". format(
len(filter(lambda a: a[2] == 2, points)), len(filter(lambda a: a[2] == 1, points))
)
# Commit to the database
conn = psycopg2.connect(options.connection)
cursor = conn.cursor()
if options.truncate:
print "Truncating old data..."
cursor.execute("TRUNCATE TABLE points")
for p in points:
cursor.execute("INSERT into points (geom, type) VALUES (ST_GeomFromText('POINT(%s %s)', 4326), %s)", p)
conn.commit()

View file

@ -1,7 +1,9 @@
#!/usr/bin/python
import psycopg2
import glob
import psycopg2
def read_polygon(f):
"""Reads an array of coordinates with the final 'END' line."""
coords = []
@ -26,6 +28,7 @@ def read_polygon(f):
coords.append(coords[0])
return '({})'.format(','.join(coords))
def read_multipolygon(f):
"""Read the entire poly file and parse in into a WKT."""
polygons = []
@ -53,6 +56,7 @@ def read_multipolygon(f):
else:
return "MULTIPOLYGON({})".format(','.join(polygons))
def convert_poly(input_file, cur):
"""Reads a multipolygon from input_file and inserts it into borders table."""
with open(input_file, 'r') as f:
@ -60,11 +64,12 @@ def convert_poly(input_file, cur):
wkt = read_multipolygon(f)
print ' ', name
try:
cur.execute('insert into borders (name, geom, modified) values (%s, ST_GeomFromText(%s), now())', (name, wkt))
cur.execute('INSERT INTO borders (name, geom, modified) VALUES (%s, ST_GeomFromText(%s), now())', (name, wkt))
except psycopg2.Error as e:
print wkt
raise e
if __name__ == "__main__":
conn = psycopg2.connect('dbname=borders')
cur = conn.cursor()

View file

@ -1,9 +1,9 @@
import itertools
import json
import psycopg2
from collections import defaultdict
import psycopg2
from config import (
AUTOSPLIT_TABLE as autosplit_table,
OSM_TABLE as osm_table,
@ -34,7 +34,7 @@ class DisjointClusterUnion:
}
def get_smallest_cluster(self):
"""Find minimal cluster without big cities."""
"""Find minimal cluster."""
smallest_cluster_id = min(
filter(
lambda cluster_id:
@ -140,9 +140,9 @@ def calculate_common_border_matrix(conn, subregion_ids):
SELECT b1.osm_id AS osm_id1, b2.osm_id AS osm_id2,
ST_Length(geography(ST_Intersection(b1.way, b2.way))) AS intersection
FROM {osm_table} b1, {osm_table} b2
WHERE b1.osm_id IN ({subregion_ids_str}) AND
b2.osm_id IN ({subregion_ids_str})
AND b1.osm_id < b2.osm_id
WHERE b1.osm_id IN ({subregion_ids_str})
AND b2.osm_id IN ({subregion_ids_str})
AND b1.osm_id < b2.osm_id
"""
)
common_border_matrix = {} # {subregion_id: { subregion_id: float} } where len > 0
@ -258,16 +258,16 @@ def save_splitting_to_db(conn, dcu: DisjointClusterUnion):
#subregion_ids_array_str = f"{{','.join(str(x) for x in subregion_ids)}}"
cluster_geometry_sql = get_union_sql(subregion_ids)
cursor.execute(f"""
INSERT INTO {autosplit_table} (osm_border_id, subregion_ids, geom,
mwm_size_thr, mwm_size_est)
VALUES (
{dcu.region_id},
'{{{','.join(str(x) for x in subregion_ids)}}}',
({cluster_geometry_sql}),
{dcu.mwm_size_thr},
{data['mwm_size_est']}
)
""")
INSERT INTO {autosplit_table} (osm_border_id, subregion_ids, geom,
mwm_size_thr, mwm_size_est)
VALUES (
{dcu.region_id},
'{{{','.join(str(x) for x in subregion_ids)}}}',
({cluster_geometry_sql}),
{dcu.mwm_size_thr},
{data['mwm_size_est']}
)
""")
conn.commit()
@ -275,11 +275,11 @@ def get_region_and_country_names(conn, region_id):
cursor = conn.cursor()
try:
cursor.execute(
f"""SELECT name,
(SELECT name
FROM {osm_table}
WHERE admin_level = 2 AND ST_contains(way, b1.way)
) AS country_name
f"""SELECT name,
(SELECT name
FROM {osm_table}
WHERE admin_level = 2 AND ST_Contains(way, b1.way)
) AS country_name
FROM osm_borders b1
WHERE osm_id = {region_id}
AND b1.osm_id NOT IN (-9086712) -- crunch, stub to exclude incorrect subregions

View file

@ -1,9 +1,12 @@
#!/usr/bin/python3
import os, sys
import time
import logging
import sys
import time
import psycopg2
import config
try:
from daemon import runner
HAS_DAEMON = True
@ -25,13 +28,12 @@ CHECK_BORDERS_INTERVAL = 10
no_count_queries = [
f"""
SELECT id, name
FROM
( SELECT id, name,
FROM (
SELECT id, name,
ST_Area(geography(geom))/1000000.0 area,
ST_Area(geography(ST_Envelope(geom)))/1000000.0 env_area
FROM {table}
WHERE {condition}
) q
WHERE {condition}) q
WHERE area != 'NaN'::double precision
AND area <= env_area
AND env_area < 5000000
@ -121,7 +123,6 @@ def init_logger():
logger.setLevel(logging.INFO)
formatter = logging.Formatter("%(asctime)s [%(levelname)s] %(message)s")
handler = logging.FileHandler(config.DAEMON_LOG_PATH)
#handler = logging.StreamHandler()
handler.setFormatter(formatter)
logger.addHandler(handler)
return logger

View file

@ -61,10 +61,10 @@ def _add_population_data(conn, subregions, need_cities):
cursor = conn.cursor()
subregion_ids = ','.join(str(x) for x in subregions.keys())
cursor.execute(f"""
SELECT b.osm_id, p.name, COALESCE(p.population, 0), p.place
SELECT b.osm_id, p.name, coalesce(p.population, 0), p.place
FROM {osm_table} b, {osm_places_table} p
WHERE b.osm_id IN ({subregion_ids})
AND ST_CONTAINS(b.way, p.center)
AND ST_Contains(b.way, p.center)
"""
)
for subregion_id, place_name, place_population, place_type in cursor:
@ -116,10 +116,10 @@ def update_border_mwm_size_estimation(conn, border_id):
'hamlet_cnt': 0
}
cursor.execute(f"""
SELECT COALESCE(p.population, 0), p.place
SELECT coalesce(p.population, 0), p.place
FROM {table} b, {config.OSM_PLACES_TABLE} p
WHERE b.id = %s
AND ST_CONTAINS(b.geom, p.center)
AND ST_Contains(b.geom, p.center)
""", (border_id, ))
for place_population, place_type in cursor:
if place_type in ('city', 'town'):