From b13e31bff1dd2499b49ed65152dcd445ad73252d Mon Sep 17 00:00:00 2001 From: Alexey Zakharenkov Date: Fri, 25 Sep 2020 12:50:57 +0300 Subject: [PATCH] Change db structure to use osm_places (not only cities/towns with population) --- db/Dockerfile.db | 4 +- db/create_osm_cities_table.sql | 67 ------------------------- db/create_osm_places_table.sql | 90 ++++++++++++++++++++++++++++++++++ db/create_tables.sql | 12 +++-- db/load_borders.sh | 2 +- db/load_osm_cities_table.sh | 4 -- db/load_osm_places_table.sh | 4 ++ db/prepare_borders.sh | 2 +- 8 files changed, 105 insertions(+), 80 deletions(-) delete mode 100644 db/create_osm_cities_table.sql create mode 100644 db/create_osm_places_table.sql delete mode 100644 db/load_osm_cities_table.sh create mode 100644 db/load_osm_places_table.sh diff --git a/db/Dockerfile.db b/db/Dockerfile.db index 8e8ebba..e9bde1c 100644 --- a/db/Dockerfile.db +++ b/db/Dockerfile.db @@ -32,6 +32,6 @@ COPY create_extensions.sql /docker-entrypoint-initdb.d/01-create_extensions.sql COPY load_borders.sh /docker-entrypoint-initdb.d/10-load_borders.sh COPY create_tables.sql /docker-entrypoint-initdb.d/20-create_tables.sql COPY load_tiles.sh /docker-entrypoint-initdb.d/30-load_tiles.sh -COPY create_osm_cities_table.sql /docker-entrypoint-initdb.d/40-create_osm_cities_table.sql -COPY load_osm_cities_table.sh /docker-entrypoint-initdb.d/41-load_osm_cities_table.sh +COPY create_osm_places_table.sql /docker-entrypoint-initdb.d/40-create_osm_places_table.sql +COPY load_osm_places_table.sh /docker-entrypoint-initdb.d/41-load_osm_places_table.sh diff --git a/db/create_osm_cities_table.sql b/db/create_osm_cities_table.sql deleted file mode 100644 index 6789d12..0000000 --- a/db/create_osm_cities_table.sql +++ /dev/null @@ -1,67 +0,0 @@ -\c gis postgres - ------------ Collect city polygons -CREATE TABLE osm_cities AS - SELECT - osm_id, - place, - 'polygon'::text AS g_type, -- geometry_type - max(regexp_replace(population, '[ .,]+', '', 'g')::int) AS population, - ST_Buffer(ST_Transform(ST_Collect(way),4326), 0) AS way, - coalesce(max("name"), max("name:en")) AS name - FROM planet_osm_polygon - WHERE place IN ('city', 'town') - AND regexp_replace(population, '[ .,]+', '', 'g') ~ '^\d+$' - GROUP BY osm_id, place; - ------------ Collect city nodes -INSERT INTO osm_cities - SELECT - osm_id, - place, - 'point'::text AS g_type, -- geometry_type - regexp_replace(population, '[ .,]+', '', 'g')::int AS population, - ST_Transform(way,4326) AS way, - coalesce("name", "name:en") AS name - FROM planet_osm_point - WHERE place IN ('city', 'town') - AND regexp_replace(population, '[ .,]+', '', 'g') ~ '^\d+$'; - - -create index osm_cities_gist_idx on osm_cities using gist(way); - - --- Delete polygons where exists a node within it with the same name - -DELETE from osm_cities WHERE g_type='polygon' and osm_id IN - ( - SELECT p.osm_id - FROM osm_cities n, osm_cities p - WHERE p.g_type='polygon' AND n.g_type='point' - AND ST_Contains(p.way, n.way) - AND (strpos(n.name, p.name) > 0 OR strpos(p.name, n.name) > 0) - ); - - --- Convert [multi]polygons to points - for further faster requests "is city in region" - -ALTER TABLE osm_cities ADD COLUMN center geometry; - -UPDATE osm_cities c SET center = - ( - CASE WHEN ST_Contains(way, ST_Centroid(way)) --true for 42972 out of 42999 - THEN ST_Centroid(way) - -- for the rest 27 cities choose arbitrary point as a center - ELSE ( - SELECT (ST_DumpPoints(way)).geom - FROM osm_cities - WHERE osm_id = c.osm_id - LIMIT 1 - ) - END - ); - -CREATE INDEX osm_cities_center_gist_idx ON osm_cities USING gist(center); -DROP INDEX osm_cities_gist_idx; -ALTER TABLE osm_cities DROP column way; - diff --git a/db/create_osm_places_table.sql b/db/create_osm_places_table.sql new file mode 100644 index 0000000..46b84d3 --- /dev/null +++ b/db/create_osm_places_table.sql @@ -0,0 +1,90 @@ +\c gis postgres + +----------- Collect city polygons +CREATE TABLE osm_places AS + SELECT + osm_id, + place, + 'polygon'::text AS g_type, -- geometry_type + max(CASE + WHEN regexp_replace(population, '[ .,]+', '', 'g') ~ '^\d+$' + THEN regexp_replace(population, '[ .,]+', '', 'g')::int + ELSE NULL + END + ) AS population, + ST_Buffer(ST_Transform(ST_Collect(way),4326), 0) AS way, + coalesce(max("name"), max("name:en")) AS name + FROM planet_osm_polygon + WHERE place IN ('city', 'town', 'village', 'hamlet', 'isolated_dwelling') + GROUP BY osm_id, place; + +----------- Collect city nodes +INSERT INTO osm_places + SELECT + osm_id, + place, + 'point'::text AS g_type, -- geometry_type + CASE + WHEN regexp_replace(population, '[ .,]+', '', 'g') ~ '^\d+$' + THEN regexp_replace(population, '[ .,]+', '', 'g')::int + ELSE NULL + END AS population, + ST_Transform(way,4326) AS way, + coalesce("name", "name:en") AS name + FROM planet_osm_point + WHERE place IN ('city', 'town', 'village', 'hamlet', 'isolated_dwelling'); + + +create index osm_places_gist_idx on osm_places using gist(way); + +-- Update node population with polygon population where +-- the polygon duplicates the node and node has no population + +select count(*) from osm_places where g_type='point' and population is null; + +UPDATE osm_places +SET population = q.max_population +FROM +( + SELECT n.osm_id node_id, greatest(p.population, n.population) max_population + FROM osm_places n, osm_places p + WHERE p.g_type='polygon' AND n.g_type='point' + AND ST_Contains(p.way, n.way) + AND (strpos(n.name, p.name) > 0 OR strpos(p.name, n.name) > 0) +) q +WHERE g_type='point' and osm_id = q.node_id; + + +-- Delete polygons where exists a node within it with the same name + +DELETE from osm_places WHERE g_type='polygon' and osm_id IN + ( + SELECT p.osm_id + FROM osm_places n, osm_places p + WHERE p.g_type='polygon' AND n.g_type='point' + AND ST_Contains(p.way, n.way) + AND (strpos(n.name, p.name) > 0 OR strpos(p.name, n.name) > 0) + ); + + +-- Convert [multi]polygons to points - for further faster requests "is city in region" + +ALTER TABLE osm_places ADD COLUMN center geometry; + +UPDATE osm_places c SET center = + ( + CASE WHEN ST_Contains(way, ST_Centroid(way)) --true for 42972 out of 42999 + THEN ST_Centroid(way) + -- for the rest 27 cities choose arbitrary point as a center + ELSE ( + SELECT (ST_DumpPoints(way)).geom + FROM osm_places + WHERE osm_id = c.osm_id + LIMIT 1 + ) + END + ); + +CREATE INDEX osm_places_center_gist_idx ON osm_places USING gist(center); +DROP INDEX osm_places_gist_idx; +ALTER TABLE osm_places DROP column way; diff --git a/db/create_tables.sql b/db/create_tables.sql index db7dcc9..ac098b7 100644 --- a/db/create_tables.sql +++ b/db/create_tables.sql @@ -13,9 +13,10 @@ CREATE TABLE borders ( disabled boolean NOT NULL DEFAULT FALSE, count_k INTEGER, modified TIMESTAMP NOT NULL, - cmnt VARCHAR(500) + cmnt VARCHAR(500), + mwm_size_est double precision ); -CREATE INDEX borders_idx ON borders USING gist (geom); +CREATE INDEX borders_geom_gits_idx ON borders USING gist (geom); CREATE INDEX borders_parent_id_idx ON borders (parent_id); CREATE TABLE borders_backup ( @@ -28,14 +29,15 @@ CREATE TABLE borders_backup ( count_k INTEGER, modified TIMESTAMP NOT NULL, cmnt VARCHAR(500), + mwm_size_est double precision, PRIMARY KEY (backup, id) ); CREATE TABLE splitting ( osm_border_id BIGINT NOT NULL REFERENCES osm_borders(osm_id), -- reference to parent osm region subregion_ids BIGINT[] NOT NULL, - city_population_thr INT NOT NULL, - cluster_population_thr INT NOT NULL, + mwm_size_est double precision NOT NULL, + mwm_size_thr double precision NOT NULL, geom geometry NOT NULL ); -CREATE INDEX splitting_idx ON splitting (osm_border_id, city_population_thr, cluster_population_thr); +CREATE INDEX splitting_idx ON splitting (osm_border_id, mwm_size_thr); diff --git a/db/load_borders.sh b/db/load_borders.sh index 20e8926..f24ca42 100755 --- a/db/load_borders.sh +++ b/db/load_borders.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash set -e OSM2PGSQL=osm2pgsql DATABASE=gis diff --git a/db/load_osm_cities_table.sh b/db/load_osm_cities_table.sh deleted file mode 100644 index aa9dce7..0000000 --- a/db/load_osm_cities_table.sh +++ /dev/null @@ -1,4 +0,0 @@ -DATABASE=gis -DATABASE_BORDERS=borders - -pg_dump -O -t osm_cities $DATABASE | psql -U borders $DATABASE_BORDERS diff --git a/db/load_osm_places_table.sh b/db/load_osm_places_table.sh new file mode 100644 index 0000000..1734198 --- /dev/null +++ b/db/load_osm_places_table.sh @@ -0,0 +1,4 @@ +DATABASE=gis +DATABASE_BORDERS=borders + +pg_dump -O -t osm_places $DATABASE | psql -U borders $DATABASE_BORDERS diff --git a/db/prepare_borders.sh b/db/prepare_borders.sh index 6d7e906..97925ae 100644 --- a/db/prepare_borders.sh +++ b/db/prepare_borders.sh @@ -27,7 +27,7 @@ else CONVERTED_PLANET=$PLANET fi -$OSMFILTER $CONVERTED_PLANET --keep="boundary=administrative or place=" --out-o5m -o=$FILTERED_PLANET || exit 3 +$OSMFILTER $CONVERTED_PLANET --keep="boundary=administrative or ( place=city =town =hamlet =village =isolated_dwelling )" --out-o5m -o=$FILTERED_PLANET || exit 3 chmod +r $FILTERED_PLANET