Change db structure to use osm_places (not only cities/towns with population)

This commit is contained in:
Alexey Zakharenkov 2020-09-25 12:50:57 +03:00 committed by Alexey Zakharenkov
parent 27c80f9879
commit b13e31bff1
8 changed files with 105 additions and 80 deletions

View file

@ -32,6 +32,6 @@ COPY create_extensions.sql /docker-entrypoint-initdb.d/01-create_extensions.sql
COPY load_borders.sh /docker-entrypoint-initdb.d/10-load_borders.sh
COPY create_tables.sql /docker-entrypoint-initdb.d/20-create_tables.sql
COPY load_tiles.sh /docker-entrypoint-initdb.d/30-load_tiles.sh
COPY create_osm_cities_table.sql /docker-entrypoint-initdb.d/40-create_osm_cities_table.sql
COPY load_osm_cities_table.sh /docker-entrypoint-initdb.d/41-load_osm_cities_table.sh
COPY create_osm_places_table.sql /docker-entrypoint-initdb.d/40-create_osm_places_table.sql
COPY load_osm_places_table.sh /docker-entrypoint-initdb.d/41-load_osm_places_table.sh

View file

@ -1,67 +0,0 @@
\c gis postgres
----------- Collect city polygons
CREATE TABLE osm_cities AS
SELECT
osm_id,
place,
'polygon'::text AS g_type, -- geometry_type
max(regexp_replace(population, '[ .,]+', '', 'g')::int) AS population,
ST_Buffer(ST_Transform(ST_Collect(way),4326), 0) AS way,
coalesce(max("name"), max("name:en")) AS name
FROM planet_osm_polygon
WHERE place IN ('city', 'town')
AND regexp_replace(population, '[ .,]+', '', 'g') ~ '^\d+$'
GROUP BY osm_id, place;
----------- Collect city nodes
INSERT INTO osm_cities
SELECT
osm_id,
place,
'point'::text AS g_type, -- geometry_type
regexp_replace(population, '[ .,]+', '', 'g')::int AS population,
ST_Transform(way,4326) AS way,
coalesce("name", "name:en") AS name
FROM planet_osm_point
WHERE place IN ('city', 'town')
AND regexp_replace(population, '[ .,]+', '', 'g') ~ '^\d+$';
create index osm_cities_gist_idx on osm_cities using gist(way);
-- Delete polygons where exists a node within it with the same name
DELETE from osm_cities WHERE g_type='polygon' and osm_id IN
(
SELECT p.osm_id
FROM osm_cities n, osm_cities p
WHERE p.g_type='polygon' AND n.g_type='point'
AND ST_Contains(p.way, n.way)
AND (strpos(n.name, p.name) > 0 OR strpos(p.name, n.name) > 0)
);
-- Convert [multi]polygons to points - for further faster requests "is city in region"
ALTER TABLE osm_cities ADD COLUMN center geometry;
UPDATE osm_cities c SET center =
(
CASE WHEN ST_Contains(way, ST_Centroid(way)) --true for 42972 out of 42999
THEN ST_Centroid(way)
-- for the rest 27 cities choose arbitrary point as a center
ELSE (
SELECT (ST_DumpPoints(way)).geom
FROM osm_cities
WHERE osm_id = c.osm_id
LIMIT 1
)
END
);
CREATE INDEX osm_cities_center_gist_idx ON osm_cities USING gist(center);
DROP INDEX osm_cities_gist_idx;
ALTER TABLE osm_cities DROP column way;

View file

@ -0,0 +1,90 @@
\c gis postgres
----------- Collect city polygons
CREATE TABLE osm_places AS
SELECT
osm_id,
place,
'polygon'::text AS g_type, -- geometry_type
max(CASE
WHEN regexp_replace(population, '[ .,]+', '', 'g') ~ '^\d+$'
THEN regexp_replace(population, '[ .,]+', '', 'g')::int
ELSE NULL
END
) AS population,
ST_Buffer(ST_Transform(ST_Collect(way),4326), 0) AS way,
coalesce(max("name"), max("name:en")) AS name
FROM planet_osm_polygon
WHERE place IN ('city', 'town', 'village', 'hamlet', 'isolated_dwelling')
GROUP BY osm_id, place;
----------- Collect city nodes
INSERT INTO osm_places
SELECT
osm_id,
place,
'point'::text AS g_type, -- geometry_type
CASE
WHEN regexp_replace(population, '[ .,]+', '', 'g') ~ '^\d+$'
THEN regexp_replace(population, '[ .,]+', '', 'g')::int
ELSE NULL
END AS population,
ST_Transform(way,4326) AS way,
coalesce("name", "name:en") AS name
FROM planet_osm_point
WHERE place IN ('city', 'town', 'village', 'hamlet', 'isolated_dwelling');
create index osm_places_gist_idx on osm_places using gist(way);
-- Update node population with polygon population where
-- the polygon duplicates the node and node has no population
select count(*) from osm_places where g_type='point' and population is null;
UPDATE osm_places
SET population = q.max_population
FROM
(
SELECT n.osm_id node_id, greatest(p.population, n.population) max_population
FROM osm_places n, osm_places p
WHERE p.g_type='polygon' AND n.g_type='point'
AND ST_Contains(p.way, n.way)
AND (strpos(n.name, p.name) > 0 OR strpos(p.name, n.name) > 0)
) q
WHERE g_type='point' and osm_id = q.node_id;
-- Delete polygons where exists a node within it with the same name
DELETE from osm_places WHERE g_type='polygon' and osm_id IN
(
SELECT p.osm_id
FROM osm_places n, osm_places p
WHERE p.g_type='polygon' AND n.g_type='point'
AND ST_Contains(p.way, n.way)
AND (strpos(n.name, p.name) > 0 OR strpos(p.name, n.name) > 0)
);
-- Convert [multi]polygons to points - for further faster requests "is city in region"
ALTER TABLE osm_places ADD COLUMN center geometry;
UPDATE osm_places c SET center =
(
CASE WHEN ST_Contains(way, ST_Centroid(way)) --true for 42972 out of 42999
THEN ST_Centroid(way)
-- for the rest 27 cities choose arbitrary point as a center
ELSE (
SELECT (ST_DumpPoints(way)).geom
FROM osm_places
WHERE osm_id = c.osm_id
LIMIT 1
)
END
);
CREATE INDEX osm_places_center_gist_idx ON osm_places USING gist(center);
DROP INDEX osm_places_gist_idx;
ALTER TABLE osm_places DROP column way;

View file

@ -13,9 +13,10 @@ CREATE TABLE borders (
disabled boolean NOT NULL DEFAULT FALSE,
count_k INTEGER,
modified TIMESTAMP NOT NULL,
cmnt VARCHAR(500)
cmnt VARCHAR(500),
mwm_size_est double precision
);
CREATE INDEX borders_idx ON borders USING gist (geom);
CREATE INDEX borders_geom_gits_idx ON borders USING gist (geom);
CREATE INDEX borders_parent_id_idx ON borders (parent_id);
CREATE TABLE borders_backup (
@ -28,14 +29,15 @@ CREATE TABLE borders_backup (
count_k INTEGER,
modified TIMESTAMP NOT NULL,
cmnt VARCHAR(500),
mwm_size_est double precision,
PRIMARY KEY (backup, id)
);
CREATE TABLE splitting (
osm_border_id BIGINT NOT NULL REFERENCES osm_borders(osm_id), -- reference to parent osm region
subregion_ids BIGINT[] NOT NULL,
city_population_thr INT NOT NULL,
cluster_population_thr INT NOT NULL,
mwm_size_est double precision NOT NULL,
mwm_size_thr double precision NOT NULL,
geom geometry NOT NULL
);
CREATE INDEX splitting_idx ON splitting (osm_border_id, city_population_thr, cluster_population_thr);
CREATE INDEX splitting_idx ON splitting (osm_border_id, mwm_size_thr);

View file

@ -1,4 +1,4 @@
#!/bin/sh
#!/bin/bash
set -e
OSM2PGSQL=osm2pgsql
DATABASE=gis

View file

@ -1,4 +0,0 @@
DATABASE=gis
DATABASE_BORDERS=borders
pg_dump -O -t osm_cities $DATABASE | psql -U borders $DATABASE_BORDERS

View file

@ -0,0 +1,4 @@
DATABASE=gis
DATABASE_BORDERS=borders
pg_dump -O -t osm_places $DATABASE | psql -U borders $DATABASE_BORDERS

View file

@ -27,7 +27,7 @@ else
CONVERTED_PLANET=$PLANET
fi
$OSMFILTER $CONVERTED_PLANET --keep="boundary=administrative or place=" --out-o5m -o=$FILTERED_PLANET || exit 3
$OSMFILTER $CONVERTED_PLANET --keep="boundary=administrative or ( place=city =town =hamlet =village =isolated_dwelling )" --out-o5m -o=$FILTERED_PLANET || exit 3
chmod +r $FILTERED_PLANET