borders/web/app/countries_structure.py
2020-10-02 12:30:52 +03:00

411 lines
14 KiB
Python

import itertools
import config
from subregions import get_subregions_info
table = config.TABLE
osm_table = config.OSM_TABLE
autosplit_table = config.AUTOSPLIT_TABLE
# admin_level => list of countries which should be initially divided at one admin level
unilevel_countries = {
2: [
'Afghanistan',
'Albania',
'Algeria',
'Andorra',
'Angola',
'Antigua and Barbuda',
'Armenia',
'Australia', # need to be divided at level 4 but has many small islands of level 4
'Azerbaijan', # has 2 non-covering 3-level regions
'Bahrain',
'Barbados',
'Belize',
'Benin',
'Bermuda',
'Bhutan',
'Botswana',
'British Sovereign Base Areas', # ! include into Cyprus
'British Virgin Islands',
'Bulgaria',
'Burkina Faso',
'Burundi',
'Cambodia',
'Cameroon',
'Cape Verde',
'Central African Republic',
'Chad',
'Chile',
'Colombia',
'Comoros',
'Congo-Brazzaville', # BUG whith autodivision at level 4
'Cook Islands',
'Costa Rica',
'Croatia', # next level = 6
'Cuba',
'Cyprus',
"Côte d'Ivoire",
'Democratic Republic of the Congo',
'Djibouti',
'Dominica',
'Dominican Republic',
'East Timor',
'Ecuador',
'Egypt',
'El Salvador',
'Equatorial Guinea',
'Eritrea',
'Estonia',
'Eswatini',
'Ethiopia',
'Falkland Islands',
'Faroe Islands',
'Federated States of Micronesia',
'Fiji',
'Gabon',
'Georgia',
'Ghana',
'Gibraltar',
'Greenland',
'Grenada',
'Guatemala',
'Guernsey',
'Guinea',
'Guinea-Bissau',
'Guyana',
'Haiti',
'Honduras',
'Iceland',
'Indonesia',
'Iran',
'Iraq',
'Isle of Man',
'Israel', # ! don't forget to separate Jerusalem
'Jamaica',
'Jersey',
'Jordan',
'Kazakhstan',
'Kenya', # ! level 3 doesn't cover the whole country
'Kiribati',
'Kosovo',
'Kuwait',
'Kyrgyzstan',
'Laos',
'Latvia',
'Lebanon',
'Liberia',
'Libya',
'Liechtenstein',
'Lithuania',
'Luxembourg',
'Madagascar',
'Malaysia',
'Maldives',
'Mali',
'Malta',
'Marshall Islands',
'Martinique',
'Mauritania',
'Mauritius',
'Mexico',
'Moldova',
'Monaco',
'Mongolia',
'Montenegro',
'Montserrat',
'Mozambique',
'Myanmar',
'Namibia',
'Nauru',
'Nicaragua',
'Niger',
'Nigeria',
'Niue',
'North Korea',
'North Macedonia',
'Oman',
'Palau',
# ! 'Palestina' is not a country in OSM - need make an mwm
'Panama',
'Papua New Guinea',
'Peru', # need split-merge
'Philippines', # split at level 3 and merge or not merte
'Qatar',
'Romania', # need split-merge
'Rwanda',
'Saint Helena, Ascension and Tristan da Cunha',
'Saint Kitts and Nevis',
'Saint Lucia',
'Saint Vincent and the Grenadines',
'San Marino',
'Samoa',
'Saudi Arabia',
'Senegal',
'Seychelles',
'Sierra Leone',
'Singapore',
'Slovakia', # ! split at level 3 then 4, and add Bratislava region (4)
'Slovenia',
'Solomon Islands',
'Somalia',
'South Georgia and the South Sandwich Islands',
'South Korea',
'South Sudan',
'South Ossetia', # ! don't forget to divide from Georgia
'Sri Lanka',
'Sudan',
'São Tomé and Príncipe',
'Suriname',
'Switzerland',
'Syria',
'Taiwan',
'Tajikistan',
'Thailand',
'The Bahamas',
'The Gambia',
'Togo',
'Tokelau',
'Tonga',
'Trinidad and Tobago',
'Tunisia',
'Turkmenistan',
'Turks and Caicos Islands',
'Tuvalu',
'United Arab Emirate',
'Uruguay',
'Uzbekistan',
'Vanuatu',
'Venezuela', # level 3 not comprehensive
'Vietnam',
# ! don't forget 'Wallis and Futuna', belongs to France
'Yemen',
'Zambia',
'Zimbabwe',
],
3: [
'Malawi',
'Nepal', # ! one region is lost after division
'Pakistan',
'Paraguay',
'Tanzania',
'Turkey',
'Uganda',
],
4: [
'Austria',
'Bangladesh',
'Belarus', # maybe need merge capital region with the province
'Belgium', # maybe need merge capital region into encompassing province
'Bolivia',
'Bosnia and Herzegovina', # other levels - 5, 6, 7 - are incomplete.
'Canada',
'China', # ! don't forget about Macau and Hong Kong of level 3 not covered by level 4
'Denmark',
'Greece', # ! has one small 3-level subregion!
'Hungary', # maybe multilevel division at levels [4, 5] ?
'India',
'Italy',
'Japan', # ? About 50 4-level subregions, some of which requires further division
'Morocco', # ! not all regions appear after substitution with level 4
'New Zealand', # ! don't forget islands to the north and south
'Norway',
'Poland', # 380(!) subregions of AL=6
'Portugal',
'Russia',
'Serbia',
'South Africa',
'Spain',
'Ukraine',
'United States',
],
5: [
'Ireland', # ! 5-level don't cover the whole country
],
6: [
'Czechia',
]
}
# Country name => list of admin levels to which it should be initially divided.
# 'Germany': [4, 5] implies that the country is divided at level 4 at first, then all
# 4-level subregions are divided into subregions of level 5 (if any)
multilevel_countries = {
'Brazil': [3, 4],
'Finland': [3, 6], # [3,5,6] in more fresh data? # division by level 6 seems ideal
'France': [3, 4],
'Germany': [4, 5], # not the whole country is covered by units of AL=5
'Netherlands': [3, 4], # there are carribean lands of level both 3 and 4
'Sweden': [3, 4], # division by level 4 seems ideal
'United Kingdom': [4, 5], # level 5 is necessary but not comprehensive
}
country_initial_levels = dict(itertools.chain(
((country, ([level] if level > 2 else []))
for level, countries in unilevel_countries.items()
for country in countries),
multilevel_countries.items()
))
class CountryStructureException(Exception):
pass
def _clear_borders(conn):
cursor = conn.cursor()
cursor.execute(f"DELETE FROM {table}")
conn.commit()
def _find_subregions(conn, osm_ids, next_level, regions):
"""Return subregions of level 'next_level' for regions with osm_ids."""
subregion_ids = []
for osm_id in osm_ids:
more_subregions = get_subregions_info(conn, osm_id, table,
next_level, need_cities=False)
for subregion_id, subregion_data in more_subregions.items():
region_data = regions.setdefault(subregion_id, {})
region_data['name'] = subregion_data['name']
region_data['mwm_size_est'] = subregion_data['mwm_size_est']
region_data['parent_id'] = osm_id
subregion_ids.append(subregion_id)
return subregion_ids
def _create_regions(conn, osm_ids, regions):
if not osm_ids:
return
osm_ids = list(osm_ids) # to ensure order
cursor = conn.cursor()
sql_values = ','.join(
f'({osm_id},'
'%s,'
f"{regions[osm_id].get('parent_id', 'NULL')},"
f"{regions[osm_id].get('mwm_size_est', 'NULL')},"
f'(SELECT way FROM {osm_table} WHERE osm_id={osm_id}),'
'now())'
for osm_id in osm_ids
)
#print(f"create regions with osm_ids={osm_ids}")
#print(f"names={tuple(names[osm_id] for osm_id in osm_ids)}")
#print(f"all parents={parents}")
cursor.execute(f"""
INSERT INTO {table} (id, name, parent_id, mwm_size_est, geom, modified)
VALUES {sql_values}
""", tuple(regions[osm_id]['name'] for osm_id in osm_ids)
)
def _make_country_structure(conn, country_osm_id):
regions = {} # osm_id: { 'name': name,
# 'mwm_size_est': size,
# 'parent_id': parent_id }
country_name = get_osm_border_name_by_osm_id(conn, country_osm_id)
country_data = regions.setdefault(country_osm_id, {})
country_data['name'] = country_name
# TODO: country_data['mwm_size_est'] = ...
_create_regions(conn, [country_osm_id], regions)
if country_initial_levels.get(country_name):
admin_levels = country_initial_levels[country_name]
prev_admin_levels = [2] + admin_levels[:-1]
prev_region_ids = [country_osm_id]
for admin_level, prev_level in zip(admin_levels, prev_admin_levels):
if not prev_region_ids:
raise CountryStructureException(
f"Empty prev_region_ids at {country_name}, "
f"AL={admin_level}, prev-AL={prev_level}"
)
subregion_ids = _find_subregions(conn, prev_region_ids,
admin_level, regions)
_create_regions(conn, subregion_ids, regions)
prev_region_ids = subregion_ids
def create_countries_initial_structure(conn):
_clear_borders(conn)
cursor = conn.cursor()
# TODO: process overlapping countries, like Ukraine and Russia with common Crimea
cursor.execute(f"""
SELECT osm_id, name
FROM {osm_table}
WHERE admin_level = 2 and name != 'Ukraine'
"""
# and name in --('Germany', 'Luxembourg', 'Austria')
# ({','.join(f"'{c}'" for c in country_initial_levels.keys())})
#"""
)
for rec in cursor:
_make_country_structure(conn, rec[0])
conn.commit()
def get_osm_border_name_by_osm_id(conn, osm_id):
cursor = conn.cursor()
cursor.execute(f"""
SELECT name FROM {osm_table}
WHERE osm_id = %s
""", (osm_id,))
rec = cursor.fetchone()
if not rec:
raise CountryStructureException(f'Not found region with osm_id="{osm_id}"')
return rec[0]
def _get_country_osm_id_by_name(conn, name):
cursor = conn.cursor()
cursor.execute(f"""
SELECT osm_id FROM {osm_table}
WHERE admin_level = 2 AND name = %s
""", (name,))
row_count = cursor.rowcount
if row_count > 1:
raise CountryStructureException(f'More than one country "{name}"')
rec = cursor.fetchone()
if not rec:
raise CountryStructureException(f'Not found country "{name}"')
return int(rec[0])
splitting = [
# large region name, admin_level (2 in most cases), admin_level to split'n'merge, into subregions of what admin_level
('Germany', 2, 4, 6), # Half of the country is covered by units of AL=5
('Metropolitan France', 3, 4, 6),
('Spain', 2, 4, 6),
('Portugal', 2, 4, 6),
('Belgium', 2, 4, 6),
('Italy', 2, 4, 6),
('Switzerland', 2, 2, 4), # has admin_level=5
('Austria', 2, 4, 6),
('Poland', 2, 4, 6), # 380(!) of AL=6
('Czechia', 2, 6, 7),
('Ukraine', 2, 4, 6), # should merge back to region=4 level clusters
('United Kingdom', 2, 5, 6), # whole country is divided by level 4; level 5 is necessary but not comprehensive
('Denmark', 2, 4, 7),
('Norway', 2, 4, 7),
('Sweden', 2, 4, 7), # though division by level 4 is currently ideal
('Finland', 2, 6, 7), # though division by level 6 is currently ideal
('Estonia', 2, 2, 6),
('Latvia', 2, 4, 6), # the whole country takes 56Mb, all 6-level units should merge into 4-level clusters
('Lithuania', 2, 2, 4), # now Lithuania has 2 mwms of size 60Mb each
('Belarus', 2, 2, 4), # 6 regions + Minsk city. Would it be merged with the region?
('Slovakia', 2, 2, 4), # there are no subregions 5, 6, 7. Must leave all 8 4-level regions
('Hungary', 2, 5, 6),
#('Slovenia', 2, 2, 8), # no levels 3,4,5,6; level 7 incomplete.
('Croatia', 2, 2, 6),
('Bosnia and Herzegovina', 2, 2, 4), # other levels - 5, 6, 7 - are incomplete.
('Serbia', 2, 4, 6),
('Romania', 2, 2, 4),
('Bulgaria', 2, 2, 4),
('Greece', 2, 4, 5), # has 7 4-level regions, must merge 5-level to them again
('Ireland', 2, 5, 6), # 5-level don't cover the whole country! Still...
('Turkey', 2, 3, 4),
]