diff --git a/db/tiles2pg.py b/db/tiles2pg.py index 5baf92c..7e46a69 100755 --- a/db/tiles2pg.py +++ b/db/tiles2pg.py @@ -1,40 +1,47 @@ #!/usr/bin/python3 import argparse +import logging import re import sys import psycopg2 -parser = argparse.ArgumentParser(description='Import tiles from CSV into a database') -parser.add_argument('-t', '--table', default='tiles', help='Target directory (default=tiles)') -parser.add_argument('-d', '--database', default='borders', help='Database name (default=borders)') -parser.add_argument('-v', dest='verbose', action='store_true', help='Print status messages') -options = parser.parse_args() +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Import tiles from CSV into a database') + parser.add_argument('-t', '--table', default='tiles', help='Target directory (default=tiles)') + parser.add_argument('-d', '--database', default='borders', help='Database name (default=borders)') + parser.add_argument('-v', dest='verbose', action='store_true', help='Print status messages') + options = parser.parse_args() -with psycopg2.connect(f'dbname={options.database}') as conn: - with conn.cursor() as cur: - cnt = 0 - for line in sys.stdin: - m = re.match(r'^\s*(\d+)\s+(-?\d+)\s+(-?\d+)', line) - if m: - (count, lat, lon) = (int(m.group(1)), float(m.group(2))/100, float(m.group(3))/100) - cur.execute(f''' - INSERT INTO {options.table} (count, tile) - VALUES (%s, - ST_SetSRID(ST_MakeBox2d(ST_Point(%s, %s), - ST_Point(%s, %s)), - 4326) - ) - ''', (count, lon, lat, lon + 0.01, lat + 0.01) - ) - cnt += 1 - else: - print(line) + log_level = logging.INFO if options.verbose else logging.WARNING + logging.basicConfig(level=log_level, format='%(levelname)s: %(message)s') - if options.verbose: - print('Commit') - conn.commit() - if options.verbose: - print(f'Uploaded {cnt} tiles') + COUNT_LAT_LON_RE = r'^\s*(\d+)\s+(-?\d+)\s+(-?\d+)' + + with psycopg2.connect(f'dbname={options.database}') as conn: + with conn.cursor() as cur: + cnt = 0 + for line in sys.stdin: + m = re.match(COUNT_LAT_LON_RE, line) + if m: + (count, lat, lon) = (int(m.group(1)), + float(m.group(2))/100, + float(m.group(3))/100) + cur.execute(f''' + INSERT INTO {options.table} (count, tile) + VALUES (%s, + ST_SetSRID(ST_MakeBox2d(ST_Point(%s, %s), + ST_Point(%s, %s)), + 4326) + ) + ''', (count, lon, lat, lon + 0.01, lat + 0.01) + ) + cnt += 1 + else: + logging.warning(f"Incorrect count-lat-lon line '{line}'") + + logging.info('Commit') + conn.commit() + logging.info(f'Uploaded {cnt} tiles') diff --git a/web/app/auto_split.py b/web/app/auto_split.py index b199661..379058c 100644 --- a/web/app/auto_split.py +++ b/web/app/auto_split.py @@ -36,19 +36,13 @@ class DisjointClusterUnion: def get_smallest_cluster(self): """Find minimal cluster.""" smallest_cluster_id = min( - filter( - lambda cluster_id: - not self.clusters[cluster_id]['finished'], - self.clusters.keys() - ), + (cluster_id for cluster_id in self.clusters.keys() + if not self.clusters[cluster_id]['finished']), default=None, key=lambda cluster_id: self.clusters[cluster_id]['mwm_size_est'] ) return smallest_cluster_id - def mark_cluster_finished(self, cluster_id): - self.clusters[cluster_id]['finished'] = True - def find_cluster(self, subregion_id): if self.representatives[subregion_id] == subregion_id: return subregion_id @@ -57,13 +51,6 @@ class DisjointClusterUnion: self.representatives[subregion_id] = representative return representative - def get_cluster_mwm_size_est(self, subregion_id): - cluster_id = self.find_cluster(subregion_id) - return self.clusters[cluster_id]['mwm_size_est'] - - def get_cluster_count(self): - return len(self.clusters) - def union(self, cluster_id1, cluster_id2): # To make it more deterministic retained_cluster_id = max(cluster_id1, cluster_id2) @@ -89,13 +76,13 @@ class DisjointClusterUnion: def get_best_cluster_to_join_with(small_cluster_id, - dcu: DisjointClusterUnion, - common_border_matrix): + common_border_matrix, + dcu: DisjointClusterUnion): if small_cluster_id not in common_border_matrix: # This may be if a subregion is isolated, # like Bezirk Lienz inside Tyrol, Austria return None - common_borders = defaultdict(lambda: 0.0) # cluster representative => common border length + common_borders = defaultdict(float) # cluster representative => common border length subregion_ids = dcu.get_cluster_subregion_ids(small_cluster_id) for subregion_id in subregion_ids: for other_subregion_id, length in common_border_matrix[subregion_id].items(): @@ -104,24 +91,25 @@ def get_best_cluster_to_join_with(small_cluster_id, common_borders[other_cluster_id] += length if not common_borders: return None + total_common_border_length = sum(common_borders.values()) - total_adjacent_mwm_size_est = sum(dcu.get_cluster_mwm_size_est(x) for x in common_borders) - choice_criterion = ( - ( - lambda cluster_id: ( - common_borders[cluster_id]/total_common_border_length + - -dcu.get_cluster_mwm_size_est(cluster_id)/total_adjacent_mwm_size_est - ) - ) if total_adjacent_mwm_size_est else - lambda cluster_id: ( - common_borders[cluster_id]/total_common_border_length + total_adjacent_mwm_size_est = sum(dcu.clusters[x]['mwm_size_est'] for x in common_borders) + + if total_adjacent_mwm_size_est: + choice_criterion = lambda cluster_id: ( + common_borders[cluster_id] / total_common_border_length + + -dcu.clusters[cluster_id]['mwm_size_est'] / total_adjacent_mwm_size_est ) - ) + else: + choice_criterion = lambda cluster_id: ( + common_borders[cluster_id] / total_common_border_length + ) + best_cluster_id = max( filter( lambda cluster_id: ( - dcu.clusters[small_cluster_id]['mwm_size_est'] + - dcu.clusters[cluster_id]['mwm_size_est'] <= dcu.mwm_size_thr + (dcu.clusters[small_cluster_id]['mwm_size_est'] + + dcu.clusters[cluster_id]['mwm_size_est']) <= dcu.mwm_size_thr ), common_borders.keys() ), @@ -169,17 +157,17 @@ def find_golden_splitting(conn, border_id, next_level, all_subregion_ids = dcu.get_all_subregion_ids() common_border_matrix = calculate_common_border_matrix(conn, all_subregion_ids) - i = 0 while True: - if dcu.get_cluster_count() == 1: + if len(dcu.clusters) == 1: return dcu - i += 1 smallest_cluster_id = dcu.get_smallest_cluster() if not smallest_cluster_id: return dcu - best_cluster_id = get_best_cluster_to_join_with(smallest_cluster_id, dcu, common_border_matrix) + best_cluster_id = get_best_cluster_to_join_with(smallest_cluster_id, + common_border_matrix, + dcu) if not best_cluster_id: - dcu.mark_cluster_finished(smallest_cluster_id) + dcu.clusters[smallest_cluster_id]['finished'] = True continue assert (smallest_cluster_id != best_cluster_id), f"{smallest_cluster_id}" dcu.union(smallest_cluster_id, best_cluster_id) @@ -194,18 +182,20 @@ def get_union_sql(subregion_ids): """ else: return f""" - SELECT ST_UNION( + SELECT ST_Union( ({get_union_sql(subregion_ids[0:1])}), ({get_union_sql(subregion_ids[1: ])}) ) """ + def get_geojson(conn, union_sql): cursor = conn.cursor() cursor.execute(f"""SELECT ST_AsGeoJSON(({union_sql}))""") rec = cursor.fetchone() return rec[0] + def write_polygons_to_poly(file, polygons, name_prefix): pcounter = 1 for polygon in polygons: @@ -255,14 +245,14 @@ def save_splitting_to_db(conn, dcu: DisjointClusterUnion): """) for cluster_id, data in dcu.clusters.items(): subregion_ids = data['subregion_ids'] - #subregion_ids_array_str = f"{{','.join(str(x) for x in subregion_ids)}}" + subregion_ids_array_str = '{' + ','.join(str(x) for x in subregion_ids) + '}' cluster_geometry_sql = get_union_sql(subregion_ids) cursor.execute(f""" INSERT INTO {autosplit_table} (osm_border_id, subregion_ids, geom, mwm_size_thr, mwm_size_est) VALUES ( {dcu.region_id}, - '{{{','.join(str(x) for x in subregion_ids)}}}', + '{subregion_ids_array_str}', ({cluster_geometry_sql}), {dcu.mwm_size_thr}, {data['mwm_size_est']} @@ -274,25 +264,25 @@ def save_splitting_to_db(conn, dcu: DisjointClusterUnion): def get_region_and_country_names(conn, region_id): cursor = conn.cursor() try: - cursor.execute( - f"""SELECT name, - (SELECT name - FROM {osm_table} - WHERE admin_level = 2 AND ST_Contains(way, b1.way) - ) AS country_name - FROM osm_borders b1 - WHERE osm_id = {region_id} - AND b1.osm_id NOT IN (-9086712) -- crunch, stub to exclude incorrect subregions - """ - ) - region_name, country_name = cursor.fetchone() + cursor.execute(f""" + SELECT name, + (SELECT name + FROM {osm_table} + WHERE admin_level = 2 + AND ST_Contains(way, b1.way)) AS country_name + FROM osm_borders b1 + WHERE osm_id = {region_id} + AND b1.osm_id NOT IN (-9086712) -- crunch, stub to exclude incorrect subregions + """ + ) + region_name, country_name = cursor.fetchone() except psycopg2.errors.CardinalityViolation: conn.rollback() - cursor.execute( - f"""SELECT name - FROM {osm_table} b1 - WHERE osm_id = {region_id} - """ + cursor.execute(f""" + SELECT name + FROM {osm_table} b1 + WHERE osm_id = {region_id} + """ ) region_name = cursor.fetchone()[0] country_name = None @@ -311,14 +301,13 @@ def split_region(conn, region_id, next_level, if dcu is None: return - save_splitting(dcu, conn, save_to_files, country_region_name) + save_splitting(conn, dcu, save_to_files, country_region_name) -def save_splitting(dcu: DisjointClusterUnion, conn, +def save_splitting(conn, dcu: DisjointClusterUnion, save_to_files=None, country_region_name=None): save_splitting_to_db(conn, dcu) if save_to_files: print(f"Saving {country_region_name}") filename_prefix = f"{country_region_name}-{dcu.city_population_thr}" save_splitting_to_file(conn, dcu, filename_prefix) - diff --git a/web/app/borders_api.py b/web/app/borders_api.py index e05e516..d5d5150 100755 --- a/web/app/borders_api.py +++ b/web/app/borders_api.py @@ -505,10 +505,11 @@ def is_administrative_region(region_id): osm_table = config.OSM_TABLE cur = g.conn.cursor() cur.execute(f""" - SELECT osm_id FROM {osm_table} WHERE osm_id = %s + SELECT count(1) FROM {osm_table} WHERE osm_id = %s """, (region_id,) ) - return bool(cur.rowcount > 0) + count = cur.fetchone()[0] + return (count > 0) def find_osm_child_regions(region_id): cursor = g.conn.cursor() @@ -529,13 +530,13 @@ def find_osm_child_regions(region_id): def is_leaf(region_id): cursor = g.conn.cursor() cursor.execute(f""" - SELECT 1 + SELECT count(1) FROM {config.TABLE} WHERE parent_id = %s - LIMIT 1 """, (region_id,) ) - return cursor.rowcount == 0 + count = cursor.fetchone()[0] + return (count == 0) def get_region_country(region_id): """Returns the uppermost predecessor of the region in the hierarchy, @@ -605,16 +606,16 @@ def divide_preview(): next_level = int(request.args.get('next_level')) except ValueError: return jsonify(status="Not a number in next level") - is_admin = is_administrative_region(region_id) + is_admin_region = is_administrative_region(region_id) region_ids = [region_id] apply_to_similar = (request.args.get('apply_to_similar') == 'true') if apply_to_similar: - if not is_admin: + if not is_admin_region: return jsonify(status="Could not use 'apply to similar' for non-administrative regions") region_ids = get_similar_regions(region_id, only_leaves=True) auto_divide = (request.args.get('auto_divide') == 'true') if auto_divide: - if not is_admin: + if not is_admin_region: return jsonify(status="Could not apply auto-division to non-administrative regions") try: mwm_size_thr = int(request.args.get('mwm_size_thr')) @@ -722,16 +723,16 @@ def divide(): next_level = int(request.args.get('next_level')) except ValueError: return jsonify(status="Not a number in next level") - is_admin = is_administrative_region(region_id) + is_admin_region = is_administrative_region(region_id) apply_to_similar = (request.args.get('apply_to_similar') == 'true') region_ids = [region_id] if apply_to_similar: - if not is_admin: + if not is_admin_region: return jsonify(status="Could not use 'apply to similar' for non-administrative regions") region_ids = get_similar_regions(region_id, only_leaves=True) auto_divide = (request.args.get('auto_divide') == 'true') if auto_divide: - if not is_admin: + if not is_admin_region: return jsonify(status="Could not apply auto-division to non-administrative regions") try: mwm_size_thr = int(request.args.get('mwm_size_thr')) @@ -755,8 +756,8 @@ def divide_into_subregions_one(region_id, next_level): subregions = get_subregions_info(g.conn, region_id, table, next_level, need_cities=False) cursor = g.conn.cursor() - is_admin = is_administrative_region(region_id) - if is_admin: + is_admin_region = is_administrative_region(region_id) + if is_admin_region: for subregion_id, data in subregions.items(): cursor.execute(f""" INSERT INTO {table} @@ -1093,11 +1094,22 @@ def josm_borders_along(): xml = xml + '' return Response(xml, mimetype='application/x-osm+xml') + +XML_ATTR_ESCAPINGS = { + '&': '&', + '>': '>', + '<': '<', + '\n': ' ', + '\r': ' ', + '\t': ' ', + '"': '"' +} + + def quoteattr(value): - value = value.replace('&', '&').replace('>', '>').replace('<', '<') - value = value.replace('\n', ' ').replace('\r', ' ').replace('\t', ' ') - value = value.replace('"', '"') - return '"{}"'.format(value) + for char, replacement in XML_ATTR_ESCAPINGS.items(): + value = value.replace(char, replacement) + return f'"{value}"' def ring_hash(refs): #return json.dumps(refs) @@ -1150,16 +1162,23 @@ def import_error(msg): else: return jsonify(status=msg) -def extend_bbox(bbox, x, y=None): - if y is not None: - x = [x, y, x, y] - bbox[0] = min(bbox[0], x[0]) - bbox[1] = min(bbox[1], x[1]) - bbox[2] = max(bbox[2], x[2]) - bbox[3] = max(bbox[3], x[3]) +def extend_bbox(bbox, *args): + """Extend bbox to include another bbox or point.""" + assert len(args) in (1, 2) + if len(args) == 1: + another_bbox = args[0] + else: + another_bbox = [args[0], args[1], args[0], args[1]] + bbox[0] = min(bbox[0], another_bbox[0]) + bbox[1] = min(bbox[1], another_bbox[1]) + bbox[2] = max(bbox[2], another_bbox[2]) + bbox[3] = max(bbox[3], another_bbox[3]) def bbox_contains(outer, inner): - return outer[0] <= inner[0] and outer[1] <= inner[1] and outer[2] >= inner[2] and outer[3] >= inner[3] + return (outer[0] <= inner[0] and + outer[1] <= inner[1] and + outer[2] >= inner[2] and + outer[3] >= inner[3]) @app.route('/import', methods=['POST']) def import_osm(): @@ -1563,10 +1582,10 @@ def statistics(): GROUP BY name""" ) result = [] - for res in cur: - coord = json.loads(res[4])['coordinates'] - result.append({'name': res[0], 'outer': res[1], 'min_area': res[2], - 'inner': res[3], 'lon': coord[0], 'lat': coord[1]}) + for (name, outer, min_area, inner, coords) in cur: + coord = json.loads(coords)['coordinates'] + result.append({'name': name, 'outer': outer, 'min_area': min_area, + 'inner': inner, 'lon': coord[0], 'lat': coord[1]}) return jsonify(regions=result) return jsonify(status='wrong group id')