From 2a97d57399e9d12dc8ce4455de56a89fdcedc949 Mon Sep 17 00:00:00 2001 From: Maksim Andrianov Date: Fri, 24 May 2019 15:58:48 +0300 Subject: [PATCH] [python] Added staistics. --- tools/python/maps_generator/__main__.py | 26 ++-- tools/python/maps_generator/generator/env.py | 6 + .../maps_generator/generator/gen_tool.py | 1 + .../maps_generator/generator/settings.py | 5 + .../maps_generator/generator/statistics.py | 116 ++++++++++++++++++ tools/python/maps_generator/maps_generator.py | 52 +++++++- .../var/etc/map_generator.ini.default | 5 +- .../var/etc/stats_types_config.txt | 59 +++++++++ 8 files changed, 255 insertions(+), 15 deletions(-) create mode 100644 tools/python/maps_generator/generator/statistics.py create mode 100644 tools/python/maps_generator/var/etc/stats_types_config.txt diff --git a/tools/python/maps_generator/__main__.py b/tools/python/maps_generator/__main__.py index ead17107d2..97a55b9371 100644 --- a/tools/python/maps_generator/__main__.py +++ b/tools/python/maps_generator/__main__.py @@ -8,7 +8,8 @@ from .generator.exceptions import ContinueError, SkipError, ValidationError from .maps_generator import (generate_maps, generate_coasts, reset_to_stage, ALL_STAGES, stage_download_production_external, stage_descriptions, stage_ugc, stage_popularity, - stage_localads, stages_as_string) + stage_localads, stage_statistics, + stages_as_string) from .utils.collections import unique logger = logging.getLogger("maps_generator") @@ -25,16 +26,16 @@ def parse_options(): nargs="?", type=str, help="Continue the last build or specified in CONTINUE from the " - "last stopped stage.") + "last stopped stage.") parser.add_argument( "--countries", type=str, default="", help="List of regions, separated by a comma or a semicolon, or path to " - "file with regions, separated by a line break, for which maps" - " will be built. The names of the regions can be seen " - "in omim/data/borders. It is necessary to set names without " - "any extension.") + "file with regions, separated by a line break, for which maps" + " will be built. The names of the regions can be seen " + "in omim/data/borders. It is necessary to set names without " + "any extension.") parser.add_argument( "--skip", type=str, @@ -58,7 +59,7 @@ def parse_options(): default=False, action="store_true", help="Build production maps. In another case, 'osm only maps' are built" - " - maps without additional data and advertising.") + " - maps without additional data and advertising.") return vars(parser.parse_args()) @@ -131,10 +132,13 @@ def main(): ] options["skip"] = options_skip if not options["production"]: - options["skip"] += stages_as_string(stage_download_production_external, - stage_ugc, stage_popularity, - stage_descriptions, - stage_localads) + options["skip"] += stages_as_string( + stage_download_production_external, + stage_ugc, stage_popularity, + stage_descriptions, + stage_localads, + stage_statistics + ) if not all(s in ALL_STAGES for s in options["skip"]): raise SkipError(f"Stages {set(options['skip']) - set(ALL_STAGES)} " f"not found.") diff --git a/tools/python/maps_generator/generator/env.py b/tools/python/maps_generator/generator/env.py index 7d0211993b..ad8dca2851 100644 --- a/tools/python/maps_generator/generator/env.py +++ b/tools/python/maps_generator/generator/env.py @@ -152,6 +152,12 @@ class Env: self._create_if_not_exist(path) return path + @property + def stats_path(self): + path = os.path.join(self.out_path, "stats") + self._create_if_not_exist(path) + return path + @property def types_path(self): return os.path.join(self.user_resource_path, "types.txt") diff --git a/tools/python/maps_generator/generator/gen_tool.py b/tools/python/maps_generator/generator/gen_tool.py index ca8a05d4e5..1899a12291 100644 --- a/tools/python/maps_generator/generator/gen_tool.py +++ b/tools/python/maps_generator/generator/gen_tool.py @@ -37,6 +37,7 @@ class GenTool: "no_ads": bool, "preprocess": bool, "split_by_polygons": bool, + "type_statistics": bool, "planet_version": int, "booking_data": str, "brands_data": str, diff --git a/tools/python/maps_generator/generator/settings.py b/tools/python/maps_generator/generator/settings.py index b0adebe3cd..8df50f2a38 100644 --- a/tools/python/maps_generator/generator/settings.py +++ b/tools/python/maps_generator/generator/settings.py @@ -50,6 +50,8 @@ SUBWAY_URL = "" FOOD_URL = "" FOOD_TRANSLATIONS_URL = "" +STATS_TYPES_CONFIG = "" + PLANET = "planet" GEN_TOOL = "generator_tool" @@ -120,6 +122,9 @@ FOOD_URL = _get_opt(config, "External", "FOOD_URL", FOOD_URL) FOOD_TRANSLATIONS_URL = _get_opt(config, "External", "FOOD_TRANSLATIONS_URL", FOOD_TRANSLATIONS_URL) +STATS_TYPES_CONFIG = _get_opt_path(config, "Stats", "STATS_TYPES_CONFIG", + STATS_TYPES_CONFIG) + PLANET_O5M = os.path.join(MAIN_OUT_PATH, PLANET + ".o5m") PLANET_PBF = os.path.join(MAIN_OUT_PATH, PLANET + ".osm.pbf") PLANET_COASTS_GEOM_URL = os.path.join(PLANET_COASTS_URL, "latest_coasts.geom") diff --git a/tools/python/maps_generator/generator/statistics.py b/tools/python/maps_generator/generator/statistics.py new file mode 100644 index 0000000000..6051e97c51 --- /dev/null +++ b/tools/python/maps_generator/generator/statistics.py @@ -0,0 +1,116 @@ +import re +import os +import datetime +from collections import defaultdict + + +RE_STAT = re.compile(r"(?:\d+\. )?([\w:|-]+?)\|: " + r"size = \d+; " + r"count = (\d+); " + r"length = ([0-9.e+-]+) m; " + r"area = ([0-9.e+-]+) m²; " + r"names = (\d+)\s*") + +RE_TIME_DELTA = re.compile(r'^(?:(?P-?\d+) (days?, )?)?' + r'((?:(?P-?\d+):)(?=\d+:\d+))?' + r'(?:(?P-?\d+):)?' + r'(?P-?\d+)' + r'(?:\.(?P\d{1,6})\d{0,6})?$') + +RE_FINISH_STAGE = re.compile(r"(.*)Stage (\w+): finished in (.+)$") + + +def read_stat(f): + stats = [] + for line in f: + m = RE_STAT.match(line) + stats.append({ + "name": m.group(1).replace("|", "-"), + "cnt": int(m.group(2)), + "len": float(m.group(3)), + "area": float(m.group(4)), + "names": int(m.group(5)) + }) + return stats + + +def read_config(f): + config = [] + for line in f: + columns = [c.strip() for c in line.split(";", 2)] + columns[0] = re.compile(columns[0]) + columns[1] = columns[1].lower() + config.append(columns) + return config + + +def process_stat(config, stats): + result = {} + for param in config: + res = 0 + for typ in stats: + if param[0].match(typ["name"]): + if param[1] == "len": + res += typ["len"] + elif param[1] == "area": + res += typ["area"] + elif param[1] == "cnt_names": + res += typ["names"] + else: + res += typ["cnt"] + result[str(param[0]) + param[1]] = res + return result + + +def format_res(res, typ): + if typ == "len": + unit = "м" + elif typ == "area": + unit = "м²" + else: + unit = "шт." + return res, unit + + +def make_stats(config_path, stats_path): + with open(config_path) as f: + config = read_config(f) + with open(stats_path) as f: + stats = process_stat(config, read_stat(f)) + lines = [] + for param in config: + k = str(param[0]) + param[1] + st = format_res(stats[k], param[1]) + lines.append({"type": param[2], "quantity": st[0], "unit": st[1]}) + return lines + + +def parse_time(time_str): + parts = RE_TIME_DELTA.match(time_str) + if not parts: + return + parts = parts.groupdict() + time_params = {} + for name, param in parts.items(): + if param: + time_params[name] = int(param) + return datetime.timedelta(**time_params) + + +def get_stages_info(log_path): + result = defaultdict(lambda: defaultdict(dict)) + for file in os.listdir(log_path): + path = os.path.join(log_path, file) + with open(path) as f: + for line in f: + m = RE_FINISH_STAGE.match(line) + if not m: + continue + stage_name = m.group(2) + dt = parse_time(m.group(3)) + if file.startswith("stage_"): + result["stages"][stage_name] = dt + else: + country = file.split(".")[0] + result["countries"][country][stage_name] = dt + return result diff --git a/tools/python/maps_generator/maps_generator.py b/tools/python/maps_generator/maps_generator.py index 4018a56863..78e15d7868 100644 --- a/tools/python/maps_generator/maps_generator.py +++ b/tools/python/maps_generator/maps_generator.py @@ -3,7 +3,10 @@ import os import shutil from functools import partial from multiprocessing.pool import ThreadPool +from collections import defaultdict import multiprocessing +import json +import datetime from descriptions.descriptions_downloader import (check_and_get_checker, download_from_wikipedia_tags, @@ -22,6 +25,7 @@ from .generator.env import (planet_lock_file, build_lock_file, from .generator.exceptions import (ContinueError, BadExitStatusError, wait_and_raise_if_fail) from .generator.gen_tool import run_gen_tool +from .generator.statistics import make_stats, get_stages_info from .utils.file import is_verified, download_file, make_tarfile logger = logging.getLogger("maps_generator") @@ -238,8 +242,9 @@ def stage_external_resources(env): for ttf_file in resources: shutil.copy2(ttf_file, env.intermediate_path) - shutil.copy2(os.path.join(env.user_resource_path, "WorldCoasts_obsolete.mwm"), - env.mwm_path) + shutil.copy2( + os.path.join(env.user_resource_path, "WorldCoasts_obsolete.mwm"), + env.mwm_path) for file in os.listdir(env.mwm_path): if file.startswith(WORLD_NAME) and file.endswith(".mwm"): @@ -259,6 +264,46 @@ def stage_localads(env): make_tarfile(f"{env.localads_path}.tar.gz", env.localads_path) +@stage +def stage_statistics(env): + result = defaultdict(lambda: defaultdict(dict)) + + @country_stage_log + def stage_mwm_statistics(env, country, **kwargs): + stats_tmp = os.path.join(env.draft_path, f"{country}.stat") + with open(stats_tmp, "w") as f: + maps_stages.run_gen_tool_with_recovery_country( + env, + env.gen_tool, + out=f, + err=env.get_subprocess_out(country), + data_path=env.mwm_path, + user_resource_path=env.user_resource_path, + type_statistics=True, + output=country, + **kwargs + ) + result["countries"][country]["types"] = \ + make_stats(settings.STATS_TYPES_CONFIG, stats_tmp) + + mwms = env.get_mwm_names() + countries = filter(lambda x: x not in WORLDS_NAMES, mwms) + with ThreadPool() as pool: + pool.map(partial(stage_mwm_statistics, env), countries) + stages_info = get_stages_info(env.log_path) + result["stages"] = stages_info["stages"] + for c in stages_info["countries"]: + result["countries"][c]["stages"] = stages_info["countries"][c] + + def default(o): + if isinstance(o, datetime.timedelta): + return str(o) + + with open(os.path.join(env.stats_path, "stats.json"), "w") as f: + json.dump(result, f, ensure_ascii=False, sort_keys=True, + indent=2, default=default) + + @stage def stage_cleanup(env): osm2ft_path = os.path.join(env.out_path, "osm2ft") @@ -283,7 +328,7 @@ STAGES = [s.__name__ for s in stage_download_and_convert_planet, stage_update_planet, stage_coastline, stage_preprocess, stage_features, stage_mwm, stage_descriptions, stage_countries_txt, stage_external_resources, - stage_localads, stage_cleanup)] + stage_localads, stage_statistics, stage_cleanup)] ALL_STAGES = STAGES + COUNTRIES_STAGES @@ -343,6 +388,7 @@ def generate_maps(env): stage_countries_txt(env) stage_external_resources(env) stage_localads(env) + stage_statistics(env) stage_cleanup(env) diff --git a/tools/python/maps_generator/var/etc/map_generator.ini.default b/tools/python/maps_generator/var/etc/map_generator.ini.default index 2fc56d6361..593781d5a0 100644 --- a/tools/python/maps_generator/var/etc/map_generator.ini.default +++ b/tools/python/maps_generator/var/etc/map_generator.ini.default @@ -32,5 +32,8 @@ OSM_TOOLS_PATH: ~/osmctools # POPULARITY_URL: # SUBWAY_URL: # FOOD_URL: -# FOOD_TRANSLATIONS_URL: +# FOOD_TRANSLATIONS_URL: + +[Stats] +STATS_TYPES_CONFIG: ${Developer:OMIM_PATH}/tools/python/maps_generator/var/etc/stats_types_config.txt diff --git a/tools/python/maps_generator/var/etc/stats_types_config.txt b/tools/python/maps_generator/var/etc/stats_types_config.txt new file mode 100644 index 0000000000..3026e9f86e --- /dev/null +++ b/tools/python/maps_generator/var/etc/stats_types_config.txt @@ -0,0 +1,59 @@ +barrier-(fence|gate);len;Заборы +building;cnt;Здания +(amenity|shop|historic)-.*;cnt;POI +(amenity|shop|historic)-.*;cnt_names;POI c именами +amenity-(cafe|restaurant|fast_food).*;cnt;Кафе и рестораны +amenity-(pub|bar);cnt;Бары и пабы +amenity-kindergarten;cnt;Детские сады +amenity-(school|university|college);cnt;Школы и университеты +amenity-parking.*;cnt;Автостоянки +amenity-parking.*;area;Автостоянки +amenity-pharmacy;cnt;Аптеки +amenity-place_of_worship.*;cnt;Храмы +amenity-(hospital|doctors);cnt;Больницы и поликлиники +amenity-toilets;cnt;Туалеты +amenity-(waste_disposal|recycling);cnt;Мусорные баки +highway-(motorway|trunk|primary|secondary|tertiary|residential|unclassified|service|track|living_street)(_link)?(-.*)?;len;Автодорожная сеть +highway-(footway|path|pedestrian|steps).*;len;Пешеходные дорожки +highway-.*-bridge;len;Мосты +highway-.*-tunnel;len;Туннели +highway-(footway|path|steps)-bridge;len;Пешеходные мосты +highway-(footway|path|steps)-tunnel;len;Пешеходные туннели +highway-steps.*;len;Лестницы +highway-speed_camera;cnt;Камеры контроля скорости +internet_access-wlan;cnt;Точки доступа Wi-Fi +leisure-(pitch|stadium|playing_fields|track|sports_centre).*;cnt;Спортплощадки и комплексы +leisure-playground;cnt;Детские площадки +man_made-lighthouse;cnt;Маяки +man_made-windmill;cnt;Ветряные мельницы +man_made-pipeline.*;len;Трубопроводы +natural-beach;cnt;Пляжи +natural-tree;cnt;Отдельностоящие деревья +natural-waterfall;cnt;Водопады +piste:type.*;len;Лыжни +place-(city.*|town|village|hamlet);cnt;Населённые пункты +place-island;cnt;Острова +power-(minor_)?line.*;len;Линии электропередачи +power-(pole|tower);cnt;Опоры ЛЭП +railway-(rail|monorail|light_rail|narrow_gauge|preserved|siding|spur|yard|disused|incline).*;len;Железные дороги +railway-.*-(bridge|tunnel);len;Железнодорожные мосты и туннели +railway-(razed|abandoned).*;len;Снятые ветки ж/д +railway-narrow_gauge.*;len;Узкоколейные ж/д +railway-tram(-.*)?;len;Трамвайные пути +railway-(halt|station);cnt;Станции железной дороги +railway-subway.*;len;Линии метро +highway-bus_stop|railway-tram_stop;cnt;Остановки наземного транспорта +shop-bakery;cnt;Пекарни +shop-books;cnt;Книжные магазины +shop-clothes;cnt;Магазины одежды +shop-shoes;cnt;Магазины обуви +shop-(convenience|supermarket);cnt;Продуктовые магазины +shop-florist;cnt;Цветочные салоны +shop-(hairdresser|beauty);cnt;Парикмахерские и салоны красоты +tourism-(guest_house|hos?tel|motel);cnt;Гостиницы и хостелы +tourism-(attraction|viewpoint);cnt;Достопримечательности и точки обзора +waterway-(canal|river|stream)(-.*)?;len;Реки, каналы и ручьи +landuse-cemetery.*;area;Кладбища +leisure-park.*;area;Парки +natural-beach;area;Пляжи +sponsored-booking;cnt;Booking отели