From 20ef14ae010669c909dd7b4d2408d1924ed57479 Mon Sep 17 00:00:00 2001 From: Maksim Andrianov Date: Tue, 28 Jan 2020 04:35:13 +0300 Subject: [PATCH] [generator][python] Refactored maps_generator. --- tools/python/maps_generator/__init__.py | 12 + tools/python/maps_generator/__main__.py | 226 +++--- .../maps_generator/generator/coastline.py | 82 ++- .../maps_generator/generator/decorators.py | 93 --- tools/python/maps_generator/generator/env.py | 641 +++++++++++------- .../maps_generator/generator/gen_tool.py | 41 +- .../maps_generator/generator/generation.py | 127 ++++ .../maps_generator/generator/osmtools.py | 88 ++- .../maps_generator/generator/settings.py | 279 +++++--- .../python/maps_generator/generator/stages.py | 412 +++++------ .../generator/stages_declaration.py | 421 ++++++++++++ .../maps_generator/generator/statistics.py | 47 +- .../python/maps_generator/generator/status.py | 17 +- .../python/maps_generator/generator/steps.py | 286 ++++++++ tools/python/maps_generator/maps_generator.py | 455 ++----------- tools/python/maps_generator/utils/algo.py | 14 + .../maps_generator/utils/collections.py | 4 - tools/python/maps_generator/utils/file.py | 25 +- tools/python/maps_generator/utils/log.py | 7 +- 19 files changed, 2001 insertions(+), 1276 deletions(-) delete mode 100644 tools/python/maps_generator/generator/decorators.py create mode 100644 tools/python/maps_generator/generator/generation.py create mode 100644 tools/python/maps_generator/generator/stages_declaration.py create mode 100644 tools/python/maps_generator/generator/steps.py create mode 100644 tools/python/maps_generator/utils/algo.py delete mode 100644 tools/python/maps_generator/utils/collections.py diff --git a/tools/python/maps_generator/__init__.py b/tools/python/maps_generator/__init__.py index e69de29bb2..103e180770 100644 --- a/tools/python/maps_generator/__init__.py +++ b/tools/python/maps_generator/__init__.py @@ -0,0 +1,12 @@ +import os + +from .generator import settings + +CONFIG_PATH = os.path.join( + os.path.dirname(os.path.join(os.path.realpath(__file__))), + "var", + "etc", + "map_generator.ini", +) + +settings.init(CONFIG_PATH) diff --git a/tools/python/maps_generator/__main__.py b/tools/python/maps_generator/__main__.py index fbc11ee6f7..4bc738e74e 100644 --- a/tools/python/maps_generator/__main__.py +++ b/tools/python/maps_generator/__main__.py @@ -1,22 +1,25 @@ import logging import os -from argparse import ArgumentParser, RawDescriptionHelpFormatter +from argparse import ArgumentParser +from argparse import RawDescriptionHelpFormatter from .generator import settings -from .generator.env import (Env, find_last_build_dir, WORLDS_NAMES, - WORLD_NAME, WORLD_COASTS_NAME) -from .generator.exceptions import ContinueError, SkipError, ValidationError -from .maps_generator import (generate_maps, generate_coasts, reset_to_stage, - ALL_STAGES, stage_download_production_external, - stage_descriptions, stage_ugc, stage_popularity, - stage_localads, stage_statistics, stage_srtm, - stages_as_string, stage_as_string, stage_coastline, - stage_update_planet) -from .utils.collections import unique +from .generator import stages +from .generator import stages_declaration as sd +from .generator.env import Env +from .generator.env import PathProvider +from .generator.env import WORLDS_NAMES +from .generator.env import find_last_build_dir +from .generator.env import get_all_countries_list +from .generator.exceptions import ContinueError +from .generator.exceptions import SkipError +from .generator.exceptions import ValidationError +from .maps_generator import generate_coasts +from .maps_generator import generate_maps +from .utils.algo import unique logger = logging.getLogger("maps_generator") - examples = """Examples: 1) Non-standard planet with coastlines If you want to generate maps for Japan you must complete the following steps: @@ -68,11 +71,12 @@ examples = """Examples: def parse_options(): - parser = ArgumentParser(description="Tool for generation maps for maps.me " - "application.", - epilog=examples, - formatter_class=RawDescriptionHelpFormatter, - parents=[settings.parser]) + parser = ArgumentParser( + description="Tool for generation maps for maps.me " "application.", + epilog=examples, + formatter_class=RawDescriptionHelpFormatter, + parents=[settings.parser], + ) parser.add_argument( "-c", "--continue", @@ -80,51 +84,61 @@ def parse_options(): nargs="?", type=str, help="Continue the last build or specified in CONTINUE from the " - "last stopped stage.") + "last stopped stage.", + ) parser.add_argument( "--countries", type=str, default="", help="List of regions, separated by a comma or a semicolon, or path to " - "file with regions, separated by a line break, for which maps" - " will be built. The names of the regions can be seen " - "in omim/data/borders. It is necessary to set names without " - "any extension.") + "file with regions, separated by a line break, for which maps" + " will be built. The names of the regions can be seen " + "in omim/data/borders. It is necessary to set names without " + "any extension.", + ) parser.add_argument( "--without_countries", type=str, default="", - help="List of regions to exclude them from generation. Syntax is the same as for --countries.") + help="List of regions to exclude them from generation. Syntax is the same as for --countries.", + ) parser.add_argument( "--skip", type=str, default="", help=f"List of stages, separated by a comma or a semicolon, " f"for which building will be skipped. Available skip stages: " - f"{', '.join([s.replace('stage_', '') for s in ALL_STAGES])}.") + f"{', '.join([s.replace('stage_', '') for s in stages.stages.get_visible_stages_names()])}.", + ) parser.add_argument( "--from_stage", type=str, default="", help=f"Stage from which maps will be rebuild. Available stages: " - f"{', '.join([s.replace('stage_', '') for s in ALL_STAGES])}.") + f"{', '.join([s.replace('stage_', '') for s in stages.stages.get_visible_stages_names()])}.", + ) parser.add_argument( "--coasts", default=False, action="store_true", - help="Build only WorldCoasts.raw and WorldCoasts.rawgeom files") + help="Build only WorldCoasts.raw and WorldCoasts.rawgeom files", + ) parser.add_argument( "--production", default=False, action="store_true", help="Build production maps. In another case, 'osm only maps' are built" - " - maps without additional data and advertising.") + " - maps without additional data and advertising.", + ) parser.add_argument( "--order", type=str, - default=os.path.join(os.path.dirname(os.path.abspath(__file__)), - "var/etc/file_generation_order.txt"), - help="Mwm generation order.") + default=os.path.join( + os.path.dirname(os.path.abspath(__file__)), + "var/etc/file_generation_order.txt", + ), + help="Mwm generation order.", + ) return vars(parser.parse_args()) @@ -133,15 +147,25 @@ def main(): root.addHandler(logging.NullHandler()) options = parse_options() + # Processing of 'continue' option. + # If 'continue' is set maps generation is continued from the last build + # that is found automatically. build_name = None if options["continue"] is None or options["continue"]: d = find_last_build_dir(options["continue"]) if d is None: - raise ContinueError("The build cannot continue: the last build " - "directory was not found.") + raise ContinueError( + "The build cannot continue: the last build " "directory was not found." + ) build_name = d - options["build_name"] = build_name + # Processing of 'countries' option. + # There is processing 'countries' and 'without_countries' options. + # Option 'without_countries' has more priority than 'countries'. + # Options 'countries' and 'without_countries' can include '*'. + # For example: '--countries="UK*, Japan"*' means + # '--countries="UK_England_East Midlands, UK_England_East of England_Essex, ..., + # Japan_Chubu Region_Aichi_Nagoya, Japan_Chubu Region_Aichi_Toyohashi, ..."'. countries_line = "" without_countries_line = "" if "COUNTRIES" in os.environ: @@ -152,14 +176,9 @@ def main(): countries_line = "*" if options["without_countries"]: - without_countries_line = options["without_countries"] + without_countries_line = options["without_countries"] - borders_path = os.path.join(settings.USER_RESOURCE_PATH, "borders") - all_countries = [ - f.replace(".poly", "") for f in os.listdir(borders_path) - if os.path.isfile(os.path.join(borders_path, f)) - ] - all_countries += list(WORLDS_NAMES) + all_countries = get_all_countries_list(PathProvider.borders_path()) def end_star_compare(prefix, full): return full.startswith(prefix) @@ -168,48 +187,45 @@ def main(): return a == b def get_countries_set_from_line(line): - countries = [] - used_countries = set() - countries_list = [] - if os.path.isfile(line): - with open(line) as f: - countries_list = [x.strip() for x in f] - elif line: - countries_list = [ - x.strip() for x in line.replace(";", ",").split(",") - ] + countries = [] + used_countries = set() + countries_list = [] + if os.path.isfile(line): + with open(line) as f: + countries_list = [x.strip() for x in f] + elif line: + countries_list = [x.strip() for x in line.replace(";", ",").split(",")] - for country_item in countries_list: - cmp = compare - _raw_country = country_item[:] - if _raw_country and _raw_country[-1] == "*": - _raw_country = _raw_country.replace("*", "") - cmp = end_star_compare + for country_item in countries_list: + cmp = compare + _raw_country = country_item[:] + if _raw_country and _raw_country[-1] == "*": + _raw_country = _raw_country.replace("*", "") + cmp = end_star_compare - for country in all_countries: - if cmp(_raw_country, country): - used_countries.add(country_item) - countries.append(country) + for country in all_countries: + if cmp(_raw_country, country): + used_countries.add(country_item) + countries.append(country) - countries = unique(countries) - diff = set(countries_list) - used_countries - if diff: - raise ValidationError(f"Bad input countries {', '.join(diff)}") - return set(countries) + countries = unique(countries) + diff = set(countries_list) - used_countries + if diff: + raise ValidationError(f"Bad input countries {', '.join(diff)}") + return set(countries) countries = get_countries_set_from_line(countries_line) without_countries = get_countries_set_from_line(without_countries_line) countries -= without_countries countries = list(countries) - options["countries"] = countries if countries else all_countries - - options["build_all_countries"] = False - if len(countries) == len(all_countries): - options["build_all_countries"] = True + if not countries: + countries = all_countries + # Processing of 'order' option. + # It defines an order of countries generation using a file from 'order' path. if options["order"]: ordered_countries = [] - countries = set(options["countries"]) + countries = set(countries) with open(options["order"]) as file: for c in file: if c.strip().startswith("#"): @@ -219,48 +235,46 @@ def main(): ordered_countries.append(c) countries.remove(c) if countries: - raise ValueError(f"{options['order']} does not have an order " - f"for {countries}.") - options["countries"] = ordered_countries + raise ValueError( + f"{options['order']} does not have an order " f"for {countries}." + ) + countries = ordered_countries - options_skip = [] + # Processing of 'skip' option. + skipped_stages = set() if options["skip"]: - options_skip = [ - f"stage_{s.strip()}" - for s in options["skip"].replace(";", ",").split(",") - ] - options["skip"] = options_skip - if not options["production"]: - options["skip"] += stages_as_string( - stage_download_production_external, - stage_ugc, - stage_popularity, - stage_srtm, - stage_descriptions, - stage_localads, - stage_statistics - ) - if not all(s in ALL_STAGES for s in options["skip"]): - raise SkipError(f"Stages {set(options['skip']) - set(ALL_STAGES)} " - f"not found.") + for s in options["skip"].replace(";", ",").split(","): + stage = f"stage_{s.strip()}" + if not stages.stages.is_valid_stage_name(stage): + raise SkipError(f"Stage {stage} not found.") + skipped_stages.add(stage) if settings.PLANET_URL != settings.DEFAULT_PLANET_URL: - options["skip"] += stages_as_string(stage_update_planet) + skipped_stages.add(stages.get_stage_name(sd.StageUpdatePlanet)) - if stage_as_string(stage_coastline) in options["skip"]: - worlds_names = [x for x in options["countries"] if x in WORLDS_NAMES] - if worlds_names: - raise SkipError(f"You can not skip {stages_as_string(stage_coastline)}" - f" if you want to generate {WORLDS_NAMES}." - f" You can exclude them with --without_countries option.") + stage_coastline_name = stages.get_stage_name(sd.StageCoastline) + if stage_coastline_name in skipped_stages: + if any(x in WORLDS_NAMES for x in options["countries"]): + raise SkipError( + f"You can not skip {stage_coastline_name}" + f" if you want to generate {WORLDS_NAMES}." + f" You can exclude them with --without_countries option." + ) - env = Env(options) - if env.from_stage: - reset_to_stage(env.from_stage, env) - if env.coasts: - generate_coasts(env) + # Make env and run maps generation. + env = Env( + countries=countries, + production=options["production"], + build_name=build_name, + skipped_stages=skipped_stages, + ) + from_stage = None + if options["from_stage"]: + from_stage = f"stage_{options['from_stage']}" + if options["coasts"]: + generate_coasts(env, from_stage) else: - generate_maps(env) + generate_maps(env, from_stage) env.finish() diff --git a/tools/python/maps_generator/generator/coastline.py b/tools/python/maps_generator/generator/coastline.py index eb5f8f6b1d..b7de5006df 100644 --- a/tools/python/maps_generator/generator/coastline.py +++ b/tools/python/maps_generator/generator/coastline.py @@ -1,41 +1,65 @@ +""" +This file contains api for osmfilter and generator_tool to generate coastline. +""" import os import subprocess from . import settings +from .env import Env from .gen_tool import run_gen_tool from .osmtools import osmfilter -def filter_coastline(name_executable, in_file, out_file, - output=subprocess.DEVNULL, error=subprocess.DEVNULL): - osmfilter(name_executable, in_file, out_file, output=output, - error=error, keep="", keep_ways="natural=coastline", - keep_nodes="capital=yes place=town =city") +def filter_coastline( + name_executable, + in_file, + out_file, + output=subprocess.DEVNULL, + error=subprocess.DEVNULL, +): + osmfilter( + name_executable, + in_file, + out_file, + output=output, + error=error, + keep="", + keep_ways="natural=coastline", + keep_nodes="capital=yes place=town =city", + ) -def make_coastline(env): - coastline_o5m = os.path.join(env.coastline_path, "coastline.o5m") - filter_coastline(env[settings.OSM_TOOL_FILTER], settings.PLANET_O5M, - coastline_o5m, output=env.get_subprocess_out(), - error=env.get_subprocess_out()) +def make_coastline(env: Env): + coastline_o5m = os.path.join(env.paths.coastline_path, "coastline.o5m") + filter_coastline( + env[settings.OSM_TOOL_FILTER], + settings.PLANET_O5M, + coastline_o5m, + output=env.get_subprocess_out(), + error=env.get_subprocess_out(), + ) - run_gen_tool(env.gen_tool, - out=env.get_subprocess_out(), - err=env.get_subprocess_out(), - intermediate_data_path=env.coastline_path, - osm_file_type="o5m", - osm_file_name=coastline_o5m, - node_storage=env.node_storage, - user_resource_path=env.user_resource_path, - preprocess=True) + run_gen_tool( + env.gen_tool, + out=env.get_subprocess_out(), + err=env.get_subprocess_out(), + intermediate_data_path=env.paths.coastline_path, + osm_file_type="o5m", + osm_file_name=coastline_o5m, + node_storage=env.node_storage, + user_resource_path=env.paths.user_resource_path, + preprocess=True, + ) - run_gen_tool(env.gen_tool, - out=env.get_subprocess_out(), - err=env.get_subprocess_out(), - intermediate_data_path=env.coastline_path, - osm_file_type="o5m", - osm_file_name=coastline_o5m, - node_storage=env.node_storage, - user_resource_path=env.user_resource_path, - make_coasts=True, - fail_on_coasts=True) + run_gen_tool( + env.gen_tool, + out=env.get_subprocess_out(), + err=env.get_subprocess_out(), + intermediate_data_path=env.paths.coastline_path, + osm_file_type="o5m", + osm_file_name=coastline_o5m, + node_storage=env.node_storage, + user_resource_path=env.paths.user_resource_path, + make_coasts=True, + fail_on_coasts=True, + ) diff --git a/tools/python/maps_generator/generator/decorators.py b/tools/python/maps_generator/generator/decorators.py deleted file mode 100644 index 2bf4aa32b6..0000000000 --- a/tools/python/maps_generator/generator/decorators.py +++ /dev/null @@ -1,93 +0,0 @@ -import datetime -import logging -import os -import time -from functools import wraps - -from .env import Env -from .status import Status -from ..utils.log import create_file_logger, DummyObject - -logger = logging.getLogger("maps_generator") - - -def stage(func): - @wraps(func) - def wrap(env: Env, *args, **kwargs): - func_name = func.__name__ - stage_formatted = " ".join(func_name.split("_")).capitalize() - logfile = os.path.join(env.log_path, f"{func_name}.log") - log_handler = logging.FileHandler(logfile) - logger.addHandler(log_handler) - if not env.is_accepted_stage(func): - logger.info(f"{stage_formatted} was not accepted.") - logger.removeHandler(log_handler) - return - main_status = env.main_status - main_status.init(env.main_status_path, func_name) - if main_status.need_skip(): - logger.warning(f"{stage_formatted} was skipped.") - logger.removeHandler(log_handler) - return - main_status.update_status() - logger.info(f"{stage_formatted}: start ...") - t = time.time() - env.set_subprocess_out(log_handler.stream) - func(env, *args, **kwargs) - d = time.time() - t - logger.info(f"{stage_formatted}: finished in " - f"{str(datetime.timedelta(seconds=d))}") - logger.removeHandler(log_handler) - - return wrap - - -def country_stage_status(func): - @wraps(func) - def wrap(env: Env, country: str, *args, **kwargs): - func_name = func.__name__ - _logger = DummyObject() - countries_meta = env.countries_meta - if "logger" in countries_meta[country]: - _logger, _ = countries_meta[country]["logger"] - stage_formatted = " ".join(func_name.split("_")).capitalize() - if not env.is_accepted_stage(func): - _logger.info(f"{stage_formatted} was not accepted.") - return - if "status" not in countries_meta[country]: - countries_meta[country]["status"] = Status() - status = countries_meta[country]["status"] - status_file = os.path.join(env.status_path, f"{country}.status") - status.init(status_file, func_name) - if status.need_skip(): - _logger.warning(f"{stage_formatted} was skipped.") - return - status.update_status() - func(env, country, *args, **kwargs) - - return wrap - - -def country_stage_log(func): - @wraps(func) - def wrap(env: Env, country: str, *args, **kwargs): - func_name = func.__name__ - log_file = os.path.join(env.log_path, f"{country}.log") - countries_meta = env.countries_meta - if "logger" not in countries_meta[country]: - countries_meta[country]["logger"] = create_file_logger(log_file) - _logger, log_handler = countries_meta[country]["logger"] - stage_formatted = " ".join(func_name.split("_")).capitalize() - _logger.info(f"{stage_formatted}: start ...") - t = time.time() - env.set_subprocess_out(log_handler.stream, country) - func(env, country, *args, logger=_logger, **kwargs) - d = time.time() - t - _logger.info(f"{stage_formatted}: finished in " - f"{str(datetime.timedelta(seconds=d))}") - - return wrap - - -def country_stage(func): - return country_stage_log(country_stage_status(func)) diff --git a/tools/python/maps_generator/generator/env.py b/tools/python/maps_generator/generator/env.py index 1241b87ccb..2be799c4e6 100644 --- a/tools/python/maps_generator/generator/env.py +++ b/tools/python/maps_generator/generator/env.py @@ -5,11 +5,21 @@ import logging.config import os import shutil import sys +from functools import wraps +from typing import Any +from typing import AnyStr +from typing import Callable +from typing import Dict +from typing import List +from typing import Optional +from typing import Set from . import settings from .osmtools import build_osmtools from .status import Status -from ..utils.file import find_executable, is_executable, symlink_force +from ..utils.file import find_executable +from ..utils.file import is_executable +from ..utils.file import symlink_force logger = logging.getLogger("maps_generator") @@ -19,28 +29,72 @@ WORLD_COASTS_NAME = "WorldCoasts" WORLDS_NAMES = {WORLD_NAME, WORLD_COASTS_NAME} -def _write_version(out_path, version): - with open(os.path.join(out_path, settings.VERSION_FILE_NAME), "w") as f: - f.write(str(version)) +def get_all_countries_list(borders_path: AnyStr) -> List[AnyStr]: + """Returns all countries including World and WorldCoasts.""" + return [ + f.replace(".poly", "") + for f in os.listdir(borders_path) + if os.path.isfile(os.path.join(borders_path, f)) + ] + list(WORLDS_NAMES) -def _read_version(version_path): - with open(version_path) as f: - line = f.readline().strip() - try: - return int(line) - except ValueError: - logger.exception(f"Cast '{line}' to int error.") - return 0 +def create_if_not_exist_path(path: AnyStr) -> bool: + """Creates directory if it doesn't exist.""" + try: + os.mkdir(path) + logger.info(f"Create {path} ...") + return True + except FileExistsError: + return False -def find_last_build_dir(hint): - if hint: +def create_if_not_exist(func: Callable[..., AnyStr]) -> Callable[..., AnyStr]: + """ + It's a decorator, that wraps func in create_if_not_exist_path, + that returns a path. + """ + + @wraps(func) + def wrapper(*args, **kwargs): + path = func(*args, **kwargs) + create_if_not_exist_path(path) + return path + + return wrapper + + +class Version: + """It's used for writing and reading a generation version.""" + + @staticmethod + def write(out_path: AnyStr, version: AnyStr): + with open(os.path.join(out_path, settings.VERSION_FILE_NAME), "w") as f: + f.write(str(version)) + + @staticmethod + def read(version_path: AnyStr) -> int: + with open(version_path) as f: + line = f.readline().strip() + try: + return int(line) + except ValueError: + logger.exception(f"Cast '{line}' to int error.") + return 0 + + +def find_last_build_dir(hint: Optional[AnyStr] = None) -> Optional[AnyStr]: + """ + It tries to find a last generation directory. If it's found function + returns path of last generation directory. Otherwise returns None. + """ + if hint is not None: p = os.path.join(settings.MAIN_OUT_PATH, hint) return hint if os.path.exists(p) else None try: - paths = [os.path.join(settings.MAIN_OUT_PATH, f) - for f in os.listdir(settings.MAIN_OUT_PATH)] + paths = [ + os.path.join(settings.MAIN_OUT_PATH, f) + for f in os.listdir(settings.MAIN_OUT_PATH) + ] except FileNotFoundError: logger.exception(f"{settings.MAIN_OUT_PATH} not found.") return None @@ -50,327 +104,396 @@ def find_last_build_dir(hint): if not os.path.isfile(version_path): versions.append(0) else: - versions.append(_read_version(version_path)) + versions.append(Version.read(version_path)) pairs = sorted(zip(paths, versions), key=lambda p: p[1], reverse=True) - return (None if not pairs or pairs[0][1] == 0 - else pairs[0][0].split(os.sep)[-1]) + return None if not pairs or pairs[0][1] == 0 else pairs[0][0].split(os.sep)[-1] -def planet_lock_file(): - return f"{settings.PLANET_O5M}.lock" +class PathProvider: + """ + PathProvider is used for building paths for a maps generation. + """ + def __init__(self, build_path: AnyStr, mwm_version: AnyStr): + self.build_path = build_path + self.mwm_version = mwm_version -def build_lock_file(out_path): - return f"{os.path.join(out_path, 'lock')}.lock" + create_if_not_exist_path(self.build_path) + + @property + @create_if_not_exist + def intermediate_data_path(self) -> AnyStr: + """ + intermediate_data_path contains intermediate files, + for example downloaded external files, that are needed for genration, + *.mwm.tmp files, etc. + """ + return os.path.join(self.build_path, "intermediate_data") + + @property + @create_if_not_exist + def data_path(self) -> AnyStr: + """It's a synonum for intermediate_data_path.""" + return self.intermediate_data_path + + @property + @create_if_not_exist + def intermediate_tmp_path(self) -> AnyStr: + """intermediate_tmp_path contains *.mwm.tmp files.""" + return os.path.join(self.intermediate_data_path, "tmp") + + @property + @create_if_not_exist + def mwm_path(self) -> AnyStr: + """mwm_path contains *.mwm files.""" + return os.path.join(self.build_path, self.mwm_version) + + @property + @create_if_not_exist + def log_path(self) -> AnyStr: + """mwm_path log files.""" + return os.path.join(self.build_path, "logs") + + @property + @create_if_not_exist + def generation_borders_path(self) -> AnyStr: + """ + generation_borders_path contains *.poly files, that define + which .mwm files are generated. + """ + return os.path.join(self.intermediate_data_path, "borders") + + @property + @create_if_not_exist + def draft_path(self) -> AnyStr: + """draft_path is used for saving temporary intermediate files.""" + return os.path.join(self.build_path, "draft") + + @property + @create_if_not_exist + def osm2ft_path(self) -> AnyStr: + """osm2ft_path contains osmId<->ftId mappings.""" + return os.path.join(self.build_path, "osm2ft") + + @property + @create_if_not_exist + def coastline_path(self) -> AnyStr: + """coastline_path is used for a coastline generation.""" + return os.path.join(self.intermediate_data_path, "coasts") + + @property + @create_if_not_exist + def coastline_tmp_path(self) -> AnyStr: + """coastline_tmp_path is used for a coastline generation.""" + return os.path.join(self.coastline_path, "tmp") + + @property + @create_if_not_exist + def status_path(self) -> AnyStr: + """status_path contains status files.""" + return os.path.join(self.build_path, "status") + + @property + @create_if_not_exist + def descriptions_path(self) -> AnyStr: + return os.path.join(self.intermediate_data_path, "descriptions") + + @property + @create_if_not_exist + def stats_path(self) -> AnyStr: + return os.path.join(self.build_path, "stats") + + @property + @create_if_not_exist + def transit_path(self) -> AnyStr: + return self.intermediate_data_path + + @property + def main_status_path(self) -> AnyStr: + return os.path.join(self.status_path, "stages.status") + + @property + def packed_polygons_path(self) -> AnyStr: + return os.path.join(self.intermediate_data_path, "packed_polygons.bin") + + @property + def localads_path(self) -> AnyStr: + return os.path.join(self.intermediate_data_path, f"localads_{self.mwm_version}") + + @property + def types_path(self) -> AnyStr: + return os.path.join(self.user_resource_path, "types.txt") + + @property + def external_resources_path(self) -> AnyStr: + return os.path.join(self.mwm_path, "external_resources.txt") + + @property + def id_to_wikidata_path(self) -> AnyStr: + return os.path.join(self.intermediate_data_path, "id_to_wikidata.csv") + + @property + def wiki_url_path(self) -> AnyStr: + return os.path.join(self.intermediate_data_path, "wiki_urls.txt") + + @property + def ugc_path(self) -> AnyStr: + return os.path.join(self.intermediate_data_path, "ugc_db.sqlite3") + + @property + def hotels_path(self) -> AnyStr: + return os.path.join(self.intermediate_data_path, "hotels.csv") + + @property + def promo_catalog_cities_path(self) -> AnyStr: + return os.path.join(self.intermediate_data_path, "promo_catalog_cities.json") + + @property + def promo_catalog_countries_path(self) -> AnyStr: + return os.path.join(self.intermediate_data_path, "promo_catalog_countries.json") + + @property + def popularity_path(self) -> AnyStr: + return os.path.join(self.intermediate_data_path, "popular_places.csv") + + @property + def subway_path(self) -> AnyStr: + return os.path.join( + self.intermediate_data_path, "mapsme_osm_subways.transit.json" + ) + + @property + def food_paths(self) -> AnyStr: + return os.path.join(self.intermediate_data_path, "ids_food.json") + + @property + def food_translations_path(self) -> AnyStr: + return os.path.join(self.intermediate_data_path, "translations_food.json") + + @property + def uk_postcodes_path(self) -> AnyStr: + return os.path.join(self.intermediate_data_path, "uk_postcodes") + + @property + def us_postcodes_path(self) -> AnyStr: + return os.path.join(self.intermediate_data_path, "us_postcodes") + + @property + def cities_boundaries_path(self) -> AnyStr: + return os.path.join(self.intermediate_data_path, "cities_boundaries.bin") + + @property + def hierarchy_path(self) -> AnyStr: + return os.path.join(self.user_resource_path, "hierarchy.txt") + + @property + def old_to_new_path(self) -> AnyStr: + return os.path.join(self.user_resource_path, "old_vs_new.csv") + + @property + def borders_to_osm_path(self) -> AnyStr: + return os.path.join(self.user_resource_path, "borders_vs_osm.csv") + + @property + def countries_synonyms_path(self) -> AnyStr: + return os.path.join(self.user_resource_path, "countries_synonyms.csv") + + @property + def counties_txt_path(self) -> AnyStr: + return os.path.join(self.mwm_path, "countries.txt") + + @property + def user_resource_path(self) -> AnyStr: + return settings.USER_RESOURCE_PATH + + @staticmethod + def srtm_path() -> AnyStr: + return settings.SRTM_PATH + + @staticmethod + def borders_path() -> AnyStr: + return os.path.join(settings.USER_RESOURCE_PATH, "borders") + + @staticmethod + @create_if_not_exist + def tmp_dir(): + return settings.TMPDIR class Env: - def __init__(self, options): - Env._logging_setup() + """ + Env provides a generation environment. It sets up instruments and paths, + that are used for a maps generation. It stores state of the maps generation. + """ + + def __init__( + self, + countries: Optional[List[AnyStr]] = None, + production: bool = False, + build_name: Optional[AnyStr] = None, + skipped_stages: Optional[Set[AnyStr]] = None, + ): + self.setup_logging() + logger.info("Start setup ...") - for k, v in Env._osm_tools_setup().items(): - setattr(self, k, v) - for k, v in options.items(): + os.environ["TMPDIR"] = PathProvider.tmp_dir() + for k, v in self.setup_osm_tools().items(): setattr(self, k, v) - self.gen_tool = Env._generator_tool_setup() + self.gen_tool = self.setup_generator_tool() - setup_options = Env._out_path_setup(self.build_name) - self.out_path, _, self.mwm_version, self.planet_version = setup_options - logger.info(f"Out path is {self.out_path}.") - - self.intermediate_path = Env._intermediate_path_setup(self.out_path) - self.data_path = self.intermediate_path - self.intermediate_tmp_path = os.path.join(self.intermediate_path, "tmp") - self._create_if_not_exist(self.intermediate_tmp_path) - Env._tmp_dir_setup() - - self.mwm_path = os.path.join(self.out_path, str(self.mwm_version)) - self._create_if_not_exist(self.mwm_path) - - self.log_path = os.path.join(self.out_path, "logs") - self._create_if_not_exist(self.log_path) - - self.temp_borders_path = self._prepare_borders() - - self.draft_path = os.path.join(self.out_path, "draft") - self._create_if_not_exist(self.draft_path) - symlink_force(self.temp_borders_path, - os.path.join(self.draft_path, "borders")) - - self.osm2ft_path = os.path.join(self.out_path, "osm2ft") - self._create_if_not_exist(self.osm2ft_path) - for x in os.listdir(self.osm2ft_path): - p = os.path.join(self.osm2ft_path, x) - if os.path.isfile(p) and x.endswith(".mwm.osm2ft"): - shutil.move(p, os.path.join(self.mwm_path, x)) + self.production = production + self.countries = countries + self.skipped_stages = set() if skipped_stages is None else skipped_stages + if self.countries is None: + self.countries = get_all_countries_list(PathProvider.borders_path()) self.node_storage = settings.NODE_STORAGE - self.user_resource_path = settings.USER_RESOURCE_PATH - self.coastline_path = os.path.join(self.intermediate_path, "coasts") - self._create_if_not_exist(self.coastline_path) + version_format = "%Y_%m_%d__%H_%M_%S" + dt = None + if build_name is None: + dt = datetime.datetime.now() + build_name = dt.strftime(version_format) + else: + dt = datetime.datetime.strptime(build_name, version_format) - self.status_path = os.path.join(self.out_path, "status") - self._create_if_not_exist(self.status_path) - self.countries_meta = collections.defaultdict(dict) + self.mwm_version = dt.strftime("%y%m%d") + self.planet_version = dt.strftime("%s") + self.build_path = os.path.join(settings.MAIN_OUT_PATH, build_name) + self.build_name = build_name + + logger.info(f"Build path is {self.build_path}.") + + self.paths = PathProvider(self.build_path, self.mwm_version) + + Version.write(self.build_path, self.planet_version) + self.setup_borders() + self.setup_osm2ft() - self.main_status_path = os.path.join(self.status_path, "stages.status") self.main_status = Status() - - self.coastline_tmp_path = os.path.join(self.coastline_path, "tmp") - self._create_if_not_exist(self.coastline_tmp_path) - - self.srtm_path = settings.SRTM_PATH - - self._subprocess_out = None - self._subprocess_countries_out = {} - - _write_version(self.out_path, self.planet_version) - - self._skipped_stages = set(self.skip) + # self.countries_meta stores log files and statuses for each country. + self.countries_meta = collections.defaultdict(dict) + self.subprocess_out = None + self.subprocess_countries_out = {} printed_countries = ", ".join(self.countries) if len(self.countries) > 50: - printed_countries = (f"{', '.join(self.countries[:25])}, ..., " - f"{', '.join(self.countries[-25:])}") - logger.info(f"The following {len(self.countries)} maps will build: " - f"{printed_countries}.") + printed_countries = ( + f"{', '.join(self.countries[:25])}, ..., " + f"{', '.join(self.countries[-25:])}" + ) + logger.info( + f"The following {len(self.countries)} maps will build: " + f"{printed_countries}." + ) logger.info("Finish setup") - def get_mwm_names(self): + def __getitem__(self, item): + return self.__dict__[item] + + def get_tmp_mwm_names(self) -> List[AnyStr]: tmp_ext = ".mwm.tmp" existing_names = set() - for f in os.listdir(self.intermediate_tmp_path): - path = os.path.join(self.intermediate_tmp_path, f) + for f in os.listdir(self.paths.intermediate_tmp_path): + path = os.path.join(self.paths.intermediate_tmp_path, f) if f.endswith(tmp_ext) and os.path.isfile(path): name = f.replace(tmp_ext, "") if name in self.countries: existing_names.add(name) return [c for c in self.countries if c in existing_names] - def is_accepted_stage(self, stage): - return stage.__name__ not in self._skipped_stages + def add_skipped_stage_name(self, stage_name: AnyStr): + self.skipped_stages.add(stage_name) - @property - def descriptions_path(self): - path = os.path.join(self.intermediate_path, "descriptions") - self._create_if_not_exist(path) - return path - - @property - def packed_polygons_path(self): - return os.path.join(self.intermediate_path, "packed_polygons.bin") - - @property - def localads_path(self): - path = os.path.join(self.out_path, f"localads_{self.mwm_version}") - self._create_if_not_exist(path) - return path - - @property - def stats_path(self): - path = os.path.join(self.out_path, "stats") - self._create_if_not_exist(path) - return path - - @property - def types_path(self): - return os.path.join(self.user_resource_path, "types.txt") - - @property - def external_resources_path(self): - return os.path.join(self.mwm_path, "external_resources.txt") - - @property - def id_to_wikidata_path(self): - return os.path.join(self.intermediate_path, "id_to_wikidata.csv") - - @property - def wiki_url_path(self): - return os.path.join(self.intermediate_path, "wiki_urls.txt") - - @property - def ugc_path(self): - return os.path.join(self.intermediate_path, "ugc_db.sqlite3") - - @property - def hotels_path(self): - return os.path.join(self.intermediate_path, "hotels.csv") - - @property - def promo_catalog_cities_path(self): - return os.path.join(self.intermediate_path, "promo_catalog_cities.json") - - @property - def promo_catalog_countries_path(self): - return os.path.join(self.intermediate_path, - "promo_catalog_countries.json") - - @property - def popularity_path(self): - return os.path.join(self.intermediate_path, "popular_places.csv") - - @property - def subway_path(self): - return os.path.join(self.intermediate_path, - "mapsme_osm_subways.transit.json") - - @property - def food_paths(self): - return os.path.join(self.intermediate_path, "ids_food.json") - - @property - def food_translations_path(self): - return os.path.join(self.intermediate_path, "translations_food.json") - - @property - def uk_postcodes_path(self): - return os.path.join(self.intermediate_path, "uk_postcodes") - - @property - def us_postcodes_path(self): - return os.path.join(self.intermediate_path, "us_postcodes") - - @property - def cities_boundaries_path(self): - return os.path.join(self.intermediate_path, "cities_boundaries.bin") - - @property - def transit_path(self): - return self.intermediate_path - - @property - def hierarchy_path(self): - return os.path.join(self.user_resource_path, "hierarchy.txt") - - @property - def old_to_new_path(self): - return os.path.join(self.user_resource_path, "old_vs_new.csv") - - @property - def borders_to_osm_path(self): - return os.path.join(self.user_resource_path, "borders_vs_osm.csv") - - @property - def countries_synonyms_path(self): - return os.path.join(self.user_resource_path, "countries_synonyms.csv") - - @property - def counties_txt_path(self): - return os.path.join(self.mwm_path, "countries.txt") - - def __getitem__(self, item): - return self.__dict__[item] + def is_skipped_stage_name(self, stage_name: AnyStr) -> bool: + return stage_name not in self.skipped_stages def finish(self): self.main_status.finish() - def finish_mwm(self, mwm_name): + def finish_mwm(self, mwm_name: AnyStr): self.countries_meta[mwm_name]["status"].finish() - def set_subprocess_out(self, subprocess_out, country=None): + def set_subprocess_out(self, subprocess_out: Any, country: Optional[AnyStr] = None): if country is None: - self._subprocess_out = subprocess_out + self.subprocess_out = subprocess_out else: - self._subprocess_countries_out[country] = subprocess_out + self.subprocess_countries_out[country] = subprocess_out - def get_subprocess_out(self, country=None): + def get_subprocess_out(self, country: Optional[AnyStr] = None): if country is None: - return self._subprocess_out + return self.subprocess_out else: - return self._subprocess_countries_out[country] + return self.subprocess_countries_out[country] @staticmethod - def _logging_setup(): + def setup_logging(): def exception_handler(type, value, tb): - logger.exception(f"Uncaught exception: {str(value)}", - exc_info=(type, value, tb)) + logger.exception( + f"Uncaught exception: {str(value)}", exc_info=(type, value, tb) + ) logging.config.dictConfig(settings.LOGGING) sys.excepthook = exception_handler @staticmethod - def _generator_tool_setup(): + def setup_generator_tool() -> AnyStr: logger.info("Check generator tool ...") gen_tool_path = shutil.which(settings.GEN_TOOL) if gen_tool_path is None: logger.info(f"Find generator tool in {settings.BUILD_PATH} ...") - gen_tool_path = find_executable(settings.BUILD_PATH, - settings.GEN_TOOL) + gen_tool_path = find_executable(settings.BUILD_PATH, settings.GEN_TOOL) logger.info(f"Generator found - {gen_tool_path}") return gen_tool_path @staticmethod - def _osm_tools_setup(): + def setup_osm_tools() -> Dict[AnyStr, AnyStr]: path = settings.OSM_TOOLS_PATH osm_tool_names = [ - settings.OSM_TOOL_CONVERT, settings.OSM_TOOL_UPDATE, - settings.OSM_TOOL_FILTER + settings.OSM_TOOL_CONVERT, + settings.OSM_TOOL_UPDATE, + settings.OSM_TOOL_FILTER, ] logger.info("Check osm tools ...") - if not Env._create_if_not_exist(path): + if not create_if_not_exist_path(path): tmp_paths = [os.path.join(path, t) for t in osm_tool_names] if all([is_executable(t) for t in tmp_paths]): osm_tool_paths = dict(zip(osm_tool_names, tmp_paths)) - logger.info( - f"Osm tools found - {', '.join(osm_tool_paths.values())}") + logger.info(f"Osm tools found - {', '.join(osm_tool_paths.values())}") return osm_tool_paths tmp_paths = [shutil.which(t) for t in osm_tool_names] if all(tmp_paths): osm_tool_paths = dict(zip(osm_tool_names, tmp_paths)) - logger.info( - f"Osm tools found - {', '.join(osm_tool_paths.values())}") + logger.info(f"Osm tools found - {', '.join(osm_tool_paths.values())}") return osm_tool_paths logger.info("Build osm tools ...") return build_osmtools(settings.OSM_TOOLS_SRC_PATH) - @staticmethod - def _out_path_setup(build_name): - dt = datetime.datetime.now() - version_format = "%Y_%m_%d__%H_%M_%S" - if build_name: - dt = datetime.datetime.strptime(build_name, version_format) + def setup_borders(self): + temp_borders = self.paths.generation_borders_path + # It is needed in case of rebuilding several mwms. + for filename in os.listdir(temp_borders): + file_path = os.path.join(temp_borders, filename) + if os.path.isfile(file_path) or os.path.islink(file_path): + os.unlink(file_path) + elif os.path.isdir(file_path): + shutil.rmtree(file_path) - s = dt.strftime(version_format) - mwm_version = dt.strftime("%y%m%d") - planet_version = int(dt.strftime("%s")) - - out_path = os.path.join(settings.MAIN_OUT_PATH, s) - Env._create_if_not_exist(settings.MAIN_OUT_PATH) - Env._create_if_not_exist(out_path) - return out_path, s, mwm_version, planet_version - - @staticmethod - def _intermediate_path_setup(out_path): - intermediate_path = os.path.join(out_path, "intermediate_data") - Env._create_if_not_exist(intermediate_path) - return intermediate_path - - @staticmethod - def _tmp_dir_setup(): - Env._create_if_not_exist(settings.TMPDIR) - os.environ["TMPDIR"] = settings.TMPDIR - - @staticmethod - def _create_if_not_exist(path): - try: - os.mkdir(path) - logger.info(f"Create {path} ...") - return True - except FileExistsError: - return False - - def _prepare_borders(self): - borders = "borders" - temp_borders = os.path.join(self.intermediate_path, borders) - Env._create_if_not_exist(temp_borders) - borders = os.path.join(settings.USER_RESOURCE_PATH, borders) + borders = PathProvider.borders_path() for x in self.countries: if x in WORLDS_NAMES: continue - shutil.copy2(f"{os.path.join(borders, x)}.poly", temp_borders) - return temp_borders + + poly = f"{x}.poly" + os.symlink(os.path.join(borders, poly), os.path.join(temp_borders, poly)) + symlink_force(temp_borders, os.path.join(self.paths.draft_path, "borders")) + + def setup_osm2ft(self): + for x in os.listdir(self.paths.osm2ft_path): + p = os.path.join(self.paths.osm2ft_path, x) + if os.path.isfile(p) and x.endswith(".mwm.osm2ft"): + shutil.move(p, os.path.join(self.paths.mwm_path, x)) diff --git a/tools/python/maps_generator/generator/gen_tool.py b/tools/python/maps_generator/generator/gen_tool.py index 2cda09f178..0cb027e583 100644 --- a/tools/python/maps_generator/generator/gen_tool.py +++ b/tools/python/maps_generator/generator/gen_tool.py @@ -3,8 +3,9 @@ import logging import os import subprocess -from .exceptions import (OptionNotFound, ValidationError, - wait_and_raise_if_fail) +from .exceptions import OptionNotFound +from .exceptions import ValidationError +from .exceptions import wait_and_raise_if_fail logger = logging.getLogger("maps_generator") @@ -43,7 +44,6 @@ class GenTool: "split_by_polygons": bool, "type_statistics": bool, "version": bool, - "planet_version": int, "booking_data": str, "promo_catalog_cities": str, "brands_data": str, @@ -61,6 +61,7 @@ class GenTool: "osm_file_name": str, "osm_file_type": str, "output": str, + "planet_version": str, "popular_places_data": str, "regions_features": str, "regions_index": str, @@ -74,8 +75,9 @@ class GenTool: "wikipedia_pages": str, } - def __init__(self, name_executable, out=subprocess.DEVNULL, - err=subprocess.DEVNULL, **options): + def __init__( + self, name_executable, out=subprocess.DEVNULL, err=subprocess.DEVNULL, **options + ): self.name_executable = name_executable self.subprocess = None self.output = out @@ -100,8 +102,10 @@ class GenTool: raise OptionNotFound(f"{k} is unavailable option") if type(v) is not GenTool.OPTIONS[k]: - raise ValidationError(f"{k} required {str(GenTool.OPTIONS[k])}," - f" but not {str(type(v))}") + raise ValidationError( + f"{k} required {str(GenTool.OPTIONS[k])}," + f" but not {str(type(v))}" + ) self.options[k] = str(v).lower() if type(v) is bool else v return self @@ -109,11 +113,13 @@ class GenTool: def run_async(self): assert self.subprocess is None, "You forgot to call wait()" cmd = self._collect_cmd() - self.subprocess = subprocess.Popen(cmd, stdout=self.output, - stderr=self.error, env=os.environ) + self.subprocess = subprocess.Popen( + cmd, stdout=self.output, stderr=self.error, env=os.environ + ) - self.logger.info(f"Run generator tool [{self.get_build_version()}]:" - f" {' '.join(cmd)} ") + self.logger.info( + f"Run generator tool [{self.get_build_version()}]:" f" {' '.join(cmd)} " + ) return self def wait(self): @@ -131,19 +137,20 @@ class GenTool: return c def get_build_version(self): - p = subprocess.Popen([self.name_executable, "--version"], - stdout=subprocess.PIPE, stderr=subprocess.PIPE, - env=os.environ) + p = subprocess.Popen( + [self.name_executable, "--version"], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + env=os.environ, + ) wait_and_raise_if_fail(p) out, err = p.communicate() return out.decode("utf-8").replace("\n", " ").strip() def _collect_cmd(self): - options = ["".join(["--", k, "=", str(v)]) for k, v in - self.options.items()] + options = ["".join(["--", k, "=", str(v)]) for k, v in self.options.items()] return [self.name_executable, *options] def run_gen_tool(*args, **kwargs): GenTool(*args, **kwargs).run() - diff --git a/tools/python/maps_generator/generator/generation.py b/tools/python/maps_generator/generator/generation.py new file mode 100644 index 0000000000..c8352d098d --- /dev/null +++ b/tools/python/maps_generator/generator/generation.py @@ -0,0 +1,127 @@ +import os +from typing import AnyStr +from typing import Optional +from typing import Type +from typing import Union + +import filelock + +from . import settings +from .env import Env +from .exceptions import ContinueError +from .stages import Stage +from .stages import get_stage_name +from .stages import stages +from .status import Status + + +class Generation: + """ + Generation describes process of a map generation. It contains stages. + + For example: + generation = Generation(env) + generation.add_stage(s1) + generation.add_stage(s2) + generation.run() + """ + + def __init__(self, env: Env): + self.env = env + self.stages = [] + + for country_stage in stages.countries_stages: + if self.is_skipped_stage(country_stage): + stage_name = get_stage_name(country_stage) + self.env.add_skipped_stage_name(stage_name) + + def is_skipped_stage(self, stage: Union[Type[Stage], Stage]) -> bool: + return stage.is_production_only and not self.env.production + + def add_stage(self, stage: Stage): + if self.is_skipped_stage(stage): + stage_name = get_stage_name(stage) + self.env.add_skipped_stage_name(stage_name) + else: + self.stages.append(stage) + + def run(self, from_stage: Optional[AnyStr] = None): + if from_stage is not None: + self.reset_to_stage(from_stage) + + planet_lock = filelock.FileLock(f"{settings.PLANET_O5M}.lock", timeout=1) + build_lock = filelock.FileLock( + f"{os.path.join(self.env.paths.build_path, 'lock')}.lock" + ) + try: + for stage in self.stages: + if stage.need_planet_lock: + planet_lock.acquire() + else: + planet_lock.release() + + if stage.need_build_lock: + build_lock.acquire() + else: + build_lock.release() + + stage(self.env) + finally: + planet_lock.release() + build_lock.release() + + def reset_to_stage(self, stage_name: AnyStr): + """ + Resets generation state to stage_name. + Status files are overwritten new statuses according stage_name. + It supposes that stages have next representation: + stage1, ..., stage_mwm[country_stage_1, ..., country_stage_M], ..., stageN + """ + countries_statuses_paths = [ + os.path.join(self.env.paths.status_path, f) + for f in os.listdir(self.env.paths.status_path) + if os.path.isfile(os.path.join(self.env.paths.status_path, f)) + and os.path.join(self.env.paths.status_path, f) + != self.env.paths.main_status_path + ] + + def set_countries_stage(st): + for path in countries_statuses_paths: + Status(path, st).update_status() + + def finish_countries_stage(): + for path in countries_statuses_paths: + Status(path).finish() + + high_level_stages = [get_stage_name(s) for s in self.stages] + if not ( + stage_name in high_level_stages + or any(stage_name == get_stage_name(s) for s in stages.countries_stages) + ): + raise ContinueError( + f"Stage {stage_name} not in {', '.join(high_level_stages)}." + ) + + if not os.path.exists(self.env.paths.main_status_path): + raise ContinueError( + f"Status file {self.env.paths.main_status_path} not found." + ) + + if not os.path.exists(self.env.paths.status_path): + raise ContinueError(f"Status path {self.env.paths.status_path} not found.") + + mwm_stage_name = get_stage_name(stages.mwm_stage) + stage_mwm_index = high_level_stages.index(mwm_stage_name) + + main_status = None + if stage_name in high_level_stages[: stage_mwm_index + 1]: + main_status = stage_name + set_countries_stage("") + elif stage_name in high_level_stages[stage_mwm_index + 1 :]: + main_status = stage_name + finish_countries_stage() + else: + main_status = get_stage_name(stages.mwm_stage) + set_countries_stage(stage_name) + + Status(self.env.paths.main_status_path, main_status).update_status() diff --git a/tools/python/maps_generator/generator/osmtools.py b/tools/python/maps_generator/generator/osmtools.py index ffdd144234..2e9709ca8c 100644 --- a/tools/python/maps_generator/generator/osmtools.py +++ b/tools/python/maps_generator/generator/osmtools.py @@ -2,23 +2,29 @@ import os import subprocess from . import settings -from .exceptions import wait_and_raise_if_fail, BadExitStatusError +from .exceptions import BadExitStatusError +from .exceptions import wait_and_raise_if_fail def build_osmtools(path, output=subprocess.DEVNULL, error=subprocess.DEVNULL): - src = {settings.OSM_TOOL_UPDATE: "osmupdate.c", - settings.OSM_TOOL_FILTER: "osmfilter.c", - settings.OSM_TOOL_CONVERT: "osmconvert.c"} + src = { + settings.OSM_TOOL_UPDATE: "osmupdate.c", + settings.OSM_TOOL_FILTER: "osmfilter.c", + settings.OSM_TOOL_CONVERT: "osmconvert.c", + } ld_flags = ("-lz",) cc = [] result = {} for executable, src in src.items(): out = os.path.join(settings.OSM_TOOLS_PATH, executable) - op = [settings.OSM_TOOLS_CC, - *settings.OSM_TOOLS_CC_FLAGS, - "-o", out, - os.path.join(path, src), - *ld_flags] + op = [ + settings.OSM_TOOLS_CC, + *settings.OSM_TOOLS_CC_FLAGS, + "-o", + out, + os.path.join(path, src), + *ld_flags, + ] s = subprocess.Popen(op, stdout=output, stderr=error) cc.append(s) result[executable] = out @@ -33,45 +39,81 @@ def build_osmtools(path, output=subprocess.DEVNULL, error=subprocess.DEVNULL): return result -def osmconvert(name_executable, in_file, out_file, output=subprocess.DEVNULL, - error=subprocess.DEVNULL, run_async=False, **kwargs): +def osmconvert( + name_executable, + in_file, + out_file, + output=subprocess.DEVNULL, + error=subprocess.DEVNULL, + run_async=False, + **kwargs, +): env = os.environ.copy() env["PATH"] = f"{settings.OSM_TOOLS_PATH}:{env['PATH']}" p = subprocess.Popen( [ - name_executable, in_file, "--drop-author", "--drop-version", - "--out-o5m", f"-o={out_file}" + name_executable, + in_file, + "--drop-author", + "--drop-version", + "--out-o5m", + f"-o={out_file}", ], - env=env, stdout=output, stderr=error) + env=env, + stdout=output, + stderr=error, + ) if run_async: return p else: wait_and_raise_if_fail(p) -def osmupdate(name_executable, in_file, out_file, output=subprocess.DEVNULL, - error=subprocess.DEVNULL, run_async=False, **kwargs): +def osmupdate( + name_executable, + in_file, + out_file, + output=subprocess.DEVNULL, + error=subprocess.DEVNULL, + run_async=False, + **kwargs, +): env = os.environ.copy() env["PATH"] = f"{settings.OSM_TOOLS_PATH}:{env['PATH']}" p = subprocess.Popen( [ - name_executable, "--drop-author", "--drop-version", "--out-o5m", + name_executable, + "--drop-author", + "--drop-version", + "--out-o5m", "-v", - in_file, out_file + in_file, + out_file, ], - env=env, stdout=output, stderr=error) + env=env, + stdout=output, + stderr=error, + ) if run_async: return p else: wait_and_raise_if_fail(p) -def osmfilter(name_executable, in_file, out_file, output=subprocess.DEVNULL, - error=subprocess.DEVNULL, run_async=False, **kwargs): +def osmfilter( + name_executable, + in_file, + out_file, + output=subprocess.DEVNULL, + error=subprocess.DEVNULL, + run_async=False, + **kwargs, +): env = os.environ.copy() env["PATH"] = f"{settings.OSM_TOOLS_PATH}:{env['PATH']}" - args = ([name_executable, in_file, f"-o={out_file}"] + - [f"--{k.replace('_', '-')}={v}" for k, v in kwargs.items()]) + args = [name_executable, in_file, f"-o={out_file}"] + [ + f"--{k.replace('_', '-')}={v}" for k, v in kwargs.items() + ] p = subprocess.Popen(args, env=env, stdout=output, stderr=error) if run_async: return p diff --git a/tools/python/maps_generator/generator/settings.py b/tools/python/maps_generator/generator/settings.py index 96fc8dde0b..f0919a7e26 100644 --- a/tools/python/maps_generator/generator/settings.py +++ b/tools/python/maps_generator/generator/settings.py @@ -2,20 +2,25 @@ import argparse import multiprocessing import os import sys -from configparser import ConfigParser, ExtendedInterpolation +from configparser import ConfigParser +from configparser import ExtendedInterpolation from pathlib import Path +from typing import Any +from typing import AnyStr +from ..utils.md5 import md5 from ..utils.system import total_virtual_memory -SETTINGS_PATH = os.path.dirname(os.path.join(os.path.realpath(__file__))) - parser = argparse.ArgumentParser(add_help=False) opt_config = "--config" parser.add_argument(opt_config, type=str, default="", help="Path to config") -def get_config_path(): - config_path = os.path.join(SETTINGS_PATH, "../var/etc/map_generator.ini") +def get_config_path(config_path: AnyStr): + """ + It tries to get an opt_config value. + If doesn't get the value a function returns config_path. + """ argv = sys.argv indexes = (-1, -1) for i, opt in enumerate(argv): @@ -27,27 +32,76 @@ def get_config_path(): if indexes[1] > len(argv): return config_path - args = argv[indexes[0]: indexes[1]] + args = argv[indexes[0] : indexes[1]] return parser.parse_args(args).config if args else config_path -DEBUG = True -HOME_PATH = str(Path.home()) -WORK_PATH = HOME_PATH -TMPDIR = os.path.join(HOME_PATH, "tmp") -MAIN_OUT_PATH = os.path.join(WORK_PATH, "generation") +class CfgReader: + """ + Config reader. + There are 3 way of getting an option. In priority order: + 1. From system env. + 2. From config. + 3. From default values. + + For using the option from system env you can build an option name as + MM__ + [SECTION_NAME] + _ + [VALUE_NAME]. + """ + + def __init__(self, default_settings_path: AnyStr): + self.config = ConfigParser(interpolation=ExtendedInterpolation()) + self.config.read([get_config_path(default_settings_path)]) + + def get_opt(self, s: AnyStr, v: AnyStr, default: Any = None): + val = CfgReader._get_env_val(s, v) + if val is not None: + return val + + return self.config.get(s, v) if self.config.has_option(s, v) else default + + def get_opt_path(self, s: AnyStr, v: AnyStr, default: AnyStr = ""): + return os.path.expanduser(self.get_opt(s, v, default)) + + @staticmethod + def _get_env_val(s: AnyStr, v: AnyStr): + return os.environ.get(f"MM__{s.upper()}_{v.upper()}") -VERSION_FILE_NAME = "version.txt" -# External resources DEFAULT_PLANET_URL = "https://planet.openstreetmap.org/pbf/planet-latest.osm.pbf" -DEFAULT_PLANET_MD5_URL = DEFAULT_PLANET_URL + ".md5" +DEFAULT_PLANET_MD5_URL = md5(DEFAULT_PLANET_URL) + +# Main section: +# If DEBUG is True, a little special planet is downloaded. +DEBUG = True +_HOME_PATH = str(Path.home()) +_WORK_PATH = _HOME_PATH +TMPDIR = os.path.join(_HOME_PATH, "tmp") +MAIN_OUT_PATH = os.path.join(_WORK_PATH, "generation") + +# Developer section: +BUILD_PATH = os.path.join(_WORK_PATH, "omim-build-release") +OMIM_PATH = os.path.join(_WORK_PATH, "omim") + +# Osm tools section: +OSM_TOOLS_SRC_PATH = os.path.join(OMIM_PATH, "tools", "osmctools") +OSM_TOOLS_PATH = os.path.join(_WORK_PATH, "osmctools") + +# Generator tool section: +NODE_STORAGE = "mem" if total_virtual_memory() / 10 ** 9 >= 64 else "map" +USER_RESOURCE_PATH = os.path.join(OMIM_PATH, "data") + +# Logging section: +LOG_FILE_PATH = os.path.join(MAIN_OUT_PATH, "generation.log") + +# External resources section: +PLANET_URL = DEFAULT_PLANET_URL +PLANET_MD5_URL = DEFAULT_PLANET_MD5_URL PLANET_COASTS_URL = "" UGC_URL = "" HOTELS_URL = "" PROMO_CATALOG_CITIES_URL = "" PROMO_CATALOG_COUNTRIES_URL = "" -POPULARITY_URL= "" +POPULARITY_URL = "" SUBWAY_URL = "" FOOD_URL = "" FOOD_TRANSLATIONS_URL = "" @@ -55,86 +109,27 @@ UK_POSTCODES_URL = "" US_POSTCODES_URL = "" SRTM_PATH = "" +# Stats section: STATS_TYPES_CONFIG = "" +# Other variables: PLANET = "planet" - GEN_TOOL = "generator_tool" +VERSION_FILE_NAME = "version.txt" -BUILD_PATH = os.path.join(WORK_PATH, "omim-build-release") -OMIM_PATH = os.path.join(WORK_PATH, "omim") - -# generator_tool -NODE_STORAGE = "mem" if total_virtual_memory() / 10 ** 9 >= 64 else "map" -USER_RESOURCE_PATH = os.path.join(OMIM_PATH, "data") - -# osm tools +# Osm tools: OSM_TOOL_CONVERT = "osmconvert" OSM_TOOL_FILTER = "osmfilter" OSM_TOOL_UPDATE = "osmupdate" -OSM_TOOLS_SRC_PATH = os.path.join(OMIM_PATH, "tools", "osmctools") -OSM_TOOLS_PATH = os.path.join(WORK_PATH, "osmctools") OSM_TOOLS_CC = "cc" -OSM_TOOLS_CC_FLAGS = ["-O3", ] +OSM_TOOLS_CC_FLAGS = [ + "-O3", +] -# system +# System: CPU_COUNT = multiprocessing.cpu_count() -# Try to read a config and to overload default settings -config = ConfigParser(interpolation=ExtendedInterpolation()) -config.read([get_config_path()]) - - -def _get_opt(config, s, v, default=None): - return config.get(s, v) if config.has_option(s, v) else default - - -def _get_opt_path(config, s, v, default=""): - return os.path.expanduser(_get_opt(config, s, v, default)) - - -_DEBUG = _get_opt(config, "Main", "DEBUG") -DEBUG = DEBUG if _DEBUG is None else int(_DEBUG) -MAIN_OUT_PATH = _get_opt_path(config, "Main", "MAIN_OUT_PATH", MAIN_OUT_PATH) - -# logging -LOG_FILE_PATH = os.path.join(MAIN_OUT_PATH, "generation.log") - -TMPDIR = _get_opt_path(config, "Main", "TMPDIR", TMPDIR) - -BUILD_PATH = _get_opt_path(config, "Developer", "BUILD_PATH", BUILD_PATH) -OMIM_PATH = _get_opt_path(config, "Developer", "OMIM_PATH", OMIM_PATH) - -USER_RESOURCE_PATH = _get_opt_path(config, "Generator tool", - "USER_RESOURCE_PATH", USER_RESOURCE_PATH) - -NODE_STORAGE = _get_opt(config, "Generator tool", "NODE_STORAGE", NODE_STORAGE) - -OSM_TOOLS_SRC_PATH = _get_opt_path(config, "Osm tools", "OSM_TOOLS_SRC_PATH", OSM_TOOLS_SRC_PATH) -OSM_TOOLS_PATH = _get_opt_path(config, "Osm tools", "OSM_TOOLS_PATH", OSM_TOOLS_PATH) - -LOG_FILE_PATH = _get_opt_path(config, "Logging", "MAIN_LOG", LOG_FILE_PATH) -os.makedirs(os.path.dirname(os.path.abspath(LOG_FILE_PATH)), exist_ok=True) - -PLANET_URL = _get_opt_path(config, "External", "PLANET_URL", DEFAULT_PLANET_URL) -PLANET_MD5_URL = _get_opt_path(config, "External", "PLANET_MD5_URL", DEFAULT_PLANET_MD5_URL) -PLANET_COASTS_URL = _get_opt_path(config, "External", "PLANET_COASTS_URL", PLANET_COASTS_URL) -UGC_URL = _get_opt_path(config, "External", "UGC_URL", UGC_URL) -HOTELS_URL = _get_opt_path(config, "External", "HOTELS_URL", HOTELS_URL) -PROMO_CATALOG_CITIES_URL = _get_opt_path(config, "External", "PROMO_CATALOG_CITIES_URL", PROMO_CATALOG_CITIES_URL) -PROMO_CATALOG_COUNTRIES_URL = _get_opt_path(config, "External", "PROMO_CATALOG_COUNTRIES_URL", PROMO_CATALOG_COUNTRIES_URL) -POPULARITY_URL = _get_opt_path(config, "External", "POPULARITY_URL", POPULARITY_URL) -SUBWAY_URL = _get_opt(config, "External", "SUBWAY_URL", SUBWAY_URL) -FOOD_URL = _get_opt(config, "External", "FOOD_URL", FOOD_URL) -UK_POSTCODES_URL = _get_opt(config, "External", "UK_POSTCODES_URL", UK_POSTCODES_URL) -US_POSTCODES_URL = _get_opt(config, "External", "US_POSTCODES_URL", US_POSTCODES_URL) -FOOD_TRANSLATIONS_URL = _get_opt(config, "External", "FOOD_TRANSLATIONS_URL", - FOOD_TRANSLATIONS_URL) -SRTM_PATH = _get_opt_path(config, "External", "SRTM_PATH", SRTM_PATH) - -STATS_TYPES_CONFIG = _get_opt_path(config, "Stats", "STATS_TYPES_CONFIG", - STATS_TYPES_CONFIG) - +# Planet and coasts: PLANET_O5M = os.path.join(MAIN_OUT_PATH, PLANET + ".o5m") PLANET_PBF = os.path.join(MAIN_OUT_PATH, PLANET + ".osm.pbf") PLANET_COASTS_GEOM_URL = os.path.join(PLANET_COASTS_URL, "latest_coasts.geom") @@ -149,28 +144,130 @@ LOGGING = { "version": 1, "disable_existing_loggers": False, "formatters": { - "standard": { - "format": "[%(asctime)s] %(levelname)s %(module)s %(message)s" - }, + "standard": {"format": "[%(asctime)s] %(levelname)s %(module)s %(message)s"}, }, "handlers": { "stdout": { "level": "INFO", "class": "logging.StreamHandler", - "formatter": "standard" + "formatter": "standard", }, "file": { "level": "DEBUG", "class": "logging.handlers.WatchedFileHandler", "formatter": "standard", - "filename": LOG_FILE_PATH - } + "filename": LOG_FILE_PATH, + }, }, "loggers": { "maps_generator": { "handlers": ["stdout", "file"], "level": "DEBUG", - "propagate": True + "propagate": True, } - } + }, } + + +def init(default_settings_path: AnyStr): + # Try to read a config and to overload default settings + cfg = CfgReader(default_settings_path) + + # Main section: + global DEBUG + global MAIN_OUT_PATH + global TMPDIR + _DEBUG = cfg.get_opt("Main", "DEBUG") + DEBUG = DEBUG if _DEBUG is None else int(_DEBUG) + MAIN_OUT_PATH = cfg.get_opt_path("Main", "MAIN_OUT_PATH", MAIN_OUT_PATH) + TMPDIR = cfg.get_opt_path("Main", "TMPDIR", TMPDIR) + + # Developer section: + global BUILD_PATH + global OMIM_PATH + BUILD_PATH = cfg.get_opt_path("Developer", "BUILD_PATH", BUILD_PATH) + OMIM_PATH = cfg.get_opt_path("Developer", "OMIM_PATH", OMIM_PATH) + + # Osm tools section: + global OSM_TOOLS_SRC_PATH + global OSM_TOOLS_PATH + OSM_TOOLS_SRC_PATH = cfg.get_opt_path( + "Osm tools", "OSM_TOOLS_SRC_PATH", OSM_TOOLS_SRC_PATH + ) + OSM_TOOLS_PATH = cfg.get_opt_path("Osm tools", "OSM_TOOLS_PATH", OSM_TOOLS_PATH) + + # Generator tool section: + global USER_RESOURCE_PATH + global NODE_STORAGE + USER_RESOURCE_PATH = cfg.get_opt_path( + "Generator tool", "USER_RESOURCE_PATH", USER_RESOURCE_PATH + ) + NODE_STORAGE = cfg.get_opt("Generator tool", "NODE_STORAGE", NODE_STORAGE) + + # Logging section: + global LOG_FILE_PATH + global LOGGING + LOG_FILE_PATH = os.path.join(MAIN_OUT_PATH, "generation.log") + LOG_FILE_PATH = cfg.get_opt_path("Logging", "MAIN_LOG", LOG_FILE_PATH) + os.makedirs(os.path.dirname(os.path.abspath(LOG_FILE_PATH)), exist_ok=True) + LOGGING["handlers"]["file"]["filename"] = LOG_FILE_PATH + + # External sction: + global PLANET_URL + global PLANET_MD5_URL + global PLANET_COASTS_URL + global UGC_URL + global HOTELS_URL + global PROMO_CATALOG_CITIES_URL + global PROMO_CATALOG_COUNTRIES_URL + global POPULARITY_URL + global SUBWAY_URL + global FOOD_URL + global UK_POSTCODES_URL + global US_POSTCODES_URL + global FOOD_TRANSLATIONS_URL + global SRTM_PATH + + PLANET_URL = cfg.get_opt_path("External", "PLANET_URL", PLANET_URL) + PLANET_MD5_URL = cfg.get_opt_path("External", "PLANET_MD5_URL", PLANET_MD5_URL) + PLANET_COASTS_URL = cfg.get_opt_path( + "External", "PLANET_COASTS_URL", PLANET_COASTS_URL + ) + UGC_URL = cfg.get_opt_path("External", "UGC_URL", UGC_URL) + HOTELS_URL = cfg.get_opt_path("External", "HOTELS_URL", HOTELS_URL) + PROMO_CATALOG_CITIES_URL = cfg.get_opt_path( + "External", "PROMO_CATALOG_CITIES_URL", PROMO_CATALOG_CITIES_URL + ) + PROMO_CATALOG_COUNTRIES_URL = cfg.get_opt_path( + "External", "PROMO_CATALOG_COUNTRIES_URL", PROMO_CATALOG_COUNTRIES_URL + ) + POPULARITY_URL = cfg.get_opt_path("External", "POPULARITY_URL", POPULARITY_URL) + SUBWAY_URL = cfg.get_opt("External", "SUBWAY_URL", SUBWAY_URL) + FOOD_URL = cfg.get_opt("External", "FOOD_URL", FOOD_URL) + + UK_POSTCODES_URL = cfg.get_opt("External", "UK_POSTCODES_URL", UK_POSTCODES_URL) + US_POSTCODES_URL = cfg.get_opt("External", "US_POSTCODES_URL", US_POSTCODES_URL) + FOOD_TRANSLATIONS_URL = cfg.get_opt( + "External", "FOOD_TRANSLATIONS_URL", FOOD_TRANSLATIONS_URL + ) + SRTM_PATH = cfg.get_opt_path("External", "SRTM_PATH", SRTM_PATH) + + # Stats section: + global STATS_TYPES_CONFIG + STATS_TYPES_CONFIG = cfg.get_opt_path( + "Stats", "STATS_TYPES_CONFIG", STATS_TYPES_CONFIG + ) + + # Planet and costs: + global PLANET_O5M + global PLANET_PBF + global PLANET_COASTS_GEOM_URL + global PLANET_COASTS_RAWGEOM_URL + PLANET_O5M = os.path.join(MAIN_OUT_PATH, PLANET + ".o5m") + PLANET_PBF = os.path.join(MAIN_OUT_PATH, PLANET + ".osm.pbf") + PLANET_COASTS_GEOM_URL = os.path.join(PLANET_COASTS_URL, "latest_coasts.geom") + PLANET_COASTS_RAWGEOM_URL = os.path.join(PLANET_COASTS_URL, "latest_coasts.rawgeom") + + if DEBUG: + PLANET_URL = "http://osmz.ru/mwm/islands/islands.o5m" + PLANET_MD5_URL = "https://cloud.mail.ru/public/5v2F/f7cSaEXBC" diff --git a/tools/python/maps_generator/generator/stages.py b/tools/python/maps_generator/generator/stages.py index 15d49a089f..8003132485 100644 --- a/tools/python/maps_generator/generator/stages.py +++ b/tools/python/maps_generator/generator/stages.py @@ -1,239 +1,239 @@ +"""" +This file contains some decorators that define stages. +There are two main types of stages: + 1. stage - a high level stage + 2. country_stage - a stage that applies to countries files(*.mwm). + +country_stage might be inside stage. There are country stages inside mwm_stage. +mwm_stage is only one stage that contains country_stages. +""" +import datetime import logging import os -import shutil -import subprocess +import time +from abc import ABC +from abc import abstractmethod +from typing import AnyStr +from typing import List +from typing import Optional +from typing import Type +from typing import Union -from . import settings -from .env import WORLD_NAME, WORLDS_NAMES -from .gen_tool import run_gen_tool -from .osmtools import osmconvert, osmupdate -from ..utils.file import download_file, is_verified -from ..utils.file import symlink_force -from ..utils.md5 import write_md5sum, md5 +from .env import Env +from .status import Status +from ..utils.algo import camel_case_split +from ..utils.log import DummyObject +from ..utils.log import create_file_logger logger = logging.getLogger("maps_generator") -def download_planet(planet): - download_file(settings.PLANET_URL, planet) - download_file(settings.PLANET_MD5_URL, md5(planet)) +class Stage(ABC): + need_planet_lock = False + need_build_lock = False + is_helper = False + is_mwm_stage = False + is_production_only = False + + def __init__(self, **args): + self.args = args + + def __call__(self, env: Env): + return self.apply(env, **self.args) + + @abstractmethod + def apply(self, *args, **kwargs): + pass -def convert_planet(tool, in_planet, out_planet, output=subprocess.DEVNULL, - error=subprocess.DEVNULL): - osmconvert(tool, in_planet, out_planet, output=output, error=error) - write_md5sum(out_planet, md5(out_planet)) +def get_stage_name(stage: Union[Type[Stage], Stage]) -> AnyStr: + n = stage.__class__.__name__ if isinstance(stage, Stage) else stage.__name__ + return "_".join(w.lower() for w in camel_case_split(n)) -def stage_download_and_convert_planet(env, force_download, **kwargs): - if force_download or not is_verified(settings.PLANET_PBF): - download_planet(settings.PLANET_PBF) +class Stages: + """Stages class is used for storing all stages.""" - convert_planet(env[settings.OSM_TOOL_CONVERT], - settings.PLANET_PBF, settings.PLANET_O5M, - output=env.get_subprocess_out(), - error=env.get_subprocess_out()) - os.remove(settings.PLANET_PBF) - os.remove(md5(settings.PLANET_PBF)) + def __init__(self): + self.mwm_stage: Optional[Type[Stage]] = None + self.countries_stages: List[Type[Stage]] = [] + self.stages: List[Type[Stage]] = [] + self.helper_stages: List[Type[Stage]] = [] + + def set_mwm_stage(self, stage: Type[Stage]): + assert self.mwm_stage is None + self.mwm_stage = stage + + def add_helper_stage(self, stage: Type[Stage]): + self.helper_stages.append(stage) + + def add_country_stage(self, stage: Type[Stage]): + self.countries_stages.append(stage) + + def add_stage(self, stage: Type[Stage]): + self.stages.append(stage) + + def get_visible_stages_names(self) -> List[AnyStr]: + """Returns all stages names except helper stages names.""" + stages = [] + for s in self.stages: + stages.append(get_stage_name(s)) + if s == self.mwm_stage: + stages += [get_stage_name(st) for st in self.countries_stages] + return stages + + def is_valid_stage_name(self, stage_name) -> bool: + return get_stage_name(self.mwm_stage) == stage_name or any( + any(stage_name == get_stage_name(x) for x in c) + for c in [self.countries_stages, self.stages, self.helper_stages] + ) -def stage_update_planet(env, **kwargs): - tmp = settings.PLANET_O5M + ".tmp" - osmupdate(env[settings.OSM_TOOL_UPDATE], settings.PLANET_O5M, tmp, - output=env.get_subprocess_out(), - error=env.get_subprocess_out(), - **kwargs) - os.remove(settings.PLANET_O5M) - os.rename(tmp, settings.PLANET_O5M) - write_md5sum(settings.PLANET_O5M, md5(settings.PLANET_O5M)) +# A global variable stage contains all possible stages. +stages = Stages() -def stage_preprocess(env, **kwargs): - run_gen_tool(env.gen_tool, - out=env.get_subprocess_out(), - err=env.get_subprocess_out(), - intermediate_data_path=env.intermediate_path, - osm_file_type="o5m", - osm_file_name=settings.PLANET_O5M, - node_storage=env.node_storage, - user_resource_path=env.user_resource_path, - preprocess=True, - **kwargs) +def outer_stage(stage: Type[Stage]) -> Type[Stage]: + """It's decorator that defines high level stage.""" + if stage.is_helper: + stages.add_helper_stage(stage) + else: + stages.add_stage(stage) + if stage.is_mwm_stage: + stages.set_mwm_stage(stage) + + def new_apply(method): + def apply(obj: Stage, env: Env, *args, **kwargs): + name = get_stage_name(obj) + stage_formatted = " ".join(name.split("_")).capitalize() + logfile = os.path.join(env.paths.log_path, f"{name}.log") + log_handler = logging.FileHandler(logfile) + logger.addHandler(log_handler) + if not env.is_skipped_stage_name(name): + logger.info(f"{stage_formatted} was not accepted.") + logger.removeHandler(log_handler) + return + + main_status = env.main_status + main_status.init(env.paths.main_status_path, name) + if main_status.need_skip(): + logger.warning(f"{stage_formatted} was skipped.") + logger.removeHandler(log_handler) + return + + main_status.update_status() + logger.info(f"{stage_formatted}: start ...") + t = time.time() + env.set_subprocess_out(log_handler.stream) + method(obj, env, *args, **kwargs) + d = time.time() - t + logger.info( + f"{stage_formatted}: finished in " + f"{str(datetime.timedelta(seconds=d))}" + ) + logger.removeHandler(log_handler) + + return apply + + stage.apply = new_apply(stage.apply) + return stage -def stage_features(env, **kwargs): - extra = {} - if env.production: - extra["add_ads"] = True - if any(x not in WORLDS_NAMES for x in env.countries): - extra["generate_packed_borders"] = True - if any(x == WORLD_NAME for x in env.countries): - extra["generate_world"] = True - if env.build_all_countries: - extra["have_borders_for_whole_world"] = True +def country_stage_status(stage: Type[Stage]) -> Type[Stage]: + """It's helper decorator that works with status file.""" - run_gen_tool( - env.gen_tool, - out=env.get_subprocess_out(), - err=env.get_subprocess_out(), - data_path=env.data_path, - intermediate_data_path=env.intermediate_path, - osm_file_type="o5m", - osm_file_name=settings.PLANET_O5M, - node_storage=env.node_storage, - user_resource_path=env.user_resource_path, - dump_cities_boundaries=True, - cities_boundaries_data=env.cities_boundaries_path, - generate_features=True, - **extra, - **kwargs - ) + def new_apply(method): + def apply(obj: Stage, env: Env, country: AnyStr, *args, **kwargs): + name = get_stage_name(obj) + _logger = DummyObject() + countries_meta = env.countries_meta + if "logger" in countries_meta[country]: + _logger, _ = countries_meta[country]["logger"] + + stage_formatted = " ".join(name.split("_")).capitalize() + + if not env.is_skipped_stage_name(name): + _logger.info(f"{stage_formatted} was not accepted.") + return + + if "status" not in countries_meta[country]: + countries_meta[country]["status"] = Status() + + status = countries_meta[country]["status"] + status_file = os.path.join(env.paths.status_path, f"{country}.status") + status.init(status_file, name) + if status.need_skip(): + _logger.warning(f"{stage_formatted} was skipped.") + return + + status.update_status() + method(obj, env, country, *args, **kwargs) + + return apply + + stage.apply = new_apply(stage.apply) + return stage -def run_gen_tool_with_recovery_country(env, *args, **kwargs): - if "data_path" not in kwargs or "output" not in kwargs: - logger.warning("The call run_gen_tool() will be without recovery.") - run_gen_tool(*args, **kwargs) - prev_data_path = kwargs["data_path"] - mwm = f"{kwargs['output']}.mwm" - osm2ft = f"{mwm}.osm2ft" - kwargs["data_path"] = env.draft_path - symlink_force(os.path.join(prev_data_path, osm2ft), - os.path.join(env.draft_path, osm2ft)) - shutil.copy(os.path.join(prev_data_path, mwm), - os.path.join(env.draft_path, mwm)) - run_gen_tool(*args, **kwargs) - shutil.move(os.path.join(env.draft_path, mwm), - os.path.join(prev_data_path, mwm)) - kwargs["data_path"] = prev_data_path +def country_stage_log(stage: Type[Stage]) -> Type[Stage]: + """It's helper decorator that works with log file.""" + + def new_apply(method): + def apply(obj: Stage, env: Env, country: AnyStr, *args, **kwargs): + name = get_stage_name(obj) + log_file = os.path.join(env.paths.log_path, f"{country}.log") + countries_meta = env.countries_meta + if "logger" not in countries_meta[country]: + countries_meta[country]["logger"] = create_file_logger(log_file) + + _logger, log_handler = countries_meta[country]["logger"] + stage_formatted = " ".join(name.split("_")).capitalize() + _logger.info(f"{stage_formatted}: start ...") + t = time.time() + env.set_subprocess_out(log_handler.stream, country) + method(obj, env, country, *args, logger=_logger, **kwargs) + d = time.time() - t + _logger.info( + f"{stage_formatted}: finished in " + f"{str(datetime.timedelta(seconds=d))}" + ) + + return apply + + stage.apply = new_apply(stage.apply) + return stage -def _generate_common_index(env, country, **kwargs): - run_gen_tool(env.gen_tool, - out=env.get_subprocess_out(country), - err=env.get_subprocess_out(country), - data_path=env.mwm_path, - intermediate_data_path=env.intermediate_path, - user_resource_path=env.user_resource_path, - node_storage=env.node_storage, - planet_version=env.planet_version, - generate_geometry=True, - generate_index=True, - output=country, - **kwargs) +def country_stage(stage: Type[Stage]) -> Type[Stage]: + """It's decorator that defines country stage.""" + if stage.is_helper: + stages.add_helper_stage(stage) + else: + stages.add_country_stage(stage) + + return country_stage_log(country_stage_status(stage)) -def stage_index_world(env, country, **kwargs): - _generate_common_index(env, country, - generate_search_index=True, - cities_boundaries_data=env.cities_boundaries_path, - generate_cities_boundaries=True, - **kwargs) +def mwm_stage(stage: Type[Stage]) -> Type[Stage]: + stage.is_mwm_stage = True + return stage -def stage_cities_ids_world(env, country, **kwargs): - run_gen_tool_with_recovery_country( - env, - env.gen_tool, - out=env.get_subprocess_out(country), - err=env.get_subprocess_out(country), - data_path=env.mwm_path, - user_resource_path=env.user_resource_path, - output=country, - generate_cities_ids=True, - **kwargs - ) +def planet_lock(stage: Type[Stage]) -> Type[Stage]: + stage.need_planet_lock = True + return stage -def stage_index(env, country, **kwargs): - _generate_common_index(env, country, - generate_search_index=True, - **kwargs) +def build_lock(stage: Type[Stage]) -> Type[Stage]: + stage.need_build_lock = True + return stage -def stage_coastline_index(env, country, **kwargs): - _generate_common_index(env, country, **kwargs) +def production_only(stage: Type[Stage]) -> Type[Stage]: + stage.is_production_only = True + return stage -def stage_ugc(env, country, **kwargs): - run_gen_tool_with_recovery_country( - env, - env.gen_tool, - out=env.get_subprocess_out(country), - err=env.get_subprocess_out(country), - data_path=env.mwm_path, - intermediate_data_path=env.intermediate_path, - user_resource_path=env.user_resource_path, - ugc_data=env.ugc_path, - output=country, - **kwargs - ) - - -def stage_popularity(env, country, **kwargs): - run_gen_tool_with_recovery_country( - env, - env.gen_tool, - out=env.get_subprocess_out(country), - err=env.get_subprocess_out(country), - data_path=env.mwm_path, - intermediate_data_path=env.intermediate_path, - user_resource_path=env.user_resource_path, - popular_places_data=env.popularity_path, - generate_popular_places=True, - output=country, - **kwargs - ) - - -def stage_srtm(env, country, **kwargs): - run_gen_tool_with_recovery_country( - env, - env.gen_tool, - out=env.get_subprocess_out(country), - err=env.get_subprocess_out(country), - data_path=env.mwm_path, - intermediate_data_path=env.intermediate_path, - user_resource_path=env.user_resource_path, - srtm_path=env.srtm_path, - output=country, - **kwargs - ) - - -def stage_routing(env, country, **kwargs): - run_gen_tool_with_recovery_country( - env, - env.gen_tool, - out=env.get_subprocess_out(country), - err=env.get_subprocess_out(country), - data_path=env.mwm_path, - intermediate_data_path=env.intermediate_path, - user_resource_path=env.user_resource_path, - cities_boundaries_data=env.cities_boundaries_path, - generate_maxspeed=True, - make_city_roads=True, - make_cross_mwm=True, - disable_cross_mwm_progress=True, - generate_cameras=True, - make_routing_index=True, - generate_traffic_keys=True, - output=country, - **kwargs - ) - - -def stage_routing_transit(env, country, **kwargs): - run_gen_tool_with_recovery_country( - env, - env.gen_tool, - out=env.get_subprocess_out(country), - err=env.get_subprocess_out(country), - data_path=env.mwm_path, - intermediate_data_path=env.intermediate_path, - user_resource_path=env.user_resource_path, - transit_path=env.transit_path, - make_transit_cross_mwm=True, - output=country, - **kwargs - ) +def helper_stage(stage: Type[Stage]) -> Type[Stage]: + stage.is_helper = True + return stage diff --git a/tools/python/maps_generator/generator/stages_declaration.py b/tools/python/maps_generator/generator/stages_declaration.py new file mode 100644 index 0000000000..7b9bb2001d --- /dev/null +++ b/tools/python/maps_generator/generator/stages_declaration.py @@ -0,0 +1,421 @@ +"""" +This file contains possible stages that maps_generator can run. +Some algorithms suppose a maps genration processes looks like: + stage1, ..., stage_mwm[country_stage_1, ..., country_stage_M], ..., stageN +Only stage_mwm can contain country_ +""" +import datetime +import json +import logging +import multiprocessing +import os +import shutil +import tarfile +from collections import defaultdict +from functools import partial +from multiprocessing.pool import ThreadPool +from typing import Type + +from descriptions.descriptions_downloader import check_and_get_checker +from descriptions.descriptions_downloader import download_from_wikidata_tags +from descriptions.descriptions_downloader import download_from_wikipedia_tags +from post_generation.hierarchy_to_countries import hierarchy_to_countries +from post_generation.inject_promo_ids import inject_promo_ids +from post_generation.localads_mwm_to_csv import create_csv +from . import coastline +from . import settings +from . import steps +from .env import Env +from .env import WORLDS_NAMES +from .env import WORLD_COASTS_NAME +from .env import WORLD_NAME +from .exceptions import BadExitStatusError +from .gen_tool import run_gen_tool +from .stages import Stage +from .stages import build_lock +from .stages import country_stage +from .stages import get_stage_name +from .stages import helper_stage +from .stages import mwm_stage +from .stages import outer_stage +from .stages import planet_lock +from .stages import production_only +from .statistics import get_stages_info +from .statistics import make_stats +from ..utils.file import download_files +from ..utils.file import is_verified + +logger = logging.getLogger("maps_generator") + + +def is_skipped(env: Env, stage: Type[Stage]) -> bool: + return env.is_skipped_stage_name(get_stage_name(stage)) + + +@outer_stage +class StageDownloadExternal(Stage): + def apply(self, env: Env): + download_files( + {settings.SUBWAY_URL: env.paths.subway_path,} + ) + + +@outer_stage +@production_only +class StageDownloadProductionExternal(Stage): + def apply(self, env: Env): + download_files( + { + settings.UGC_URL: env.paths.ugc_path, + settings.HOTELS_URL: env.paths.hotels_path, + settings.PROMO_CATALOG_CITIES_URL: env.paths.promo_catalog_cities_path, + settings.PROMO_CATALOG_COUNTRIES_URL: env.paths.promo_catalog_countries_path, + settings.POPULARITY_URL: env.paths.popularity_path, + settings.FOOD_URL: env.paths.food_paths, + settings.FOOD_TRANSLATIONS_URL: env.paths.food_translations_path, + settings.UK_POSTCODES_URL: env.paths.uk_postcodes_path, + settings.US_POSTCODES_URL: env.paths.us_postcodes_path, + } + ) + + +@outer_stage +@planet_lock +class StageDownloadAndConvertPlanet(Stage): + def apply(self, env: Env, **kwargs): + force_download = not is_skipped(env, StageUpdatePlanet) + if force_download or not is_verified(settings.PLANET_O5M): + steps.step_download_and_convert_planet( + env, force_download=force_download, **kwargs + ) + + +@outer_stage +@planet_lock +class StageUpdatePlanet(Stage): + def apply(self, env: Env, **kwargs): + if not settings.DEBUG: + steps.step_update_planet(env, **kwargs) + + +@outer_stage +@build_lock +class StageCoastline(Stage): + def apply(self, env: Env, use_old_if_fail=True): + coasts_geom = "WorldCoasts.geom" + coasts_rawgeom = "WorldCoasts.rawgeom" + try: + coastline.make_coastline(env) + except BadExitStatusError as e: + if not use_old_if_fail: + raise e + + logger.info("Build costs failed. Try to download the costs...") + download_files( + { + settings.PLANET_COASTS_GEOM_URL: os.path.join( + env.paths.coastline_path, coasts_geom + ), + settings.PLANET_COASTS_RAWGEOM_URL: os.path.join( + env.paths.coastline_path, coasts_rawgeom + ), + } + ) + + for f in [coasts_geom, coasts_rawgeom]: + path = os.path.join(env.paths.coastline_path, f) + shutil.copy2(path, env.paths.intermediate_data_path) + + +@outer_stage +@build_lock +class StagePreprocess(Stage): + def apply(self, env: Env, **kwargs): + steps.step_preprocess(env, **kwargs) + + +@outer_stage +@build_lock +class StageFeatures(Stage): + def apply(self, env: Env): + extra = {} + if is_skipped(env, StageDescriptions): + extra["idToWikidata"] = env.paths.id_to_wikidata_path + if is_skipped(env, StageDownloadProductionExternal): + extra["booking_data"] = env.paths.hotels_path + extra["promo_catalog_cities"] = env.paths.promo_catalog_cities_path + extra["popular_places_data"] = env.paths.popularity_path + extra["brands_data"] = env.paths.food_paths + extra["brands_translations_data"] = env.paths.food_translations_path + if is_skipped(env, StageCoastline): + extra["emit_coasts"] = True + + steps.step_features(env, **extra) + if os.path.exists(env.paths.packed_polygons_path): + shutil.copy2(env.paths.packed_polygons_path, env.paths.mwm_path) + + +@outer_stage +@build_lock +@production_only +@helper_stage +class StageDownloadDescriptions(Stage): + def apply(self, env: Env): + if not is_skipped(env, StageDescriptions): + return + + run_gen_tool( + env.gen_tool, + out=env.get_subprocess_out(), + err=env.get_subprocess_out(), + intermediate_data_path=env.paths.intermediate_data_path, + user_resource_path=env.paths.user_resource_path, + dump_wikipedia_urls=env.paths.wiki_url_path, + idToWikidata=env.paths.id_to_wikidata_path, + ) + + langs = ("en", "ru", "es", "fr", "de") + checker = check_and_get_checker(env.paths.popularity_path) + download_from_wikipedia_tags( + env.paths.wiki_url_path, env.paths.descriptions_path, langs, checker + ) + download_from_wikidata_tags( + env.paths.id_to_wikidata_path, env.paths.descriptions_path, langs, checker + ) + + +@outer_stage +@build_lock +@mwm_stage +class StageMwm(Stage): + def apply(self, env: Env): + def build(country): + StageIndex(country=country)(env) + StageUgc(country=country)(env) + StagePopularity(country=country)(env) + StageSrtm(country=country)(env) + StageDescriptions(country=country)(env) + StageRouting(country=country)(env) + StageRoutingTransit(country=country)(env) + env.finish_mwm(country) + + def build_world(country): + StageIndex(country=country)(env) + StageCitiesIdsWorld(country=country)(env) + env.finish_mwm(country) + + def build_world_coasts(country): + StageIndex(country=country)(env) + env.finish_mwm(country) + + specific = {WORLD_NAME: build_world, WORLD_COASTS_NAME: build_world_coasts} + names = env.get_tmp_mwm_names() + with ThreadPool() as pool: + pool.map( + lambda c: specific[c](c) if c in specific else build(c), + names, + chunksize=1, + ) + + +@country_stage +@build_lock +class StageIndex(Stage): + def apply(self, env: Env, country, **kwargs): + if country == WORLD_NAME: + steps.step_index_world(env, country, **kwargs) + elif country == WORLD_COASTS_NAME: + steps.step_coastline_index(env, country, **kwargs) + else: + extra = {} + if is_skipped(env, StageDownloadProductionExternal): + extra["uk_postcodes_dataset"] = env.paths.uk_postcodes_path + extra["us_postcodes_dataset"] = env.paths.us_postcodes_path + steps.step_index(env, country, **kwargs, **extra) + + +@country_stage +@build_lock +@production_only +class StageCitiesIdsWorld(Stage): + def apply(self, env: Env, country, **kwargs): + steps.step_cities_ids_world(env, country, **kwargs) + + +@country_stage +@build_lock +@production_only +class StageUgc(Stage): + def apply(self, env: Env, country, **kwargs): + steps.step_ugc(env, country, **kwargs) + + +@country_stage +@build_lock +@production_only +class StagePopularity(Stage): + def apply(self, env: Env, country, **kwargs): + steps.step_popularity(env, country, **kwargs) + + +@country_stage +@build_lock +@production_only +class StageSrtm(Stage): + def apply(self, env: Env, country, **kwargs): + steps.step_srtm(env, country, **kwargs) + + +@country_stage +@build_lock +@production_only +class StageDescriptions(Stage): + def apply(self, env: Env, country, **kwargs): + steps.step_description(env, country, **kwargs) + + +@country_stage +@build_lock +class StageRouting(Stage): + def apply(self, env: Env, country, **kwargs): + steps.step_routing(env, country, **kwargs) + + +@country_stage +@build_lock +class StageRoutingTransit(Stage): + def apply(self, env: Env, country, **kwargs): + steps.step_routing_transit(env, country, **kwargs) + + +@outer_stage +@build_lock +class StageCountriesTxt(Stage): + def apply(self, env: Env): + countries = hierarchy_to_countries( + env.paths.old_to_new_path, + env.paths.borders_to_osm_path, + env.paths.countries_synonyms_path, + env.paths.hierarchy_path, + env.paths.mwm_path, + env.paths.mwm_version, + ) + if is_skipped(env, StageDownloadProductionExternal): + countries_json = json.loads(countries) + inject_promo_ids( + countries_json, + env.paths.promo_catalog_cities_path, + env.paths.promo_catalog_countries_path, + env.paths.mwm_path, + env.paths.types_path, + env.paths.mwm_path, + ) + countries = json.dumps(countries_json, ensure_ascii=True, indent=1) + with open(env.paths.counties_txt_path, "w") as f: + f.write(countries) + + +@outer_stage +@build_lock +class StageExternalResources(Stage): + def apply(self, env: Env): + black_list = {"00_roboto_regular.ttf"} + resources = [ + os.path.join(env.paths.user_resource_path, file) + for file in os.listdir(env.paths.user_resource_path) + if file.endswith(".ttf") and file not in black_list + ] + for ttf_file in resources: + shutil.copy2(ttf_file, env.paths.mwm_path) + + for file in os.listdir(env.paths.mwm_path): + if file.startswith(WORLD_NAME) and file.endswith(".mwm"): + resources.append(os.path.join(env.paths.mwm_path, file)) + + resources.sort() + with open(env.paths.external_resources_path, "w") as f: + for resource in resources: + fd = os.open(resource, os.O_RDONLY) + f.write(f"{os.path.basename(resource)} {os.fstat(fd).st_size}\n") + + +@outer_stage +@build_lock +@production_only +class StageLocalads(Stage): + def apply(self, env: Env): + create_csv( + env.paths.localads_path, + env.paths.mwm_path, + env.paths.mwm_path, + env.paths.types_path, + env.mwm_version, + multiprocessing.cpu_count(), + ) + with tarfile.open(f"{env.paths.localads_path}.tar.gz", "w:gz") as tar: + for filename in os.listdir(env.paths.localads_path): + tar.add( + os.path.join(env.paths.localads_path, filename), arcname=filename + ) + + +@outer_stage +@build_lock +class StageStatistics(Stage): + def apply(self, env: Env): + result = defaultdict(lambda: defaultdict(dict)) + + def stage_mwm_statistics(env: Env, country, **kwargs): + stats_tmp = os.path.join(env.paths.draft_path, f"{country}.stat") + with open(os.devnull, "w") as dev_null: + with open(stats_tmp, "w") as f: + steps.run_gen_tool_with_recovery_country( + env, + env.gen_tool, + out=f, + err=dev_null, + data_path=env.paths.mwm_path, + user_resource_path=env.paths.user_resource_path, + type_statistics=True, + output=country, + **kwargs, + ) + result["countries"][country]["types"] = make_stats( + settings.STATS_TYPES_CONFIG, stats_tmp + ) + + names = env.get_tmp_mwm_names() + countries = filter(lambda x: x not in WORLDS_NAMES, names) + with ThreadPool() as pool: + pool.map(partial(stage_mwm_statistics, env), countries) + + steps_info = get_stages_info(env.paths.log_path, {"statistics"}) + result["steps"] = steps_info["steps"] + for c in steps_info["countries"]: + result["countries"][c]["steps"] = steps_info["countries"][c] + + def default(o): + if isinstance(o, datetime.timedelta): + return str(o) + + with open(os.path.join(env.paths.stats_path, "stats.json"), "w") as f: + json.dump( + result, f, ensure_ascii=False, sort_keys=True, indent=2, default=default + ) + + +@outer_stage +@build_lock +class StageCleanup(Stage): + def apply(self, env: Env): + logger.info( + f"osm2ft files will be moved from {env.paths.build_path} " + f"to {env.paths.osm2ft_path}." + ) + for x in os.listdir(env.paths.mwm_path): + p = os.path.join(env.paths.mwm_path, x) + if os.path.isfile(p) and x.endswith(".mwm.osm2ft"): + shutil.move(p, os.path.join(env.paths.osm2ft_path, x)) + + logger.info(f"{env.paths.draft_path} will be removed.") + shutil.rmtree(env.paths.draft_path) diff --git a/tools/python/maps_generator/generator/statistics.py b/tools/python/maps_generator/generator/statistics.py index 3dd0dc761e..9ae9195bb1 100644 --- a/tools/python/maps_generator/generator/statistics.py +++ b/tools/python/maps_generator/generator/statistics.py @@ -1,23 +1,26 @@ -import re -import os import datetime +import os +import re from collections import defaultdict from .exceptions import ParseError +RE_STAT = re.compile( + r"(?:\d+\. )?([\w:|-]+?)\|: " + r"size = \d+; " + r"count = (\d+); " + r"length = ([0-9.e+-]+) m; " + r"area = ([0-9.e+-]+) m²; " + r"names = (\d+)\s*" +) -RE_STAT = re.compile(r"(?:\d+\. )?([\w:|-]+?)\|: " - r"size = \d+; " - r"count = (\d+); " - r"length = ([0-9.e+-]+) m; " - r"area = ([0-9.e+-]+) m²; " - r"names = (\d+)\s*") - -RE_TIME_DELTA = re.compile(r'^(?:(?P-?\d+) (days?, )?)?' - r'((?:(?P-?\d+):)(?=\d+:\d+))?' - r'(?:(?P-?\d+):)?' - r'(?P-?\d+)' - r'(?:\.(?P\d{1,6})\d{0,6})?$') +RE_TIME_DELTA = re.compile( + r"^(?:(?P-?\d+) (days?, )?)?" + r"((?:(?P-?\d+):)(?=\d+:\d+))?" + r"(?:(?P-?\d+):)?" + r"(?P-?\d+)" + r"(?:\.(?P\d{1,6})\d{0,6})?$" +) RE_FINISH_STAGE = re.compile(r"(.*)Stage (.+): finished in (.+)$") @@ -26,13 +29,15 @@ def read_stat(f): stats = [] for line in f: m = RE_STAT.match(line) - stats.append({ - "name": m.group(1).replace("|", "-"), - "cnt": int(m.group(2)), - "len": float(m.group(3)), - "area": float(m.group(4)), - "names": int(m.group(5)) - }) + stats.append( + { + "name": m.group(1).replace("|", "-"), + "cnt": int(m.group(2)), + "len": float(m.group(3)), + "area": float(m.group(4)), + "names": int(m.group(5)), + } + ) return stats diff --git a/tools/python/maps_generator/generator/status.py b/tools/python/maps_generator/generator/status.py index c637313e27..2bd6c627ca 100644 --- a/tools/python/maps_generator/generator/status.py +++ b/tools/python/maps_generator/generator/status.py @@ -1,26 +1,29 @@ import os +from typing import AnyStr class Status: - def __init__(self): - self.stat_path = None - self.stat_next = None + """Status is used for recovering and continuation maps generation.""" + + def __init__(self, stat_path: AnyStr = None, stat_next: AnyStr = None): + self.stat_path = stat_path + self.stat_next = stat_next self.stat_saved = None self.find = False - def init(self, stat_path, stat_next): + def init(self, stat_path: AnyStr, stat_next: AnyStr): self.stat_path = stat_path self.stat_next = stat_next if os.path.exists(self.stat_path) and os.path.isfile(self.stat_path): with open(self.stat_path) as status: self.stat_saved = status.read() if not self.find: - self.find = self.stat_saved is None or not self.need_skip() + self.find = not self.stat_saved or not self.need_skip() - def need_skip(self): + def need_skip(self) -> bool: if self.find: return False - return self.stat_saved is not None and self.stat_next != self.stat_saved + return self.stat_saved and self.stat_next != self.stat_saved def update_status(self): with open(self.stat_path, "w") as status: diff --git a/tools/python/maps_generator/generator/steps.py b/tools/python/maps_generator/generator/steps.py new file mode 100644 index 0000000000..b4339ae9df --- /dev/null +++ b/tools/python/maps_generator/generator/steps.py @@ -0,0 +1,286 @@ +""" +This file contains basic api for generator_tool and osm tools to generate maps. +""" +import logging +import os +import shutil +import subprocess +from typing import AnyStr + +from . import settings +from .env import Env +from .env import PathProvider +from .env import WORLDS_NAMES +from .env import WORLD_NAME +from .env import get_all_countries_list +from .gen_tool import run_gen_tool +from .osmtools import osmconvert +from .osmtools import osmupdate +from ..utils.file import download_file +from ..utils.file import is_verified +from ..utils.file import symlink_force +from ..utils.md5 import md5 +from ..utils.md5 import write_md5sum + +logger = logging.getLogger("maps_generator") + + +def download_planet(planet: AnyStr): + download_file(settings.PLANET_URL, planet) + download_file(settings.PLANET_MD5_URL, md5(planet)) + + +def convert_planet( + tool: AnyStr, + in_planet: AnyStr, + out_planet: AnyStr, + output=subprocess.DEVNULL, + error=subprocess.DEVNULL, +): + osmconvert(tool, in_planet, out_planet, output=output, error=error) + write_md5sum(out_planet, md5(out_planet)) + + +def step_download_and_convert_planet(env: Env, force_download: bool, **kwargs): + if force_download or not is_verified(settings.PLANET_PBF): + download_planet(settings.PLANET_PBF) + + convert_planet( + env[settings.OSM_TOOL_CONVERT], + settings.PLANET_PBF, + settings.PLANET_O5M, + output=env.get_subprocess_out(), + error=env.get_subprocess_out(), + ) + os.remove(settings.PLANET_PBF) + os.remove(md5(settings.PLANET_PBF)) + + +def step_update_planet(env: Env, **kwargs): + tmp = settings.PLANET_O5M + ".tmp" + osmupdate( + env[settings.OSM_TOOL_UPDATE], + settings.PLANET_O5M, + tmp, + output=env.get_subprocess_out(), + error=env.get_subprocess_out(), + **kwargs, + ) + os.remove(settings.PLANET_O5M) + os.rename(tmp, settings.PLANET_O5M) + write_md5sum(settings.PLANET_O5M, md5(settings.PLANET_O5M)) + + +def step_preprocess(env: Env, **kwargs): + run_gen_tool( + env.gen_tool, + out=env.get_subprocess_out(), + err=env.get_subprocess_out(), + intermediate_data_path=env.paths.intermediate_data_path, + osm_file_type="o5m", + osm_file_name=settings.PLANET_O5M, + node_storage=env.node_storage, + user_resource_path=env.paths.user_resource_path, + preprocess=True, + **kwargs, + ) + + +def step_features(env: Env, **kwargs): + extra = {} + if env.production: + extra["add_ads"] = True + if any(x not in WORLDS_NAMES for x in env.countries): + extra["generate_packed_borders"] = True + if any(x == WORLD_NAME for x in env.countries): + extra["generate_world"] = True + if len(env.countries) == len(get_all_countries_list(PathProvider.borders_path())): + extra["have_borders_for_whole_world"] = True + + run_gen_tool( + env.gen_tool, + out=env.get_subprocess_out(), + err=env.get_subprocess_out(), + data_path=env.paths.data_path, + intermediate_data_path=env.paths.intermediate_data_path, + osm_file_type="o5m", + osm_file_name=settings.PLANET_O5M, + node_storage=env.node_storage, + user_resource_path=env.paths.user_resource_path, + dump_cities_boundaries=True, + cities_boundaries_data=env.paths.cities_boundaries_path, + generate_features=True, + **extra, + **kwargs, + ) + + +def run_gen_tool_with_recovery_country(env: Env, *args, **kwargs): + if "data_path" not in kwargs or "output" not in kwargs: + logger.warning("The call run_gen_tool() will be without recovery.") + run_gen_tool(*args, **kwargs) + + prev_data_path = kwargs["data_path"] + mwm = f"{kwargs['output']}.mwm" + osm2ft = f"{mwm}.osm2ft" + kwargs["data_path"] = env.paths.draft_path + symlink_force( + os.path.join(prev_data_path, osm2ft), os.path.join(env.paths.draft_path, osm2ft) + ) + shutil.copy( + os.path.join(prev_data_path, mwm), os.path.join(env.paths.draft_path, mwm) + ) + run_gen_tool(*args, **kwargs) + shutil.move( + os.path.join(env.paths.draft_path, mwm), os.path.join(prev_data_path, mwm) + ) + kwargs["data_path"] = prev_data_path + + +def _generate_common_index(env: Env, country: AnyStr, **kwargs): + run_gen_tool( + env.gen_tool, + out=env.get_subprocess_out(country), + err=env.get_subprocess_out(country), + data_path=env.paths.mwm_path, + intermediate_data_path=env.paths.intermediate_data_path, + user_resource_path=env.paths.user_resource_path, + node_storage=env.node_storage, + planet_version=env.planet_version, + generate_geometry=True, + generate_index=True, + output=country, + **kwargs, + ) + + +def step_index_world(env: Env, country: AnyStr, **kwargs): + _generate_common_index( + env, + country, + generate_search_index=True, + cities_boundaries_data=env.paths.cities_boundaries_path, + generate_cities_boundaries=True, + **kwargs, + ) + + +def step_cities_ids_world(env: Env, country: AnyStr, **kwargs): + run_gen_tool_with_recovery_country( + env, + env.gen_tool, + out=env.get_subprocess_out(country), + err=env.get_subprocess_out(country), + data_path=env.paths.mwm_path, + user_resource_path=env.paths.user_resource_path, + output=country, + generate_cities_ids=True, + **kwargs, + ) + + +def step_index(env: Env, country: AnyStr, **kwargs): + _generate_common_index(env, country, generate_search_index=True, **kwargs) + + +def step_coastline_index(env: Env, country: AnyStr, **kwargs): + _generate_common_index(env, country, **kwargs) + + +def step_ugc(env: Env, country: AnyStr, **kwargs): + run_gen_tool_with_recovery_country( + env, + env.gen_tool, + out=env.get_subprocess_out(country), + err=env.get_subprocess_out(country), + data_path=env.paths.mwm_path, + intermediate_data_path=env.paths.intermediate_data_path, + user_resource_path=env.paths.user_resource_path, + ugc_data=env.paths.ugc_path, + output=country, + **kwargs, + ) + + +def step_popularity(env: Env, country: AnyStr, **kwargs): + run_gen_tool_with_recovery_country( + env, + env.gen_tool, + out=env.get_subprocess_out(country), + err=env.get_subprocess_out(country), + data_path=env.paths.mwm_path, + intermediate_data_path=env.paths.intermediate_data_path, + user_resource_path=env.paths.user_resource_path, + popular_places_data=env.paths.popularity_path, + generate_popular_places=True, + output=country, + **kwargs, + ) + + +def step_srtm(env: Env, country: AnyStr, **kwargs): + run_gen_tool_with_recovery_country( + env, + env.gen_tool, + out=env.get_subprocess_out(country), + err=env.get_subprocess_out(country), + data_path=env.paths.mwm_path, + intermediate_data_path=env.paths.intermediate_data_path, + user_resource_path=env.paths.user_resource_path, + srtm_path=env.paths.srtm_path(), + output=country, + **kwargs, + ) + + +def step_description(env: Env, country: AnyStr, **kwargs): + run_gen_tool_with_recovery_country( + env, + env.gen_tool, + out=env.get_subprocess_out(country), + err=env.get_subprocess_out(country), + data_path=env.paths.mwm_path, + user_resource_path=env.paths.user_resource_path, + wikipedia_pages=env.paths.descriptions_path, + idToWikidata=env.paths.id_to_wikidata_path, + output=country, + **kwargs, + ) + + +def step_routing(env: Env, country: AnyStr, **kwargs): + run_gen_tool_with_recovery_country( + env, + env.gen_tool, + out=env.get_subprocess_out(country), + err=env.get_subprocess_out(country), + data_path=env.paths.mwm_path, + intermediate_data_path=env.paths.intermediate_data_path, + user_resource_path=env.paths.user_resource_path, + cities_boundaries_data=env.paths.cities_boundaries_path, + generate_maxspeed=True, + make_city_roads=True, + make_cross_mwm=True, + disable_cross_mwm_progress=True, + generate_cameras=True, + make_routing_index=True, + generate_traffic_keys=True, + output=country, + **kwargs, + ) + + +def step_routing_transit(env: Env, country: AnyStr, **kwargs): + run_gen_tool_with_recovery_country( + env, + env.gen_tool, + out=env.get_subprocess_out(country), + err=env.get_subprocess_out(country), + data_path=env.paths.mwm_path, + intermediate_data_path=env.paths.intermediate_data_path, + user_resource_path=env.paths.user_resource_path, + transit_path=env.paths.transit_path, + make_transit_cross_mwm=True, + output=country, + **kwargs, + ) diff --git a/tools/python/maps_generator/maps_generator.py b/tools/python/maps_generator/maps_generator.py index a4d7fa1526..61d825a1b9 100644 --- a/tools/python/maps_generator/maps_generator.py +++ b/tools/python/maps_generator/maps_generator.py @@ -1,416 +1,51 @@ -import datetime -import json import logging -import multiprocessing -import os -import shutil -import tarfile -from collections import defaultdict -from functools import partial -from multiprocessing.pool import ThreadPool +from typing import AnyStr +from typing import Optional -from descriptions.descriptions_downloader import (check_and_get_checker, - download_from_wikipedia_tags, - download_from_wikidata_tags) -from filelock import FileLock -from post_generation.hierarchy_to_countries import hierarchy_to_countries -from post_generation.inject_promo_ids import inject_promo_ids -from post_generation.localads_mwm_to_csv import create_csv - -from .generator import stages -from .generator import coastline -from .generator import settings -from .generator.decorators import stage, country_stage, country_stage_log -from .generator.env import (planet_lock_file, build_lock_file, - WORLD_COASTS_NAME, WORLD_NAME, WORLDS_NAMES) -from .generator.exceptions import ContinueError, BadExitStatusError -from .generator.gen_tool import run_gen_tool -from .generator.statistics import make_stats, get_stages_info -from .utils.file import is_verified, download_file +from .generator import stages_declaration as sd +from .generator.env import Env +from .generator.generation import Generation logger = logging.getLogger("maps_generator") -def download_external(url_to_path: dict): - for k, v in url_to_path.items(): - download_file(k, v) - - -@stage -def stage_download_and_convert_planet(env, **kwargs): - force_download = not env.is_accepted_stage(stage_update_planet) - if force_download or not is_verified(settings.PLANET_O5M): - stages.stage_download_and_convert_planet(env, force_download=force_download, - **kwargs) - - -@stage -def stage_update_planet(env, **kwargs): - if not settings.DEBUG: - stages.stage_update_planet(env, **kwargs) - - -@stage -def stage_download_external(env): - download_external({ - settings.SUBWAY_URL: env.subway_path, - }) - - -@stage -def stage_download_production_external(env): - download_external({ - settings.UGC_URL: env.ugc_path, - settings.HOTELS_URL: env.hotels_path, - settings.PROMO_CATALOG_CITIES_URL: env.promo_catalog_cities_path, - settings.PROMO_CATALOG_COUNTRIES_URL: env.promo_catalog_countries_path, - settings.POPULARITY_URL: env.popularity_path, - settings.FOOD_URL: env.food_paths, - settings.FOOD_TRANSLATIONS_URL: env.food_translations_path, - settings.UK_POSTCODES_URL: env.uk_postcodes_path, - settings.US_POSTCODES_URL: env.us_postcodes_path, - }) - - -@stage -def stage_preprocess(env, **kwargs): - stages.stage_preprocess(env, **kwargs) - - -@stage -def stage_features(env): - extra = {} - if env.is_accepted_stage(stage_descriptions): - extra["idToWikidata"] = env.id_to_wikidata_path - if env.is_accepted_stage(stage_download_production_external): - extra["booking_data"] = env.hotels_path - extra["promo_catalog_cities"] = env.promo_catalog_cities_path - extra["popular_places_data"] = env.popularity_path - extra["brands_data"] = env.food_paths - extra["brands_translations_data"] = env.food_translations_path - if env.is_accepted_stage(stage_coastline): - extra["emit_coasts"]=True - - stages.stage_features(env, **extra) - if os.path.exists(env.packed_polygons_path): - shutil.copy2(env.packed_polygons_path, env.mwm_path) - - -@stage -def stage_coastline(env): - coasts_geom = "WorldCoasts.geom" - coasts_rawgeom = "WorldCoasts.rawgeom" - try: - coastline.make_coastline(env) - except BadExitStatusError: - logger.info("Build costs failed. Try to download the costs...") - download_external({ - settings.PLANET_COASTS_GEOM_URL: - os.path.join(env.coastline_path, coasts_geom), - settings.PLANET_COASTS_RAWGEOM_URL: - os.path.join(env.coastline_path, coasts_rawgeom) - }) - - for f in [coasts_geom, coasts_rawgeom]: - path = os.path.join(env.coastline_path, f) - shutil.copy2(path, env.intermediate_path) - - -@country_stage -def stage_index(env, country, **kwargs): - if country == WORLD_NAME: - stages.stage_index_world(env, country, **kwargs) - elif country == WORLD_COASTS_NAME: - stages.stage_coastline_index(env, country, **kwargs) - else: - extra = {} - if env.is_accepted_stage(stage_download_production_external): - extra["uk_postcodes_dataset"] = env.uk_postcodes_path - extra["us_postcodes_dataset"] = env.us_postcodes_path - stages.stage_index(env, country, **kwargs, **extra) - - -@country_stage -def stage_cities_ids_world(env, country, **kwargs): - stages.stage_cities_ids_world(env, country, **kwargs) - - -@country_stage -def stage_ugc(env, country, **kwargs): - stages.stage_ugc(env, country, **kwargs) - - -@country_stage -def stage_popularity(env, country, **kwargs): - stages.stage_popularity(env, country, **kwargs) - - -@country_stage -def stage_srtm(env, country, **kwargs): - stages.stage_srtm(env, country, **kwargs) - - -@country_stage -def stage_routing(env, country, **kwargs): - stages.stage_routing(env, country, **kwargs) - - -@country_stage -def stage_routing_transit(env, country, **kwargs): - stages.stage_routing_transit(env, country, **kwargs) - - -@stage -def stage_mwm(env): - def build(country): - stage_index(env, country) - stage_ugc(env, country) - stage_popularity(env, country) - stage_srtm(env, country) - stage_routing(env, country) - stage_routing_transit(env, country) - env.finish_mwm(country) - - def build_world(country): - stage_index(env, country) - stage_cities_ids_world(env, country) - env.finish_mwm(country) - - def build_world_coasts(country): - stage_index(env, country) - env.finish_mwm(country) - - specific = { - WORLD_NAME: build_world, - WORLD_COASTS_NAME: build_world_coasts - } - - mwms = env.get_mwm_names() - with ThreadPool() as pool: - pool.map(lambda c: specific[c](c) if c in specific else build(c), mwms, - chunksize=1) - - -@stage -def stage_descriptions(env): - run_gen_tool(env.gen_tool, - out=env.get_subprocess_out(), - err=env.get_subprocess_out(), - intermediate_data_path=env.intermediate_path, - user_resource_path=env.user_resource_path, - dump_wikipedia_urls=env.wiki_url_path, - idToWikidata=env.id_to_wikidata_path) - - langs = ("en", "ru", "es", "fr", "de") - checker = check_and_get_checker(env.popularity_path) - download_from_wikipedia_tags(env.wiki_url_path, env.descriptions_path, - langs, checker) - download_from_wikidata_tags(env.id_to_wikidata_path, env.descriptions_path, - langs, checker) - - @country_stage_log - def stage_write_descriptions(env, country, **kwargs): - stages.run_gen_tool_with_recovery_country( - env, - env.gen_tool, - out=env.get_subprocess_out(country), - err=env.get_subprocess_out(country), - data_path=env.mwm_path, - user_resource_path=env.user_resource_path, - wikipedia_pages=env.descriptions_path, - idToWikidata=env.id_to_wikidata_path, - output=country, - **kwargs - ) - - mwms = env.get_mwm_names() - countries = filter(lambda x: x not in WORLDS_NAMES, mwms) - with ThreadPool() as pool: - pool.map(partial(stage_write_descriptions, env), countries) - - -@stage -def stage_countries_txt(env): - countries = hierarchy_to_countries(env.old_to_new_path, - env.borders_to_osm_path, - env.countries_synonyms_path, - env.hierarchy_path, env.mwm_path, - env.mwm_version) - if env.is_accepted_stage(stage_download_production_external): - countries_json = json.loads(countries) - inject_promo_ids(countries_json, env.promo_catalog_cities_path, - env.promo_catalog_countries_path, env.mwm_path, - env.types_path, env.mwm_path) - countries = json.dumps(countries_json, ensure_ascii=True, indent=1) - - with open(env.counties_txt_path, "w") as f: - f.write(countries) - - -@stage -def stage_external_resources(env): - black_list = {"00_roboto_regular.ttf"} - resources = [os.path.join(env.user_resource_path, file) - for file in os.listdir(env.user_resource_path) - if file.endswith(".ttf") and file not in black_list] - for ttf_file in resources: - shutil.copy2(ttf_file, env.mwm_path) - - for file in os.listdir(env.mwm_path): - if file.startswith(WORLD_NAME) and file.endswith(".mwm"): - resources.append(os.path.join(env.mwm_path, file)) - - resources.sort() - with open(env.external_resources_path, "w") as f: - for resource in resources: - fd = os.open(resource, os.O_RDONLY) - f.write(f"{os.path.basename(resource)} {os.fstat(fd).st_size}\n") - - -@stage -def stage_localads(env): - create_csv(env.localads_path, env.mwm_path, env.mwm_path, env.types_path, - env.mwm_version, multiprocessing.cpu_count()) - with tarfile.open(f"{env.localads_path}.tar.gz", "w:gz") as tar: - for filename in os.listdir(env.localads_path): - tar.add(os.path.join(env.localads_path, filename), arcname=filename) - - -@stage -def stage_statistics(env): - result = defaultdict(lambda: defaultdict(dict)) - - @country_stage_log - def stage_mwm_statistics(env, country, **kwargs): - stats_tmp = os.path.join(env.draft_path, f"{country}.stat") - with open(stats_tmp, "w") as f: - stages.run_gen_tool_with_recovery_country( - env, - env.gen_tool, - out=f, - err=env.get_subprocess_out(country), - data_path=env.mwm_path, - user_resource_path=env.user_resource_path, - type_statistics=True, - output=country, - **kwargs - ) - result["countries"][country]["types"] = \ - make_stats(settings.STATS_TYPES_CONFIG, stats_tmp) - - mwms = env.get_mwm_names() - countries = filter(lambda x: x not in WORLDS_NAMES, mwms) - with ThreadPool() as pool: - pool.map(partial(stage_mwm_statistics, env), countries) - stages_info = get_stages_info(env.log_path, {"statistics"}) - result["stages"] = stages_info["stages"] - for c in stages_info["countries"]: - result["countries"][c]["stages"] = stages_info["countries"][c] - - def default(o): - if isinstance(o, datetime.timedelta): - return str(o) - - with open(os.path.join(env.stats_path, "stats.json"), "w") as f: - json.dump(result, f, ensure_ascii=False, sort_keys=True, - indent=2, default=default) - - -@stage -def stage_cleanup(env): - logger.info(f"osm2ft files will be moved from {env.out_path} " - f"to {env.osm2ft_path}.") - for x in os.listdir(env.mwm_path): - p = os.path.join(env.mwm_path, x) - if os.path.isfile(p) and x.endswith(".mwm.osm2ft"): - shutil.move(p, os.path.join(env.osm2ft_path, x)) - - logger.info(f"{env.draft_path} will be removed.") - shutil.rmtree(env.draft_path) - - -MWM_STAGE = stage_mwm.__name__ -COUNTRIES_STAGES = [s.__name__ for s in - (stage_index, stage_ugc, stage_popularity, stage_srtm, - stage_routing, stage_routing_transit)] -STAGES = [s.__name__ for s in - (stage_download_external, stage_download_production_external, - stage_download_and_convert_planet, stage_update_planet, - stage_coastline, stage_preprocess, stage_features, stage_mwm, - stage_descriptions, stage_countries_txt, stage_external_resources, - stage_localads, stage_statistics, stage_cleanup)] - -ALL_STAGES = STAGES + COUNTRIES_STAGES - - -def stages_as_string(*args): - return [x.__name__ for x in args] - - -def stage_as_string(stage): - return stage.__name__ - - -def reset_to_stage(stage_name, env): - def set_countries_stage(n): - statuses = [os.path.join(env.status_path, f) - for f in os.listdir(env.status_path) - if os.path.isfile(os.path.join(env.status_path, f)) and - os.path.join(env.status_path, f) != env.main_status_path] - for s in statuses: - with open(s, "w") as f: - f.write(n) - - _stage = f"stage_{stage_name}" - stage_mwm_index = STAGES.index(MWM_STAGE) - if _stage not in ALL_STAGES: - raise ContinueError( - f"Stage {stage_name} not in {', '.join(ALL_STAGES)}.") - if not os.path.exists(env.main_status_path): - raise ContinueError(f"Status file {env.main_status_path} not found.") - if not os.path.exists(env.status_path): - raise ContinueError(f"Status path {env.status_path} not found.") - - main_status = None - if _stage in STAGES[:stage_mwm_index + 1]: - main_status = _stage - set_countries_stage(COUNTRIES_STAGES[0]) - elif _stage in STAGES[stage_mwm_index + 1:]: - main_status = _stage - elif _stage in COUNTRIES_STAGES: - main_status = MWM_STAGE - set_countries_stage(_stage) - - logger.info(f"New active status is {main_status}.") - with open(env.main_status_path, "w") as f: - f.write(main_status) - - -def generate_maps(env): - stage_download_external(env) - stage_download_production_external(env) - with FileLock(planet_lock_file(), timeout=1) as planet_lock: - stage_download_and_convert_planet(env) - stage_update_planet(env) - with FileLock(build_lock_file(env.out_path), timeout=1): - stage_coastline(env) - stage_preprocess(env) - stage_features(env) - planet_lock.release() - stage_mwm(env) - stage_descriptions(env) - stage_countries_txt(env) - stage_external_resources(env) - stage_localads(env) - stage_statistics(env) - stage_cleanup(env) - - -def generate_coasts(env): - with FileLock(planet_lock_file(), timeout=1) as planet_lock: - stage_download_and_convert_planet(env) - stage_update_planet(env) - with FileLock(build_lock_file(env.out_path), timeout=1): - stage_coastline(env) - planet_lock.release() - stage_cleanup(env) +def run_generation(env, stages, from_stage: Optional[AnyStr] = None): + generation = Generation(env) + for s in stages: + generation.add_stage(s) + + generation.run(from_stage) + + +def generate_maps(env: Env, from_stage: Optional[AnyStr] = None): + """"Runs maps generation.""" + stages = [ + sd.StageDownloadExternal(), + sd.StageDownloadProductionExternal(), + sd.StageDownloadAndConvertPlanet(), + sd.StageUpdatePlanet(), + sd.StageCoastline(), + sd.StagePreprocess(), + sd.StageFeatures(), + sd.StageDownloadDescriptions(), + sd.StageMwm(), + sd.StageCountriesTxt(), + sd.StageExternalResources(), + sd.StageLocalads(), + sd.StageStatistics(), + sd.StageCleanup(), + ] + + run_generation(env, stages, from_stage) + + +def generate_coasts(env: Env, from_stage: Optional[AnyStr] = None): + """Runs coasts generation.""" + stages = [ + sd.StageDownloadAndConvertPlanet(), + sd.StageUpdatePlanet(), + sd.StageCoastline(), + sd.StageCleanup(), + ] + + run_generation(env, stages, from_stage) diff --git a/tools/python/maps_generator/utils/algo.py b/tools/python/maps_generator/utils/algo.py new file mode 100644 index 0000000000..5d2da6bf7d --- /dev/null +++ b/tools/python/maps_generator/utils/algo.py @@ -0,0 +1,14 @@ +from re import finditer + + +def unique(s): + seen = set() + seen_add = seen.add + return [x for x in s if not (x in seen or seen_add(x))] + + +def camel_case_split(identifier): + matches = finditer( + ".+?(?:(?<=[a-z])(?=[A-Z])|(?<=[A-Z])(?=[A-Z][a-z])|$)", identifier + ) + return [m.group(0) for m in matches] diff --git a/tools/python/maps_generator/utils/collections.py b/tools/python/maps_generator/utils/collections.py deleted file mode 100644 index 28a56e7417..0000000000 --- a/tools/python/maps_generator/utils/collections.py +++ /dev/null @@ -1,4 +0,0 @@ -def unique(s): - seen = set() - seen_add = seen.add - return [x for x in s if not (x in seen or seen_add(x))] diff --git a/tools/python/maps_generator/utils/file.py b/tools/python/maps_generator/utils/file.py index a766835025..dcce455474 100644 --- a/tools/python/maps_generator/utils/file.py +++ b/tools/python/maps_generator/utils/file.py @@ -5,18 +5,22 @@ import logging import os import shutil import urllib.request +from typing import AnyStr +from typing import Dict +from typing import Optional -from .md5 import md5, check_md5 +from .md5 import check_md5 +from .md5 import md5 logger = logging.getLogger("maps_generator") -def is_executable(fpath): +def is_executable(fpath: AnyStr) -> bool: return os.path.isfile(fpath) and os.access(fpath, os.X_OK) @functools.lru_cache() -def find_executable(path, exe=None): +def find_executable(path: AnyStr, exe: Optional[AnyStr] = None) -> AnyStr: if exe is None: if is_executable(path): return path @@ -29,27 +33,32 @@ def find_executable(path, exe=None): raise FileNotFoundError(f"{exe} not found in {path}") -def download_file(url, name): +def download_file(url: AnyStr, name: AnyStr): logger.info(f"Trying to download {name} from {url}.") urllib.request.urlretrieve(url, name) logger.info(f"File {name} was downloaded from {url}.") -def is_exists_file_and_md5(name): +def download_files(url_to_path: Dict[AnyStr, AnyStr]): + for k, v in url_to_path.items(): + download_file(k, v) + + +def is_exists_file_and_md5(name: AnyStr) -> bool: return os.path.isfile(name) and os.path.isfile(md5(name)) -def is_verified(name): +def is_verified(name: AnyStr) -> bool: return is_exists_file_and_md5(name) and check_md5(name, md5(name)) -def copy_overwrite(from_path, to_path): +def copy_overwrite(from_path: AnyStr, to_path: AnyStr): if os.path.exists(to_path): shutil.rmtree(to_path) shutil.copytree(from_path, to_path) -def symlink_force(target, link_name): +def symlink_force(target: AnyStr, link_name: AnyStr): try: os.symlink(target, link_name) except OSError as e: diff --git a/tools/python/maps_generator/utils/log.py b/tools/python/maps_generator/utils/log.py index dde7bc204b..4acb564fc7 100644 --- a/tools/python/maps_generator/utils/log.py +++ b/tools/python/maps_generator/utils/log.py @@ -6,8 +6,11 @@ class DummyObject: return lambda *args: None -def create_file_logger(file, level=logging.DEBUG, - format="[%(asctime)s] %(levelname)s %(module)s %(message)s"): +def create_file_logger( + file, + level=logging.DEBUG, + format="[%(asctime)s] %(levelname)s %(module)s %(message)s", +): logger = logging.getLogger(file) logger.setLevel(level) formatter = logging.Formatter(format)