diff --git a/tools/python/maps_generator/README.md b/tools/python/maps_generator/README.md index 9ebaab12cd..dc277160ba 100644 --- a/tools/python/maps_generator/README.md +++ b/tools/python/maps_generator/README.md @@ -141,7 +141,7 @@ optional arguments: Continue the last build or specified in CONTINUE from the last stopped stage. -s SUFFIX, --suffix SUFFIX - Suffix the name of a build directory. + Suffix of the name of a build directory. --countries COUNTRIES List of regions, separated by a comma or a semicolon, or path to file with regions, separated by a line @@ -153,22 +153,23 @@ optional arguments: Syntax is the same as for --countries. --skip SKIP List of stages, separated by a comma or a semicolon, for which building will be skipped. Available skip - stages: DownloadExternal, DownloadProductionExternal, - DownloadAndConvertPlanet, UpdatePlanet, Coastline, - Preprocess, Features, Mwm, Index, CitiesIdsWorld, Ugc, - Popularity, Srtm, Descriptions, Routing, - RoutingTransit, CountriesTxt, ExternalResources, - LocalAds, Statistics, Cleanup. + stages: DownloadAndConvertPlanet, UpdatePlanet, + Coastline, Preprocess, Features, Mwm, Index, + CitiesIdsWorld, Ugc, Popularity, Srtm, IsolinesInfo, + Descriptions, Routing, RoutingTransit, CountriesTxt, + ExternalResources, LocalAds, Statistics, Cleanup. --from_stage FROM_STAGE Stage from which maps will be rebuild. Available - stages: DownloadExternal, DownloadProductionExternal, - DownloadAndConvertPlanet, UpdatePlanet, Coastline, - Preprocess, Features, Mwm, Index, CitiesIdsWorld, Ugc, - Popularity, Srtm, Descriptions, Routing, - RoutingTransit, CountriesTxt, ExternalResources, - LocalAds, Statistics, Cleanup. + stages: DownloadAndConvertPlanet, UpdatePlanet, + Coastline, Preprocess, Features, Mwm, Index, + CitiesIdsWorld, Ugc, Popularity, Srtm, IsolinesInfo, + Descriptions, Routing, RoutingTransit, CountriesTxt, + ExternalResources, LocalAds, Statistics, Cleanup. --coasts Build only WorldCoasts.raw and WorldCoasts.rawgeom files + --force_download_files + If build is continued, files will always be downloaded + again. --production Build production maps. In another case, 'osm only maps' are built - maps without additional data and advertising. diff --git a/tools/python/maps_generator/__main__.py b/tools/python/maps_generator/__main__.py index 86856e3720..b5d3443e0f 100644 --- a/tools/python/maps_generator/__main__.py +++ b/tools/python/maps_generator/__main__.py @@ -130,6 +130,12 @@ def parse_options(): action="store_true", help="Build only WorldCoasts.raw and WorldCoasts.rawgeom files", ) + parser.add_argument( + "--force_download_files", + default=False, + action="store_true", + help="If build is continued, files will always be downloaded again.", + ) parser.add_argument( "--production", default=False, @@ -278,6 +284,7 @@ def main(): build_name=build_name, build_suffix=options.suffix, skipped_stages=skipped_stages, + force_download_files=options["force_download_files"] ) from_stage = None if options.from_stage: diff --git a/tools/python/maps_generator/generator/env.py b/tools/python/maps_generator/generator/env.py index 3827c61c20..3c3defbc14 100644 --- a/tools/python/maps_generator/generator/env.py +++ b/tools/python/maps_generator/generator/env.py @@ -348,6 +348,7 @@ class Env: build_name: Optional[AnyStr] = None, build_suffix: AnyStr = "", skipped_stages: Optional[Set[Type[Stage]]] = None, + force_download_files: bool = False ): self.setup_logging() @@ -359,6 +360,7 @@ class Env: self.gen_tool = self.setup_generator_tool() self.production = production + self.force_download_files = force_download_files self.countries = countries self.skipped_stages = set() if skipped_stages is None else skipped_stages if self.countries is None: diff --git a/tools/python/maps_generator/generator/stages.py b/tools/python/maps_generator/generator/stages.py index 8b9b162b1c..89df59e296 100644 --- a/tools/python/maps_generator/generator/stages.py +++ b/tools/python/maps_generator/generator/stages.py @@ -14,6 +14,7 @@ import time from abc import ABC from abc import abstractmethod from collections import defaultdict +from multiprocessing import Lock from typing import AnyStr from typing import Callable from typing import List @@ -22,12 +23,20 @@ from typing import Type from typing import Union from maps_generator.generator.status import Status +from maps_generator.utils.file import download_files from maps_generator.utils.log import DummyObject from maps_generator.utils.log import create_file_logger logger = logging.getLogger("maps_generator") +class InternalDependency: + def __init__(self, url, path_method, mode=""): + self.url = url + self.path_method = path_method + self.mode = mode + + class Stage(ABC): need_planet_lock = False need_build_lock = False @@ -258,3 +267,40 @@ def helper_stage_for(*deps) -> Callable[[Type[Stage],], Type[Stage]]: return stage return wrapper + + +def depends_from_internal(*deps) -> Callable[[Type[Stage],], Type[Stage]]: + def new_apply(method): + def apply(obj: Stage, env: "Env", *args, **kwargs): + if hasattr(obj, "internal_dependencies") and obj.internal_dependencies: + with obj.depends_from_internal_lock: + if not obj.depends_from_internal_downloaded: + deps = {} + for d in obj.internal_dependencies: + if "p" in d.mode and not env.production: + continue + + path = None + if type(d.path_method) is property: + path = d.path_method.__get__(env.paths) + + assert path is not None, type(d.path_method) + deps[d.url] = path + + if deps: + download_files(deps, env.force_download_files) + + obj.depends_from_internal_downloaded = True + + method(obj, env, *args, **kwargs) + + return apply + + def wrapper(stage: Type[Stage]) -> Type[Stage]: + stage.internal_dependencies = deps + stage.depends_from_internal_lock = Lock() + stage.depends_from_internal_downloaded = False + stage.apply = new_apply(stage.apply) + return stage + + return wrapper diff --git a/tools/python/maps_generator/generator/stages_declaration.py b/tools/python/maps_generator/generator/stages_declaration.py index 41b704d8f2..041dfbcd09 100644 --- a/tools/python/maps_generator/generator/stages_declaration.py +++ b/tools/python/maps_generator/generator/stages_declaration.py @@ -30,9 +30,11 @@ from maps_generator.generator.env import WORLD_COASTS_NAME from maps_generator.generator.env import WORLD_NAME from maps_generator.generator.exceptions import BadExitStatusError from maps_generator.generator.gen_tool import run_gen_tool +from maps_generator.generator.stages import InternalDependency as D from maps_generator.generator.stages import Stage from maps_generator.generator.stages import build_lock from maps_generator.generator.stages import country_stage +from maps_generator.generator.stages import depends_from_internal from maps_generator.generator.stages import helper_stage_for from maps_generator.generator.stages import mwm_stage from maps_generator.generator.stages import outer_stage @@ -54,33 +56,6 @@ def is_accepted(env: Env, stage: Type[Stage]) -> bool: return env.is_accepted_stage(stage) -@outer_stage -class StageDownloadExternal(Stage): - def apply(self, env: Env): - download_files( - {settings.SUBWAY_URL: env.paths.subway_path,} - ) - - -@outer_stage -@production_only -class StageDownloadProductionExternal(Stage): - def apply(self, env: Env): - download_files( - { - settings.UGC_URL: env.paths.ugc_path, - settings.HOTELS_URL: env.paths.hotels_path, - settings.PROMO_CATALOG_CITIES_URL: env.paths.promo_catalog_cities_path, - settings.PROMO_CATALOG_COUNTRIES_URL: env.paths.promo_catalog_countries_path, - settings.POPULARITY_URL: env.paths.popularity_path, - settings.FOOD_URL: env.paths.food_paths, - settings.FOOD_TRANSLATIONS_URL: env.paths.food_translations_path, - settings.UK_POSTCODES_URL: env.paths.uk_postcodes_path, - settings.US_POSTCODES_URL: env.paths.us_postcodes_path, - } - ) - - @outer_stage @planet_lock class StageDownloadAndConvertPlanet(Stage): @@ -135,13 +110,20 @@ class StagePreprocess(Stage): @outer_stage +@depends_from_internal( + D(settings.HOTELS_URL, PathProvider.hotels_path, "p"), + D(settings.PROMO_CATALOG_CITIES_URL, PathProvider.promo_catalog_cities_path, "p"), + D(settings.POPULARITY_URL, PathProvider.popularity_path, "p"), + D(settings.FOOD_URL, PathProvider.food_paths, "p"), + D(settings.FOOD_TRANSLATIONS_URL, PathProvider.food_translations_path, "p"), +) @build_lock class StageFeatures(Stage): def apply(self, env: Env): extra = {} if is_accepted(env, StageDescriptions): extra.update({"idToWikidata": env.paths.id_to_wikidata_path}) - if is_accepted(env, StageDownloadProductionExternal): + if env.production: extra.update( { "booking_data": env.paths.hotels_path, @@ -219,6 +201,10 @@ class StageMwm(Stage): @country_stage +@depends_from_internal( + D(settings.UK_POSTCODES_URL, PathProvider.uk_postcodes_path, "p"), + D(settings.US_POSTCODES_URL, PathProvider.us_postcodes_path, "p"), +) @build_lock class StageIndex(Stage): def apply(self, env: Env, country, **kwargs): @@ -227,7 +213,7 @@ class StageIndex(Stage): elif country == WORLD_COASTS_NAME: steps.step_coastline_index(env, country, **kwargs) else: - if is_accepted(env, StageDownloadProductionExternal): + if env.production: kwargs.update( { "uk_postcodes_dataset": env.paths.uk_postcodes_path, @@ -246,6 +232,7 @@ class StageCitiesIdsWorld(Stage): @country_stage +@depends_from_internal(D(settings.UGC_URL, PathProvider.ugc_path),) @build_lock @production_only class StageUgc(Stage): @@ -293,6 +280,7 @@ class StageRouting(Stage): @country_stage +@depends_from_internal(D(settings.SUBWAY_URL, PathProvider.subway_path),) @build_lock class StageRoutingTransit(Stage): def apply(self, env: Env, country, **kwargs): @@ -300,6 +288,9 @@ class StageRoutingTransit(Stage): @outer_stage +@depends_from_internal( + D(settings.PROMO_CATALOG_COUNTRIES_URL, PathProvider.promo_catalog_countries_path, "p") +) @build_lock class StageCountriesTxt(Stage): def apply(self, env: Env): @@ -311,7 +302,7 @@ class StageCountriesTxt(Stage): env.paths.mwm_path, env.paths.mwm_version, ) - if is_accepted(env, StageDownloadProductionExternal): + if env.production: countries_json = json.loads(countries) inject_promo_ids( countries_json, diff --git a/tools/python/maps_generator/generator/steps.py b/tools/python/maps_generator/generator/steps.py index 12dce586a9..3cdcd5ce69 100644 --- a/tools/python/maps_generator/generator/steps.py +++ b/tools/python/maps_generator/generator/steps.py @@ -17,7 +17,7 @@ from maps_generator.generator.env import get_all_countries_list from maps_generator.generator.gen_tool import run_gen_tool from maps_generator.generator.osmtools import osmconvert from maps_generator.generator.osmtools import osmupdate -from maps_generator.utils.file import download_file +from maps_generator.utils.file import download_files from maps_generator.utils.file import is_verified from maps_generator.utils.file import symlink_force from maps_generator.utils.md5 import md5 @@ -36,11 +36,6 @@ def multithread_run_if_one_country(func): return wrap -def download_planet(planet: AnyStr): - download_file(settings.PLANET_URL, planet) - download_file(settings.PLANET_MD5_URL, md5(planet)) - - def convert_planet( tool: AnyStr, in_planet: AnyStr, @@ -54,7 +49,13 @@ def convert_planet( def step_download_and_convert_planet(env: Env, force_download: bool, **kwargs): if force_download or not is_verified(env.paths.planet_osm_pbf): - download_planet(env.paths.planet_osm_pbf) + download_files( + { + settings.PLANET_URL: env.paths.planet_osm_pbf, + settings.PLANET_MD5_URL: md5(env.paths.planet_osm_pbf), + }, + env.force_download_files, + ) convert_planet( env[settings.OSM_TOOL_CONVERT], diff --git a/tools/python/maps_generator/maps_generator.py b/tools/python/maps_generator/maps_generator.py index a3e4a820f3..2ace39fb72 100644 --- a/tools/python/maps_generator/maps_generator.py +++ b/tools/python/maps_generator/maps_generator.py @@ -20,8 +20,6 @@ def run_generation(env, stages, from_stage: Optional[AnyStr] = None): def generate_maps(env: Env, from_stage: Optional[AnyStr] = None): """"Runs maps generation.""" stages = [ - sd.StageDownloadExternal(), - sd.StageDownloadProductionExternal(), sd.StageDownloadAndConvertPlanet(), sd.StageUpdatePlanet(), sd.StageCoastline(), diff --git a/tools/python/maps_generator/utils/file.py b/tools/python/maps_generator/utils/file.py index fe599116e5..68e45404cf 100644 --- a/tools/python/maps_generator/utils/file.py +++ b/tools/python/maps_generator/utils/file.py @@ -5,6 +5,8 @@ import logging import os import shutil import urllib.request +from functools import partial +from multiprocessing.pool import ThreadPool from typing import AnyStr from typing import Dict from typing import Optional @@ -33,15 +35,24 @@ def find_executable(path: AnyStr, exe: Optional[AnyStr] = None) -> AnyStr: raise FileNotFoundError(f"{exe} not found in {path}") -def download_file(url: AnyStr, name: AnyStr): +def download_file(url: AnyStr, name: AnyStr, download_if_exists: bool = True): logger.info(f"Trying to download {name} from {url}.") - urllib.request.urlretrieve(url, name) + if not download_if_exists and os.path.exists(name): + logger.info(f"File {name} already exists.") + return + + tmp_name = f"{name}__" + urllib.request.urlretrieve(url, tmp_name) + shutil.move(tmp_name, name) logger.info(f"File {name} was downloaded from {url}.") -def download_files(url_to_path: Dict[AnyStr, AnyStr]): - for k, v in url_to_path.items(): - download_file(k, v) +def download_files(url_to_path: Dict[AnyStr, AnyStr], download_if_exists: bool = True): + with ThreadPool() as pool: + pool.starmap( + partial(download_file, download_if_exists=download_if_exists), + url_to_path.items(), + ) def is_exists_file_and_md5(name: AnyStr) -> bool: