[generator][python] Improved working with internal deps.

This commit is contained in:
Maksim Andrianov 2020-04-03 12:20:09 +03:00 committed by mpimenov
parent 14739dbaf2
commit 9b0cfdcb2b
8 changed files with 114 additions and 57 deletions

View file

@ -141,7 +141,7 @@ optional arguments:
Continue the last build or specified in CONTINUE from
the last stopped stage.
-s SUFFIX, --suffix SUFFIX
Suffix the name of a build directory.
Suffix of the name of a build directory.
--countries COUNTRIES
List of regions, separated by a comma or a semicolon,
or path to file with regions, separated by a line
@ -153,22 +153,23 @@ optional arguments:
Syntax is the same as for --countries.
--skip SKIP List of stages, separated by a comma or a semicolon,
for which building will be skipped. Available skip
stages: DownloadExternal, DownloadProductionExternal,
DownloadAndConvertPlanet, UpdatePlanet, Coastline,
Preprocess, Features, Mwm, Index, CitiesIdsWorld, Ugc,
Popularity, Srtm, Descriptions, Routing,
RoutingTransit, CountriesTxt, ExternalResources,
LocalAds, Statistics, Cleanup.
stages: DownloadAndConvertPlanet, UpdatePlanet,
Coastline, Preprocess, Features, Mwm, Index,
CitiesIdsWorld, Ugc, Popularity, Srtm, IsolinesInfo,
Descriptions, Routing, RoutingTransit, CountriesTxt,
ExternalResources, LocalAds, Statistics, Cleanup.
--from_stage FROM_STAGE
Stage from which maps will be rebuild. Available
stages: DownloadExternal, DownloadProductionExternal,
DownloadAndConvertPlanet, UpdatePlanet, Coastline,
Preprocess, Features, Mwm, Index, CitiesIdsWorld, Ugc,
Popularity, Srtm, Descriptions, Routing,
RoutingTransit, CountriesTxt, ExternalResources,
LocalAds, Statistics, Cleanup.
stages: DownloadAndConvertPlanet, UpdatePlanet,
Coastline, Preprocess, Features, Mwm, Index,
CitiesIdsWorld, Ugc, Popularity, Srtm, IsolinesInfo,
Descriptions, Routing, RoutingTransit, CountriesTxt,
ExternalResources, LocalAds, Statistics, Cleanup.
--coasts Build only WorldCoasts.raw and WorldCoasts.rawgeom
files
--force_download_files
If build is continued, files will always be downloaded
again.
--production Build production maps. In another case, 'osm only
maps' are built - maps without additional data and
advertising.

View file

@ -130,6 +130,12 @@ def parse_options():
action="store_true",
help="Build only WorldCoasts.raw and WorldCoasts.rawgeom files",
)
parser.add_argument(
"--force_download_files",
default=False,
action="store_true",
help="If build is continued, files will always be downloaded again.",
)
parser.add_argument(
"--production",
default=False,
@ -278,6 +284,7 @@ def main():
build_name=build_name,
build_suffix=options.suffix,
skipped_stages=skipped_stages,
force_download_files=options["force_download_files"]
)
from_stage = None
if options.from_stage:

View file

@ -348,6 +348,7 @@ class Env:
build_name: Optional[AnyStr] = None,
build_suffix: AnyStr = "",
skipped_stages: Optional[Set[Type[Stage]]] = None,
force_download_files: bool = False
):
self.setup_logging()
@ -359,6 +360,7 @@ class Env:
self.gen_tool = self.setup_generator_tool()
self.production = production
self.force_download_files = force_download_files
self.countries = countries
self.skipped_stages = set() if skipped_stages is None else skipped_stages
if self.countries is None:

View file

@ -14,6 +14,7 @@ import time
from abc import ABC
from abc import abstractmethod
from collections import defaultdict
from multiprocessing import Lock
from typing import AnyStr
from typing import Callable
from typing import List
@ -22,12 +23,20 @@ from typing import Type
from typing import Union
from maps_generator.generator.status import Status
from maps_generator.utils.file import download_files
from maps_generator.utils.log import DummyObject
from maps_generator.utils.log import create_file_logger
logger = logging.getLogger("maps_generator")
class InternalDependency:
def __init__(self, url, path_method, mode=""):
self.url = url
self.path_method = path_method
self.mode = mode
class Stage(ABC):
need_planet_lock = False
need_build_lock = False
@ -258,3 +267,40 @@ def helper_stage_for(*deps) -> Callable[[Type[Stage],], Type[Stage]]:
return stage
return wrapper
def depends_from_internal(*deps) -> Callable[[Type[Stage],], Type[Stage]]:
def new_apply(method):
def apply(obj: Stage, env: "Env", *args, **kwargs):
if hasattr(obj, "internal_dependencies") and obj.internal_dependencies:
with obj.depends_from_internal_lock:
if not obj.depends_from_internal_downloaded:
deps = {}
for d in obj.internal_dependencies:
if "p" in d.mode and not env.production:
continue
path = None
if type(d.path_method) is property:
path = d.path_method.__get__(env.paths)
assert path is not None, type(d.path_method)
deps[d.url] = path
if deps:
download_files(deps, env.force_download_files)
obj.depends_from_internal_downloaded = True
method(obj, env, *args, **kwargs)
return apply
def wrapper(stage: Type[Stage]) -> Type[Stage]:
stage.internal_dependencies = deps
stage.depends_from_internal_lock = Lock()
stage.depends_from_internal_downloaded = False
stage.apply = new_apply(stage.apply)
return stage
return wrapper

View file

@ -30,9 +30,11 @@ from maps_generator.generator.env import WORLD_COASTS_NAME
from maps_generator.generator.env import WORLD_NAME
from maps_generator.generator.exceptions import BadExitStatusError
from maps_generator.generator.gen_tool import run_gen_tool
from maps_generator.generator.stages import InternalDependency as D
from maps_generator.generator.stages import Stage
from maps_generator.generator.stages import build_lock
from maps_generator.generator.stages import country_stage
from maps_generator.generator.stages import depends_from_internal
from maps_generator.generator.stages import helper_stage_for
from maps_generator.generator.stages import mwm_stage
from maps_generator.generator.stages import outer_stage
@ -54,33 +56,6 @@ def is_accepted(env: Env, stage: Type[Stage]) -> bool:
return env.is_accepted_stage(stage)
@outer_stage
class StageDownloadExternal(Stage):
def apply(self, env: Env):
download_files(
{settings.SUBWAY_URL: env.paths.subway_path,}
)
@outer_stage
@production_only
class StageDownloadProductionExternal(Stage):
def apply(self, env: Env):
download_files(
{
settings.UGC_URL: env.paths.ugc_path,
settings.HOTELS_URL: env.paths.hotels_path,
settings.PROMO_CATALOG_CITIES_URL: env.paths.promo_catalog_cities_path,
settings.PROMO_CATALOG_COUNTRIES_URL: env.paths.promo_catalog_countries_path,
settings.POPULARITY_URL: env.paths.popularity_path,
settings.FOOD_URL: env.paths.food_paths,
settings.FOOD_TRANSLATIONS_URL: env.paths.food_translations_path,
settings.UK_POSTCODES_URL: env.paths.uk_postcodes_path,
settings.US_POSTCODES_URL: env.paths.us_postcodes_path,
}
)
@outer_stage
@planet_lock
class StageDownloadAndConvertPlanet(Stage):
@ -135,13 +110,20 @@ class StagePreprocess(Stage):
@outer_stage
@depends_from_internal(
D(settings.HOTELS_URL, PathProvider.hotels_path, "p"),
D(settings.PROMO_CATALOG_CITIES_URL, PathProvider.promo_catalog_cities_path, "p"),
D(settings.POPULARITY_URL, PathProvider.popularity_path, "p"),
D(settings.FOOD_URL, PathProvider.food_paths, "p"),
D(settings.FOOD_TRANSLATIONS_URL, PathProvider.food_translations_path, "p"),
)
@build_lock
class StageFeatures(Stage):
def apply(self, env: Env):
extra = {}
if is_accepted(env, StageDescriptions):
extra.update({"idToWikidata": env.paths.id_to_wikidata_path})
if is_accepted(env, StageDownloadProductionExternal):
if env.production:
extra.update(
{
"booking_data": env.paths.hotels_path,
@ -219,6 +201,10 @@ class StageMwm(Stage):
@country_stage
@depends_from_internal(
D(settings.UK_POSTCODES_URL, PathProvider.uk_postcodes_path, "p"),
D(settings.US_POSTCODES_URL, PathProvider.us_postcodes_path, "p"),
)
@build_lock
class StageIndex(Stage):
def apply(self, env: Env, country, **kwargs):
@ -227,7 +213,7 @@ class StageIndex(Stage):
elif country == WORLD_COASTS_NAME:
steps.step_coastline_index(env, country, **kwargs)
else:
if is_accepted(env, StageDownloadProductionExternal):
if env.production:
kwargs.update(
{
"uk_postcodes_dataset": env.paths.uk_postcodes_path,
@ -246,6 +232,7 @@ class StageCitiesIdsWorld(Stage):
@country_stage
@depends_from_internal(D(settings.UGC_URL, PathProvider.ugc_path),)
@build_lock
@production_only
class StageUgc(Stage):
@ -293,6 +280,7 @@ class StageRouting(Stage):
@country_stage
@depends_from_internal(D(settings.SUBWAY_URL, PathProvider.subway_path),)
@build_lock
class StageRoutingTransit(Stage):
def apply(self, env: Env, country, **kwargs):
@ -300,6 +288,9 @@ class StageRoutingTransit(Stage):
@outer_stage
@depends_from_internal(
D(settings.PROMO_CATALOG_COUNTRIES_URL, PathProvider.promo_catalog_countries_path, "p")
)
@build_lock
class StageCountriesTxt(Stage):
def apply(self, env: Env):
@ -311,7 +302,7 @@ class StageCountriesTxt(Stage):
env.paths.mwm_path,
env.paths.mwm_version,
)
if is_accepted(env, StageDownloadProductionExternal):
if env.production:
countries_json = json.loads(countries)
inject_promo_ids(
countries_json,

View file

@ -17,7 +17,7 @@ from maps_generator.generator.env import get_all_countries_list
from maps_generator.generator.gen_tool import run_gen_tool
from maps_generator.generator.osmtools import osmconvert
from maps_generator.generator.osmtools import osmupdate
from maps_generator.utils.file import download_file
from maps_generator.utils.file import download_files
from maps_generator.utils.file import is_verified
from maps_generator.utils.file import symlink_force
from maps_generator.utils.md5 import md5
@ -36,11 +36,6 @@ def multithread_run_if_one_country(func):
return wrap
def download_planet(planet: AnyStr):
download_file(settings.PLANET_URL, planet)
download_file(settings.PLANET_MD5_URL, md5(planet))
def convert_planet(
tool: AnyStr,
in_planet: AnyStr,
@ -54,7 +49,13 @@ def convert_planet(
def step_download_and_convert_planet(env: Env, force_download: bool, **kwargs):
if force_download or not is_verified(env.paths.planet_osm_pbf):
download_planet(env.paths.planet_osm_pbf)
download_files(
{
settings.PLANET_URL: env.paths.planet_osm_pbf,
settings.PLANET_MD5_URL: md5(env.paths.planet_osm_pbf),
},
env.force_download_files,
)
convert_planet(
env[settings.OSM_TOOL_CONVERT],

View file

@ -20,8 +20,6 @@ def run_generation(env, stages, from_stage: Optional[AnyStr] = None):
def generate_maps(env: Env, from_stage: Optional[AnyStr] = None):
""""Runs maps generation."""
stages = [
sd.StageDownloadExternal(),
sd.StageDownloadProductionExternal(),
sd.StageDownloadAndConvertPlanet(),
sd.StageUpdatePlanet(),
sd.StageCoastline(),

View file

@ -5,6 +5,8 @@ import logging
import os
import shutil
import urllib.request
from functools import partial
from multiprocessing.pool import ThreadPool
from typing import AnyStr
from typing import Dict
from typing import Optional
@ -33,15 +35,24 @@ def find_executable(path: AnyStr, exe: Optional[AnyStr] = None) -> AnyStr:
raise FileNotFoundError(f"{exe} not found in {path}")
def download_file(url: AnyStr, name: AnyStr):
def download_file(url: AnyStr, name: AnyStr, download_if_exists: bool = True):
logger.info(f"Trying to download {name} from {url}.")
urllib.request.urlretrieve(url, name)
if not download_if_exists and os.path.exists(name):
logger.info(f"File {name} already exists.")
return
tmp_name = f"{name}__"
urllib.request.urlretrieve(url, tmp_name)
shutil.move(tmp_name, name)
logger.info(f"File {name} was downloaded from {url}.")
def download_files(url_to_path: Dict[AnyStr, AnyStr]):
for k, v in url_to_path.items():
download_file(k, v)
def download_files(url_to_path: Dict[AnyStr, AnyStr], download_if_exists: bool = True):
with ThreadPool() as pool:
pool.starmap(
partial(download_file, download_if_exists=download_if_exists),
url_to_path.items(),
)
def is_exists_file_and_md5(name: AnyStr) -> bool: