From 95c6bd87095ec7d62102bacb94be9f002b5bcb42 Mon Sep 17 00:00:00 2001 From: Maksim Andrianov Date: Tue, 16 Apr 2019 18:29:28 +0300 Subject: [PATCH] [generator] Added python wrapper over generator_tool. --- tools/python/maps_generator/README.md | 160 +++++++++ tools/python/maps_generator/__init__.py | 0 tools/python/maps_generator/__main__.py | 143 ++++++++ .../maps_generator/generator/__init__.py | 0 .../maps_generator/generator/basic_stages.py | 60 ++++ .../maps_generator/generator/coastline.py | 49 +++ .../maps_generator/generator/decorators.py | 89 +++++ tools/python/maps_generator/generator/env.py | 305 ++++++++++++++++++ .../maps_generator/generator/exceptions.py | 30 ++ .../maps_generator/generator/gen_tool.py | 129 ++++++++ .../maps_generator/generator/maps_stages.py | 129 ++++++++ .../maps_generator/generator/osmtools.py | 79 +++++ .../maps_generator/generator/settings.py | 157 +++++++++ .../python/maps_generator/generator/status.py | 31 ++ tools/python/maps_generator/maps_generator.py | 298 +++++++++++++++++ tools/python/maps_generator/requirements.txt | 3 + tools/python/maps_generator/utils/__init__.py | 0 .../maps_generator/utils/collections.py | 4 + tools/python/maps_generator/utils/file.py | 57 ++++ tools/python/maps_generator/utils/log.py | 18 ++ tools/python/maps_generator/utils/md5.py | 31 ++ tools/python/maps_generator/utils/system.py | 9 + .../var/etc/map_generator.ini.default | 35 ++ .../post_generation/hierarchy_to_countries.py | 2 + 24 files changed, 1818 insertions(+) create mode 100644 tools/python/maps_generator/README.md create mode 100644 tools/python/maps_generator/__init__.py create mode 100644 tools/python/maps_generator/__main__.py create mode 100644 tools/python/maps_generator/generator/__init__.py create mode 100644 tools/python/maps_generator/generator/basic_stages.py create mode 100644 tools/python/maps_generator/generator/coastline.py create mode 100644 tools/python/maps_generator/generator/decorators.py create mode 100644 tools/python/maps_generator/generator/env.py create mode 100644 tools/python/maps_generator/generator/exceptions.py create mode 100644 tools/python/maps_generator/generator/gen_tool.py create mode 100644 tools/python/maps_generator/generator/maps_stages.py create mode 100644 tools/python/maps_generator/generator/osmtools.py create mode 100644 tools/python/maps_generator/generator/settings.py create mode 100644 tools/python/maps_generator/generator/status.py create mode 100644 tools/python/maps_generator/maps_generator.py create mode 100644 tools/python/maps_generator/requirements.txt create mode 100644 tools/python/maps_generator/utils/__init__.py create mode 100644 tools/python/maps_generator/utils/collections.py create mode 100644 tools/python/maps_generator/utils/file.py create mode 100644 tools/python/maps_generator/utils/log.py create mode 100644 tools/python/maps_generator/utils/md5.py create mode 100644 tools/python/maps_generator/utils/system.py create mode 100644 tools/python/maps_generator/var/etc/map_generator.ini.default diff --git a/tools/python/maps_generator/README.md b/tools/python/maps_generator/README.md new file mode 100644 index 0000000000..fbca0102c5 --- /dev/null +++ b/tools/python/maps_generator/README.md @@ -0,0 +1,160 @@ +# maps_generator +It's tool for generation maps for maps.me application. + +## Setup +You must have Python version not lower than 3.6 and complete the following steps: +0. Change directory: +```sh +$ cd omim/tools/python/maps_generator +``` +1. [Install generator_tool.](https://github.com/mapsme/omim/blob/master/docs/INSTALL.md) +2. Install dependencies: +```sh +maps_generator$ pip3 install -r requirements.txt +``` + +3. Make ini file: +```sh +maps_generator$ cp var/etc/map_generator.ini.default var/etc/map_generator.ini +``` + +4. Edit ini file: +```sh +maps_generator$ vim var/etc/map_generator.ini +``` + +```ini +[Main] +# The path where the planet will be downloaded and the maps are generated. +MAIN_OUT_PATH: ~/maps_build +# If the flag DEBUG is set a special small planet file will be downloaded. +DEBUG: 1 + + +[Developer] +# The path where the generator_tool will be searched. +BUILD_PATH: ~/omim-build-release +# The path to the project directory omim. +OMIM_PATH: ~/omim + + +[Generator tool] +# The path to the omim/data. +USER_RESOURCE_PATH: ${Developer:OMIM_PATH}/data +# Do not change it. This is determined automatically. +# NODE_STORAGE: map + + +[Osm tools] +# The path to the osmctools sources. +OSM_TOOLS_SRC_PATH: ${Developer:OMIM_PATH}/tools/osmctools +# The path where osmctools will be searched or will be built. +OSM_TOOLS_PATH: ~/osmctools + + +[Logging] +# The path where maps_generator log will be saved. +# LOG_FILE_PATH: generation.log + + +[External] +# The url to the planet file. +# PLANET_URL: +# The url to the file with md5 sum of the planet. +# PLANET_MD5_URL: + +# The url to the subway file. +# SUBWAY_URL: + +# Urls for production maps generation. +# UGC_URL: +# HOTELS_URL: +# POPULARITY_URL: +# FOOD_URL: +# FOOD_TRANSLATIONS_URL: +``` + +## Usage +```sh +$ cd omim/tools/python +python$ python3.6 -m maps_generator -h +``` + +``` +usage: __main__.py [-h] [--config CONFIG] [-c [CONTINUE]] + [--countries COUNTRIES] [--skip SKIP] + [--from_stage FROM_STAGE] [--production] + +Tool for generation maps for maps.me application. + +optional arguments: + -h, --help show this help message and exit + --config CONFIG Path to config + -c [CONTINUE], --continue [CONTINUE] + Continue the last build or specified in CONTINUE from + the last stopped stage. + --countries COUNTRIES + List of regions, separated by a comma or a semicolon, + or path to file with regions, separated by a line + break, for which maps will be built. The names of the + regions can be seen in omim/data/borders. It is + necessary to set names without any extension. + --skip SKIP List of stages, separated by a comma or a semicolon, + for which building will be skipped. Available skip + stages: download_external, + download_production_external, + download_and_convert_planet, update_planet, coastline, + preprocess, features, mwm, descriptions, + countries_txt, cleanup, index, ugc, popularity, + routing, routing_transit. + --from_stage FROM_STAGE + Stage from which maps will be rebuild. Available + stages: download_external, + download_production_external, + download_and_convert_planet, update_planet, coastline, + preprocess, features, mwm, descriptions, + countries_txt, cleanup, index, ugc, popularity, + routing, routing_transit. + --production Build production maps. In another case, 'osm only + maps' are built - maps without additional data and + advertising. +``` + +If you are not from the maps.me team, then you do not need the option --production when generating maps. + +To generate maps for the whole world you need 400 GB of hard disk space and a computer with more than 64 GB. +### Examples +#### Non-standard planet +If I want to generate maps for Japan I must complete the following steps: +1. Open https://download.geofabrik.de/asia/japan.html and cope urls to osm.pbf +file and md5sum. +2. Edit ini file: +```sh +maps_generator$ vim var/etc/map_generator.ini +``` + +```ini +[Main] +... +DEBUG: 0 +... +[External] +PLANET_URL: https://download.geofabrik.de/asia/japan-latest.osm.pbf +PLANET_MD5_URL: https://download.geofabrik.de/asia/japan-latest.osm.pbf.md5 +... +``` + +3. Run +```sh +python$ python3.6 -m maps_generator --countries="World, WorldCoasts, Japan_*" --skip_stage="update_planet" + +``` +We must skip the step of updating the planet, because it is a non-standard planet. +#### Rebuild stages: +For example, you changed routing code in omim project and want to regenerate maps. +You must have previous generation. You may regenerate from stage routing only for two mwms: + +```sh +python$ python3.6 -m maps_generator -c --from_stage="routing" --countries="Japan_Kinki Region_Osaka_Osaka, Japan_Chugoku Region_Tottori" + +``` \ No newline at end of file diff --git a/tools/python/maps_generator/__init__.py b/tools/python/maps_generator/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tools/python/maps_generator/__main__.py b/tools/python/maps_generator/__main__.py new file mode 100644 index 0000000000..7b7fe83722 --- /dev/null +++ b/tools/python/maps_generator/__main__.py @@ -0,0 +1,143 @@ +import logging +import os +from argparse import ArgumentParser + +from .generator import settings +from .generator.env import Env, find_last_build_dir, WORLDS_NAMES +from .generator.exceptions import ContinueError, SkipError, ValidationError +from .maps_generator import (start, reset_to_stage, ALL_STAGES, + stage_download_production_external, + stage_descriptions, stage_ugc, stage_popularity, + stages_as_string) +from .utils.collections import unique + +logger = logging.getLogger("maps_generator") + + +def parse_options(): + parser = ArgumentParser(description="Tool for generation maps for maps.me " + "application.", + parents=[settings.parser]) + parser.add_argument( + "-c", + "--continue", + default="", + nargs="?", + type=str, + help="Continue the last build or specified in CONTINUE from the " + "last stopped stage.") + parser.add_argument( + "--countries", + type=str, + default="", + help="List of regions, separated by a comma or a semicolon, or path to " + "file with regions, separated by a line break, for which maps" + " will be built. The names of the regions can be seen " + "in omim/data/borders. It is necessary to set names without " + "any extension.") + parser.add_argument( + "--skip", + type=str, + default="", + help=f"List of stages, separated by a comma or a semicolon, " + f"for which building will be skipped. Available skip stages: " + f"{', '.join([s.replace('stage_', '') for s in ALL_STAGES])}.") + parser.add_argument( + "--from_stage", + type=str, + default="", + help=f"Stage from which maps will be rebuild. Available stages: " + f"{', '.join([s.replace('stage_', '') for s in ALL_STAGES])}.") + parser.add_argument( + "--production", + default=False, + action="store_true", + help="Build production maps. In another case, 'osm only maps' are built" + " - maps without additional data and advertising.") + return vars(parser.parse_args()) + + +def main(): + root = logging.getLogger() + root.addHandler(logging.NullHandler()) + options = parse_options() + + build_name = None + if options["continue"] is None or options["continue"]: + d = find_last_build_dir(options["continue"]) + if d is None: + raise ContinueError("The build cannot continue: the last build " + "directory was not found.") + build_name = d + options["build_name"] = build_name + + countries_line = "" + if "COUNTRIES" in os.environ: + countries_line = os.environ["COUNTRIES"] + if options["countries"]: + countries_line = options["countries"] + raw_countries = [] + if os.path.isfile(countries_line): + with open(countries_line) as f: + raw_countries = [x.strip() for x in f] + if countries_line: + raw_countries = [ + x.strip() for x in countries_line.replace(";", ",").split(",") + ] + + borders_path = os.path.join(settings.USER_RESOURCE_PATH, "borders") + all_countries = [ + f.replace(".poly", "") for f in os.listdir(borders_path) + if os.path.isfile(os.path.join(borders_path, f)) + ] + all_countries += list(WORLDS_NAMES) + countries = [] + used_countries = set() + + def end_star_compare(prefix, full): + return full.startswith(prefix) + + def compare(a, b): + return a == b + + for raw_country in raw_countries: + cmp = compare + _raw_country = raw_country[:] + if _raw_country and _raw_country[-1] == "*": + _raw_country = _raw_country.replace("*", "") + cmp = end_star_compare + + for country in all_countries: + if cmp(_raw_country, country): + used_countries.add(raw_country) + countries.append(country) + + countries = unique(countries) + diff = set(raw_countries) - used_countries + if diff: + raise ValidationError(f"Bad input countries {', '.join(diff)}") + options["countries"] = countries if countries else all_countries + + options_skip = [] + if options["skip"]: + options_skip = [ + f"stage_{s.strip()}" + for s in options["skip"].replace(";", ",").split(",") + ] + options["skip"] = options_skip + if not options["production"]: + options["skip"] += stages_as_string(stage_download_production_external, + stage_ugc, stage_popularity, + stage_descriptions) + if not all(s in ALL_STAGES for s in options["skip"]): + raise SkipError(f"Stages {set(options['skip']) - set(ALL_STAGES)} " + f"not found.") + + env = Env(options) + if env.from_stage: + reset_to_stage(env.from_stage, env) + start(env) + env.finish() + + +main() diff --git a/tools/python/maps_generator/generator/__init__.py b/tools/python/maps_generator/generator/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tools/python/maps_generator/generator/basic_stages.py b/tools/python/maps_generator/generator/basic_stages.py new file mode 100644 index 0000000000..abc4a4c092 --- /dev/null +++ b/tools/python/maps_generator/generator/basic_stages.py @@ -0,0 +1,60 @@ +import os +import subprocess + +from ..utils.file import download_file, is_verified +from ..utils.md5 import write_md5sum, md5 +from . import settings +from .gen_tool import run_gen_tool +from .osmtools import osmconvert, osmupdate +from .exceptions import wait_and_raise_if_fail + + +def download_planet(planet, output=subprocess.DEVNULL, + error=subprocess.DEVNULL): + p = download_file(settings.PLANET_URL, planet, output=output, error=error) + m = download_file(settings.PLANET_MD5_URL, md5(planet), output=output, + error=error) + wait_and_raise_if_fail(p) + wait_and_raise_if_fail(m) + + +def convert_planet(tool, in_planet, out_planet, output=subprocess.DEVNULL, + error=subprocess.DEVNULL): + osmconvert(tool, in_planet, out_planet, output=output, error=error) + write_md5sum(out_planet, md5(out_planet)) + + +def stage_download_and_convert_planet(env, **kwargs): + if not is_verified(settings.PLANET_PBF): + download_planet(settings.PLANET_PBF, output=env.subprocess_out, + error=env.subprocess_out) + + convert_planet(env[settings.OSM_TOOL_CONVERT], + settings.PLANET_PBF, settings.PLANET_O5M, + output=env.subprocess_out, + error=env.subprocess_out) + os.remove(settings.PLANET_PBF) + os.remove(md5(settings.PLANET_PBF)) + + +def stage_update_planet(env, **kwargs): + tmp = settings.PLANET_O5M + ".tmp" + osmupdate(env[settings.OSM_TOOL_UPDATE], settings.PLANET_O5M, tmp, + output=env.subprocess_out, + error=env.subprocess_out, + **kwargs) + os.remove(settings.PLANET_O5M) + os.rename(tmp, settings.PLANET_O5M) + write_md5sum(settings.PLANET_O5M, md5(settings.PLANET_O5M)) + + +def stage_preprocess(env, **kwargs): + run_gen_tool(env.gen_tool, + out=env.subprocess_out, + err=env.subprocess_out, + intermediate_data_path=env.intermediate_path, + osm_file_type="o5m", + osm_file_name=settings.PLANET_O5M, + node_storage=env.node_storage, + preprocess=True, + **kwargs) diff --git a/tools/python/maps_generator/generator/coastline.py b/tools/python/maps_generator/generator/coastline.py new file mode 100644 index 0000000000..eef2efd46b --- /dev/null +++ b/tools/python/maps_generator/generator/coastline.py @@ -0,0 +1,49 @@ +import os +import subprocess + +from . import settings +from .exceptions import wait_and_raise_if_fail +from .gen_tool import run_gen_tool +from .osmtools import osmfilter + + +def filter_coastline(name_executable, in_file, out_file, + output=subprocess.DEVNULL, error=subprocess.DEVNULL): + osmfilter(name_executable, in_file, out_file, output=output, + error=error, keep="", keep_ways="natural=coastline", + keep_nodes="capital=yes place=town =city") + + +def make_coastline(env): + coastline_o5m = os.path.join(env.coastline_path, "coastline.o5m") + filter_coastline(env[settings.OSM_TOOL_FILTER], settings.PLANET_O5M, + coastline_o5m, output=env.subprocess_out, + error=env.subprocess_out) + + run_gen_tool(env.gen_tool, + out=env.subprocess_out, + err=env.subprocess_out, + intermediate_data_path=env.coastline_path, + osm_file_type="o5m", + osm_file_name=coastline_o5m, + node_storage=env.node_storage, + preprocess=True) + + run_gen_tool(env.gen_tool, + out=env.subprocess_out, + err=env.subprocess_out, + intermediate_data_path=env.coastline_path, + osm_file_type="o5m", + osm_file_name=coastline_o5m, + node_storage=env.node_storage, + user_resource_path=env.user_resource_path, + make_coasts=True, + fail_on_coasts=True) + + prefix = "WorldCoasts" + coastline_files = [] + for f in os.listdir(env.coastline_path): + path = os.path.join(env.coastline_path, f) + if os.path.isfile(path) and f.startswith(prefix): + coastline_files.append(path) + return coastline_files diff --git a/tools/python/maps_generator/generator/decorators.py b/tools/python/maps_generator/generator/decorators.py new file mode 100644 index 0000000000..4c6c1083ff --- /dev/null +++ b/tools/python/maps_generator/generator/decorators.py @@ -0,0 +1,89 @@ +import datetime +import logging +import os +import time +from functools import wraps + +from .env import Env +from .status import Status +from ..utils.log import create_file_logger, DummyObject + +logger = logging.getLogger("maps_generator") + + +def stage(func): + @wraps(func) + def wrap(env: Env, *args, **kwargs): + func_name = func.__name__ + stage_formatted = " ".join(func_name.split("_")).capitalize() + if not env.is_accepted_stage(func_name): + logger.info(f"{stage_formatted} was not accepted.") + return + main_status = env.main_status + main_status.init(env.main_status_path, func_name) + if main_status.need_skip(): + logger.warning(f"{stage_formatted} was skipped.") + return + main_status.update_status() + logger.info(f"{stage_formatted}: start ...") + t = time.time() + with open(os.path.join(env.log_path, f"{func_name}.log"), "w") as l: + env.set_subprocess_out(l) + func(env, *args, **kwargs) + d = time.time() - t + logger.info(f"{stage_formatted}: finished in " + f"{str(datetime.timedelta(seconds=d))}") + + return wrap + + +def country_stage_status(func): + @wraps(func) + def wrap(env: Env, country: str, *args, **kwargs): + func_name = func.__name__ + _logger = DummyObject() + countries_meta = env.countries_meta + if "logger" in countries_meta[country]: + _logger = countries_meta[country]["logger"] + stage_formatted = " ".join(func_name.split("_")).capitalize() + if not env.is_accepted_stage(func_name): + _logger.info(f"{stage_formatted} was not accepted.") + return + if "status" not in countries_meta[country]: + countries_meta[country]["status"] = Status() + status = countries_meta[country]["status"] + status_file = os.path.join(env.status_path, f"{country}.status") + status.init(status_file, func_name) + if status.need_skip(): + _logger.warning(f"{stage_formatted} was skipped.") + return + status.update_status() + func(env, country, *args, **kwargs) + + return wrap + + +def country_stage_log(func): + @wraps(func) + def wrap(env: Env, country: str, *args, **kwargs): + func_name = func.__name__ + log_file = os.path.join(env.log_path, f"{country}.log") + countries_meta = env.countries_meta + if "logger" not in countries_meta[country]: + countries_meta[country]["logger"] = create_file_logger(log_file) + _logger = countries_meta[country]["logger"] + stage_formatted = " ".join(func_name.split("_")).capitalize() + _logger.info(f"{stage_formatted}: start ...") + t = time.time() + with open(log_file, "a+") as l: + env.set_subprocess_out(l) + func(env, country, *args, logger=_logger, **kwargs) + d = time.time() - t + _logger.info(f"{stage_formatted}: finished in " + f"{str(datetime.timedelta(seconds=d))}") + + return wrap + + +def country_stage(func): + return country_stage_log(country_stage_status(func)) diff --git a/tools/python/maps_generator/generator/env.py b/tools/python/maps_generator/generator/env.py new file mode 100644 index 0000000000..474c89a987 --- /dev/null +++ b/tools/python/maps_generator/generator/env.py @@ -0,0 +1,305 @@ +import collections +import datetime +import logging +import logging.config +import os +import shutil +import sys + +from . import settings +from .osmtools import build_osmtools +from .status import Status +from ..utils.file import find_executable, is_executable, symlink_force + +logger = logging.getLogger("maps_generator") + +WORLD_NAME = "World" +WORLD_COASTS_NAME = "WorldCoasts" + +WORLDS_NAMES = {WORLD_NAME, WORLD_COASTS_NAME} + + +def _write_version(out_path, version): + with open(os.path.join(out_path, settings.VERSION_FILE_NAME), "w") as f: + f.write(str(version)) + + +def _read_version(version_path): + with open(version_path) as f: + line = f.readline().strip() + try: + return int(line) + except ValueError: + logger.exception(f"Cast '{line}' to int error.") + return 0 + + +def find_last_build_dir(hint): + if hint: + p = os.path.join(settings.MAIN_OUT_PATH, hint) + return hint if os.path.exists(p) else None + try: + paths = [os.path.join(settings.MAIN_OUT_PATH, f) + for f in os.listdir(settings.MAIN_OUT_PATH)] + except FileNotFoundError: + logger.exception(f"{settings.MAIN_OUT_PATH} not found.") + return None + versions = [] + for path in paths: + version_path = os.path.join(path, settings.VERSION_FILE_NAME) + if not os.path.isfile(version_path): + versions.append(0) + else: + versions.append(_read_version(version_path)) + pairs = sorted(zip(paths, versions), key=lambda p: p[1], reverse=True) + return (None if not pairs or pairs[0][1] == 0 + else pairs[0][0].split(os.sep)[-1]) + + +def planet_lock_file(): + return f"{settings.PLANET_O5M}.lock" + + +def build_lock_file(out_path): + return f"{os.path.join(out_path, 'lock')}.lock" + + +class Env: + def __init__(self, options): + Env._logging_setup() + logger.info("Start setup ...") + for k, v in Env._osm_tools_setup().items(): + setattr(self, k, v) + for k, v in options.items(): + setattr(self, k, v) + + self.gen_tool = Env._generator_tool_setup() + + setup_options = Env._out_path_setup(self.build_name) + self.out_path, _, self.mwm_version, self.planet_version = setup_options + logger.info(f"Out path is {self.out_path}.") + + self.intermediate_path = Env._intermediate_path_setup(self.out_path) + self.data_path = self.intermediate_path + self.intermediate_tmp_path = os.path.join(self.intermediate_path, "tmp") + self._create_if_not_exist(self.intermediate_tmp_path) + Env._tmp_dir_setup() + + self.mwm_path = os.path.join(self.out_path, str(self.mwm_version)) + self._create_if_not_exist(self.mwm_path) + + self.log_path = os.path.join(self.out_path, "logs") + self._create_if_not_exist(self.log_path) + + self.temp_borders_path = self._prepare_borders() + + self.draft_path = os.path.join(self.out_path, "draft") + self._create_if_not_exist(self.draft_path) + symlink_force(self.temp_borders_path, + os.path.join(self.draft_path, "borders")) + + self.node_storage = settings.NODE_STORAGE + self.user_resource_path = settings.USER_RESOURCE_PATH + + self.coastline_path = os.path.join(self.intermediate_path, "coasts") + self._create_if_not_exist(self.coastline_path) + + self.status_path = os.path.join(self.out_path, "status") + self._create_if_not_exist(self.status_path) + self.countries_meta = collections.defaultdict(dict) + + self.main_status_path = os.path.join(self.status_path, "stages.status") + self.main_status = Status() + + self.coastline_tmp_path = os.path.join(self.coastline_path, "tmp") + self._create_if_not_exist(self.coastline_tmp_path) + self.subprocess_out = None + + self.descriptions_path = os.path.join(self.intermediate_path, + "descriptions") + self._create_if_not_exist(self.descriptions_path) + + _write_version(self.out_path, self.planet_version) + + self._skipped_stages = set(self.skip) + logger.info( + f"The following maps will build: {', '.join(self.countries)}.") + logger.info("Finish setup") + + def get_mwm_names(self): + tmp_ext = ".mwm.tmp" + mwm_names = [] + for f in os.listdir(self.intermediate_tmp_path): + path = os.path.join(self.intermediate_tmp_path, f) + if f.endswith(tmp_ext) and os.path.isfile(path): + name = f.replace(tmp_ext, "") + if name in self.countries: + mwm_names.append(name) + return mwm_names + + def is_accepted_stage(self, stage_name): + return stage_name not in self._skipped_stages + + @property + def id_to_wikidata_path(self): + return os.path.join(self.intermediate_path, "id_to_wikidata.csv") + + @property + def wiki_url_path(self): + return os.path.join(self.intermediate_path, "wiki_urls.txt") + + @property + def ugc_path(self): + return os.path.join(self.intermediate_path, "ugc_db.sqlite3") + + @property + def hotels_path(self): + return os.path.join(self.intermediate_path, "hotels.csv") + + @property + def popularity_path(self): + return os.path.join(self.intermediate_path, "popular_places.csv") + + @property + def subway_path(self): + return os.path.join(self.intermediate_path, + "mapsme_osm_subways.transit.json") + + @property + def food_paths(self): + return os.path.join(self.intermediate_path, "ids_food.json") + + @property + def food_translations_path(self): + return os.path.join(self.intermediate_path, "translations_food.json") + + @property + def cities_boundaries_path(self): + return os.path.join(self.intermediate_path, "cities_boundaries.bin") + + @property + def transit_path(self): + return self.intermediate_path + + @property + def hierarchy_path(self): + return os.path.join(self.user_resource_path, "hierarchy.txt") + + @property + def old_to_new_path(self): + return os.path.join(self.user_resource_path, "old_vs_new.csv") + + @property + def borders_to_osm_path(self): + return os.path.join(self.user_resource_path, "borders_vs_osm.csv") + + @property + def counties_txt_path(self): + return os.path.join(self.mwm_path, "countries.txt") + + def __getitem__(self, item): + return self.__dict__[item] + + def finish(self): + self.main_status.finish() + + def finish_mwm(self, mwm_name): + self.countries_meta[mwm_name]["status"].finish() + + def set_subprocess_out(self, subprocess_out): + self.subprocess_out = subprocess_out + + @staticmethod + def _logging_setup(): + def exception_handler(type, value, tb): + logger.exception( + f"Uncaught exception: {str(value)}", + exc_info=(type, value, tb)) + + logging.config.dictConfig(settings.LOGGING) + sys.excepthook = exception_handler + + @staticmethod + def _generator_tool_setup(): + logger.info("Check generator tool ...") + gen_tool_path = shutil.which(settings.GEN_TOOL) + if gen_tool_path is None: + logger.info(f"Find generator tool in {settings.BUILD_PATH} ...") + gen_tool_path = find_executable(settings.BUILD_PATH, + settings.GEN_TOOL) + logger.info(f"Generator found - {gen_tool_path}") + return gen_tool_path + + @staticmethod + def _osm_tools_setup(): + path = settings.OSM_TOOLS_PATH + osm_tool_names = [ + settings.OSM_TOOL_CONVERT, settings.OSM_TOOL_UPDATE, + settings.OSM_TOOL_FILTER + ] + + logger.info("Check osm tools ...") + if Env._create_if_not_exist(path): + tmp_paths = [os.path.join(path, t) for t in osm_tool_names] + if all([is_executable(t) for t in tmp_paths]): + osm_tool_paths = dict(zip(osm_tool_names, tmp_paths)) + logger.info( + f"Osm tools found - {', '.join(osm_tool_paths.values())}") + return osm_tool_paths + + tmp_paths = [shutil.which(t) for t in osm_tool_names] + if all(tmp_paths): + osm_tool_paths = dict(zip(osm_tool_names, tmp_paths)) + logger.info( + f"Osm tools found - {', '.join(osm_tool_paths.values())}") + return osm_tool_paths + + logger.info("Build osm tools ...") + return build_osmtools(settings.OSM_TOOLS_SRC_PATH) + + @staticmethod + def _out_path_setup(build_name): + dt = datetime.datetime.now() + version_format = "%Y_%m_%d__%H_%M_%S" + if build_name: + dt = datetime.datetime.strptime(build_name, version_format) + + s = dt.strftime(version_format) + mwm_version = dt.strftime("%y%m%d") + planet_version = int(dt.strftime("%s")) + + out_path = os.path.join(settings.MAIN_OUT_PATH, s) + Env._create_if_not_exist(settings.MAIN_OUT_PATH) + Env._create_if_not_exist(out_path) + return out_path, s, mwm_version, planet_version + + @staticmethod + def _intermediate_path_setup(out_path): + intermediate_path = os.path.join(out_path, "intermediate_data") + Env._create_if_not_exist(intermediate_path) + return intermediate_path + + @staticmethod + def _tmp_dir_setup(): + Env._create_if_not_exist(settings.TMPDIR) + os.environ["TMPDIR"] = settings.TMPDIR + + @staticmethod + def _create_if_not_exist(path): + try: + os.mkdir(path) + logger.info(f"Create {path} ...") + return True + except FileExistsError: + return False + + def _prepare_borders(self): + borders = "borders" + temp_borders = os.path.join(self.intermediate_path, borders) + Env._create_if_not_exist(temp_borders) + borders = os.path.join(settings.USER_RESOURCE_PATH, borders) + for x in self.countries: + if x in WORLDS_NAMES: + continue + shutil.copy2(f"{os.path.join(borders, x)}.poly", temp_borders) + return temp_borders diff --git a/tools/python/maps_generator/generator/exceptions.py b/tools/python/maps_generator/generator/exceptions.py new file mode 100644 index 0000000000..7aa40c0538 --- /dev/null +++ b/tools/python/maps_generator/generator/exceptions.py @@ -0,0 +1,30 @@ +import os + + +class MapsGeneratorError(Exception): + pass + + +class OptionNotFound(MapsGeneratorError): + pass + + +class ValidationError(MapsGeneratorError): + pass + + +class ContinueError(MapsGeneratorError): + pass + + +class SkipError(MapsGeneratorError): + pass + + +class BadExitStatusError(MapsGeneratorError): + pass + + +def wait_and_raise_if_fail(p): + if p.wait() != os.EX_OK: + raise BadExitStatusError(f"The launch of {' '.join(p.args)} failed.") diff --git a/tools/python/maps_generator/generator/gen_tool.py b/tools/python/maps_generator/generator/gen_tool.py new file mode 100644 index 0000000000..ca8a05d4e5 --- /dev/null +++ b/tools/python/maps_generator/generator/gen_tool.py @@ -0,0 +1,129 @@ +import copy +import logging +import os +import subprocess + +from .exceptions import (OptionNotFound, ValidationError, + wait_and_raise_if_fail) + +logger = logging.getLogger("maps_generator") + + +class GenTool: + OPTIONS = { + "disable_cross_mwm_progress": bool, + "dump_cities_boundaries": bool, + "emit_coasts": bool, + "fail_on_coasts": bool, + "generate_cameras": bool, + "generate_features": bool, + "generate_geometry": bool, + "generate_geo_objects_features": bool, + "generate_geo_objects_index": bool, + "generate_index": bool, + "generate_maxspeed": bool, + "generate_popular_places": bool, + "generate_region_features": bool, + "generate_regions": bool, + "generate_regions_kv": bool, + "generate_search_index": bool, + "generate_traffic_keys": bool, + "generate_world": bool, + "make_city_roads": bool, + "make_coasts": bool, + "make_cross_mwm": bool, + "make_routing_index": bool, + "make_transit_cross_mwm": bool, + "no_ads": bool, + "preprocess": bool, + "split_by_polygons": bool, + "planet_version": int, + "booking_data": str, + "brands_data": str, + "brands_translations_data": str, + "cities_boundaries_data": str, + "data_path": str, + "dump_wikipedia_urls": str, + "geo_objects_features": str, + "geo_objects_key_value": str, + "ids_without_addresses": str, + "idToWikidata": str, + "intermediate_data_path": str, + "nodes_list_path": str, + "node_storage": str, + "osm_file_name": str, + "osm_file_type": str, + "output": str, + "popular_places_data": str, + "regions_features": str, + "regions_index": str, + "regions_key_value": str, + "transit_path": str, + "ugc_data": str, + "user_resource_path": str, + "wikipedia_pages": str, + } + + def __init__(self, name_executable, out=subprocess.DEVNULL, + err=subprocess.DEVNULL, **options): + self.name_executable = name_executable + self.subprocess = None + self.output = out + self.error = err + self.options = {} + self.logger = logger + self.add_options(**options) + + @property + def args(self): + return self._collect_cmd() + + def add_options(self, **options): + if "logger" in options: + self.logger = options["logger"] + + for k, v in options.items(): + if k == "logger": + continue + + if k not in GenTool.OPTIONS: + raise OptionNotFound(f"{k} is unavailable option") + + if type(v) is not GenTool.OPTIONS[k]: + raise ValidationError(f"{k} required {str(GenTool.OPTIONS[k])}," + f" but not {str(type(v))}") + + self.options[k] = str(v).lower() if type(v) is bool else v + return self + + def run_async(self): + assert self.subprocess is None, "You forgot to call wait()" + cmd = self._collect_cmd() + self.subprocess = subprocess.Popen(cmd, stdout=self.output, + stderr=self.error, env=os.environ) + + self.logger.info(f"Run command {' '.join(cmd)}") + return self + + def wait(self): + code = self.subprocess.wait() + self.subprocess = None + return code + + def run(self): + self.run_async() + wait_and_raise_if_fail(self) + + def branch(self): + c = GenTool(self.name_executable, out=self.output, err=self.error) + c.options = copy.deepcopy(self.options) + return c + + def _collect_cmd(self): + options = ["".join(["--", k, "=", str(v)]) for k, v in + self.options.items()] + return [self.name_executable, *options] + + +def run_gen_tool(*args, **kwargs): + GenTool(*args, **kwargs).run() diff --git a/tools/python/maps_generator/generator/maps_stages.py b/tools/python/maps_generator/generator/maps_stages.py new file mode 100644 index 0000000000..1468c87606 --- /dev/null +++ b/tools/python/maps_generator/generator/maps_stages.py @@ -0,0 +1,129 @@ +import logging +import os +import shutil + +from .gen_tool import run_gen_tool +from ..utils.file import symlink_force + +logger = logging.getLogger("maps_generator") + + +def run_gen_tool_with_recovery_country(env, *args, **kwargs): + if "data_path" not in kwargs or "output" not in kwargs: + logger.warning("The call run_gen_tool() will be without recovery.") + run_gen_tool(*args, **kwargs) + prev_data_path = kwargs["data_path"] + mwm = f"{kwargs['output']}.mwm" + osm2ft = f"{mwm}.osm2ft" + kwargs["data_path"] = env.draft_path + symlink_force(os.path.join(prev_data_path, osm2ft), + os.path.join(env.draft_path, osm2ft)) + shutil.copy(os.path.join(prev_data_path, mwm), + os.path.join(env.draft_path, mwm)) + run_gen_tool(*args, **kwargs) + shutil.move(os.path.join(env.draft_path, mwm), + os.path.join(prev_data_path, mwm)) + kwargs["data_path"] = prev_data_path + + +def stage_index_world(env, country, **kwargs): + run_gen_tool(env.gen_tool, + out=env.subprocess_out, + err=env.subprocess_out, + data_path=env.mwm_path, + intermediate_data_path=env.intermediate_path, + user_resource_path=env.user_resource_path, + node_storage=env.node_storage, + planet_version=env.planet_version, + generate_geometry=True, + generate_index=True, + generate_search_index=True, + cities_boundaries_data=env.cities_boundaries_path, + make_city_roads=True, + output=country, + **kwargs) + + +def stage_index(env, country, **kwargs): + stage_index_world(env, country, generate_maxspeed=True, **kwargs) + + +def stage_coastline_index(env, country, **kwargs): + run_gen_tool(env.gen_tool, + out=env.subprocess_out, + err=env.subprocess_out, + data_path=env.mwm_path, + intermediate_data_path=env.intermediate_path, + user_resource_path=env.user_resource_path, + node_storage=env.node_storage, + planet_version=env.planet_version, + generate_geometry=True, + generate_index=True, + output=country, + **kwargs) + + +def stage_ugc(env, country, **kwargs): + run_gen_tool_with_recovery_country( + env, + env.gen_tool, + out=env.subprocess_out, + err=env.subprocess_out, + data_path=env.mwm_path, + intermediate_data_path=env.intermediate_path, + user_resource_path=env.user_resource_path, + ugc_data=env.ugc_path, + output=country, + **kwargs + ) + + +def stage_popularity(env, country, **kwargs): + run_gen_tool_with_recovery_country( + env, + env.gen_tool, + out=env.subprocess_out, + err=env.subprocess_out, + data_path=env.mwm_path, + intermediate_data_path=env.intermediate_path, + user_resource_path=env.user_resource_path, + popular_places_data=env.popularity_path, + generate_popular_places=True, + output=country, + **kwargs + ) + + +def stage_routing(env, country, **kwargs): + run_gen_tool_with_recovery_country( + env, + env.gen_tool, + out=env.subprocess_out, + err=env.subprocess_out, + data_path=env.mwm_path, + intermediate_data_path=env.intermediate_path, + user_resource_path=env.user_resource_path, + make_cross_mwm=True, + disable_cross_mwm_progress=True, + generate_cameras=True, + make_routing_index=True, + generate_traffic_keys=True, + output=country, + **kwargs + ) + + +def stage_routing_transit(env, country, **kwargs): + run_gen_tool_with_recovery_country( + env, + env.gen_tool, + out=env.subprocess_out, + err=env.subprocess_out, + data_path=env.mwm_path, + intermediate_data_path=env.intermediate_path, + user_resource_path=env.user_resource_path, + transit_path=env.transit_path, + make_transit_cross_mwm=True, + output=country, + **kwargs + ) diff --git a/tools/python/maps_generator/generator/osmtools.py b/tools/python/maps_generator/generator/osmtools.py new file mode 100644 index 0000000000..ffdd144234 --- /dev/null +++ b/tools/python/maps_generator/generator/osmtools.py @@ -0,0 +1,79 @@ +import os +import subprocess + +from . import settings +from .exceptions import wait_and_raise_if_fail, BadExitStatusError + + +def build_osmtools(path, output=subprocess.DEVNULL, error=subprocess.DEVNULL): + src = {settings.OSM_TOOL_UPDATE: "osmupdate.c", + settings.OSM_TOOL_FILTER: "osmfilter.c", + settings.OSM_TOOL_CONVERT: "osmconvert.c"} + ld_flags = ("-lz",) + cc = [] + result = {} + for executable, src in src.items(): + out = os.path.join(settings.OSM_TOOLS_PATH, executable) + op = [settings.OSM_TOOLS_CC, + *settings.OSM_TOOLS_CC_FLAGS, + "-o", out, + os.path.join(path, src), + *ld_flags] + s = subprocess.Popen(op, stdout=output, stderr=error) + cc.append(s) + result[executable] = out + + messages = [] + for c in cc: + if c.wait() != os.EX_OK: + messages.append(f"The launch of {' '.join(c.args)} failed.") + if messages: + raise BadExitStatusError("\n".split(messages)) + + return result + + +def osmconvert(name_executable, in_file, out_file, output=subprocess.DEVNULL, + error=subprocess.DEVNULL, run_async=False, **kwargs): + env = os.environ.copy() + env["PATH"] = f"{settings.OSM_TOOLS_PATH}:{env['PATH']}" + p = subprocess.Popen( + [ + name_executable, in_file, "--drop-author", "--drop-version", + "--out-o5m", f"-o={out_file}" + ], + env=env, stdout=output, stderr=error) + if run_async: + return p + else: + wait_and_raise_if_fail(p) + + +def osmupdate(name_executable, in_file, out_file, output=subprocess.DEVNULL, + error=subprocess.DEVNULL, run_async=False, **kwargs): + env = os.environ.copy() + env["PATH"] = f"{settings.OSM_TOOLS_PATH}:{env['PATH']}" + p = subprocess.Popen( + [ + name_executable, "--drop-author", "--drop-version", "--out-o5m", + "-v", + in_file, out_file + ], + env=env, stdout=output, stderr=error) + if run_async: + return p + else: + wait_and_raise_if_fail(p) + + +def osmfilter(name_executable, in_file, out_file, output=subprocess.DEVNULL, + error=subprocess.DEVNULL, run_async=False, **kwargs): + env = os.environ.copy() + env["PATH"] = f"{settings.OSM_TOOLS_PATH}:{env['PATH']}" + args = ([name_executable, in_file, f"-o={out_file}"] + + [f"--{k.replace('_', '-')}={v}" for k, v in kwargs.items()]) + p = subprocess.Popen(args, env=env, stdout=output, stderr=error) + if run_async: + return p + else: + wait_and_raise_if_fail(p) diff --git a/tools/python/maps_generator/generator/settings.py b/tools/python/maps_generator/generator/settings.py new file mode 100644 index 0000000000..1d809f450d --- /dev/null +++ b/tools/python/maps_generator/generator/settings.py @@ -0,0 +1,157 @@ +import argparse +import multiprocessing +import os +import sys +from configparser import ConfigParser, ExtendedInterpolation +from pathlib import Path + +from ..utils.system import total_virtual_memory + +SETTINGS_PATH = os.path.dirname(os.path.join(os.path.realpath(__file__))) + +parser = argparse.ArgumentParser(add_help=False) +opt_config = "--config" +parser.add_argument(opt_config, type=str, default="", help="Path to config") + + +def get_config_path(): + config_path = os.path.join(SETTINGS_PATH, "../var/etc/map_generator.ini") + argv = sys.argv + indexes = (-1, -1) + for i, opt in enumerate(argv): + if opt.startswith(f"{opt_config}="): + indexes = (i, i + 1) + if opt == opt_config: + indexes = (i, i + 2) + + if indexes[1] > len(argv): + return config_path + + args = argv[indexes[0]: indexes[1]] + return parser.parse_args(args).config if args else config_path + + +DEBUG = True +HOME_PATH = str(Path.home()) +WORK_PATH = HOME_PATH +TMPDIR = os.path.join(HOME_PATH, "tmp") +MAIN_OUT_PATH = os.path.join(WORK_PATH, "generation") + +VERSION_FILE_NAME = "version.txt" + +# External resources +PLANET_URL = "https://planet.openstreetmap.org/pbf/planet-latest.osm.pbf" +PLANET_MD5_URL = PLANET_URL + ".md5" +UGC_URL = "" +HOTELS_URL = "" +POPULARITY_URL= "" +SUBWAY_URL = "" +FOOD_URL = "" +FOOD_TRANSLATIONS_URL = "" + +PLANET = "planet" + +GEN_TOOL = "generator_tool" + +BUILD_PATH = os.path.join(WORK_PATH, "omim-build-release") +OMIM_PATH = os.path.join(WORK_PATH, "omim") + +# generator_tool +NODE_STORAGE = "mem" if total_virtual_memory() / 10 ** 9 >= 64 else "map" +USER_RESOURCE_PATH = os.path.join(OMIM_PATH, "data") + +# osm tools +OSM_TOOL_CONVERT = "osmconvert" +OSM_TOOL_FILTER = "osmfilter" +OSM_TOOL_UPDATE = "osmupdate" +OSM_TOOLS_SRC_PATH = os.path.join(OMIM_PATH, "tools", "osmctools") +OSM_TOOLS_PATH = os.path.join(WORK_PATH, "osmctools") +OSM_TOOLS_CC = "cc" +OSM_TOOLS_CC_FLAGS = ["-O3", ] + +# system +CPU_COUNT = multiprocessing.cpu_count() + +# Try to read a config and to overload default settings +config = ConfigParser(interpolation=ExtendedInterpolation()) +config.read([get_config_path()]) + + +def _get_opt(config, s, v, default=None): + return config.get(s, v) if config.has_option(s, v) else default + + +def _get_opt_path(config, s, v, default=""): + return os.path.expanduser(_get_opt(config, s, v, default)) + + +_DEBUG = _get_opt(config, "Main", "DEBUG") +DEBUG = DEBUG if _DEBUG is None else int(_DEBUG) +MAIN_OUT_PATH = _get_opt_path(config, "Main", "MAIN_OUT_PATH", MAIN_OUT_PATH) + +# logging +LOG_FILE_PATH = os.path.join(MAIN_OUT_PATH, "generation.log") + +TMPDIR = _get_opt_path(config, "Main", "TMPDIR", TMPDIR) + +BUILD_PATH = _get_opt_path(config, "Developer", "BUILD_PATH", BUILD_PATH) +OMIM_PATH = _get_opt_path(config, "Developer", "OMIM_PATH", OMIM_PATH) + +USER_RESOURCE_PATH = _get_opt_path(config, "Generator tool", + "USER_RESOURCE_PATH", USER_RESOURCE_PATH) + +NODE_STORAGE = _get_opt(config, "Generator tool", "NODE_STORAGE", NODE_STORAGE) + +OSM_TOOLS_SRC_PATH = _get_opt_path(config, "Osm tools", "OSM_TOOLS_SRC_PATH", OSM_TOOLS_SRC_PATH) +OSM_TOOLS_PATH = _get_opt_path(config, "Osm tools", "OSM_TOOLS_PATH", OSM_TOOLS_PATH) + +LOG_FILE_PATH = _get_opt_path(config, "Logging", "MAIN_LOG", LOG_FILE_PATH) +os.makedirs(os.path.dirname(os.path.abspath(LOG_FILE_PATH)), exist_ok=True) + +PLANET_URL = _get_opt_path(config, "External", "PLANET_URL", PLANET_URL) +PLANET_MD5_URL = _get_opt_path(config, "External", "PLANET_MD5_URL", PLANET_MD5_URL) +UGC_URL = _get_opt_path(config, "External", "UGC_URL", UGC_URL) +HOTELS_URL = _get_opt_path(config, "External", "HOTELS_URL", HOTELS_URL) +POPULARITY_URL = _get_opt_path(config, "External", "POPULARITY_URL", POPULARITY_URL) +SUBWAY_URL = _get_opt(config, "External", "SUBWAY_URL", SUBWAY_URL) +FOOD_URL = _get_opt(config, "External", "FOOD_URL", FOOD_URL) +FOOD_TRANSLATIONS_URL = _get_opt(config, "External", "FOOD_TRANSLATIONS_URL", + FOOD_TRANSLATIONS_URL) + +PLANET_O5M = os.path.join(MAIN_OUT_PATH, PLANET + ".o5m") +PLANET_PBF = os.path.join(MAIN_OUT_PATH, PLANET + ".osm.pbf") + +if DEBUG: + PLANET_URL = "http://osmz.ru/mwm/islands/islands.o5m" + PLANET_MD5_URL = "https://cloud.mail.ru/public/5v2F/f7cSaEXBC" + +# for lib logging +LOGGING = { + "version": 1, + "disable_existing_loggers": False, + "formatters": { + "standard": { + "format": "[%(asctime)s] %(levelname)s %(module)s %(message)s" + }, + }, + "handlers": { + "stdout": { + "level": "INFO", + "class": "logging.StreamHandler", + "formatter": "standard" + }, + "file": { + "level": "DEBUG", + "class": "logging.handlers.WatchedFileHandler", + "formatter": "standard", + "filename": LOG_FILE_PATH + } + }, + "loggers": { + "maps_generator": { + "handlers": ["stdout", "file"], + "level": "DEBUG", + "propagate": True + } + } +} diff --git a/tools/python/maps_generator/generator/status.py b/tools/python/maps_generator/generator/status.py new file mode 100644 index 0000000000..c637313e27 --- /dev/null +++ b/tools/python/maps_generator/generator/status.py @@ -0,0 +1,31 @@ +import os + + +class Status: + def __init__(self): + self.stat_path = None + self.stat_next = None + self.stat_saved = None + self.find = False + + def init(self, stat_path, stat_next): + self.stat_path = stat_path + self.stat_next = stat_next + if os.path.exists(self.stat_path) and os.path.isfile(self.stat_path): + with open(self.stat_path) as status: + self.stat_saved = status.read() + if not self.find: + self.find = self.stat_saved is None or not self.need_skip() + + def need_skip(self): + if self.find: + return False + return self.stat_saved is not None and self.stat_next != self.stat_saved + + def update_status(self): + with open(self.stat_path, "w") as status: + status.write(self.stat_next) + + def finish(self): + with open(self.stat_path, "w") as status: + status.write("finish") diff --git a/tools/python/maps_generator/maps_generator.py b/tools/python/maps_generator/maps_generator.py new file mode 100644 index 0000000000..a2d37da15d --- /dev/null +++ b/tools/python/maps_generator/maps_generator.py @@ -0,0 +1,298 @@ +import logging +import os +import shutil +from functools import partial +from multiprocessing.pool import ThreadPool + +from descriptions.descriptions_downloader import (check_and_get_checker, + download_from_wikipedia_tags, + download_from_wikidata_tags) +from filelock import FileLock +from post_generation.hierarchy_to_countries import hierarchy_to_countries + +from .generator import basic_stages +from .generator import coastline +from .generator import maps_stages +from .generator import settings +from .generator.decorators import stage, country_stage, country_stage_log +from .generator.env import (planet_lock_file, build_lock_file, + WORLD_COASTS_NAME, WORLD_NAME, WORLDS_NAMES) +from .generator.exceptions import (ContinueError, + wait_and_raise_if_fail) +from .generator.gen_tool import run_gen_tool +from .utils.file import is_verified, download_file + +logger = logging.getLogger("maps_generator") + + +def download_external(url_to_path: dict): + ps = [download_file(k, v) for k, v in url_to_path.items()] + for p in ps: + wait_and_raise_if_fail(p) + + +@stage +def stage_download_and_convert_planet(env, **kwargs): + if not is_verified(settings.PLANET_O5M): + basic_stages.stage_download_and_convert_planet(env, **kwargs) + + +@stage +def stage_update_planet(env, **kwargs): + if not settings.DEBUG: + basic_stages.stage_update_planet(env, **kwargs) + + +@stage +def stage_download_external(env, **kwargs): + download_external({ + settings.SUBWAY_URL: env.subway_path, + }) + + +@stage +def stage_download_production_external(env, **kwargs): + download_external({ + settings.UGC_URL: env.ugc_path, + settings.HOTELS_URL: env.hotels_path, + settings.POPULARITY_URL: env.popularity_path, + settings.FOOD_URL: env.food_paths, + settings.FOOD_TRANSLATIONS_URL: env.food_translations_path + }) + + +@stage +def stage_preprocess(env, **kwargs): + basic_stages.stage_preprocess(env, **kwargs) + + +@stage +def stage_features(env): + extra = {} + if env.is_accepted_stage(stage_descriptions.__name__): + extra["idToWikidata"] = env.id_to_wikidata_path + if env.is_accepted_stage(stage_download_production_external.__name__): + extra["booking_data"] = env.hotels_path + extra["popular_places_data"] = env.popularity_path + extra["brands_data"] = env.food_paths + extra["brands_translations_data"] = env.food_translations_path + if not env.production: + extra["no_ads"] = True + if any(x not in WORLDS_NAMES for x in env.countries): + extra["split_by_polygons"] = True + if any(x == WORLD_NAME for x in env.countries): + extra["generate_world"] = True + + run_gen_tool( + env.gen_tool, + out=env.subprocess_out, + err=env.subprocess_out, + data_path=env.data_path, + intermediate_data_path=env.intermediate_path, + osm_file_type="o5m", + osm_file_name=settings.PLANET_O5M, + node_storage=env.node_storage, + user_resource_path=env.user_resource_path, + dump_cities_boundaries=True, + cities_boundaries_data=env.cities_boundaries_path, + generate_features=True, + emit_coasts=True, + **extra + ) + + +@stage +def stage_coastline(env): + coastline_files = coastline.make_coastline(env) + for file in coastline_files: + shutil.copy2(file, env.intermediate_path) + + +@country_stage +def stage_index(env, country, **kwargs): + if country == WORLD_NAME: + maps_stages.stage_index_world(env, country, **kwargs) + elif country == WORLD_COASTS_NAME: + maps_stages.stage_coastline_index(env, country, **kwargs) + else: + maps_stages.stage_index(env, country, **kwargs) + + +@country_stage +def stage_ugc(env, country, **kwargs): + maps_stages.stage_ugc(env, country, **kwargs) + + +@country_stage +def stage_popularity(env, country, **kwargs): + maps_stages.stage_popularity(env, country, **kwargs) + + +@country_stage +def stage_routing(env, country, **kwargs): + maps_stages.stage_routing(env, country, **kwargs) + + +@country_stage +def stage_routing_transit(env, country, **kwargs): + maps_stages.stage_routing_transit(env, country, **kwargs) + + +@stage +def stage_mwm(env): + def build(country): + stage_index(env, country) + stage_ugc(env, country) + stage_popularity(env, country) + stage_routing(env, country) + stage_routing_transit(env, country) + env.finish_mwm(country) + + def build_world(country): + stage_index(env, country) + env.finish_mwm(country) + + def build_world_coasts(country): + stage_index(env, country) + env.finish_mwm(country) + + specific = { + WORLD_NAME: build_world, + WORLD_COASTS_NAME: build_world_coasts + } + + mwms = env.get_mwm_names() + with ThreadPool() as pool: + pool.map(lambda c: specific[c](c) if c in specific else build(c), mwms) + + +@stage +def stage_descriptions(env): + run_gen_tool(env.gen_tool, + out=env.subprocess_out, + err=env.subprocess_out, + intermediate_data_path=env.intermediate_path, + user_resource_path=env.user_resource_path, + dump_wikipedia_urls=env.wiki_url_path, + idToWikidata=env.id_to_wikidata_path) + + langs = ("en", "ru", "es") + checker = check_and_get_checker(env.popularity_path) + download_from_wikipedia_tags(env.wiki_url_path, env.descriptions_path, + langs, checker) + download_from_wikidata_tags(env.id_to_wikidata_path, env.descriptions_path, + langs, checker) + + @country_stage_log + def stage_write_descriptions(env, country, **kwargs): + maps_stages.run_gen_tool_with_recovery_country( + env, + env.gen_tool, + out=env.subprocess_out, + err=env.subprocess_out, + data_path=env.mwm_path, + user_resource_path=env.user_resource_path, + wikipedia_pages=env.descriptions_path, + idToWikidata=env.id_to_wikidata_path, + output=country, + **kwargs + ) + + mwms = env.get_mwm_names() + countries = filter(lambda x: x not in WORLDS_NAMES, mwms) + with ThreadPool() as pool: + pool.map(partial(stage_write_descriptions, env), countries) + + +@stage +def stage_countries_txt(env): + countries = hierarchy_to_countries(env.old_to_new_path, + env.borders_to_osm_path, + env.hierarchy_path, env.mwm_path, + env.mwm_version) + with open(env.counties_txt_path, "w") as f: + f.write(countries) + + +@stage +def stage_cleanup(env): + osm2ft_path = os.path.join(env.out_path, "osm2ft") + os.makedirs(osm2ft_path, exist_ok=True) + logger.info(f"osm2ft files will be moved from {env.out_path} " + f"to {osm2ft_path}.") + for x in os.listdir(env.mwm_path): + p = os.path.join(env.mwm_path, x) + if os.path.isfile(p) and x.endswith(".mwm.osm2ft"): + shutil.move(p, os.path.join(osm2ft_path, x)) + + logger.info(f"{env.draft_path} will be removed.") + shutil.rmtree(env.draft_path) + + +MWM_STAGE = stage_mwm.__name__ +COUNTRIES_STAGES = [s.__name__ for s in + (stage_index, stage_ugc, stage_popularity, stage_routing, + stage_routing_transit)] +STAGES = [s.__name__ for s in + (stage_download_external, stage_download_production_external, + stage_download_and_convert_planet, stage_update_planet, + stage_coastline, stage_preprocess, stage_features, stage_mwm, + stage_descriptions, stage_countries_txt, stage_cleanup)] + +ALL_STAGES = STAGES + COUNTRIES_STAGES + + +def stages_as_string(*args): + return [x.__name__ for x in args] + + +def reset_to_stage(stage_name, env): + def set_countries_stage(n): + statuses = [os.path.join(env.status_path, f) + for f in os.listdir(env.status_path) + if os.path.isfile(os.path.join(env.status_path, f)) and + os.path.join(env.status_path, f) != env.main_status_path] + for s in statuses: + with open(s, "w") as f: + f.write(n) + + _stage = f"stage_{stage_name}" + stage_mwm_index = STAGES.index(MWM_STAGE) + if _stage not in ALL_STAGES: + raise ContinueError( + f"Stage {stage_name} not in {', '.join(ALL_STAGES)}.") + if not os.path.exists(env.main_status_path): + raise ContinueError(f"Status file {env.main_status_path} not found.") + if not os.path.exists(env.status_path): + raise ContinueError(f"Status path {env.status_path} not found.") + + main_status = None + if _stage in STAGES[:stage_mwm_index + 1]: + main_status = _stage + set_countries_stage(COUNTRIES_STAGES[0]) + elif _stage in STAGES[stage_mwm_index + 1:]: + main_status = _stage + elif _stage in COUNTRIES_STAGES: + main_status = MWM_STAGE + set_countries_stage(_stage) + + logger.info(f"New active status is {main_status}.") + with open(env.main_status_path, "w") as f: + f.write(main_status) + + +def start(env): + stage_download_external(env) + stage_download_production_external(env) + with FileLock(planet_lock_file(), timeout=1) as planet_lock: + stage_download_and_convert_planet(env) + stage_update_planet(env) + with FileLock(build_lock_file(env.out_path), timeout=1): + stage_coastline(env) + stage_preprocess(env) + stage_features(env) + planet_lock.release() + stage_mwm(env) + stage_descriptions(env) + stage_countries_txt(env) + stage_cleanup(env) diff --git a/tools/python/maps_generator/requirements.txt b/tools/python/maps_generator/requirements.txt new file mode 100644 index 0000000000..ef5098d175 --- /dev/null +++ b/tools/python/maps_generator/requirements.txt @@ -0,0 +1,3 @@ +-r ../post_generation/requirements.txt +-r ../descriptions/requirements.txt +filelock==3.0.10 diff --git a/tools/python/maps_generator/utils/__init__.py b/tools/python/maps_generator/utils/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tools/python/maps_generator/utils/collections.py b/tools/python/maps_generator/utils/collections.py new file mode 100644 index 0000000000..28a56e7417 --- /dev/null +++ b/tools/python/maps_generator/utils/collections.py @@ -0,0 +1,4 @@ +def unique(s): + seen = set() + seen_add = seen.add + return [x for x in s if not (x in seen or seen_add(x))] diff --git a/tools/python/maps_generator/utils/file.py b/tools/python/maps_generator/utils/file.py new file mode 100644 index 0000000000..ee79eac43d --- /dev/null +++ b/tools/python/maps_generator/utils/file.py @@ -0,0 +1,57 @@ +import errno +import functools +import glob +import os +import shutil +import subprocess + +from .md5 import md5, check_md5 + + +def is_executable(fpath): + return os.path.isfile(fpath) and os.access(fpath, os.X_OK) + + +@functools.lru_cache() +def find_executable(path, exe=None): + if exe is None: + if is_executable(path): + return path + else: + raise FileNotFoundError(path) + find_pattern = f"{path}/**/{exe}" + for name in glob.iglob(find_pattern, recursive=True): + if is_executable(name): + return name + raise FileNotFoundError(f"{exe} not found in {path}") + + +def download_file(url, name, output=subprocess.DEVNULL, + error=subprocess.DEVNULL): + return subprocess.Popen(["curl", "-s", "-L", "-o" + name, url], + stdout=output, stderr=error) + + +def is_exists_file_and_md5(name): + return os.path.isfile(name) and os.path.isfile(md5(name)) + + +def is_verified(name): + return is_exists_file_and_md5(name) and check_md5(name, md5(name)) + + +def copy_overwrite(from_path, to_path): + if os.path.exists(to_path): + shutil.rmtree(to_path) + shutil.copytree(from_path, to_path) + + +def symlink_force(target, link_name): + try: + os.symlink(target, link_name) + except OSError as e: + if e.errno == errno.EEXIST: + os.remove(link_name) + os.symlink(target, link_name) + else: + raise e diff --git a/tools/python/maps_generator/utils/log.py b/tools/python/maps_generator/utils/log.py new file mode 100644 index 0000000000..2d0222ec8a --- /dev/null +++ b/tools/python/maps_generator/utils/log.py @@ -0,0 +1,18 @@ +import logging + + +class DummyObject: + def __getattr__(self, name): + return lambda *args: None + + +def create_file_logger(file, level=logging.DEBUG, + format="[%(asctime)s] %(levelname)s %(module)s %(message)s"): + logger = logging.getLogger(file) + logger.setLevel(level) + formatter = logging.Formatter(format) + handler = logging.FileHandler(file) + handler.setLevel(level) + handler.setFormatter(formatter) + logger.addHandler(handler) + return logger diff --git a/tools/python/maps_generator/utils/md5.py b/tools/python/maps_generator/utils/md5.py new file mode 100644 index 0000000000..bdb09e3247 --- /dev/null +++ b/tools/python/maps_generator/utils/md5.py @@ -0,0 +1,31 @@ +import hashlib + + +def md5sum(name, block_size=4096): + d = hashlib.md5() + with open(name, mode="rb") as f: + buf = f.read(block_size) + while len(buf) > 0: + d.update(buf) + buf = f.read(block_size) + return d.hexdigest() + + +def write_md5sum(fname, name): + with open(name, mode="w") as f: + md5 = md5sum(fname) + f.write(md5) + + +def check_md5(fname, name): + h = md5sum(fname) + with open(name, "r") as f: + data = f.read() + assert len(data) != 0, f"The file {name} is empty" + if data.split()[0] == h: + return True + return False + + +def md5(name): + return f"{name}.md5" diff --git a/tools/python/maps_generator/utils/system.py b/tools/python/maps_generator/utils/system.py new file mode 100644 index 0000000000..7003d6a4ed --- /dev/null +++ b/tools/python/maps_generator/utils/system.py @@ -0,0 +1,9 @@ +import os +import sys + + +def total_virtual_memory(): + if sys.platform.startswith("linux"): + return os.sysconf("SC_PAGE_SIZE") * os.sysconf("SC_PHYS_PAGES") + else: + return 0 diff --git a/tools/python/maps_generator/var/etc/map_generator.ini.default b/tools/python/maps_generator/var/etc/map_generator.ini.default new file mode 100644 index 0000000000..cabd1c9644 --- /dev/null +++ b/tools/python/maps_generator/var/etc/map_generator.ini.default @@ -0,0 +1,35 @@ +[Main] +MAIN_OUT_PATH: ~/maps_build +DEBUG: 1 + + +[Developer] +BUILD_PATH: ~/omim-build-release +OMIM_PATH: ~/omim + + +[Generator tool] +USER_RESOURCE_PATH: ${Developer:OMIM_PATH}/data +# auto detection +# NODE_STORAGE: map + + +[Osm tools] +OSM_TOOLS_SRC_PATH: ${Developer:OMIM_PATH}/tools/osmctools +OSM_TOOLS_PATH: ~/osmctools + + +[Logging] +# LOG_FILE_PATH: generation.log + + +[External] +# PLANET_URL: +# PLANET_MD5_URL: +# UGC_URL: +# HOTELS_URL: +# POPULARITY_URL: +# SUBWAY_URL: +# FOOD_URL: +# FOOD_TRANSLATIONS_URL: + diff --git a/tools/python/post_generation/hierarchy_to_countries.py b/tools/python/post_generation/hierarchy_to_countries.py index a7348e4608..e63f33cca2 100755 --- a/tools/python/post_generation/hierarchy_to_countries.py +++ b/tools/python/post_generation/hierarchy_to_countries.py @@ -115,6 +115,8 @@ def hierarchy_to_countries(old_vs_new_csv_path, borders_vs_osm_csv_path, def fill_last(last, stack): name = last["id"] + if not os.path.exists(os.path.join(target_path, f"{name}.mwm")): + return last["s"] = get_mwm_size(target_path, name) last["sha1_base64"] = get_mwm_hash(target_path, name) if last["s"] >= 0: