[generator][python] Refactored maps_generator.

This commit is contained in:
Maksim Andrianov 2020-01-28 04:35:13 +03:00 committed by mpimenov
parent 4420d22185
commit 20ef14ae01
19 changed files with 2001 additions and 1276 deletions

View file

@ -0,0 +1,12 @@
import os
from .generator import settings
CONFIG_PATH = os.path.join(
os.path.dirname(os.path.join(os.path.realpath(__file__))),
"var",
"etc",
"map_generator.ini",
)
settings.init(CONFIG_PATH)

View file

@ -1,22 +1,25 @@
import logging
import os
from argparse import ArgumentParser, RawDescriptionHelpFormatter
from argparse import ArgumentParser
from argparse import RawDescriptionHelpFormatter
from .generator import settings
from .generator.env import (Env, find_last_build_dir, WORLDS_NAMES,
WORLD_NAME, WORLD_COASTS_NAME)
from .generator.exceptions import ContinueError, SkipError, ValidationError
from .maps_generator import (generate_maps, generate_coasts, reset_to_stage,
ALL_STAGES, stage_download_production_external,
stage_descriptions, stage_ugc, stage_popularity,
stage_localads, stage_statistics, stage_srtm,
stages_as_string, stage_as_string, stage_coastline,
stage_update_planet)
from .utils.collections import unique
from .generator import stages
from .generator import stages_declaration as sd
from .generator.env import Env
from .generator.env import PathProvider
from .generator.env import WORLDS_NAMES
from .generator.env import find_last_build_dir
from .generator.env import get_all_countries_list
from .generator.exceptions import ContinueError
from .generator.exceptions import SkipError
from .generator.exceptions import ValidationError
from .maps_generator import generate_coasts
from .maps_generator import generate_maps
from .utils.algo import unique
logger = logging.getLogger("maps_generator")
examples = """Examples:
1) Non-standard planet with coastlines
If you want to generate maps for Japan you must complete the following steps:
@ -68,11 +71,12 @@ examples = """Examples:
def parse_options():
parser = ArgumentParser(description="Tool for generation maps for maps.me "
"application.",
epilog=examples,
formatter_class=RawDescriptionHelpFormatter,
parents=[settings.parser])
parser = ArgumentParser(
description="Tool for generation maps for maps.me " "application.",
epilog=examples,
formatter_class=RawDescriptionHelpFormatter,
parents=[settings.parser],
)
parser.add_argument(
"-c",
"--continue",
@ -80,51 +84,61 @@ def parse_options():
nargs="?",
type=str,
help="Continue the last build or specified in CONTINUE from the "
"last stopped stage.")
"last stopped stage.",
)
parser.add_argument(
"--countries",
type=str,
default="",
help="List of regions, separated by a comma or a semicolon, or path to "
"file with regions, separated by a line break, for which maps"
" will be built. The names of the regions can be seen "
"in omim/data/borders. It is necessary to set names without "
"any extension.")
"file with regions, separated by a line break, for which maps"
" will be built. The names of the regions can be seen "
"in omim/data/borders. It is necessary to set names without "
"any extension.",
)
parser.add_argument(
"--without_countries",
type=str,
default="",
help="List of regions to exclude them from generation. Syntax is the same as for --countries.")
help="List of regions to exclude them from generation. Syntax is the same as for --countries.",
)
parser.add_argument(
"--skip",
type=str,
default="",
help=f"List of stages, separated by a comma or a semicolon, "
f"for which building will be skipped. Available skip stages: "
f"{', '.join([s.replace('stage_', '') for s in ALL_STAGES])}.")
f"{', '.join([s.replace('stage_', '') for s in stages.stages.get_visible_stages_names()])}.",
)
parser.add_argument(
"--from_stage",
type=str,
default="",
help=f"Stage from which maps will be rebuild. Available stages: "
f"{', '.join([s.replace('stage_', '') for s in ALL_STAGES])}.")
f"{', '.join([s.replace('stage_', '') for s in stages.stages.get_visible_stages_names()])}.",
)
parser.add_argument(
"--coasts",
default=False,
action="store_true",
help="Build only WorldCoasts.raw and WorldCoasts.rawgeom files")
help="Build only WorldCoasts.raw and WorldCoasts.rawgeom files",
)
parser.add_argument(
"--production",
default=False,
action="store_true",
help="Build production maps. In another case, 'osm only maps' are built"
" - maps without additional data and advertising.")
" - maps without additional data and advertising.",
)
parser.add_argument(
"--order",
type=str,
default=os.path.join(os.path.dirname(os.path.abspath(__file__)),
"var/etc/file_generation_order.txt"),
help="Mwm generation order.")
default=os.path.join(
os.path.dirname(os.path.abspath(__file__)),
"var/etc/file_generation_order.txt",
),
help="Mwm generation order.",
)
return vars(parser.parse_args())
@ -133,15 +147,25 @@ def main():
root.addHandler(logging.NullHandler())
options = parse_options()
# Processing of 'continue' option.
# If 'continue' is set maps generation is continued from the last build
# that is found automatically.
build_name = None
if options["continue"] is None or options["continue"]:
d = find_last_build_dir(options["continue"])
if d is None:
raise ContinueError("The build cannot continue: the last build "
"directory was not found.")
raise ContinueError(
"The build cannot continue: the last build " "directory was not found."
)
build_name = d
options["build_name"] = build_name
# Processing of 'countries' option.
# There is processing 'countries' and 'without_countries' options.
# Option 'without_countries' has more priority than 'countries'.
# Options 'countries' and 'without_countries' can include '*'.
# For example: '--countries="UK*, Japan"*' means
# '--countries="UK_England_East Midlands, UK_England_East of England_Essex, ...,
# Japan_Chubu Region_Aichi_Nagoya, Japan_Chubu Region_Aichi_Toyohashi, ..."'.
countries_line = ""
without_countries_line = ""
if "COUNTRIES" in os.environ:
@ -152,14 +176,9 @@ def main():
countries_line = "*"
if options["without_countries"]:
without_countries_line = options["without_countries"]
without_countries_line = options["without_countries"]
borders_path = os.path.join(settings.USER_RESOURCE_PATH, "borders")
all_countries = [
f.replace(".poly", "") for f in os.listdir(borders_path)
if os.path.isfile(os.path.join(borders_path, f))
]
all_countries += list(WORLDS_NAMES)
all_countries = get_all_countries_list(PathProvider.borders_path())
def end_star_compare(prefix, full):
return full.startswith(prefix)
@ -168,48 +187,45 @@ def main():
return a == b
def get_countries_set_from_line(line):
countries = []
used_countries = set()
countries_list = []
if os.path.isfile(line):
with open(line) as f:
countries_list = [x.strip() for x in f]
elif line:
countries_list = [
x.strip() for x in line.replace(";", ",").split(",")
]
countries = []
used_countries = set()
countries_list = []
if os.path.isfile(line):
with open(line) as f:
countries_list = [x.strip() for x in f]
elif line:
countries_list = [x.strip() for x in line.replace(";", ",").split(",")]
for country_item in countries_list:
cmp = compare
_raw_country = country_item[:]
if _raw_country and _raw_country[-1] == "*":
_raw_country = _raw_country.replace("*", "")
cmp = end_star_compare
for country_item in countries_list:
cmp = compare
_raw_country = country_item[:]
if _raw_country and _raw_country[-1] == "*":
_raw_country = _raw_country.replace("*", "")
cmp = end_star_compare
for country in all_countries:
if cmp(_raw_country, country):
used_countries.add(country_item)
countries.append(country)
for country in all_countries:
if cmp(_raw_country, country):
used_countries.add(country_item)
countries.append(country)
countries = unique(countries)
diff = set(countries_list) - used_countries
if diff:
raise ValidationError(f"Bad input countries {', '.join(diff)}")
return set(countries)
countries = unique(countries)
diff = set(countries_list) - used_countries
if diff:
raise ValidationError(f"Bad input countries {', '.join(diff)}")
return set(countries)
countries = get_countries_set_from_line(countries_line)
without_countries = get_countries_set_from_line(without_countries_line)
countries -= without_countries
countries = list(countries)
options["countries"] = countries if countries else all_countries
options["build_all_countries"] = False
if len(countries) == len(all_countries):
options["build_all_countries"] = True
if not countries:
countries = all_countries
# Processing of 'order' option.
# It defines an order of countries generation using a file from 'order' path.
if options["order"]:
ordered_countries = []
countries = set(options["countries"])
countries = set(countries)
with open(options["order"]) as file:
for c in file:
if c.strip().startswith("#"):
@ -219,48 +235,46 @@ def main():
ordered_countries.append(c)
countries.remove(c)
if countries:
raise ValueError(f"{options['order']} does not have an order "
f"for {countries}.")
options["countries"] = ordered_countries
raise ValueError(
f"{options['order']} does not have an order " f"for {countries}."
)
countries = ordered_countries
options_skip = []
# Processing of 'skip' option.
skipped_stages = set()
if options["skip"]:
options_skip = [
f"stage_{s.strip()}"
for s in options["skip"].replace(";", ",").split(",")
]
options["skip"] = options_skip
if not options["production"]:
options["skip"] += stages_as_string(
stage_download_production_external,
stage_ugc,
stage_popularity,
stage_srtm,
stage_descriptions,
stage_localads,
stage_statistics
)
if not all(s in ALL_STAGES for s in options["skip"]):
raise SkipError(f"Stages {set(options['skip']) - set(ALL_STAGES)} "
f"not found.")
for s in options["skip"].replace(";", ",").split(","):
stage = f"stage_{s.strip()}"
if not stages.stages.is_valid_stage_name(stage):
raise SkipError(f"Stage {stage} not found.")
skipped_stages.add(stage)
if settings.PLANET_URL != settings.DEFAULT_PLANET_URL:
options["skip"] += stages_as_string(stage_update_planet)
skipped_stages.add(stages.get_stage_name(sd.StageUpdatePlanet))
if stage_as_string(stage_coastline) in options["skip"]:
worlds_names = [x for x in options["countries"] if x in WORLDS_NAMES]
if worlds_names:
raise SkipError(f"You can not skip {stages_as_string(stage_coastline)}"
f" if you want to generate {WORLDS_NAMES}."
f" You can exclude them with --without_countries option.")
stage_coastline_name = stages.get_stage_name(sd.StageCoastline)
if stage_coastline_name in skipped_stages:
if any(x in WORLDS_NAMES for x in options["countries"]):
raise SkipError(
f"You can not skip {stage_coastline_name}"
f" if you want to generate {WORLDS_NAMES}."
f" You can exclude them with --without_countries option."
)
env = Env(options)
if env.from_stage:
reset_to_stage(env.from_stage, env)
if env.coasts:
generate_coasts(env)
# Make env and run maps generation.
env = Env(
countries=countries,
production=options["production"],
build_name=build_name,
skipped_stages=skipped_stages,
)
from_stage = None
if options["from_stage"]:
from_stage = f"stage_{options['from_stage']}"
if options["coasts"]:
generate_coasts(env, from_stage)
else:
generate_maps(env)
generate_maps(env, from_stage)
env.finish()

View file

@ -1,41 +1,65 @@
"""
This file contains api for osmfilter and generator_tool to generate coastline.
"""
import os
import subprocess
from . import settings
from .env import Env
from .gen_tool import run_gen_tool
from .osmtools import osmfilter
def filter_coastline(name_executable, in_file, out_file,
output=subprocess.DEVNULL, error=subprocess.DEVNULL):
osmfilter(name_executable, in_file, out_file, output=output,
error=error, keep="", keep_ways="natural=coastline",
keep_nodes="capital=yes place=town =city")
def filter_coastline(
name_executable,
in_file,
out_file,
output=subprocess.DEVNULL,
error=subprocess.DEVNULL,
):
osmfilter(
name_executable,
in_file,
out_file,
output=output,
error=error,
keep="",
keep_ways="natural=coastline",
keep_nodes="capital=yes place=town =city",
)
def make_coastline(env):
coastline_o5m = os.path.join(env.coastline_path, "coastline.o5m")
filter_coastline(env[settings.OSM_TOOL_FILTER], settings.PLANET_O5M,
coastline_o5m, output=env.get_subprocess_out(),
error=env.get_subprocess_out())
def make_coastline(env: Env):
coastline_o5m = os.path.join(env.paths.coastline_path, "coastline.o5m")
filter_coastline(
env[settings.OSM_TOOL_FILTER],
settings.PLANET_O5M,
coastline_o5m,
output=env.get_subprocess_out(),
error=env.get_subprocess_out(),
)
run_gen_tool(env.gen_tool,
out=env.get_subprocess_out(),
err=env.get_subprocess_out(),
intermediate_data_path=env.coastline_path,
osm_file_type="o5m",
osm_file_name=coastline_o5m,
node_storage=env.node_storage,
user_resource_path=env.user_resource_path,
preprocess=True)
run_gen_tool(
env.gen_tool,
out=env.get_subprocess_out(),
err=env.get_subprocess_out(),
intermediate_data_path=env.paths.coastline_path,
osm_file_type="o5m",
osm_file_name=coastline_o5m,
node_storage=env.node_storage,
user_resource_path=env.paths.user_resource_path,
preprocess=True,
)
run_gen_tool(env.gen_tool,
out=env.get_subprocess_out(),
err=env.get_subprocess_out(),
intermediate_data_path=env.coastline_path,
osm_file_type="o5m",
osm_file_name=coastline_o5m,
node_storage=env.node_storage,
user_resource_path=env.user_resource_path,
make_coasts=True,
fail_on_coasts=True)
run_gen_tool(
env.gen_tool,
out=env.get_subprocess_out(),
err=env.get_subprocess_out(),
intermediate_data_path=env.paths.coastline_path,
osm_file_type="o5m",
osm_file_name=coastline_o5m,
node_storage=env.node_storage,
user_resource_path=env.paths.user_resource_path,
make_coasts=True,
fail_on_coasts=True,
)

View file

@ -1,93 +0,0 @@
import datetime
import logging
import os
import time
from functools import wraps
from .env import Env
from .status import Status
from ..utils.log import create_file_logger, DummyObject
logger = logging.getLogger("maps_generator")
def stage(func):
@wraps(func)
def wrap(env: Env, *args, **kwargs):
func_name = func.__name__
stage_formatted = " ".join(func_name.split("_")).capitalize()
logfile = os.path.join(env.log_path, f"{func_name}.log")
log_handler = logging.FileHandler(logfile)
logger.addHandler(log_handler)
if not env.is_accepted_stage(func):
logger.info(f"{stage_formatted} was not accepted.")
logger.removeHandler(log_handler)
return
main_status = env.main_status
main_status.init(env.main_status_path, func_name)
if main_status.need_skip():
logger.warning(f"{stage_formatted} was skipped.")
logger.removeHandler(log_handler)
return
main_status.update_status()
logger.info(f"{stage_formatted}: start ...")
t = time.time()
env.set_subprocess_out(log_handler.stream)
func(env, *args, **kwargs)
d = time.time() - t
logger.info(f"{stage_formatted}: finished in "
f"{str(datetime.timedelta(seconds=d))}")
logger.removeHandler(log_handler)
return wrap
def country_stage_status(func):
@wraps(func)
def wrap(env: Env, country: str, *args, **kwargs):
func_name = func.__name__
_logger = DummyObject()
countries_meta = env.countries_meta
if "logger" in countries_meta[country]:
_logger, _ = countries_meta[country]["logger"]
stage_formatted = " ".join(func_name.split("_")).capitalize()
if not env.is_accepted_stage(func):
_logger.info(f"{stage_formatted} was not accepted.")
return
if "status" not in countries_meta[country]:
countries_meta[country]["status"] = Status()
status = countries_meta[country]["status"]
status_file = os.path.join(env.status_path, f"{country}.status")
status.init(status_file, func_name)
if status.need_skip():
_logger.warning(f"{stage_formatted} was skipped.")
return
status.update_status()
func(env, country, *args, **kwargs)
return wrap
def country_stage_log(func):
@wraps(func)
def wrap(env: Env, country: str, *args, **kwargs):
func_name = func.__name__
log_file = os.path.join(env.log_path, f"{country}.log")
countries_meta = env.countries_meta
if "logger" not in countries_meta[country]:
countries_meta[country]["logger"] = create_file_logger(log_file)
_logger, log_handler = countries_meta[country]["logger"]
stage_formatted = " ".join(func_name.split("_")).capitalize()
_logger.info(f"{stage_formatted}: start ...")
t = time.time()
env.set_subprocess_out(log_handler.stream, country)
func(env, country, *args, logger=_logger, **kwargs)
d = time.time() - t
_logger.info(f"{stage_formatted}: finished in "
f"{str(datetime.timedelta(seconds=d))}")
return wrap
def country_stage(func):
return country_stage_log(country_stage_status(func))

View file

@ -5,11 +5,21 @@ import logging.config
import os
import shutil
import sys
from functools import wraps
from typing import Any
from typing import AnyStr
from typing import Callable
from typing import Dict
from typing import List
from typing import Optional
from typing import Set
from . import settings
from .osmtools import build_osmtools
from .status import Status
from ..utils.file import find_executable, is_executable, symlink_force
from ..utils.file import find_executable
from ..utils.file import is_executable
from ..utils.file import symlink_force
logger = logging.getLogger("maps_generator")
@ -19,28 +29,72 @@ WORLD_COASTS_NAME = "WorldCoasts"
WORLDS_NAMES = {WORLD_NAME, WORLD_COASTS_NAME}
def _write_version(out_path, version):
with open(os.path.join(out_path, settings.VERSION_FILE_NAME), "w") as f:
f.write(str(version))
def get_all_countries_list(borders_path: AnyStr) -> List[AnyStr]:
"""Returns all countries including World and WorldCoasts."""
return [
f.replace(".poly", "")
for f in os.listdir(borders_path)
if os.path.isfile(os.path.join(borders_path, f))
] + list(WORLDS_NAMES)
def _read_version(version_path):
with open(version_path) as f:
line = f.readline().strip()
try:
return int(line)
except ValueError:
logger.exception(f"Cast '{line}' to int error.")
return 0
def create_if_not_exist_path(path: AnyStr) -> bool:
"""Creates directory if it doesn't exist."""
try:
os.mkdir(path)
logger.info(f"Create {path} ...")
return True
except FileExistsError:
return False
def find_last_build_dir(hint):
if hint:
def create_if_not_exist(func: Callable[..., AnyStr]) -> Callable[..., AnyStr]:
"""
It's a decorator, that wraps func in create_if_not_exist_path,
that returns a path.
"""
@wraps(func)
def wrapper(*args, **kwargs):
path = func(*args, **kwargs)
create_if_not_exist_path(path)
return path
return wrapper
class Version:
"""It's used for writing and reading a generation version."""
@staticmethod
def write(out_path: AnyStr, version: AnyStr):
with open(os.path.join(out_path, settings.VERSION_FILE_NAME), "w") as f:
f.write(str(version))
@staticmethod
def read(version_path: AnyStr) -> int:
with open(version_path) as f:
line = f.readline().strip()
try:
return int(line)
except ValueError:
logger.exception(f"Cast '{line}' to int error.")
return 0
def find_last_build_dir(hint: Optional[AnyStr] = None) -> Optional[AnyStr]:
"""
It tries to find a last generation directory. If it's found function
returns path of last generation directory. Otherwise returns None.
"""
if hint is not None:
p = os.path.join(settings.MAIN_OUT_PATH, hint)
return hint if os.path.exists(p) else None
try:
paths = [os.path.join(settings.MAIN_OUT_PATH, f)
for f in os.listdir(settings.MAIN_OUT_PATH)]
paths = [
os.path.join(settings.MAIN_OUT_PATH, f)
for f in os.listdir(settings.MAIN_OUT_PATH)
]
except FileNotFoundError:
logger.exception(f"{settings.MAIN_OUT_PATH} not found.")
return None
@ -50,327 +104,396 @@ def find_last_build_dir(hint):
if not os.path.isfile(version_path):
versions.append(0)
else:
versions.append(_read_version(version_path))
versions.append(Version.read(version_path))
pairs = sorted(zip(paths, versions), key=lambda p: p[1], reverse=True)
return (None if not pairs or pairs[0][1] == 0
else pairs[0][0].split(os.sep)[-1])
return None if not pairs or pairs[0][1] == 0 else pairs[0][0].split(os.sep)[-1]
def planet_lock_file():
return f"{settings.PLANET_O5M}.lock"
class PathProvider:
"""
PathProvider is used for building paths for a maps generation.
"""
def __init__(self, build_path: AnyStr, mwm_version: AnyStr):
self.build_path = build_path
self.mwm_version = mwm_version
def build_lock_file(out_path):
return f"{os.path.join(out_path, 'lock')}.lock"
create_if_not_exist_path(self.build_path)
@property
@create_if_not_exist
def intermediate_data_path(self) -> AnyStr:
"""
intermediate_data_path contains intermediate files,
for example downloaded external files, that are needed for genration,
*.mwm.tmp files, etc.
"""
return os.path.join(self.build_path, "intermediate_data")
@property
@create_if_not_exist
def data_path(self) -> AnyStr:
"""It's a synonum for intermediate_data_path."""
return self.intermediate_data_path
@property
@create_if_not_exist
def intermediate_tmp_path(self) -> AnyStr:
"""intermediate_tmp_path contains *.mwm.tmp files."""
return os.path.join(self.intermediate_data_path, "tmp")
@property
@create_if_not_exist
def mwm_path(self) -> AnyStr:
"""mwm_path contains *.mwm files."""
return os.path.join(self.build_path, self.mwm_version)
@property
@create_if_not_exist
def log_path(self) -> AnyStr:
"""mwm_path log files."""
return os.path.join(self.build_path, "logs")
@property
@create_if_not_exist
def generation_borders_path(self) -> AnyStr:
"""
generation_borders_path contains *.poly files, that define
which .mwm files are generated.
"""
return os.path.join(self.intermediate_data_path, "borders")
@property
@create_if_not_exist
def draft_path(self) -> AnyStr:
"""draft_path is used for saving temporary intermediate files."""
return os.path.join(self.build_path, "draft")
@property
@create_if_not_exist
def osm2ft_path(self) -> AnyStr:
"""osm2ft_path contains osmId<->ftId mappings."""
return os.path.join(self.build_path, "osm2ft")
@property
@create_if_not_exist
def coastline_path(self) -> AnyStr:
"""coastline_path is used for a coastline generation."""
return os.path.join(self.intermediate_data_path, "coasts")
@property
@create_if_not_exist
def coastline_tmp_path(self) -> AnyStr:
"""coastline_tmp_path is used for a coastline generation."""
return os.path.join(self.coastline_path, "tmp")
@property
@create_if_not_exist
def status_path(self) -> AnyStr:
"""status_path contains status files."""
return os.path.join(self.build_path, "status")
@property
@create_if_not_exist
def descriptions_path(self) -> AnyStr:
return os.path.join(self.intermediate_data_path, "descriptions")
@property
@create_if_not_exist
def stats_path(self) -> AnyStr:
return os.path.join(self.build_path, "stats")
@property
@create_if_not_exist
def transit_path(self) -> AnyStr:
return self.intermediate_data_path
@property
def main_status_path(self) -> AnyStr:
return os.path.join(self.status_path, "stages.status")
@property
def packed_polygons_path(self) -> AnyStr:
return os.path.join(self.intermediate_data_path, "packed_polygons.bin")
@property
def localads_path(self) -> AnyStr:
return os.path.join(self.intermediate_data_path, f"localads_{self.mwm_version}")
@property
def types_path(self) -> AnyStr:
return os.path.join(self.user_resource_path, "types.txt")
@property
def external_resources_path(self) -> AnyStr:
return os.path.join(self.mwm_path, "external_resources.txt")
@property
def id_to_wikidata_path(self) -> AnyStr:
return os.path.join(self.intermediate_data_path, "id_to_wikidata.csv")
@property
def wiki_url_path(self) -> AnyStr:
return os.path.join(self.intermediate_data_path, "wiki_urls.txt")
@property
def ugc_path(self) -> AnyStr:
return os.path.join(self.intermediate_data_path, "ugc_db.sqlite3")
@property
def hotels_path(self) -> AnyStr:
return os.path.join(self.intermediate_data_path, "hotels.csv")
@property
def promo_catalog_cities_path(self) -> AnyStr:
return os.path.join(self.intermediate_data_path, "promo_catalog_cities.json")
@property
def promo_catalog_countries_path(self) -> AnyStr:
return os.path.join(self.intermediate_data_path, "promo_catalog_countries.json")
@property
def popularity_path(self) -> AnyStr:
return os.path.join(self.intermediate_data_path, "popular_places.csv")
@property
def subway_path(self) -> AnyStr:
return os.path.join(
self.intermediate_data_path, "mapsme_osm_subways.transit.json"
)
@property
def food_paths(self) -> AnyStr:
return os.path.join(self.intermediate_data_path, "ids_food.json")
@property
def food_translations_path(self) -> AnyStr:
return os.path.join(self.intermediate_data_path, "translations_food.json")
@property
def uk_postcodes_path(self) -> AnyStr:
return os.path.join(self.intermediate_data_path, "uk_postcodes")
@property
def us_postcodes_path(self) -> AnyStr:
return os.path.join(self.intermediate_data_path, "us_postcodes")
@property
def cities_boundaries_path(self) -> AnyStr:
return os.path.join(self.intermediate_data_path, "cities_boundaries.bin")
@property
def hierarchy_path(self) -> AnyStr:
return os.path.join(self.user_resource_path, "hierarchy.txt")
@property
def old_to_new_path(self) -> AnyStr:
return os.path.join(self.user_resource_path, "old_vs_new.csv")
@property
def borders_to_osm_path(self) -> AnyStr:
return os.path.join(self.user_resource_path, "borders_vs_osm.csv")
@property
def countries_synonyms_path(self) -> AnyStr:
return os.path.join(self.user_resource_path, "countries_synonyms.csv")
@property
def counties_txt_path(self) -> AnyStr:
return os.path.join(self.mwm_path, "countries.txt")
@property
def user_resource_path(self) -> AnyStr:
return settings.USER_RESOURCE_PATH
@staticmethod
def srtm_path() -> AnyStr:
return settings.SRTM_PATH
@staticmethod
def borders_path() -> AnyStr:
return os.path.join(settings.USER_RESOURCE_PATH, "borders")
@staticmethod
@create_if_not_exist
def tmp_dir():
return settings.TMPDIR
class Env:
def __init__(self, options):
Env._logging_setup()
"""
Env provides a generation environment. It sets up instruments and paths,
that are used for a maps generation. It stores state of the maps generation.
"""
def __init__(
self,
countries: Optional[List[AnyStr]] = None,
production: bool = False,
build_name: Optional[AnyStr] = None,
skipped_stages: Optional[Set[AnyStr]] = None,
):
self.setup_logging()
logger.info("Start setup ...")
for k, v in Env._osm_tools_setup().items():
setattr(self, k, v)
for k, v in options.items():
os.environ["TMPDIR"] = PathProvider.tmp_dir()
for k, v in self.setup_osm_tools().items():
setattr(self, k, v)
self.gen_tool = Env._generator_tool_setup()
self.gen_tool = self.setup_generator_tool()
setup_options = Env._out_path_setup(self.build_name)
self.out_path, _, self.mwm_version, self.planet_version = setup_options
logger.info(f"Out path is {self.out_path}.")
self.intermediate_path = Env._intermediate_path_setup(self.out_path)
self.data_path = self.intermediate_path
self.intermediate_tmp_path = os.path.join(self.intermediate_path, "tmp")
self._create_if_not_exist(self.intermediate_tmp_path)
Env._tmp_dir_setup()
self.mwm_path = os.path.join(self.out_path, str(self.mwm_version))
self._create_if_not_exist(self.mwm_path)
self.log_path = os.path.join(self.out_path, "logs")
self._create_if_not_exist(self.log_path)
self.temp_borders_path = self._prepare_borders()
self.draft_path = os.path.join(self.out_path, "draft")
self._create_if_not_exist(self.draft_path)
symlink_force(self.temp_borders_path,
os.path.join(self.draft_path, "borders"))
self.osm2ft_path = os.path.join(self.out_path, "osm2ft")
self._create_if_not_exist(self.osm2ft_path)
for x in os.listdir(self.osm2ft_path):
p = os.path.join(self.osm2ft_path, x)
if os.path.isfile(p) and x.endswith(".mwm.osm2ft"):
shutil.move(p, os.path.join(self.mwm_path, x))
self.production = production
self.countries = countries
self.skipped_stages = set() if skipped_stages is None else skipped_stages
if self.countries is None:
self.countries = get_all_countries_list(PathProvider.borders_path())
self.node_storage = settings.NODE_STORAGE
self.user_resource_path = settings.USER_RESOURCE_PATH
self.coastline_path = os.path.join(self.intermediate_path, "coasts")
self._create_if_not_exist(self.coastline_path)
version_format = "%Y_%m_%d__%H_%M_%S"
dt = None
if build_name is None:
dt = datetime.datetime.now()
build_name = dt.strftime(version_format)
else:
dt = datetime.datetime.strptime(build_name, version_format)
self.status_path = os.path.join(self.out_path, "status")
self._create_if_not_exist(self.status_path)
self.countries_meta = collections.defaultdict(dict)
self.mwm_version = dt.strftime("%y%m%d")
self.planet_version = dt.strftime("%s")
self.build_path = os.path.join(settings.MAIN_OUT_PATH, build_name)
self.build_name = build_name
logger.info(f"Build path is {self.build_path}.")
self.paths = PathProvider(self.build_path, self.mwm_version)
Version.write(self.build_path, self.planet_version)
self.setup_borders()
self.setup_osm2ft()
self.main_status_path = os.path.join(self.status_path, "stages.status")
self.main_status = Status()
self.coastline_tmp_path = os.path.join(self.coastline_path, "tmp")
self._create_if_not_exist(self.coastline_tmp_path)
self.srtm_path = settings.SRTM_PATH
self._subprocess_out = None
self._subprocess_countries_out = {}
_write_version(self.out_path, self.planet_version)
self._skipped_stages = set(self.skip)
# self.countries_meta stores log files and statuses for each country.
self.countries_meta = collections.defaultdict(dict)
self.subprocess_out = None
self.subprocess_countries_out = {}
printed_countries = ", ".join(self.countries)
if len(self.countries) > 50:
printed_countries = (f"{', '.join(self.countries[:25])}, ..., "
f"{', '.join(self.countries[-25:])}")
logger.info(f"The following {len(self.countries)} maps will build: "
f"{printed_countries}.")
printed_countries = (
f"{', '.join(self.countries[:25])}, ..., "
f"{', '.join(self.countries[-25:])}"
)
logger.info(
f"The following {len(self.countries)} maps will build: "
f"{printed_countries}."
)
logger.info("Finish setup")
def get_mwm_names(self):
def __getitem__(self, item):
return self.__dict__[item]
def get_tmp_mwm_names(self) -> List[AnyStr]:
tmp_ext = ".mwm.tmp"
existing_names = set()
for f in os.listdir(self.intermediate_tmp_path):
path = os.path.join(self.intermediate_tmp_path, f)
for f in os.listdir(self.paths.intermediate_tmp_path):
path = os.path.join(self.paths.intermediate_tmp_path, f)
if f.endswith(tmp_ext) and os.path.isfile(path):
name = f.replace(tmp_ext, "")
if name in self.countries:
existing_names.add(name)
return [c for c in self.countries if c in existing_names]
def is_accepted_stage(self, stage):
return stage.__name__ not in self._skipped_stages
def add_skipped_stage_name(self, stage_name: AnyStr):
self.skipped_stages.add(stage_name)
@property
def descriptions_path(self):
path = os.path.join(self.intermediate_path, "descriptions")
self._create_if_not_exist(path)
return path
@property
def packed_polygons_path(self):
return os.path.join(self.intermediate_path, "packed_polygons.bin")
@property
def localads_path(self):
path = os.path.join(self.out_path, f"localads_{self.mwm_version}")
self._create_if_not_exist(path)
return path
@property
def stats_path(self):
path = os.path.join(self.out_path, "stats")
self._create_if_not_exist(path)
return path
@property
def types_path(self):
return os.path.join(self.user_resource_path, "types.txt")
@property
def external_resources_path(self):
return os.path.join(self.mwm_path, "external_resources.txt")
@property
def id_to_wikidata_path(self):
return os.path.join(self.intermediate_path, "id_to_wikidata.csv")
@property
def wiki_url_path(self):
return os.path.join(self.intermediate_path, "wiki_urls.txt")
@property
def ugc_path(self):
return os.path.join(self.intermediate_path, "ugc_db.sqlite3")
@property
def hotels_path(self):
return os.path.join(self.intermediate_path, "hotels.csv")
@property
def promo_catalog_cities_path(self):
return os.path.join(self.intermediate_path, "promo_catalog_cities.json")
@property
def promo_catalog_countries_path(self):
return os.path.join(self.intermediate_path,
"promo_catalog_countries.json")
@property
def popularity_path(self):
return os.path.join(self.intermediate_path, "popular_places.csv")
@property
def subway_path(self):
return os.path.join(self.intermediate_path,
"mapsme_osm_subways.transit.json")
@property
def food_paths(self):
return os.path.join(self.intermediate_path, "ids_food.json")
@property
def food_translations_path(self):
return os.path.join(self.intermediate_path, "translations_food.json")
@property
def uk_postcodes_path(self):
return os.path.join(self.intermediate_path, "uk_postcodes")
@property
def us_postcodes_path(self):
return os.path.join(self.intermediate_path, "us_postcodes")
@property
def cities_boundaries_path(self):
return os.path.join(self.intermediate_path, "cities_boundaries.bin")
@property
def transit_path(self):
return self.intermediate_path
@property
def hierarchy_path(self):
return os.path.join(self.user_resource_path, "hierarchy.txt")
@property
def old_to_new_path(self):
return os.path.join(self.user_resource_path, "old_vs_new.csv")
@property
def borders_to_osm_path(self):
return os.path.join(self.user_resource_path, "borders_vs_osm.csv")
@property
def countries_synonyms_path(self):
return os.path.join(self.user_resource_path, "countries_synonyms.csv")
@property
def counties_txt_path(self):
return os.path.join(self.mwm_path, "countries.txt")
def __getitem__(self, item):
return self.__dict__[item]
def is_skipped_stage_name(self, stage_name: AnyStr) -> bool:
return stage_name not in self.skipped_stages
def finish(self):
self.main_status.finish()
def finish_mwm(self, mwm_name):
def finish_mwm(self, mwm_name: AnyStr):
self.countries_meta[mwm_name]["status"].finish()
def set_subprocess_out(self, subprocess_out, country=None):
def set_subprocess_out(self, subprocess_out: Any, country: Optional[AnyStr] = None):
if country is None:
self._subprocess_out = subprocess_out
self.subprocess_out = subprocess_out
else:
self._subprocess_countries_out[country] = subprocess_out
self.subprocess_countries_out[country] = subprocess_out
def get_subprocess_out(self, country=None):
def get_subprocess_out(self, country: Optional[AnyStr] = None):
if country is None:
return self._subprocess_out
return self.subprocess_out
else:
return self._subprocess_countries_out[country]
return self.subprocess_countries_out[country]
@staticmethod
def _logging_setup():
def setup_logging():
def exception_handler(type, value, tb):
logger.exception(f"Uncaught exception: {str(value)}",
exc_info=(type, value, tb))
logger.exception(
f"Uncaught exception: {str(value)}", exc_info=(type, value, tb)
)
logging.config.dictConfig(settings.LOGGING)
sys.excepthook = exception_handler
@staticmethod
def _generator_tool_setup():
def setup_generator_tool() -> AnyStr:
logger.info("Check generator tool ...")
gen_tool_path = shutil.which(settings.GEN_TOOL)
if gen_tool_path is None:
logger.info(f"Find generator tool in {settings.BUILD_PATH} ...")
gen_tool_path = find_executable(settings.BUILD_PATH,
settings.GEN_TOOL)
gen_tool_path = find_executable(settings.BUILD_PATH, settings.GEN_TOOL)
logger.info(f"Generator found - {gen_tool_path}")
return gen_tool_path
@staticmethod
def _osm_tools_setup():
def setup_osm_tools() -> Dict[AnyStr, AnyStr]:
path = settings.OSM_TOOLS_PATH
osm_tool_names = [
settings.OSM_TOOL_CONVERT, settings.OSM_TOOL_UPDATE,
settings.OSM_TOOL_FILTER
settings.OSM_TOOL_CONVERT,
settings.OSM_TOOL_UPDATE,
settings.OSM_TOOL_FILTER,
]
logger.info("Check osm tools ...")
if not Env._create_if_not_exist(path):
if not create_if_not_exist_path(path):
tmp_paths = [os.path.join(path, t) for t in osm_tool_names]
if all([is_executable(t) for t in tmp_paths]):
osm_tool_paths = dict(zip(osm_tool_names, tmp_paths))
logger.info(
f"Osm tools found - {', '.join(osm_tool_paths.values())}")
logger.info(f"Osm tools found - {', '.join(osm_tool_paths.values())}")
return osm_tool_paths
tmp_paths = [shutil.which(t) for t in osm_tool_names]
if all(tmp_paths):
osm_tool_paths = dict(zip(osm_tool_names, tmp_paths))
logger.info(
f"Osm tools found - {', '.join(osm_tool_paths.values())}")
logger.info(f"Osm tools found - {', '.join(osm_tool_paths.values())}")
return osm_tool_paths
logger.info("Build osm tools ...")
return build_osmtools(settings.OSM_TOOLS_SRC_PATH)
@staticmethod
def _out_path_setup(build_name):
dt = datetime.datetime.now()
version_format = "%Y_%m_%d__%H_%M_%S"
if build_name:
dt = datetime.datetime.strptime(build_name, version_format)
def setup_borders(self):
temp_borders = self.paths.generation_borders_path
# It is needed in case of rebuilding several mwms.
for filename in os.listdir(temp_borders):
file_path = os.path.join(temp_borders, filename)
if os.path.isfile(file_path) or os.path.islink(file_path):
os.unlink(file_path)
elif os.path.isdir(file_path):
shutil.rmtree(file_path)
s = dt.strftime(version_format)
mwm_version = dt.strftime("%y%m%d")
planet_version = int(dt.strftime("%s"))
out_path = os.path.join(settings.MAIN_OUT_PATH, s)
Env._create_if_not_exist(settings.MAIN_OUT_PATH)
Env._create_if_not_exist(out_path)
return out_path, s, mwm_version, planet_version
@staticmethod
def _intermediate_path_setup(out_path):
intermediate_path = os.path.join(out_path, "intermediate_data")
Env._create_if_not_exist(intermediate_path)
return intermediate_path
@staticmethod
def _tmp_dir_setup():
Env._create_if_not_exist(settings.TMPDIR)
os.environ["TMPDIR"] = settings.TMPDIR
@staticmethod
def _create_if_not_exist(path):
try:
os.mkdir(path)
logger.info(f"Create {path} ...")
return True
except FileExistsError:
return False
def _prepare_borders(self):
borders = "borders"
temp_borders = os.path.join(self.intermediate_path, borders)
Env._create_if_not_exist(temp_borders)
borders = os.path.join(settings.USER_RESOURCE_PATH, borders)
borders = PathProvider.borders_path()
for x in self.countries:
if x in WORLDS_NAMES:
continue
shutil.copy2(f"{os.path.join(borders, x)}.poly", temp_borders)
return temp_borders
poly = f"{x}.poly"
os.symlink(os.path.join(borders, poly), os.path.join(temp_borders, poly))
symlink_force(temp_borders, os.path.join(self.paths.draft_path, "borders"))
def setup_osm2ft(self):
for x in os.listdir(self.paths.osm2ft_path):
p = os.path.join(self.paths.osm2ft_path, x)
if os.path.isfile(p) and x.endswith(".mwm.osm2ft"):
shutil.move(p, os.path.join(self.paths.mwm_path, x))

View file

@ -3,8 +3,9 @@ import logging
import os
import subprocess
from .exceptions import (OptionNotFound, ValidationError,
wait_and_raise_if_fail)
from .exceptions import OptionNotFound
from .exceptions import ValidationError
from .exceptions import wait_and_raise_if_fail
logger = logging.getLogger("maps_generator")
@ -43,7 +44,6 @@ class GenTool:
"split_by_polygons": bool,
"type_statistics": bool,
"version": bool,
"planet_version": int,
"booking_data": str,
"promo_catalog_cities": str,
"brands_data": str,
@ -61,6 +61,7 @@ class GenTool:
"osm_file_name": str,
"osm_file_type": str,
"output": str,
"planet_version": str,
"popular_places_data": str,
"regions_features": str,
"regions_index": str,
@ -74,8 +75,9 @@ class GenTool:
"wikipedia_pages": str,
}
def __init__(self, name_executable, out=subprocess.DEVNULL,
err=subprocess.DEVNULL, **options):
def __init__(
self, name_executable, out=subprocess.DEVNULL, err=subprocess.DEVNULL, **options
):
self.name_executable = name_executable
self.subprocess = None
self.output = out
@ -100,8 +102,10 @@ class GenTool:
raise OptionNotFound(f"{k} is unavailable option")
if type(v) is not GenTool.OPTIONS[k]:
raise ValidationError(f"{k} required {str(GenTool.OPTIONS[k])},"
f" but not {str(type(v))}")
raise ValidationError(
f"{k} required {str(GenTool.OPTIONS[k])},"
f" but not {str(type(v))}"
)
self.options[k] = str(v).lower() if type(v) is bool else v
return self
@ -109,11 +113,13 @@ class GenTool:
def run_async(self):
assert self.subprocess is None, "You forgot to call wait()"
cmd = self._collect_cmd()
self.subprocess = subprocess.Popen(cmd, stdout=self.output,
stderr=self.error, env=os.environ)
self.subprocess = subprocess.Popen(
cmd, stdout=self.output, stderr=self.error, env=os.environ
)
self.logger.info(f"Run generator tool [{self.get_build_version()}]:"
f" {' '.join(cmd)} ")
self.logger.info(
f"Run generator tool [{self.get_build_version()}]:" f" {' '.join(cmd)} "
)
return self
def wait(self):
@ -131,19 +137,20 @@ class GenTool:
return c
def get_build_version(self):
p = subprocess.Popen([self.name_executable, "--version"],
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
env=os.environ)
p = subprocess.Popen(
[self.name_executable, "--version"],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
env=os.environ,
)
wait_and_raise_if_fail(p)
out, err = p.communicate()
return out.decode("utf-8").replace("\n", " ").strip()
def _collect_cmd(self):
options = ["".join(["--", k, "=", str(v)]) for k, v in
self.options.items()]
options = ["".join(["--", k, "=", str(v)]) for k, v in self.options.items()]
return [self.name_executable, *options]
def run_gen_tool(*args, **kwargs):
GenTool(*args, **kwargs).run()

View file

@ -0,0 +1,127 @@
import os
from typing import AnyStr
from typing import Optional
from typing import Type
from typing import Union
import filelock
from . import settings
from .env import Env
from .exceptions import ContinueError
from .stages import Stage
from .stages import get_stage_name
from .stages import stages
from .status import Status
class Generation:
"""
Generation describes process of a map generation. It contains stages.
For example:
generation = Generation(env)
generation.add_stage(s1)
generation.add_stage(s2)
generation.run()
"""
def __init__(self, env: Env):
self.env = env
self.stages = []
for country_stage in stages.countries_stages:
if self.is_skipped_stage(country_stage):
stage_name = get_stage_name(country_stage)
self.env.add_skipped_stage_name(stage_name)
def is_skipped_stage(self, stage: Union[Type[Stage], Stage]) -> bool:
return stage.is_production_only and not self.env.production
def add_stage(self, stage: Stage):
if self.is_skipped_stage(stage):
stage_name = get_stage_name(stage)
self.env.add_skipped_stage_name(stage_name)
else:
self.stages.append(stage)
def run(self, from_stage: Optional[AnyStr] = None):
if from_stage is not None:
self.reset_to_stage(from_stage)
planet_lock = filelock.FileLock(f"{settings.PLANET_O5M}.lock", timeout=1)
build_lock = filelock.FileLock(
f"{os.path.join(self.env.paths.build_path, 'lock')}.lock"
)
try:
for stage in self.stages:
if stage.need_planet_lock:
planet_lock.acquire()
else:
planet_lock.release()
if stage.need_build_lock:
build_lock.acquire()
else:
build_lock.release()
stage(self.env)
finally:
planet_lock.release()
build_lock.release()
def reset_to_stage(self, stage_name: AnyStr):
"""
Resets generation state to stage_name.
Status files are overwritten new statuses according stage_name.
It supposes that stages have next representation:
stage1, ..., stage_mwm[country_stage_1, ..., country_stage_M], ..., stageN
"""
countries_statuses_paths = [
os.path.join(self.env.paths.status_path, f)
for f in os.listdir(self.env.paths.status_path)
if os.path.isfile(os.path.join(self.env.paths.status_path, f))
and os.path.join(self.env.paths.status_path, f)
!= self.env.paths.main_status_path
]
def set_countries_stage(st):
for path in countries_statuses_paths:
Status(path, st).update_status()
def finish_countries_stage():
for path in countries_statuses_paths:
Status(path).finish()
high_level_stages = [get_stage_name(s) for s in self.stages]
if not (
stage_name in high_level_stages
or any(stage_name == get_stage_name(s) for s in stages.countries_stages)
):
raise ContinueError(
f"Stage {stage_name} not in {', '.join(high_level_stages)}."
)
if not os.path.exists(self.env.paths.main_status_path):
raise ContinueError(
f"Status file {self.env.paths.main_status_path} not found."
)
if not os.path.exists(self.env.paths.status_path):
raise ContinueError(f"Status path {self.env.paths.status_path} not found.")
mwm_stage_name = get_stage_name(stages.mwm_stage)
stage_mwm_index = high_level_stages.index(mwm_stage_name)
main_status = None
if stage_name in high_level_stages[: stage_mwm_index + 1]:
main_status = stage_name
set_countries_stage("")
elif stage_name in high_level_stages[stage_mwm_index + 1 :]:
main_status = stage_name
finish_countries_stage()
else:
main_status = get_stage_name(stages.mwm_stage)
set_countries_stage(stage_name)
Status(self.env.paths.main_status_path, main_status).update_status()

View file

@ -2,23 +2,29 @@ import os
import subprocess
from . import settings
from .exceptions import wait_and_raise_if_fail, BadExitStatusError
from .exceptions import BadExitStatusError
from .exceptions import wait_and_raise_if_fail
def build_osmtools(path, output=subprocess.DEVNULL, error=subprocess.DEVNULL):
src = {settings.OSM_TOOL_UPDATE: "osmupdate.c",
settings.OSM_TOOL_FILTER: "osmfilter.c",
settings.OSM_TOOL_CONVERT: "osmconvert.c"}
src = {
settings.OSM_TOOL_UPDATE: "osmupdate.c",
settings.OSM_TOOL_FILTER: "osmfilter.c",
settings.OSM_TOOL_CONVERT: "osmconvert.c",
}
ld_flags = ("-lz",)
cc = []
result = {}
for executable, src in src.items():
out = os.path.join(settings.OSM_TOOLS_PATH, executable)
op = [settings.OSM_TOOLS_CC,
*settings.OSM_TOOLS_CC_FLAGS,
"-o", out,
os.path.join(path, src),
*ld_flags]
op = [
settings.OSM_TOOLS_CC,
*settings.OSM_TOOLS_CC_FLAGS,
"-o",
out,
os.path.join(path, src),
*ld_flags,
]
s = subprocess.Popen(op, stdout=output, stderr=error)
cc.append(s)
result[executable] = out
@ -33,45 +39,81 @@ def build_osmtools(path, output=subprocess.DEVNULL, error=subprocess.DEVNULL):
return result
def osmconvert(name_executable, in_file, out_file, output=subprocess.DEVNULL,
error=subprocess.DEVNULL, run_async=False, **kwargs):
def osmconvert(
name_executable,
in_file,
out_file,
output=subprocess.DEVNULL,
error=subprocess.DEVNULL,
run_async=False,
**kwargs,
):
env = os.environ.copy()
env["PATH"] = f"{settings.OSM_TOOLS_PATH}:{env['PATH']}"
p = subprocess.Popen(
[
name_executable, in_file, "--drop-author", "--drop-version",
"--out-o5m", f"-o={out_file}"
name_executable,
in_file,
"--drop-author",
"--drop-version",
"--out-o5m",
f"-o={out_file}",
],
env=env, stdout=output, stderr=error)
env=env,
stdout=output,
stderr=error,
)
if run_async:
return p
else:
wait_and_raise_if_fail(p)
def osmupdate(name_executable, in_file, out_file, output=subprocess.DEVNULL,
error=subprocess.DEVNULL, run_async=False, **kwargs):
def osmupdate(
name_executable,
in_file,
out_file,
output=subprocess.DEVNULL,
error=subprocess.DEVNULL,
run_async=False,
**kwargs,
):
env = os.environ.copy()
env["PATH"] = f"{settings.OSM_TOOLS_PATH}:{env['PATH']}"
p = subprocess.Popen(
[
name_executable, "--drop-author", "--drop-version", "--out-o5m",
name_executable,
"--drop-author",
"--drop-version",
"--out-o5m",
"-v",
in_file, out_file
in_file,
out_file,
],
env=env, stdout=output, stderr=error)
env=env,
stdout=output,
stderr=error,
)
if run_async:
return p
else:
wait_and_raise_if_fail(p)
def osmfilter(name_executable, in_file, out_file, output=subprocess.DEVNULL,
error=subprocess.DEVNULL, run_async=False, **kwargs):
def osmfilter(
name_executable,
in_file,
out_file,
output=subprocess.DEVNULL,
error=subprocess.DEVNULL,
run_async=False,
**kwargs,
):
env = os.environ.copy()
env["PATH"] = f"{settings.OSM_TOOLS_PATH}:{env['PATH']}"
args = ([name_executable, in_file, f"-o={out_file}"] +
[f"--{k.replace('_', '-')}={v}" for k, v in kwargs.items()])
args = [name_executable, in_file, f"-o={out_file}"] + [
f"--{k.replace('_', '-')}={v}" for k, v in kwargs.items()
]
p = subprocess.Popen(args, env=env, stdout=output, stderr=error)
if run_async:
return p

View file

@ -2,20 +2,25 @@ import argparse
import multiprocessing
import os
import sys
from configparser import ConfigParser, ExtendedInterpolation
from configparser import ConfigParser
from configparser import ExtendedInterpolation
from pathlib import Path
from typing import Any
from typing import AnyStr
from ..utils.md5 import md5
from ..utils.system import total_virtual_memory
SETTINGS_PATH = os.path.dirname(os.path.join(os.path.realpath(__file__)))
parser = argparse.ArgumentParser(add_help=False)
opt_config = "--config"
parser.add_argument(opt_config, type=str, default="", help="Path to config")
def get_config_path():
config_path = os.path.join(SETTINGS_PATH, "../var/etc/map_generator.ini")
def get_config_path(config_path: AnyStr):
"""
It tries to get an opt_config value.
If doesn't get the value a function returns config_path.
"""
argv = sys.argv
indexes = (-1, -1)
for i, opt in enumerate(argv):
@ -27,27 +32,76 @@ def get_config_path():
if indexes[1] > len(argv):
return config_path
args = argv[indexes[0]: indexes[1]]
args = argv[indexes[0] : indexes[1]]
return parser.parse_args(args).config if args else config_path
DEBUG = True
HOME_PATH = str(Path.home())
WORK_PATH = HOME_PATH
TMPDIR = os.path.join(HOME_PATH, "tmp")
MAIN_OUT_PATH = os.path.join(WORK_PATH, "generation")
class CfgReader:
"""
Config reader.
There are 3 way of getting an option. In priority order:
1. From system env.
2. From config.
3. From default values.
For using the option from system env you can build an option name as
MM__ + [SECTION_NAME] + _ + [VALUE_NAME].
"""
def __init__(self, default_settings_path: AnyStr):
self.config = ConfigParser(interpolation=ExtendedInterpolation())
self.config.read([get_config_path(default_settings_path)])
def get_opt(self, s: AnyStr, v: AnyStr, default: Any = None):
val = CfgReader._get_env_val(s, v)
if val is not None:
return val
return self.config.get(s, v) if self.config.has_option(s, v) else default
def get_opt_path(self, s: AnyStr, v: AnyStr, default: AnyStr = ""):
return os.path.expanduser(self.get_opt(s, v, default))
@staticmethod
def _get_env_val(s: AnyStr, v: AnyStr):
return os.environ.get(f"MM__{s.upper()}_{v.upper()}")
VERSION_FILE_NAME = "version.txt"
# External resources
DEFAULT_PLANET_URL = "https://planet.openstreetmap.org/pbf/planet-latest.osm.pbf"
DEFAULT_PLANET_MD5_URL = DEFAULT_PLANET_URL + ".md5"
DEFAULT_PLANET_MD5_URL = md5(DEFAULT_PLANET_URL)
# Main section:
# If DEBUG is True, a little special planet is downloaded.
DEBUG = True
_HOME_PATH = str(Path.home())
_WORK_PATH = _HOME_PATH
TMPDIR = os.path.join(_HOME_PATH, "tmp")
MAIN_OUT_PATH = os.path.join(_WORK_PATH, "generation")
# Developer section:
BUILD_PATH = os.path.join(_WORK_PATH, "omim-build-release")
OMIM_PATH = os.path.join(_WORK_PATH, "omim")
# Osm tools section:
OSM_TOOLS_SRC_PATH = os.path.join(OMIM_PATH, "tools", "osmctools")
OSM_TOOLS_PATH = os.path.join(_WORK_PATH, "osmctools")
# Generator tool section:
NODE_STORAGE = "mem" if total_virtual_memory() / 10 ** 9 >= 64 else "map"
USER_RESOURCE_PATH = os.path.join(OMIM_PATH, "data")
# Logging section:
LOG_FILE_PATH = os.path.join(MAIN_OUT_PATH, "generation.log")
# External resources section:
PLANET_URL = DEFAULT_PLANET_URL
PLANET_MD5_URL = DEFAULT_PLANET_MD5_URL
PLANET_COASTS_URL = ""
UGC_URL = ""
HOTELS_URL = ""
PROMO_CATALOG_CITIES_URL = ""
PROMO_CATALOG_COUNTRIES_URL = ""
POPULARITY_URL= ""
POPULARITY_URL = ""
SUBWAY_URL = ""
FOOD_URL = ""
FOOD_TRANSLATIONS_URL = ""
@ -55,86 +109,27 @@ UK_POSTCODES_URL = ""
US_POSTCODES_URL = ""
SRTM_PATH = ""
# Stats section:
STATS_TYPES_CONFIG = ""
# Other variables:
PLANET = "planet"
GEN_TOOL = "generator_tool"
VERSION_FILE_NAME = "version.txt"
BUILD_PATH = os.path.join(WORK_PATH, "omim-build-release")
OMIM_PATH = os.path.join(WORK_PATH, "omim")
# generator_tool
NODE_STORAGE = "mem" if total_virtual_memory() / 10 ** 9 >= 64 else "map"
USER_RESOURCE_PATH = os.path.join(OMIM_PATH, "data")
# osm tools
# Osm tools:
OSM_TOOL_CONVERT = "osmconvert"
OSM_TOOL_FILTER = "osmfilter"
OSM_TOOL_UPDATE = "osmupdate"
OSM_TOOLS_SRC_PATH = os.path.join(OMIM_PATH, "tools", "osmctools")
OSM_TOOLS_PATH = os.path.join(WORK_PATH, "osmctools")
OSM_TOOLS_CC = "cc"
OSM_TOOLS_CC_FLAGS = ["-O3", ]
OSM_TOOLS_CC_FLAGS = [
"-O3",
]
# system
# System:
CPU_COUNT = multiprocessing.cpu_count()
# Try to read a config and to overload default settings
config = ConfigParser(interpolation=ExtendedInterpolation())
config.read([get_config_path()])
def _get_opt(config, s, v, default=None):
return config.get(s, v) if config.has_option(s, v) else default
def _get_opt_path(config, s, v, default=""):
return os.path.expanduser(_get_opt(config, s, v, default))
_DEBUG = _get_opt(config, "Main", "DEBUG")
DEBUG = DEBUG if _DEBUG is None else int(_DEBUG)
MAIN_OUT_PATH = _get_opt_path(config, "Main", "MAIN_OUT_PATH", MAIN_OUT_PATH)
# logging
LOG_FILE_PATH = os.path.join(MAIN_OUT_PATH, "generation.log")
TMPDIR = _get_opt_path(config, "Main", "TMPDIR", TMPDIR)
BUILD_PATH = _get_opt_path(config, "Developer", "BUILD_PATH", BUILD_PATH)
OMIM_PATH = _get_opt_path(config, "Developer", "OMIM_PATH", OMIM_PATH)
USER_RESOURCE_PATH = _get_opt_path(config, "Generator tool",
"USER_RESOURCE_PATH", USER_RESOURCE_PATH)
NODE_STORAGE = _get_opt(config, "Generator tool", "NODE_STORAGE", NODE_STORAGE)
OSM_TOOLS_SRC_PATH = _get_opt_path(config, "Osm tools", "OSM_TOOLS_SRC_PATH", OSM_TOOLS_SRC_PATH)
OSM_TOOLS_PATH = _get_opt_path(config, "Osm tools", "OSM_TOOLS_PATH", OSM_TOOLS_PATH)
LOG_FILE_PATH = _get_opt_path(config, "Logging", "MAIN_LOG", LOG_FILE_PATH)
os.makedirs(os.path.dirname(os.path.abspath(LOG_FILE_PATH)), exist_ok=True)
PLANET_URL = _get_opt_path(config, "External", "PLANET_URL", DEFAULT_PLANET_URL)
PLANET_MD5_URL = _get_opt_path(config, "External", "PLANET_MD5_URL", DEFAULT_PLANET_MD5_URL)
PLANET_COASTS_URL = _get_opt_path(config, "External", "PLANET_COASTS_URL", PLANET_COASTS_URL)
UGC_URL = _get_opt_path(config, "External", "UGC_URL", UGC_URL)
HOTELS_URL = _get_opt_path(config, "External", "HOTELS_URL", HOTELS_URL)
PROMO_CATALOG_CITIES_URL = _get_opt_path(config, "External", "PROMO_CATALOG_CITIES_URL", PROMO_CATALOG_CITIES_URL)
PROMO_CATALOG_COUNTRIES_URL = _get_opt_path(config, "External", "PROMO_CATALOG_COUNTRIES_URL", PROMO_CATALOG_COUNTRIES_URL)
POPULARITY_URL = _get_opt_path(config, "External", "POPULARITY_URL", POPULARITY_URL)
SUBWAY_URL = _get_opt(config, "External", "SUBWAY_URL", SUBWAY_URL)
FOOD_URL = _get_opt(config, "External", "FOOD_URL", FOOD_URL)
UK_POSTCODES_URL = _get_opt(config, "External", "UK_POSTCODES_URL", UK_POSTCODES_URL)
US_POSTCODES_URL = _get_opt(config, "External", "US_POSTCODES_URL", US_POSTCODES_URL)
FOOD_TRANSLATIONS_URL = _get_opt(config, "External", "FOOD_TRANSLATIONS_URL",
FOOD_TRANSLATIONS_URL)
SRTM_PATH = _get_opt_path(config, "External", "SRTM_PATH", SRTM_PATH)
STATS_TYPES_CONFIG = _get_opt_path(config, "Stats", "STATS_TYPES_CONFIG",
STATS_TYPES_CONFIG)
# Planet and coasts:
PLANET_O5M = os.path.join(MAIN_OUT_PATH, PLANET + ".o5m")
PLANET_PBF = os.path.join(MAIN_OUT_PATH, PLANET + ".osm.pbf")
PLANET_COASTS_GEOM_URL = os.path.join(PLANET_COASTS_URL, "latest_coasts.geom")
@ -149,28 +144,130 @@ LOGGING = {
"version": 1,
"disable_existing_loggers": False,
"formatters": {
"standard": {
"format": "[%(asctime)s] %(levelname)s %(module)s %(message)s"
},
"standard": {"format": "[%(asctime)s] %(levelname)s %(module)s %(message)s"},
},
"handlers": {
"stdout": {
"level": "INFO",
"class": "logging.StreamHandler",
"formatter": "standard"
"formatter": "standard",
},
"file": {
"level": "DEBUG",
"class": "logging.handlers.WatchedFileHandler",
"formatter": "standard",
"filename": LOG_FILE_PATH
}
"filename": LOG_FILE_PATH,
},
},
"loggers": {
"maps_generator": {
"handlers": ["stdout", "file"],
"level": "DEBUG",
"propagate": True
"propagate": True,
}
}
},
}
def init(default_settings_path: AnyStr):
# Try to read a config and to overload default settings
cfg = CfgReader(default_settings_path)
# Main section:
global DEBUG
global MAIN_OUT_PATH
global TMPDIR
_DEBUG = cfg.get_opt("Main", "DEBUG")
DEBUG = DEBUG if _DEBUG is None else int(_DEBUG)
MAIN_OUT_PATH = cfg.get_opt_path("Main", "MAIN_OUT_PATH", MAIN_OUT_PATH)
TMPDIR = cfg.get_opt_path("Main", "TMPDIR", TMPDIR)
# Developer section:
global BUILD_PATH
global OMIM_PATH
BUILD_PATH = cfg.get_opt_path("Developer", "BUILD_PATH", BUILD_PATH)
OMIM_PATH = cfg.get_opt_path("Developer", "OMIM_PATH", OMIM_PATH)
# Osm tools section:
global OSM_TOOLS_SRC_PATH
global OSM_TOOLS_PATH
OSM_TOOLS_SRC_PATH = cfg.get_opt_path(
"Osm tools", "OSM_TOOLS_SRC_PATH", OSM_TOOLS_SRC_PATH
)
OSM_TOOLS_PATH = cfg.get_opt_path("Osm tools", "OSM_TOOLS_PATH", OSM_TOOLS_PATH)
# Generator tool section:
global USER_RESOURCE_PATH
global NODE_STORAGE
USER_RESOURCE_PATH = cfg.get_opt_path(
"Generator tool", "USER_RESOURCE_PATH", USER_RESOURCE_PATH
)
NODE_STORAGE = cfg.get_opt("Generator tool", "NODE_STORAGE", NODE_STORAGE)
# Logging section:
global LOG_FILE_PATH
global LOGGING
LOG_FILE_PATH = os.path.join(MAIN_OUT_PATH, "generation.log")
LOG_FILE_PATH = cfg.get_opt_path("Logging", "MAIN_LOG", LOG_FILE_PATH)
os.makedirs(os.path.dirname(os.path.abspath(LOG_FILE_PATH)), exist_ok=True)
LOGGING["handlers"]["file"]["filename"] = LOG_FILE_PATH
# External sction:
global PLANET_URL
global PLANET_MD5_URL
global PLANET_COASTS_URL
global UGC_URL
global HOTELS_URL
global PROMO_CATALOG_CITIES_URL
global PROMO_CATALOG_COUNTRIES_URL
global POPULARITY_URL
global SUBWAY_URL
global FOOD_URL
global UK_POSTCODES_URL
global US_POSTCODES_URL
global FOOD_TRANSLATIONS_URL
global SRTM_PATH
PLANET_URL = cfg.get_opt_path("External", "PLANET_URL", PLANET_URL)
PLANET_MD5_URL = cfg.get_opt_path("External", "PLANET_MD5_URL", PLANET_MD5_URL)
PLANET_COASTS_URL = cfg.get_opt_path(
"External", "PLANET_COASTS_URL", PLANET_COASTS_URL
)
UGC_URL = cfg.get_opt_path("External", "UGC_URL", UGC_URL)
HOTELS_URL = cfg.get_opt_path("External", "HOTELS_URL", HOTELS_URL)
PROMO_CATALOG_CITIES_URL = cfg.get_opt_path(
"External", "PROMO_CATALOG_CITIES_URL", PROMO_CATALOG_CITIES_URL
)
PROMO_CATALOG_COUNTRIES_URL = cfg.get_opt_path(
"External", "PROMO_CATALOG_COUNTRIES_URL", PROMO_CATALOG_COUNTRIES_URL
)
POPULARITY_URL = cfg.get_opt_path("External", "POPULARITY_URL", POPULARITY_URL)
SUBWAY_URL = cfg.get_opt("External", "SUBWAY_URL", SUBWAY_URL)
FOOD_URL = cfg.get_opt("External", "FOOD_URL", FOOD_URL)
UK_POSTCODES_URL = cfg.get_opt("External", "UK_POSTCODES_URL", UK_POSTCODES_URL)
US_POSTCODES_URL = cfg.get_opt("External", "US_POSTCODES_URL", US_POSTCODES_URL)
FOOD_TRANSLATIONS_URL = cfg.get_opt(
"External", "FOOD_TRANSLATIONS_URL", FOOD_TRANSLATIONS_URL
)
SRTM_PATH = cfg.get_opt_path("External", "SRTM_PATH", SRTM_PATH)
# Stats section:
global STATS_TYPES_CONFIG
STATS_TYPES_CONFIG = cfg.get_opt_path(
"Stats", "STATS_TYPES_CONFIG", STATS_TYPES_CONFIG
)
# Planet and costs:
global PLANET_O5M
global PLANET_PBF
global PLANET_COASTS_GEOM_URL
global PLANET_COASTS_RAWGEOM_URL
PLANET_O5M = os.path.join(MAIN_OUT_PATH, PLANET + ".o5m")
PLANET_PBF = os.path.join(MAIN_OUT_PATH, PLANET + ".osm.pbf")
PLANET_COASTS_GEOM_URL = os.path.join(PLANET_COASTS_URL, "latest_coasts.geom")
PLANET_COASTS_RAWGEOM_URL = os.path.join(PLANET_COASTS_URL, "latest_coasts.rawgeom")
if DEBUG:
PLANET_URL = "http://osmz.ru/mwm/islands/islands.o5m"
PLANET_MD5_URL = "https://cloud.mail.ru/public/5v2F/f7cSaEXBC"

View file

@ -1,239 +1,239 @@
""""
This file contains some decorators that define stages.
There are two main types of stages:
1. stage - a high level stage
2. country_stage - a stage that applies to countries files(*.mwm).
country_stage might be inside stage. There are country stages inside mwm_stage.
mwm_stage is only one stage that contains country_stages.
"""
import datetime
import logging
import os
import shutil
import subprocess
import time
from abc import ABC
from abc import abstractmethod
from typing import AnyStr
from typing import List
from typing import Optional
from typing import Type
from typing import Union
from . import settings
from .env import WORLD_NAME, WORLDS_NAMES
from .gen_tool import run_gen_tool
from .osmtools import osmconvert, osmupdate
from ..utils.file import download_file, is_verified
from ..utils.file import symlink_force
from ..utils.md5 import write_md5sum, md5
from .env import Env
from .status import Status
from ..utils.algo import camel_case_split
from ..utils.log import DummyObject
from ..utils.log import create_file_logger
logger = logging.getLogger("maps_generator")
def download_planet(planet):
download_file(settings.PLANET_URL, planet)
download_file(settings.PLANET_MD5_URL, md5(planet))
class Stage(ABC):
need_planet_lock = False
need_build_lock = False
is_helper = False
is_mwm_stage = False
is_production_only = False
def __init__(self, **args):
self.args = args
def __call__(self, env: Env):
return self.apply(env, **self.args)
@abstractmethod
def apply(self, *args, **kwargs):
pass
def convert_planet(tool, in_planet, out_planet, output=subprocess.DEVNULL,
error=subprocess.DEVNULL):
osmconvert(tool, in_planet, out_planet, output=output, error=error)
write_md5sum(out_planet, md5(out_planet))
def get_stage_name(stage: Union[Type[Stage], Stage]) -> AnyStr:
n = stage.__class__.__name__ if isinstance(stage, Stage) else stage.__name__
return "_".join(w.lower() for w in camel_case_split(n))
def stage_download_and_convert_planet(env, force_download, **kwargs):
if force_download or not is_verified(settings.PLANET_PBF):
download_planet(settings.PLANET_PBF)
class Stages:
"""Stages class is used for storing all stages."""
convert_planet(env[settings.OSM_TOOL_CONVERT],
settings.PLANET_PBF, settings.PLANET_O5M,
output=env.get_subprocess_out(),
error=env.get_subprocess_out())
os.remove(settings.PLANET_PBF)
os.remove(md5(settings.PLANET_PBF))
def __init__(self):
self.mwm_stage: Optional[Type[Stage]] = None
self.countries_stages: List[Type[Stage]] = []
self.stages: List[Type[Stage]] = []
self.helper_stages: List[Type[Stage]] = []
def set_mwm_stage(self, stage: Type[Stage]):
assert self.mwm_stage is None
self.mwm_stage = stage
def add_helper_stage(self, stage: Type[Stage]):
self.helper_stages.append(stage)
def add_country_stage(self, stage: Type[Stage]):
self.countries_stages.append(stage)
def add_stage(self, stage: Type[Stage]):
self.stages.append(stage)
def get_visible_stages_names(self) -> List[AnyStr]:
"""Returns all stages names except helper stages names."""
stages = []
for s in self.stages:
stages.append(get_stage_name(s))
if s == self.mwm_stage:
stages += [get_stage_name(st) for st in self.countries_stages]
return stages
def is_valid_stage_name(self, stage_name) -> bool:
return get_stage_name(self.mwm_stage) == stage_name or any(
any(stage_name == get_stage_name(x) for x in c)
for c in [self.countries_stages, self.stages, self.helper_stages]
)
def stage_update_planet(env, **kwargs):
tmp = settings.PLANET_O5M + ".tmp"
osmupdate(env[settings.OSM_TOOL_UPDATE], settings.PLANET_O5M, tmp,
output=env.get_subprocess_out(),
error=env.get_subprocess_out(),
**kwargs)
os.remove(settings.PLANET_O5M)
os.rename(tmp, settings.PLANET_O5M)
write_md5sum(settings.PLANET_O5M, md5(settings.PLANET_O5M))
# A global variable stage contains all possible stages.
stages = Stages()
def stage_preprocess(env, **kwargs):
run_gen_tool(env.gen_tool,
out=env.get_subprocess_out(),
err=env.get_subprocess_out(),
intermediate_data_path=env.intermediate_path,
osm_file_type="o5m",
osm_file_name=settings.PLANET_O5M,
node_storage=env.node_storage,
user_resource_path=env.user_resource_path,
preprocess=True,
**kwargs)
def outer_stage(stage: Type[Stage]) -> Type[Stage]:
"""It's decorator that defines high level stage."""
if stage.is_helper:
stages.add_helper_stage(stage)
else:
stages.add_stage(stage)
if stage.is_mwm_stage:
stages.set_mwm_stage(stage)
def new_apply(method):
def apply(obj: Stage, env: Env, *args, **kwargs):
name = get_stage_name(obj)
stage_formatted = " ".join(name.split("_")).capitalize()
logfile = os.path.join(env.paths.log_path, f"{name}.log")
log_handler = logging.FileHandler(logfile)
logger.addHandler(log_handler)
if not env.is_skipped_stage_name(name):
logger.info(f"{stage_formatted} was not accepted.")
logger.removeHandler(log_handler)
return
main_status = env.main_status
main_status.init(env.paths.main_status_path, name)
if main_status.need_skip():
logger.warning(f"{stage_formatted} was skipped.")
logger.removeHandler(log_handler)
return
main_status.update_status()
logger.info(f"{stage_formatted}: start ...")
t = time.time()
env.set_subprocess_out(log_handler.stream)
method(obj, env, *args, **kwargs)
d = time.time() - t
logger.info(
f"{stage_formatted}: finished in "
f"{str(datetime.timedelta(seconds=d))}"
)
logger.removeHandler(log_handler)
return apply
stage.apply = new_apply(stage.apply)
return stage
def stage_features(env, **kwargs):
extra = {}
if env.production:
extra["add_ads"] = True
if any(x not in WORLDS_NAMES for x in env.countries):
extra["generate_packed_borders"] = True
if any(x == WORLD_NAME for x in env.countries):
extra["generate_world"] = True
if env.build_all_countries:
extra["have_borders_for_whole_world"] = True
def country_stage_status(stage: Type[Stage]) -> Type[Stage]:
"""It's helper decorator that works with status file."""
run_gen_tool(
env.gen_tool,
out=env.get_subprocess_out(),
err=env.get_subprocess_out(),
data_path=env.data_path,
intermediate_data_path=env.intermediate_path,
osm_file_type="o5m",
osm_file_name=settings.PLANET_O5M,
node_storage=env.node_storage,
user_resource_path=env.user_resource_path,
dump_cities_boundaries=True,
cities_boundaries_data=env.cities_boundaries_path,
generate_features=True,
**extra,
**kwargs
)
def new_apply(method):
def apply(obj: Stage, env: Env, country: AnyStr, *args, **kwargs):
name = get_stage_name(obj)
_logger = DummyObject()
countries_meta = env.countries_meta
if "logger" in countries_meta[country]:
_logger, _ = countries_meta[country]["logger"]
stage_formatted = " ".join(name.split("_")).capitalize()
if not env.is_skipped_stage_name(name):
_logger.info(f"{stage_formatted} was not accepted.")
return
if "status" not in countries_meta[country]:
countries_meta[country]["status"] = Status()
status = countries_meta[country]["status"]
status_file = os.path.join(env.paths.status_path, f"{country}.status")
status.init(status_file, name)
if status.need_skip():
_logger.warning(f"{stage_formatted} was skipped.")
return
status.update_status()
method(obj, env, country, *args, **kwargs)
return apply
stage.apply = new_apply(stage.apply)
return stage
def run_gen_tool_with_recovery_country(env, *args, **kwargs):
if "data_path" not in kwargs or "output" not in kwargs:
logger.warning("The call run_gen_tool() will be without recovery.")
run_gen_tool(*args, **kwargs)
prev_data_path = kwargs["data_path"]
mwm = f"{kwargs['output']}.mwm"
osm2ft = f"{mwm}.osm2ft"
kwargs["data_path"] = env.draft_path
symlink_force(os.path.join(prev_data_path, osm2ft),
os.path.join(env.draft_path, osm2ft))
shutil.copy(os.path.join(prev_data_path, mwm),
os.path.join(env.draft_path, mwm))
run_gen_tool(*args, **kwargs)
shutil.move(os.path.join(env.draft_path, mwm),
os.path.join(prev_data_path, mwm))
kwargs["data_path"] = prev_data_path
def country_stage_log(stage: Type[Stage]) -> Type[Stage]:
"""It's helper decorator that works with log file."""
def new_apply(method):
def apply(obj: Stage, env: Env, country: AnyStr, *args, **kwargs):
name = get_stage_name(obj)
log_file = os.path.join(env.paths.log_path, f"{country}.log")
countries_meta = env.countries_meta
if "logger" not in countries_meta[country]:
countries_meta[country]["logger"] = create_file_logger(log_file)
_logger, log_handler = countries_meta[country]["logger"]
stage_formatted = " ".join(name.split("_")).capitalize()
_logger.info(f"{stage_formatted}: start ...")
t = time.time()
env.set_subprocess_out(log_handler.stream, country)
method(obj, env, country, *args, logger=_logger, **kwargs)
d = time.time() - t
_logger.info(
f"{stage_formatted}: finished in "
f"{str(datetime.timedelta(seconds=d))}"
)
return apply
stage.apply = new_apply(stage.apply)
return stage
def _generate_common_index(env, country, **kwargs):
run_gen_tool(env.gen_tool,
out=env.get_subprocess_out(country),
err=env.get_subprocess_out(country),
data_path=env.mwm_path,
intermediate_data_path=env.intermediate_path,
user_resource_path=env.user_resource_path,
node_storage=env.node_storage,
planet_version=env.planet_version,
generate_geometry=True,
generate_index=True,
output=country,
**kwargs)
def country_stage(stage: Type[Stage]) -> Type[Stage]:
"""It's decorator that defines country stage."""
if stage.is_helper:
stages.add_helper_stage(stage)
else:
stages.add_country_stage(stage)
return country_stage_log(country_stage_status(stage))
def stage_index_world(env, country, **kwargs):
_generate_common_index(env, country,
generate_search_index=True,
cities_boundaries_data=env.cities_boundaries_path,
generate_cities_boundaries=True,
**kwargs)
def mwm_stage(stage: Type[Stage]) -> Type[Stage]:
stage.is_mwm_stage = True
return stage
def stage_cities_ids_world(env, country, **kwargs):
run_gen_tool_with_recovery_country(
env,
env.gen_tool,
out=env.get_subprocess_out(country),
err=env.get_subprocess_out(country),
data_path=env.mwm_path,
user_resource_path=env.user_resource_path,
output=country,
generate_cities_ids=True,
**kwargs
)
def planet_lock(stage: Type[Stage]) -> Type[Stage]:
stage.need_planet_lock = True
return stage
def stage_index(env, country, **kwargs):
_generate_common_index(env, country,
generate_search_index=True,
**kwargs)
def build_lock(stage: Type[Stage]) -> Type[Stage]:
stage.need_build_lock = True
return stage
def stage_coastline_index(env, country, **kwargs):
_generate_common_index(env, country, **kwargs)
def production_only(stage: Type[Stage]) -> Type[Stage]:
stage.is_production_only = True
return stage
def stage_ugc(env, country, **kwargs):
run_gen_tool_with_recovery_country(
env,
env.gen_tool,
out=env.get_subprocess_out(country),
err=env.get_subprocess_out(country),
data_path=env.mwm_path,
intermediate_data_path=env.intermediate_path,
user_resource_path=env.user_resource_path,
ugc_data=env.ugc_path,
output=country,
**kwargs
)
def stage_popularity(env, country, **kwargs):
run_gen_tool_with_recovery_country(
env,
env.gen_tool,
out=env.get_subprocess_out(country),
err=env.get_subprocess_out(country),
data_path=env.mwm_path,
intermediate_data_path=env.intermediate_path,
user_resource_path=env.user_resource_path,
popular_places_data=env.popularity_path,
generate_popular_places=True,
output=country,
**kwargs
)
def stage_srtm(env, country, **kwargs):
run_gen_tool_with_recovery_country(
env,
env.gen_tool,
out=env.get_subprocess_out(country),
err=env.get_subprocess_out(country),
data_path=env.mwm_path,
intermediate_data_path=env.intermediate_path,
user_resource_path=env.user_resource_path,
srtm_path=env.srtm_path,
output=country,
**kwargs
)
def stage_routing(env, country, **kwargs):
run_gen_tool_with_recovery_country(
env,
env.gen_tool,
out=env.get_subprocess_out(country),
err=env.get_subprocess_out(country),
data_path=env.mwm_path,
intermediate_data_path=env.intermediate_path,
user_resource_path=env.user_resource_path,
cities_boundaries_data=env.cities_boundaries_path,
generate_maxspeed=True,
make_city_roads=True,
make_cross_mwm=True,
disable_cross_mwm_progress=True,
generate_cameras=True,
make_routing_index=True,
generate_traffic_keys=True,
output=country,
**kwargs
)
def stage_routing_transit(env, country, **kwargs):
run_gen_tool_with_recovery_country(
env,
env.gen_tool,
out=env.get_subprocess_out(country),
err=env.get_subprocess_out(country),
data_path=env.mwm_path,
intermediate_data_path=env.intermediate_path,
user_resource_path=env.user_resource_path,
transit_path=env.transit_path,
make_transit_cross_mwm=True,
output=country,
**kwargs
)
def helper_stage(stage: Type[Stage]) -> Type[Stage]:
stage.is_helper = True
return stage

View file

@ -0,0 +1,421 @@
""""
This file contains possible stages that maps_generator can run.
Some algorithms suppose a maps genration processes looks like:
stage1, ..., stage_mwm[country_stage_1, ..., country_stage_M], ..., stageN
Only stage_mwm can contain country_
"""
import datetime
import json
import logging
import multiprocessing
import os
import shutil
import tarfile
from collections import defaultdict
from functools import partial
from multiprocessing.pool import ThreadPool
from typing import Type
from descriptions.descriptions_downloader import check_and_get_checker
from descriptions.descriptions_downloader import download_from_wikidata_tags
from descriptions.descriptions_downloader import download_from_wikipedia_tags
from post_generation.hierarchy_to_countries import hierarchy_to_countries
from post_generation.inject_promo_ids import inject_promo_ids
from post_generation.localads_mwm_to_csv import create_csv
from . import coastline
from . import settings
from . import steps
from .env import Env
from .env import WORLDS_NAMES
from .env import WORLD_COASTS_NAME
from .env import WORLD_NAME
from .exceptions import BadExitStatusError
from .gen_tool import run_gen_tool
from .stages import Stage
from .stages import build_lock
from .stages import country_stage
from .stages import get_stage_name
from .stages import helper_stage
from .stages import mwm_stage
from .stages import outer_stage
from .stages import planet_lock
from .stages import production_only
from .statistics import get_stages_info
from .statistics import make_stats
from ..utils.file import download_files
from ..utils.file import is_verified
logger = logging.getLogger("maps_generator")
def is_skipped(env: Env, stage: Type[Stage]) -> bool:
return env.is_skipped_stage_name(get_stage_name(stage))
@outer_stage
class StageDownloadExternal(Stage):
def apply(self, env: Env):
download_files(
{settings.SUBWAY_URL: env.paths.subway_path,}
)
@outer_stage
@production_only
class StageDownloadProductionExternal(Stage):
def apply(self, env: Env):
download_files(
{
settings.UGC_URL: env.paths.ugc_path,
settings.HOTELS_URL: env.paths.hotels_path,
settings.PROMO_CATALOG_CITIES_URL: env.paths.promo_catalog_cities_path,
settings.PROMO_CATALOG_COUNTRIES_URL: env.paths.promo_catalog_countries_path,
settings.POPULARITY_URL: env.paths.popularity_path,
settings.FOOD_URL: env.paths.food_paths,
settings.FOOD_TRANSLATIONS_URL: env.paths.food_translations_path,
settings.UK_POSTCODES_URL: env.paths.uk_postcodes_path,
settings.US_POSTCODES_URL: env.paths.us_postcodes_path,
}
)
@outer_stage
@planet_lock
class StageDownloadAndConvertPlanet(Stage):
def apply(self, env: Env, **kwargs):
force_download = not is_skipped(env, StageUpdatePlanet)
if force_download or not is_verified(settings.PLANET_O5M):
steps.step_download_and_convert_planet(
env, force_download=force_download, **kwargs
)
@outer_stage
@planet_lock
class StageUpdatePlanet(Stage):
def apply(self, env: Env, **kwargs):
if not settings.DEBUG:
steps.step_update_planet(env, **kwargs)
@outer_stage
@build_lock
class StageCoastline(Stage):
def apply(self, env: Env, use_old_if_fail=True):
coasts_geom = "WorldCoasts.geom"
coasts_rawgeom = "WorldCoasts.rawgeom"
try:
coastline.make_coastline(env)
except BadExitStatusError as e:
if not use_old_if_fail:
raise e
logger.info("Build costs failed. Try to download the costs...")
download_files(
{
settings.PLANET_COASTS_GEOM_URL: os.path.join(
env.paths.coastline_path, coasts_geom
),
settings.PLANET_COASTS_RAWGEOM_URL: os.path.join(
env.paths.coastline_path, coasts_rawgeom
),
}
)
for f in [coasts_geom, coasts_rawgeom]:
path = os.path.join(env.paths.coastline_path, f)
shutil.copy2(path, env.paths.intermediate_data_path)
@outer_stage
@build_lock
class StagePreprocess(Stage):
def apply(self, env: Env, **kwargs):
steps.step_preprocess(env, **kwargs)
@outer_stage
@build_lock
class StageFeatures(Stage):
def apply(self, env: Env):
extra = {}
if is_skipped(env, StageDescriptions):
extra["idToWikidata"] = env.paths.id_to_wikidata_path
if is_skipped(env, StageDownloadProductionExternal):
extra["booking_data"] = env.paths.hotels_path
extra["promo_catalog_cities"] = env.paths.promo_catalog_cities_path
extra["popular_places_data"] = env.paths.popularity_path
extra["brands_data"] = env.paths.food_paths
extra["brands_translations_data"] = env.paths.food_translations_path
if is_skipped(env, StageCoastline):
extra["emit_coasts"] = True
steps.step_features(env, **extra)
if os.path.exists(env.paths.packed_polygons_path):
shutil.copy2(env.paths.packed_polygons_path, env.paths.mwm_path)
@outer_stage
@build_lock
@production_only
@helper_stage
class StageDownloadDescriptions(Stage):
def apply(self, env: Env):
if not is_skipped(env, StageDescriptions):
return
run_gen_tool(
env.gen_tool,
out=env.get_subprocess_out(),
err=env.get_subprocess_out(),
intermediate_data_path=env.paths.intermediate_data_path,
user_resource_path=env.paths.user_resource_path,
dump_wikipedia_urls=env.paths.wiki_url_path,
idToWikidata=env.paths.id_to_wikidata_path,
)
langs = ("en", "ru", "es", "fr", "de")
checker = check_and_get_checker(env.paths.popularity_path)
download_from_wikipedia_tags(
env.paths.wiki_url_path, env.paths.descriptions_path, langs, checker
)
download_from_wikidata_tags(
env.paths.id_to_wikidata_path, env.paths.descriptions_path, langs, checker
)
@outer_stage
@build_lock
@mwm_stage
class StageMwm(Stage):
def apply(self, env: Env):
def build(country):
StageIndex(country=country)(env)
StageUgc(country=country)(env)
StagePopularity(country=country)(env)
StageSrtm(country=country)(env)
StageDescriptions(country=country)(env)
StageRouting(country=country)(env)
StageRoutingTransit(country=country)(env)
env.finish_mwm(country)
def build_world(country):
StageIndex(country=country)(env)
StageCitiesIdsWorld(country=country)(env)
env.finish_mwm(country)
def build_world_coasts(country):
StageIndex(country=country)(env)
env.finish_mwm(country)
specific = {WORLD_NAME: build_world, WORLD_COASTS_NAME: build_world_coasts}
names = env.get_tmp_mwm_names()
with ThreadPool() as pool:
pool.map(
lambda c: specific[c](c) if c in specific else build(c),
names,
chunksize=1,
)
@country_stage
@build_lock
class StageIndex(Stage):
def apply(self, env: Env, country, **kwargs):
if country == WORLD_NAME:
steps.step_index_world(env, country, **kwargs)
elif country == WORLD_COASTS_NAME:
steps.step_coastline_index(env, country, **kwargs)
else:
extra = {}
if is_skipped(env, StageDownloadProductionExternal):
extra["uk_postcodes_dataset"] = env.paths.uk_postcodes_path
extra["us_postcodes_dataset"] = env.paths.us_postcodes_path
steps.step_index(env, country, **kwargs, **extra)
@country_stage
@build_lock
@production_only
class StageCitiesIdsWorld(Stage):
def apply(self, env: Env, country, **kwargs):
steps.step_cities_ids_world(env, country, **kwargs)
@country_stage
@build_lock
@production_only
class StageUgc(Stage):
def apply(self, env: Env, country, **kwargs):
steps.step_ugc(env, country, **kwargs)
@country_stage
@build_lock
@production_only
class StagePopularity(Stage):
def apply(self, env: Env, country, **kwargs):
steps.step_popularity(env, country, **kwargs)
@country_stage
@build_lock
@production_only
class StageSrtm(Stage):
def apply(self, env: Env, country, **kwargs):
steps.step_srtm(env, country, **kwargs)
@country_stage
@build_lock
@production_only
class StageDescriptions(Stage):
def apply(self, env: Env, country, **kwargs):
steps.step_description(env, country, **kwargs)
@country_stage
@build_lock
class StageRouting(Stage):
def apply(self, env: Env, country, **kwargs):
steps.step_routing(env, country, **kwargs)
@country_stage
@build_lock
class StageRoutingTransit(Stage):
def apply(self, env: Env, country, **kwargs):
steps.step_routing_transit(env, country, **kwargs)
@outer_stage
@build_lock
class StageCountriesTxt(Stage):
def apply(self, env: Env):
countries = hierarchy_to_countries(
env.paths.old_to_new_path,
env.paths.borders_to_osm_path,
env.paths.countries_synonyms_path,
env.paths.hierarchy_path,
env.paths.mwm_path,
env.paths.mwm_version,
)
if is_skipped(env, StageDownloadProductionExternal):
countries_json = json.loads(countries)
inject_promo_ids(
countries_json,
env.paths.promo_catalog_cities_path,
env.paths.promo_catalog_countries_path,
env.paths.mwm_path,
env.paths.types_path,
env.paths.mwm_path,
)
countries = json.dumps(countries_json, ensure_ascii=True, indent=1)
with open(env.paths.counties_txt_path, "w") as f:
f.write(countries)
@outer_stage
@build_lock
class StageExternalResources(Stage):
def apply(self, env: Env):
black_list = {"00_roboto_regular.ttf"}
resources = [
os.path.join(env.paths.user_resource_path, file)
for file in os.listdir(env.paths.user_resource_path)
if file.endswith(".ttf") and file not in black_list
]
for ttf_file in resources:
shutil.copy2(ttf_file, env.paths.mwm_path)
for file in os.listdir(env.paths.mwm_path):
if file.startswith(WORLD_NAME) and file.endswith(".mwm"):
resources.append(os.path.join(env.paths.mwm_path, file))
resources.sort()
with open(env.paths.external_resources_path, "w") as f:
for resource in resources:
fd = os.open(resource, os.O_RDONLY)
f.write(f"{os.path.basename(resource)} {os.fstat(fd).st_size}\n")
@outer_stage
@build_lock
@production_only
class StageLocalads(Stage):
def apply(self, env: Env):
create_csv(
env.paths.localads_path,
env.paths.mwm_path,
env.paths.mwm_path,
env.paths.types_path,
env.mwm_version,
multiprocessing.cpu_count(),
)
with tarfile.open(f"{env.paths.localads_path}.tar.gz", "w:gz") as tar:
for filename in os.listdir(env.paths.localads_path):
tar.add(
os.path.join(env.paths.localads_path, filename), arcname=filename
)
@outer_stage
@build_lock
class StageStatistics(Stage):
def apply(self, env: Env):
result = defaultdict(lambda: defaultdict(dict))
def stage_mwm_statistics(env: Env, country, **kwargs):
stats_tmp = os.path.join(env.paths.draft_path, f"{country}.stat")
with open(os.devnull, "w") as dev_null:
with open(stats_tmp, "w") as f:
steps.run_gen_tool_with_recovery_country(
env,
env.gen_tool,
out=f,
err=dev_null,
data_path=env.paths.mwm_path,
user_resource_path=env.paths.user_resource_path,
type_statistics=True,
output=country,
**kwargs,
)
result["countries"][country]["types"] = make_stats(
settings.STATS_TYPES_CONFIG, stats_tmp
)
names = env.get_tmp_mwm_names()
countries = filter(lambda x: x not in WORLDS_NAMES, names)
with ThreadPool() as pool:
pool.map(partial(stage_mwm_statistics, env), countries)
steps_info = get_stages_info(env.paths.log_path, {"statistics"})
result["steps"] = steps_info["steps"]
for c in steps_info["countries"]:
result["countries"][c]["steps"] = steps_info["countries"][c]
def default(o):
if isinstance(o, datetime.timedelta):
return str(o)
with open(os.path.join(env.paths.stats_path, "stats.json"), "w") as f:
json.dump(
result, f, ensure_ascii=False, sort_keys=True, indent=2, default=default
)
@outer_stage
@build_lock
class StageCleanup(Stage):
def apply(self, env: Env):
logger.info(
f"osm2ft files will be moved from {env.paths.build_path} "
f"to {env.paths.osm2ft_path}."
)
for x in os.listdir(env.paths.mwm_path):
p = os.path.join(env.paths.mwm_path, x)
if os.path.isfile(p) and x.endswith(".mwm.osm2ft"):
shutil.move(p, os.path.join(env.paths.osm2ft_path, x))
logger.info(f"{env.paths.draft_path} will be removed.")
shutil.rmtree(env.paths.draft_path)

View file

@ -1,23 +1,26 @@
import re
import os
import datetime
import os
import re
from collections import defaultdict
from .exceptions import ParseError
RE_STAT = re.compile(
r"(?:\d+\. )?([\w:|-]+?)\|: "
r"size = \d+; "
r"count = (\d+); "
r"length = ([0-9.e+-]+) m; "
r"area = ([0-9.e+-]+) m²; "
r"names = (\d+)\s*"
)
RE_STAT = re.compile(r"(?:\d+\. )?([\w:|-]+?)\|: "
r"size = \d+; "
r"count = (\d+); "
r"length = ([0-9.e+-]+) m; "
r"area = ([0-9.e+-]+) m²; "
r"names = (\d+)\s*")
RE_TIME_DELTA = re.compile(r'^(?:(?P<days>-?\d+) (days?, )?)?'
r'((?:(?P<hours>-?\d+):)(?=\d+:\d+))?'
r'(?:(?P<minutes>-?\d+):)?'
r'(?P<seconds>-?\d+)'
r'(?:\.(?P<microseconds>\d{1,6})\d{0,6})?$')
RE_TIME_DELTA = re.compile(
r"^(?:(?P<days>-?\d+) (days?, )?)?"
r"((?:(?P<hours>-?\d+):)(?=\d+:\d+))?"
r"(?:(?P<minutes>-?\d+):)?"
r"(?P<seconds>-?\d+)"
r"(?:\.(?P<microseconds>\d{1,6})\d{0,6})?$"
)
RE_FINISH_STAGE = re.compile(r"(.*)Stage (.+): finished in (.+)$")
@ -26,13 +29,15 @@ def read_stat(f):
stats = []
for line in f:
m = RE_STAT.match(line)
stats.append({
"name": m.group(1).replace("|", "-"),
"cnt": int(m.group(2)),
"len": float(m.group(3)),
"area": float(m.group(4)),
"names": int(m.group(5))
})
stats.append(
{
"name": m.group(1).replace("|", "-"),
"cnt": int(m.group(2)),
"len": float(m.group(3)),
"area": float(m.group(4)),
"names": int(m.group(5)),
}
)
return stats

View file

@ -1,26 +1,29 @@
import os
from typing import AnyStr
class Status:
def __init__(self):
self.stat_path = None
self.stat_next = None
"""Status is used for recovering and continuation maps generation."""
def __init__(self, stat_path: AnyStr = None, stat_next: AnyStr = None):
self.stat_path = stat_path
self.stat_next = stat_next
self.stat_saved = None
self.find = False
def init(self, stat_path, stat_next):
def init(self, stat_path: AnyStr, stat_next: AnyStr):
self.stat_path = stat_path
self.stat_next = stat_next
if os.path.exists(self.stat_path) and os.path.isfile(self.stat_path):
with open(self.stat_path) as status:
self.stat_saved = status.read()
if not self.find:
self.find = self.stat_saved is None or not self.need_skip()
self.find = not self.stat_saved or not self.need_skip()
def need_skip(self):
def need_skip(self) -> bool:
if self.find:
return False
return self.stat_saved is not None and self.stat_next != self.stat_saved
return self.stat_saved and self.stat_next != self.stat_saved
def update_status(self):
with open(self.stat_path, "w") as status:

View file

@ -0,0 +1,286 @@
"""
This file contains basic api for generator_tool and osm tools to generate maps.
"""
import logging
import os
import shutil
import subprocess
from typing import AnyStr
from . import settings
from .env import Env
from .env import PathProvider
from .env import WORLDS_NAMES
from .env import WORLD_NAME
from .env import get_all_countries_list
from .gen_tool import run_gen_tool
from .osmtools import osmconvert
from .osmtools import osmupdate
from ..utils.file import download_file
from ..utils.file import is_verified
from ..utils.file import symlink_force
from ..utils.md5 import md5
from ..utils.md5 import write_md5sum
logger = logging.getLogger("maps_generator")
def download_planet(planet: AnyStr):
download_file(settings.PLANET_URL, planet)
download_file(settings.PLANET_MD5_URL, md5(planet))
def convert_planet(
tool: AnyStr,
in_planet: AnyStr,
out_planet: AnyStr,
output=subprocess.DEVNULL,
error=subprocess.DEVNULL,
):
osmconvert(tool, in_planet, out_planet, output=output, error=error)
write_md5sum(out_planet, md5(out_planet))
def step_download_and_convert_planet(env: Env, force_download: bool, **kwargs):
if force_download or not is_verified(settings.PLANET_PBF):
download_planet(settings.PLANET_PBF)
convert_planet(
env[settings.OSM_TOOL_CONVERT],
settings.PLANET_PBF,
settings.PLANET_O5M,
output=env.get_subprocess_out(),
error=env.get_subprocess_out(),
)
os.remove(settings.PLANET_PBF)
os.remove(md5(settings.PLANET_PBF))
def step_update_planet(env: Env, **kwargs):
tmp = settings.PLANET_O5M + ".tmp"
osmupdate(
env[settings.OSM_TOOL_UPDATE],
settings.PLANET_O5M,
tmp,
output=env.get_subprocess_out(),
error=env.get_subprocess_out(),
**kwargs,
)
os.remove(settings.PLANET_O5M)
os.rename(tmp, settings.PLANET_O5M)
write_md5sum(settings.PLANET_O5M, md5(settings.PLANET_O5M))
def step_preprocess(env: Env, **kwargs):
run_gen_tool(
env.gen_tool,
out=env.get_subprocess_out(),
err=env.get_subprocess_out(),
intermediate_data_path=env.paths.intermediate_data_path,
osm_file_type="o5m",
osm_file_name=settings.PLANET_O5M,
node_storage=env.node_storage,
user_resource_path=env.paths.user_resource_path,
preprocess=True,
**kwargs,
)
def step_features(env: Env, **kwargs):
extra = {}
if env.production:
extra["add_ads"] = True
if any(x not in WORLDS_NAMES for x in env.countries):
extra["generate_packed_borders"] = True
if any(x == WORLD_NAME for x in env.countries):
extra["generate_world"] = True
if len(env.countries) == len(get_all_countries_list(PathProvider.borders_path())):
extra["have_borders_for_whole_world"] = True
run_gen_tool(
env.gen_tool,
out=env.get_subprocess_out(),
err=env.get_subprocess_out(),
data_path=env.paths.data_path,
intermediate_data_path=env.paths.intermediate_data_path,
osm_file_type="o5m",
osm_file_name=settings.PLANET_O5M,
node_storage=env.node_storage,
user_resource_path=env.paths.user_resource_path,
dump_cities_boundaries=True,
cities_boundaries_data=env.paths.cities_boundaries_path,
generate_features=True,
**extra,
**kwargs,
)
def run_gen_tool_with_recovery_country(env: Env, *args, **kwargs):
if "data_path" not in kwargs or "output" not in kwargs:
logger.warning("The call run_gen_tool() will be without recovery.")
run_gen_tool(*args, **kwargs)
prev_data_path = kwargs["data_path"]
mwm = f"{kwargs['output']}.mwm"
osm2ft = f"{mwm}.osm2ft"
kwargs["data_path"] = env.paths.draft_path
symlink_force(
os.path.join(prev_data_path, osm2ft), os.path.join(env.paths.draft_path, osm2ft)
)
shutil.copy(
os.path.join(prev_data_path, mwm), os.path.join(env.paths.draft_path, mwm)
)
run_gen_tool(*args, **kwargs)
shutil.move(
os.path.join(env.paths.draft_path, mwm), os.path.join(prev_data_path, mwm)
)
kwargs["data_path"] = prev_data_path
def _generate_common_index(env: Env, country: AnyStr, **kwargs):
run_gen_tool(
env.gen_tool,
out=env.get_subprocess_out(country),
err=env.get_subprocess_out(country),
data_path=env.paths.mwm_path,
intermediate_data_path=env.paths.intermediate_data_path,
user_resource_path=env.paths.user_resource_path,
node_storage=env.node_storage,
planet_version=env.planet_version,
generate_geometry=True,
generate_index=True,
output=country,
**kwargs,
)
def step_index_world(env: Env, country: AnyStr, **kwargs):
_generate_common_index(
env,
country,
generate_search_index=True,
cities_boundaries_data=env.paths.cities_boundaries_path,
generate_cities_boundaries=True,
**kwargs,
)
def step_cities_ids_world(env: Env, country: AnyStr, **kwargs):
run_gen_tool_with_recovery_country(
env,
env.gen_tool,
out=env.get_subprocess_out(country),
err=env.get_subprocess_out(country),
data_path=env.paths.mwm_path,
user_resource_path=env.paths.user_resource_path,
output=country,
generate_cities_ids=True,
**kwargs,
)
def step_index(env: Env, country: AnyStr, **kwargs):
_generate_common_index(env, country, generate_search_index=True, **kwargs)
def step_coastline_index(env: Env, country: AnyStr, **kwargs):
_generate_common_index(env, country, **kwargs)
def step_ugc(env: Env, country: AnyStr, **kwargs):
run_gen_tool_with_recovery_country(
env,
env.gen_tool,
out=env.get_subprocess_out(country),
err=env.get_subprocess_out(country),
data_path=env.paths.mwm_path,
intermediate_data_path=env.paths.intermediate_data_path,
user_resource_path=env.paths.user_resource_path,
ugc_data=env.paths.ugc_path,
output=country,
**kwargs,
)
def step_popularity(env: Env, country: AnyStr, **kwargs):
run_gen_tool_with_recovery_country(
env,
env.gen_tool,
out=env.get_subprocess_out(country),
err=env.get_subprocess_out(country),
data_path=env.paths.mwm_path,
intermediate_data_path=env.paths.intermediate_data_path,
user_resource_path=env.paths.user_resource_path,
popular_places_data=env.paths.popularity_path,
generate_popular_places=True,
output=country,
**kwargs,
)
def step_srtm(env: Env, country: AnyStr, **kwargs):
run_gen_tool_with_recovery_country(
env,
env.gen_tool,
out=env.get_subprocess_out(country),
err=env.get_subprocess_out(country),
data_path=env.paths.mwm_path,
intermediate_data_path=env.paths.intermediate_data_path,
user_resource_path=env.paths.user_resource_path,
srtm_path=env.paths.srtm_path(),
output=country,
**kwargs,
)
def step_description(env: Env, country: AnyStr, **kwargs):
run_gen_tool_with_recovery_country(
env,
env.gen_tool,
out=env.get_subprocess_out(country),
err=env.get_subprocess_out(country),
data_path=env.paths.mwm_path,
user_resource_path=env.paths.user_resource_path,
wikipedia_pages=env.paths.descriptions_path,
idToWikidata=env.paths.id_to_wikidata_path,
output=country,
**kwargs,
)
def step_routing(env: Env, country: AnyStr, **kwargs):
run_gen_tool_with_recovery_country(
env,
env.gen_tool,
out=env.get_subprocess_out(country),
err=env.get_subprocess_out(country),
data_path=env.paths.mwm_path,
intermediate_data_path=env.paths.intermediate_data_path,
user_resource_path=env.paths.user_resource_path,
cities_boundaries_data=env.paths.cities_boundaries_path,
generate_maxspeed=True,
make_city_roads=True,
make_cross_mwm=True,
disable_cross_mwm_progress=True,
generate_cameras=True,
make_routing_index=True,
generate_traffic_keys=True,
output=country,
**kwargs,
)
def step_routing_transit(env: Env, country: AnyStr, **kwargs):
run_gen_tool_with_recovery_country(
env,
env.gen_tool,
out=env.get_subprocess_out(country),
err=env.get_subprocess_out(country),
data_path=env.paths.mwm_path,
intermediate_data_path=env.paths.intermediate_data_path,
user_resource_path=env.paths.user_resource_path,
transit_path=env.paths.transit_path,
make_transit_cross_mwm=True,
output=country,
**kwargs,
)

View file

@ -1,416 +1,51 @@
import datetime
import json
import logging
import multiprocessing
import os
import shutil
import tarfile
from collections import defaultdict
from functools import partial
from multiprocessing.pool import ThreadPool
from typing import AnyStr
from typing import Optional
from descriptions.descriptions_downloader import (check_and_get_checker,
download_from_wikipedia_tags,
download_from_wikidata_tags)
from filelock import FileLock
from post_generation.hierarchy_to_countries import hierarchy_to_countries
from post_generation.inject_promo_ids import inject_promo_ids
from post_generation.localads_mwm_to_csv import create_csv
from .generator import stages
from .generator import coastline
from .generator import settings
from .generator.decorators import stage, country_stage, country_stage_log
from .generator.env import (planet_lock_file, build_lock_file,
WORLD_COASTS_NAME, WORLD_NAME, WORLDS_NAMES)
from .generator.exceptions import ContinueError, BadExitStatusError
from .generator.gen_tool import run_gen_tool
from .generator.statistics import make_stats, get_stages_info
from .utils.file import is_verified, download_file
from .generator import stages_declaration as sd
from .generator.env import Env
from .generator.generation import Generation
logger = logging.getLogger("maps_generator")
def download_external(url_to_path: dict):
for k, v in url_to_path.items():
download_file(k, v)
@stage
def stage_download_and_convert_planet(env, **kwargs):
force_download = not env.is_accepted_stage(stage_update_planet)
if force_download or not is_verified(settings.PLANET_O5M):
stages.stage_download_and_convert_planet(env, force_download=force_download,
**kwargs)
@stage
def stage_update_planet(env, **kwargs):
if not settings.DEBUG:
stages.stage_update_planet(env, **kwargs)
@stage
def stage_download_external(env):
download_external({
settings.SUBWAY_URL: env.subway_path,
})
@stage
def stage_download_production_external(env):
download_external({
settings.UGC_URL: env.ugc_path,
settings.HOTELS_URL: env.hotels_path,
settings.PROMO_CATALOG_CITIES_URL: env.promo_catalog_cities_path,
settings.PROMO_CATALOG_COUNTRIES_URL: env.promo_catalog_countries_path,
settings.POPULARITY_URL: env.popularity_path,
settings.FOOD_URL: env.food_paths,
settings.FOOD_TRANSLATIONS_URL: env.food_translations_path,
settings.UK_POSTCODES_URL: env.uk_postcodes_path,
settings.US_POSTCODES_URL: env.us_postcodes_path,
})
@stage
def stage_preprocess(env, **kwargs):
stages.stage_preprocess(env, **kwargs)
@stage
def stage_features(env):
extra = {}
if env.is_accepted_stage(stage_descriptions):
extra["idToWikidata"] = env.id_to_wikidata_path
if env.is_accepted_stage(stage_download_production_external):
extra["booking_data"] = env.hotels_path
extra["promo_catalog_cities"] = env.promo_catalog_cities_path
extra["popular_places_data"] = env.popularity_path
extra["brands_data"] = env.food_paths
extra["brands_translations_data"] = env.food_translations_path
if env.is_accepted_stage(stage_coastline):
extra["emit_coasts"]=True
stages.stage_features(env, **extra)
if os.path.exists(env.packed_polygons_path):
shutil.copy2(env.packed_polygons_path, env.mwm_path)
@stage
def stage_coastline(env):
coasts_geom = "WorldCoasts.geom"
coasts_rawgeom = "WorldCoasts.rawgeom"
try:
coastline.make_coastline(env)
except BadExitStatusError:
logger.info("Build costs failed. Try to download the costs...")
download_external({
settings.PLANET_COASTS_GEOM_URL:
os.path.join(env.coastline_path, coasts_geom),
settings.PLANET_COASTS_RAWGEOM_URL:
os.path.join(env.coastline_path, coasts_rawgeom)
})
for f in [coasts_geom, coasts_rawgeom]:
path = os.path.join(env.coastline_path, f)
shutil.copy2(path, env.intermediate_path)
@country_stage
def stage_index(env, country, **kwargs):
if country == WORLD_NAME:
stages.stage_index_world(env, country, **kwargs)
elif country == WORLD_COASTS_NAME:
stages.stage_coastline_index(env, country, **kwargs)
else:
extra = {}
if env.is_accepted_stage(stage_download_production_external):
extra["uk_postcodes_dataset"] = env.uk_postcodes_path
extra["us_postcodes_dataset"] = env.us_postcodes_path
stages.stage_index(env, country, **kwargs, **extra)
@country_stage
def stage_cities_ids_world(env, country, **kwargs):
stages.stage_cities_ids_world(env, country, **kwargs)
@country_stage
def stage_ugc(env, country, **kwargs):
stages.stage_ugc(env, country, **kwargs)
@country_stage
def stage_popularity(env, country, **kwargs):
stages.stage_popularity(env, country, **kwargs)
@country_stage
def stage_srtm(env, country, **kwargs):
stages.stage_srtm(env, country, **kwargs)
@country_stage
def stage_routing(env, country, **kwargs):
stages.stage_routing(env, country, **kwargs)
@country_stage
def stage_routing_transit(env, country, **kwargs):
stages.stage_routing_transit(env, country, **kwargs)
@stage
def stage_mwm(env):
def build(country):
stage_index(env, country)
stage_ugc(env, country)
stage_popularity(env, country)
stage_srtm(env, country)
stage_routing(env, country)
stage_routing_transit(env, country)
env.finish_mwm(country)
def build_world(country):
stage_index(env, country)
stage_cities_ids_world(env, country)
env.finish_mwm(country)
def build_world_coasts(country):
stage_index(env, country)
env.finish_mwm(country)
specific = {
WORLD_NAME: build_world,
WORLD_COASTS_NAME: build_world_coasts
}
mwms = env.get_mwm_names()
with ThreadPool() as pool:
pool.map(lambda c: specific[c](c) if c in specific else build(c), mwms,
chunksize=1)
@stage
def stage_descriptions(env):
run_gen_tool(env.gen_tool,
out=env.get_subprocess_out(),
err=env.get_subprocess_out(),
intermediate_data_path=env.intermediate_path,
user_resource_path=env.user_resource_path,
dump_wikipedia_urls=env.wiki_url_path,
idToWikidata=env.id_to_wikidata_path)
langs = ("en", "ru", "es", "fr", "de")
checker = check_and_get_checker(env.popularity_path)
download_from_wikipedia_tags(env.wiki_url_path, env.descriptions_path,
langs, checker)
download_from_wikidata_tags(env.id_to_wikidata_path, env.descriptions_path,
langs, checker)
@country_stage_log
def stage_write_descriptions(env, country, **kwargs):
stages.run_gen_tool_with_recovery_country(
env,
env.gen_tool,
out=env.get_subprocess_out(country),
err=env.get_subprocess_out(country),
data_path=env.mwm_path,
user_resource_path=env.user_resource_path,
wikipedia_pages=env.descriptions_path,
idToWikidata=env.id_to_wikidata_path,
output=country,
**kwargs
)
mwms = env.get_mwm_names()
countries = filter(lambda x: x not in WORLDS_NAMES, mwms)
with ThreadPool() as pool:
pool.map(partial(stage_write_descriptions, env), countries)
@stage
def stage_countries_txt(env):
countries = hierarchy_to_countries(env.old_to_new_path,
env.borders_to_osm_path,
env.countries_synonyms_path,
env.hierarchy_path, env.mwm_path,
env.mwm_version)
if env.is_accepted_stage(stage_download_production_external):
countries_json = json.loads(countries)
inject_promo_ids(countries_json, env.promo_catalog_cities_path,
env.promo_catalog_countries_path, env.mwm_path,
env.types_path, env.mwm_path)
countries = json.dumps(countries_json, ensure_ascii=True, indent=1)
with open(env.counties_txt_path, "w") as f:
f.write(countries)
@stage
def stage_external_resources(env):
black_list = {"00_roboto_regular.ttf"}
resources = [os.path.join(env.user_resource_path, file)
for file in os.listdir(env.user_resource_path)
if file.endswith(".ttf") and file not in black_list]
for ttf_file in resources:
shutil.copy2(ttf_file, env.mwm_path)
for file in os.listdir(env.mwm_path):
if file.startswith(WORLD_NAME) and file.endswith(".mwm"):
resources.append(os.path.join(env.mwm_path, file))
resources.sort()
with open(env.external_resources_path, "w") as f:
for resource in resources:
fd = os.open(resource, os.O_RDONLY)
f.write(f"{os.path.basename(resource)} {os.fstat(fd).st_size}\n")
@stage
def stage_localads(env):
create_csv(env.localads_path, env.mwm_path, env.mwm_path, env.types_path,
env.mwm_version, multiprocessing.cpu_count())
with tarfile.open(f"{env.localads_path}.tar.gz", "w:gz") as tar:
for filename in os.listdir(env.localads_path):
tar.add(os.path.join(env.localads_path, filename), arcname=filename)
@stage
def stage_statistics(env):
result = defaultdict(lambda: defaultdict(dict))
@country_stage_log
def stage_mwm_statistics(env, country, **kwargs):
stats_tmp = os.path.join(env.draft_path, f"{country}.stat")
with open(stats_tmp, "w") as f:
stages.run_gen_tool_with_recovery_country(
env,
env.gen_tool,
out=f,
err=env.get_subprocess_out(country),
data_path=env.mwm_path,
user_resource_path=env.user_resource_path,
type_statistics=True,
output=country,
**kwargs
)
result["countries"][country]["types"] = \
make_stats(settings.STATS_TYPES_CONFIG, stats_tmp)
mwms = env.get_mwm_names()
countries = filter(lambda x: x not in WORLDS_NAMES, mwms)
with ThreadPool() as pool:
pool.map(partial(stage_mwm_statistics, env), countries)
stages_info = get_stages_info(env.log_path, {"statistics"})
result["stages"] = stages_info["stages"]
for c in stages_info["countries"]:
result["countries"][c]["stages"] = stages_info["countries"][c]
def default(o):
if isinstance(o, datetime.timedelta):
return str(o)
with open(os.path.join(env.stats_path, "stats.json"), "w") as f:
json.dump(result, f, ensure_ascii=False, sort_keys=True,
indent=2, default=default)
@stage
def stage_cleanup(env):
logger.info(f"osm2ft files will be moved from {env.out_path} "
f"to {env.osm2ft_path}.")
for x in os.listdir(env.mwm_path):
p = os.path.join(env.mwm_path, x)
if os.path.isfile(p) and x.endswith(".mwm.osm2ft"):
shutil.move(p, os.path.join(env.osm2ft_path, x))
logger.info(f"{env.draft_path} will be removed.")
shutil.rmtree(env.draft_path)
MWM_STAGE = stage_mwm.__name__
COUNTRIES_STAGES = [s.__name__ for s in
(stage_index, stage_ugc, stage_popularity, stage_srtm,
stage_routing, stage_routing_transit)]
STAGES = [s.__name__ for s in
(stage_download_external, stage_download_production_external,
stage_download_and_convert_planet, stage_update_planet,
stage_coastline, stage_preprocess, stage_features, stage_mwm,
stage_descriptions, stage_countries_txt, stage_external_resources,
stage_localads, stage_statistics, stage_cleanup)]
ALL_STAGES = STAGES + COUNTRIES_STAGES
def stages_as_string(*args):
return [x.__name__ for x in args]
def stage_as_string(stage):
return stage.__name__
def reset_to_stage(stage_name, env):
def set_countries_stage(n):
statuses = [os.path.join(env.status_path, f)
for f in os.listdir(env.status_path)
if os.path.isfile(os.path.join(env.status_path, f)) and
os.path.join(env.status_path, f) != env.main_status_path]
for s in statuses:
with open(s, "w") as f:
f.write(n)
_stage = f"stage_{stage_name}"
stage_mwm_index = STAGES.index(MWM_STAGE)
if _stage not in ALL_STAGES:
raise ContinueError(
f"Stage {stage_name} not in {', '.join(ALL_STAGES)}.")
if not os.path.exists(env.main_status_path):
raise ContinueError(f"Status file {env.main_status_path} not found.")
if not os.path.exists(env.status_path):
raise ContinueError(f"Status path {env.status_path} not found.")
main_status = None
if _stage in STAGES[:stage_mwm_index + 1]:
main_status = _stage
set_countries_stage(COUNTRIES_STAGES[0])
elif _stage in STAGES[stage_mwm_index + 1:]:
main_status = _stage
elif _stage in COUNTRIES_STAGES:
main_status = MWM_STAGE
set_countries_stage(_stage)
logger.info(f"New active status is {main_status}.")
with open(env.main_status_path, "w") as f:
f.write(main_status)
def generate_maps(env):
stage_download_external(env)
stage_download_production_external(env)
with FileLock(planet_lock_file(), timeout=1) as planet_lock:
stage_download_and_convert_planet(env)
stage_update_planet(env)
with FileLock(build_lock_file(env.out_path), timeout=1):
stage_coastline(env)
stage_preprocess(env)
stage_features(env)
planet_lock.release()
stage_mwm(env)
stage_descriptions(env)
stage_countries_txt(env)
stage_external_resources(env)
stage_localads(env)
stage_statistics(env)
stage_cleanup(env)
def generate_coasts(env):
with FileLock(planet_lock_file(), timeout=1) as planet_lock:
stage_download_and_convert_planet(env)
stage_update_planet(env)
with FileLock(build_lock_file(env.out_path), timeout=1):
stage_coastline(env)
planet_lock.release()
stage_cleanup(env)
def run_generation(env, stages, from_stage: Optional[AnyStr] = None):
generation = Generation(env)
for s in stages:
generation.add_stage(s)
generation.run(from_stage)
def generate_maps(env: Env, from_stage: Optional[AnyStr] = None):
""""Runs maps generation."""
stages = [
sd.StageDownloadExternal(),
sd.StageDownloadProductionExternal(),
sd.StageDownloadAndConvertPlanet(),
sd.StageUpdatePlanet(),
sd.StageCoastline(),
sd.StagePreprocess(),
sd.StageFeatures(),
sd.StageDownloadDescriptions(),
sd.StageMwm(),
sd.StageCountriesTxt(),
sd.StageExternalResources(),
sd.StageLocalads(),
sd.StageStatistics(),
sd.StageCleanup(),
]
run_generation(env, stages, from_stage)
def generate_coasts(env: Env, from_stage: Optional[AnyStr] = None):
"""Runs coasts generation."""
stages = [
sd.StageDownloadAndConvertPlanet(),
sd.StageUpdatePlanet(),
sd.StageCoastline(),
sd.StageCleanup(),
]
run_generation(env, stages, from_stage)

View file

@ -0,0 +1,14 @@
from re import finditer
def unique(s):
seen = set()
seen_add = seen.add
return [x for x in s if not (x in seen or seen_add(x))]
def camel_case_split(identifier):
matches = finditer(
".+?(?:(?<=[a-z])(?=[A-Z])|(?<=[A-Z])(?=[A-Z][a-z])|$)", identifier
)
return [m.group(0) for m in matches]

View file

@ -1,4 +0,0 @@
def unique(s):
seen = set()
seen_add = seen.add
return [x for x in s if not (x in seen or seen_add(x))]

View file

@ -5,18 +5,22 @@ import logging
import os
import shutil
import urllib.request
from typing import AnyStr
from typing import Dict
from typing import Optional
from .md5 import md5, check_md5
from .md5 import check_md5
from .md5 import md5
logger = logging.getLogger("maps_generator")
def is_executable(fpath):
def is_executable(fpath: AnyStr) -> bool:
return os.path.isfile(fpath) and os.access(fpath, os.X_OK)
@functools.lru_cache()
def find_executable(path, exe=None):
def find_executable(path: AnyStr, exe: Optional[AnyStr] = None) -> AnyStr:
if exe is None:
if is_executable(path):
return path
@ -29,27 +33,32 @@ def find_executable(path, exe=None):
raise FileNotFoundError(f"{exe} not found in {path}")
def download_file(url, name):
def download_file(url: AnyStr, name: AnyStr):
logger.info(f"Trying to download {name} from {url}.")
urllib.request.urlretrieve(url, name)
logger.info(f"File {name} was downloaded from {url}.")
def is_exists_file_and_md5(name):
def download_files(url_to_path: Dict[AnyStr, AnyStr]):
for k, v in url_to_path.items():
download_file(k, v)
def is_exists_file_and_md5(name: AnyStr) -> bool:
return os.path.isfile(name) and os.path.isfile(md5(name))
def is_verified(name):
def is_verified(name: AnyStr) -> bool:
return is_exists_file_and_md5(name) and check_md5(name, md5(name))
def copy_overwrite(from_path, to_path):
def copy_overwrite(from_path: AnyStr, to_path: AnyStr):
if os.path.exists(to_path):
shutil.rmtree(to_path)
shutil.copytree(from_path, to_path)
def symlink_force(target, link_name):
def symlink_force(target: AnyStr, link_name: AnyStr):
try:
os.symlink(target, link_name)
except OSError as e:

View file

@ -6,8 +6,11 @@ class DummyObject:
return lambda *args: None
def create_file_logger(file, level=logging.DEBUG,
format="[%(asctime)s] %(levelname)s %(module)s %(message)s"):
def create_file_logger(
file,
level=logging.DEBUG,
format="[%(asctime)s] %(levelname)s %(module)s %(message)s",
):
logger = logging.getLogger(file)
logger.setLevel(level)
formatter = logging.Formatter(format)