forked from organicmaps/organicmaps
[python] Added staistics.
This commit is contained in:
parent
f2edf6d870
commit
2a97d57399
8 changed files with 255 additions and 15 deletions
|
@ -8,7 +8,8 @@ from .generator.exceptions import ContinueError, SkipError, ValidationError
|
|||
from .maps_generator import (generate_maps, generate_coasts, reset_to_stage,
|
||||
ALL_STAGES, stage_download_production_external,
|
||||
stage_descriptions, stage_ugc, stage_popularity,
|
||||
stage_localads, stages_as_string)
|
||||
stage_localads, stage_statistics,
|
||||
stages_as_string)
|
||||
from .utils.collections import unique
|
||||
|
||||
logger = logging.getLogger("maps_generator")
|
||||
|
@ -25,16 +26,16 @@ def parse_options():
|
|||
nargs="?",
|
||||
type=str,
|
||||
help="Continue the last build or specified in CONTINUE from the "
|
||||
"last stopped stage.")
|
||||
"last stopped stage.")
|
||||
parser.add_argument(
|
||||
"--countries",
|
||||
type=str,
|
||||
default="",
|
||||
help="List of regions, separated by a comma or a semicolon, or path to "
|
||||
"file with regions, separated by a line break, for which maps"
|
||||
" will be built. The names of the regions can be seen "
|
||||
"in omim/data/borders. It is necessary to set names without "
|
||||
"any extension.")
|
||||
"file with regions, separated by a line break, for which maps"
|
||||
" will be built. The names of the regions can be seen "
|
||||
"in omim/data/borders. It is necessary to set names without "
|
||||
"any extension.")
|
||||
parser.add_argument(
|
||||
"--skip",
|
||||
type=str,
|
||||
|
@ -58,7 +59,7 @@ def parse_options():
|
|||
default=False,
|
||||
action="store_true",
|
||||
help="Build production maps. In another case, 'osm only maps' are built"
|
||||
" - maps without additional data and advertising.")
|
||||
" - maps without additional data and advertising.")
|
||||
return vars(parser.parse_args())
|
||||
|
||||
|
||||
|
@ -131,10 +132,13 @@ def main():
|
|||
]
|
||||
options["skip"] = options_skip
|
||||
if not options["production"]:
|
||||
options["skip"] += stages_as_string(stage_download_production_external,
|
||||
stage_ugc, stage_popularity,
|
||||
stage_descriptions,
|
||||
stage_localads)
|
||||
options["skip"] += stages_as_string(
|
||||
stage_download_production_external,
|
||||
stage_ugc, stage_popularity,
|
||||
stage_descriptions,
|
||||
stage_localads,
|
||||
stage_statistics
|
||||
)
|
||||
if not all(s in ALL_STAGES for s in options["skip"]):
|
||||
raise SkipError(f"Stages {set(options['skip']) - set(ALL_STAGES)} "
|
||||
f"not found.")
|
||||
|
|
|
@ -152,6 +152,12 @@ class Env:
|
|||
self._create_if_not_exist(path)
|
||||
return path
|
||||
|
||||
@property
|
||||
def stats_path(self):
|
||||
path = os.path.join(self.out_path, "stats")
|
||||
self._create_if_not_exist(path)
|
||||
return path
|
||||
|
||||
@property
|
||||
def types_path(self):
|
||||
return os.path.join(self.user_resource_path, "types.txt")
|
||||
|
|
|
@ -37,6 +37,7 @@ class GenTool:
|
|||
"no_ads": bool,
|
||||
"preprocess": bool,
|
||||
"split_by_polygons": bool,
|
||||
"type_statistics": bool,
|
||||
"planet_version": int,
|
||||
"booking_data": str,
|
||||
"brands_data": str,
|
||||
|
|
|
@ -50,6 +50,8 @@ SUBWAY_URL = ""
|
|||
FOOD_URL = ""
|
||||
FOOD_TRANSLATIONS_URL = ""
|
||||
|
||||
STATS_TYPES_CONFIG = ""
|
||||
|
||||
PLANET = "planet"
|
||||
|
||||
GEN_TOOL = "generator_tool"
|
||||
|
@ -120,6 +122,9 @@ FOOD_URL = _get_opt(config, "External", "FOOD_URL", FOOD_URL)
|
|||
FOOD_TRANSLATIONS_URL = _get_opt(config, "External", "FOOD_TRANSLATIONS_URL",
|
||||
FOOD_TRANSLATIONS_URL)
|
||||
|
||||
STATS_TYPES_CONFIG = _get_opt_path(config, "Stats", "STATS_TYPES_CONFIG",
|
||||
STATS_TYPES_CONFIG)
|
||||
|
||||
PLANET_O5M = os.path.join(MAIN_OUT_PATH, PLANET + ".o5m")
|
||||
PLANET_PBF = os.path.join(MAIN_OUT_PATH, PLANET + ".osm.pbf")
|
||||
PLANET_COASTS_GEOM_URL = os.path.join(PLANET_COASTS_URL, "latest_coasts.geom")
|
||||
|
|
116
tools/python/maps_generator/generator/statistics.py
Normal file
116
tools/python/maps_generator/generator/statistics.py
Normal file
|
@ -0,0 +1,116 @@
|
|||
import re
|
||||
import os
|
||||
import datetime
|
||||
from collections import defaultdict
|
||||
|
||||
|
||||
RE_STAT = re.compile(r"(?:\d+\. )?([\w:|-]+?)\|: "
|
||||
r"size = \d+; "
|
||||
r"count = (\d+); "
|
||||
r"length = ([0-9.e+-]+) m; "
|
||||
r"area = ([0-9.e+-]+) m²; "
|
||||
r"names = (\d+)\s*")
|
||||
|
||||
RE_TIME_DELTA = re.compile(r'^(?:(?P<days>-?\d+) (days?, )?)?'
|
||||
r'((?:(?P<hours>-?\d+):)(?=\d+:\d+))?'
|
||||
r'(?:(?P<minutes>-?\d+):)?'
|
||||
r'(?P<seconds>-?\d+)'
|
||||
r'(?:\.(?P<microseconds>\d{1,6})\d{0,6})?$')
|
||||
|
||||
RE_FINISH_STAGE = re.compile(r"(.*)Stage (\w+): finished in (.+)$")
|
||||
|
||||
|
||||
def read_stat(f):
|
||||
stats = []
|
||||
for line in f:
|
||||
m = RE_STAT.match(line)
|
||||
stats.append({
|
||||
"name": m.group(1).replace("|", "-"),
|
||||
"cnt": int(m.group(2)),
|
||||
"len": float(m.group(3)),
|
||||
"area": float(m.group(4)),
|
||||
"names": int(m.group(5))
|
||||
})
|
||||
return stats
|
||||
|
||||
|
||||
def read_config(f):
|
||||
config = []
|
||||
for line in f:
|
||||
columns = [c.strip() for c in line.split(";", 2)]
|
||||
columns[0] = re.compile(columns[0])
|
||||
columns[1] = columns[1].lower()
|
||||
config.append(columns)
|
||||
return config
|
||||
|
||||
|
||||
def process_stat(config, stats):
|
||||
result = {}
|
||||
for param in config:
|
||||
res = 0
|
||||
for typ in stats:
|
||||
if param[0].match(typ["name"]):
|
||||
if param[1] == "len":
|
||||
res += typ["len"]
|
||||
elif param[1] == "area":
|
||||
res += typ["area"]
|
||||
elif param[1] == "cnt_names":
|
||||
res += typ["names"]
|
||||
else:
|
||||
res += typ["cnt"]
|
||||
result[str(param[0]) + param[1]] = res
|
||||
return result
|
||||
|
||||
|
||||
def format_res(res, typ):
|
||||
if typ == "len":
|
||||
unit = "м"
|
||||
elif typ == "area":
|
||||
unit = "м²"
|
||||
else:
|
||||
unit = "шт."
|
||||
return res, unit
|
||||
|
||||
|
||||
def make_stats(config_path, stats_path):
|
||||
with open(config_path) as f:
|
||||
config = read_config(f)
|
||||
with open(stats_path) as f:
|
||||
stats = process_stat(config, read_stat(f))
|
||||
lines = []
|
||||
for param in config:
|
||||
k = str(param[0]) + param[1]
|
||||
st = format_res(stats[k], param[1])
|
||||
lines.append({"type": param[2], "quantity": st[0], "unit": st[1]})
|
||||
return lines
|
||||
|
||||
|
||||
def parse_time(time_str):
|
||||
parts = RE_TIME_DELTA.match(time_str)
|
||||
if not parts:
|
||||
return
|
||||
parts = parts.groupdict()
|
||||
time_params = {}
|
||||
for name, param in parts.items():
|
||||
if param:
|
||||
time_params[name] = int(param)
|
||||
return datetime.timedelta(**time_params)
|
||||
|
||||
|
||||
def get_stages_info(log_path):
|
||||
result = defaultdict(lambda: defaultdict(dict))
|
||||
for file in os.listdir(log_path):
|
||||
path = os.path.join(log_path, file)
|
||||
with open(path) as f:
|
||||
for line in f:
|
||||
m = RE_FINISH_STAGE.match(line)
|
||||
if not m:
|
||||
continue
|
||||
stage_name = m.group(2)
|
||||
dt = parse_time(m.group(3))
|
||||
if file.startswith("stage_"):
|
||||
result["stages"][stage_name] = dt
|
||||
else:
|
||||
country = file.split(".")[0]
|
||||
result["countries"][country][stage_name] = dt
|
||||
return result
|
|
@ -3,7 +3,10 @@ import os
|
|||
import shutil
|
||||
from functools import partial
|
||||
from multiprocessing.pool import ThreadPool
|
||||
from collections import defaultdict
|
||||
import multiprocessing
|
||||
import json
|
||||
import datetime
|
||||
|
||||
from descriptions.descriptions_downloader import (check_and_get_checker,
|
||||
download_from_wikipedia_tags,
|
||||
|
@ -22,6 +25,7 @@ from .generator.env import (planet_lock_file, build_lock_file,
|
|||
from .generator.exceptions import (ContinueError, BadExitStatusError,
|
||||
wait_and_raise_if_fail)
|
||||
from .generator.gen_tool import run_gen_tool
|
||||
from .generator.statistics import make_stats, get_stages_info
|
||||
from .utils.file import is_verified, download_file, make_tarfile
|
||||
|
||||
logger = logging.getLogger("maps_generator")
|
||||
|
@ -238,8 +242,9 @@ def stage_external_resources(env):
|
|||
for ttf_file in resources:
|
||||
shutil.copy2(ttf_file, env.intermediate_path)
|
||||
|
||||
shutil.copy2(os.path.join(env.user_resource_path, "WorldCoasts_obsolete.mwm"),
|
||||
env.mwm_path)
|
||||
shutil.copy2(
|
||||
os.path.join(env.user_resource_path, "WorldCoasts_obsolete.mwm"),
|
||||
env.mwm_path)
|
||||
|
||||
for file in os.listdir(env.mwm_path):
|
||||
if file.startswith(WORLD_NAME) and file.endswith(".mwm"):
|
||||
|
@ -259,6 +264,46 @@ def stage_localads(env):
|
|||
make_tarfile(f"{env.localads_path}.tar.gz", env.localads_path)
|
||||
|
||||
|
||||
@stage
|
||||
def stage_statistics(env):
|
||||
result = defaultdict(lambda: defaultdict(dict))
|
||||
|
||||
@country_stage_log
|
||||
def stage_mwm_statistics(env, country, **kwargs):
|
||||
stats_tmp = os.path.join(env.draft_path, f"{country}.stat")
|
||||
with open(stats_tmp, "w") as f:
|
||||
maps_stages.run_gen_tool_with_recovery_country(
|
||||
env,
|
||||
env.gen_tool,
|
||||
out=f,
|
||||
err=env.get_subprocess_out(country),
|
||||
data_path=env.mwm_path,
|
||||
user_resource_path=env.user_resource_path,
|
||||
type_statistics=True,
|
||||
output=country,
|
||||
**kwargs
|
||||
)
|
||||
result["countries"][country]["types"] = \
|
||||
make_stats(settings.STATS_TYPES_CONFIG, stats_tmp)
|
||||
|
||||
mwms = env.get_mwm_names()
|
||||
countries = filter(lambda x: x not in WORLDS_NAMES, mwms)
|
||||
with ThreadPool() as pool:
|
||||
pool.map(partial(stage_mwm_statistics, env), countries)
|
||||
stages_info = get_stages_info(env.log_path)
|
||||
result["stages"] = stages_info["stages"]
|
||||
for c in stages_info["countries"]:
|
||||
result["countries"][c]["stages"] = stages_info["countries"][c]
|
||||
|
||||
def default(o):
|
||||
if isinstance(o, datetime.timedelta):
|
||||
return str(o)
|
||||
|
||||
with open(os.path.join(env.stats_path, "stats.json"), "w") as f:
|
||||
json.dump(result, f, ensure_ascii=False, sort_keys=True,
|
||||
indent=2, default=default)
|
||||
|
||||
|
||||
@stage
|
||||
def stage_cleanup(env):
|
||||
osm2ft_path = os.path.join(env.out_path, "osm2ft")
|
||||
|
@ -283,7 +328,7 @@ STAGES = [s.__name__ for s in
|
|||
stage_download_and_convert_planet, stage_update_planet,
|
||||
stage_coastline, stage_preprocess, stage_features, stage_mwm,
|
||||
stage_descriptions, stage_countries_txt, stage_external_resources,
|
||||
stage_localads, stage_cleanup)]
|
||||
stage_localads, stage_statistics, stage_cleanup)]
|
||||
|
||||
ALL_STAGES = STAGES + COUNTRIES_STAGES
|
||||
|
||||
|
@ -343,6 +388,7 @@ def generate_maps(env):
|
|||
stage_countries_txt(env)
|
||||
stage_external_resources(env)
|
||||
stage_localads(env)
|
||||
stage_statistics(env)
|
||||
stage_cleanup(env)
|
||||
|
||||
|
||||
|
|
|
@ -32,5 +32,8 @@ OSM_TOOLS_PATH: ~/osmctools
|
|||
# POPULARITY_URL:
|
||||
# SUBWAY_URL:
|
||||
# FOOD_URL:
|
||||
# FOOD_TRANSLATIONS_URL:
|
||||
# FOOD_TRANSLATIONS_URL:
|
||||
|
||||
[Stats]
|
||||
STATS_TYPES_CONFIG: ${Developer:OMIM_PATH}/tools/python/maps_generator/var/etc/stats_types_config.txt
|
||||
|
||||
|
|
59
tools/python/maps_generator/var/etc/stats_types_config.txt
Normal file
59
tools/python/maps_generator/var/etc/stats_types_config.txt
Normal file
|
@ -0,0 +1,59 @@
|
|||
barrier-(fence|gate);len;Заборы
|
||||
building;cnt;Здания
|
||||
(amenity|shop|historic)-.*;cnt;POI
|
||||
(amenity|shop|historic)-.*;cnt_names;POI c именами
|
||||
amenity-(cafe|restaurant|fast_food).*;cnt;Кафе и рестораны
|
||||
amenity-(pub|bar);cnt;Бары и пабы
|
||||
amenity-kindergarten;cnt;Детские сады
|
||||
amenity-(school|university|college);cnt;Школы и университеты
|
||||
amenity-parking.*;cnt;Автостоянки
|
||||
amenity-parking.*;area;Автостоянки
|
||||
amenity-pharmacy;cnt;Аптеки
|
||||
amenity-place_of_worship.*;cnt;Храмы
|
||||
amenity-(hospital|doctors);cnt;Больницы и поликлиники
|
||||
amenity-toilets;cnt;Туалеты
|
||||
amenity-(waste_disposal|recycling);cnt;Мусорные баки
|
||||
highway-(motorway|trunk|primary|secondary|tertiary|residential|unclassified|service|track|living_street)(_link)?(-.*)?;len;Автодорожная сеть
|
||||
highway-(footway|path|pedestrian|steps).*;len;Пешеходные дорожки
|
||||
highway-.*-bridge;len;Мосты
|
||||
highway-.*-tunnel;len;Туннели
|
||||
highway-(footway|path|steps)-bridge;len;Пешеходные мосты
|
||||
highway-(footway|path|steps)-tunnel;len;Пешеходные туннели
|
||||
highway-steps.*;len;Лестницы
|
||||
highway-speed_camera;cnt;Камеры контроля скорости
|
||||
internet_access-wlan;cnt;Точки доступа Wi-Fi
|
||||
leisure-(pitch|stadium|playing_fields|track|sports_centre).*;cnt;Спортплощадки и комплексы
|
||||
leisure-playground;cnt;Детские площадки
|
||||
man_made-lighthouse;cnt;Маяки
|
||||
man_made-windmill;cnt;Ветряные мельницы
|
||||
man_made-pipeline.*;len;Трубопроводы
|
||||
natural-beach;cnt;Пляжи
|
||||
natural-tree;cnt;Отдельностоящие деревья
|
||||
natural-waterfall;cnt;Водопады
|
||||
piste:type.*;len;Лыжни
|
||||
place-(city.*|town|village|hamlet);cnt;Населённые пункты
|
||||
place-island;cnt;Острова
|
||||
power-(minor_)?line.*;len;Линии электропередачи
|
||||
power-(pole|tower);cnt;Опоры ЛЭП
|
||||
railway-(rail|monorail|light_rail|narrow_gauge|preserved|siding|spur|yard|disused|incline).*;len;Железные дороги
|
||||
railway-.*-(bridge|tunnel);len;Железнодорожные мосты и туннели
|
||||
railway-(razed|abandoned).*;len;Снятые ветки ж/д
|
||||
railway-narrow_gauge.*;len;Узкоколейные ж/д
|
||||
railway-tram(-.*)?;len;Трамвайные пути
|
||||
railway-(halt|station);cnt;Станции железной дороги
|
||||
railway-subway.*;len;Линии метро
|
||||
highway-bus_stop|railway-tram_stop;cnt;Остановки наземного транспорта
|
||||
shop-bakery;cnt;Пекарни
|
||||
shop-books;cnt;Книжные магазины
|
||||
shop-clothes;cnt;Магазины одежды
|
||||
shop-shoes;cnt;Магазины обуви
|
||||
shop-(convenience|supermarket);cnt;Продуктовые магазины
|
||||
shop-florist;cnt;Цветочные салоны
|
||||
shop-(hairdresser|beauty);cnt;Парикмахерские и салоны красоты
|
||||
tourism-(guest_house|hos?tel|motel);cnt;Гостиницы и хостелы
|
||||
tourism-(attraction|viewpoint);cnt;Достопримечательности и точки обзора
|
||||
waterway-(canal|river|stream)(-.*)?;len;Реки, каналы и ручьи
|
||||
landuse-cemetery.*;area;Кладбища
|
||||
leisure-park.*;area;Парки
|
||||
natural-beach;area;Пляжи
|
||||
sponsored-booking;cnt;Booking отели
|
Loading…
Add table
Reference in a new issue