[python] Added staistics.

This commit is contained in:
Maksim Andrianov 2019-05-24 15:58:48 +03:00 committed by mpimenov
parent f2edf6d870
commit 2a97d57399
8 changed files with 255 additions and 15 deletions

View file

@ -8,7 +8,8 @@ from .generator.exceptions import ContinueError, SkipError, ValidationError
from .maps_generator import (generate_maps, generate_coasts, reset_to_stage,
ALL_STAGES, stage_download_production_external,
stage_descriptions, stage_ugc, stage_popularity,
stage_localads, stages_as_string)
stage_localads, stage_statistics,
stages_as_string)
from .utils.collections import unique
logger = logging.getLogger("maps_generator")
@ -25,16 +26,16 @@ def parse_options():
nargs="?",
type=str,
help="Continue the last build or specified in CONTINUE from the "
"last stopped stage.")
"last stopped stage.")
parser.add_argument(
"--countries",
type=str,
default="",
help="List of regions, separated by a comma or a semicolon, or path to "
"file with regions, separated by a line break, for which maps"
" will be built. The names of the regions can be seen "
"in omim/data/borders. It is necessary to set names without "
"any extension.")
"file with regions, separated by a line break, for which maps"
" will be built. The names of the regions can be seen "
"in omim/data/borders. It is necessary to set names without "
"any extension.")
parser.add_argument(
"--skip",
type=str,
@ -58,7 +59,7 @@ def parse_options():
default=False,
action="store_true",
help="Build production maps. In another case, 'osm only maps' are built"
" - maps without additional data and advertising.")
" - maps without additional data and advertising.")
return vars(parser.parse_args())
@ -131,10 +132,13 @@ def main():
]
options["skip"] = options_skip
if not options["production"]:
options["skip"] += stages_as_string(stage_download_production_external,
stage_ugc, stage_popularity,
stage_descriptions,
stage_localads)
options["skip"] += stages_as_string(
stage_download_production_external,
stage_ugc, stage_popularity,
stage_descriptions,
stage_localads,
stage_statistics
)
if not all(s in ALL_STAGES for s in options["skip"]):
raise SkipError(f"Stages {set(options['skip']) - set(ALL_STAGES)} "
f"not found.")

View file

@ -152,6 +152,12 @@ class Env:
self._create_if_not_exist(path)
return path
@property
def stats_path(self):
path = os.path.join(self.out_path, "stats")
self._create_if_not_exist(path)
return path
@property
def types_path(self):
return os.path.join(self.user_resource_path, "types.txt")

View file

@ -37,6 +37,7 @@ class GenTool:
"no_ads": bool,
"preprocess": bool,
"split_by_polygons": bool,
"type_statistics": bool,
"planet_version": int,
"booking_data": str,
"brands_data": str,

View file

@ -50,6 +50,8 @@ SUBWAY_URL = ""
FOOD_URL = ""
FOOD_TRANSLATIONS_URL = ""
STATS_TYPES_CONFIG = ""
PLANET = "planet"
GEN_TOOL = "generator_tool"
@ -120,6 +122,9 @@ FOOD_URL = _get_opt(config, "External", "FOOD_URL", FOOD_URL)
FOOD_TRANSLATIONS_URL = _get_opt(config, "External", "FOOD_TRANSLATIONS_URL",
FOOD_TRANSLATIONS_URL)
STATS_TYPES_CONFIG = _get_opt_path(config, "Stats", "STATS_TYPES_CONFIG",
STATS_TYPES_CONFIG)
PLANET_O5M = os.path.join(MAIN_OUT_PATH, PLANET + ".o5m")
PLANET_PBF = os.path.join(MAIN_OUT_PATH, PLANET + ".osm.pbf")
PLANET_COASTS_GEOM_URL = os.path.join(PLANET_COASTS_URL, "latest_coasts.geom")

View file

@ -0,0 +1,116 @@
import re
import os
import datetime
from collections import defaultdict
RE_STAT = re.compile(r"(?:\d+\. )?([\w:|-]+?)\|: "
r"size = \d+; "
r"count = (\d+); "
r"length = ([0-9.e+-]+) m; "
r"area = ([0-9.e+-]+) m²; "
r"names = (\d+)\s*")
RE_TIME_DELTA = re.compile(r'^(?:(?P<days>-?\d+) (days?, )?)?'
r'((?:(?P<hours>-?\d+):)(?=\d+:\d+))?'
r'(?:(?P<minutes>-?\d+):)?'
r'(?P<seconds>-?\d+)'
r'(?:\.(?P<microseconds>\d{1,6})\d{0,6})?$')
RE_FINISH_STAGE = re.compile(r"(.*)Stage (\w+): finished in (.+)$")
def read_stat(f):
stats = []
for line in f:
m = RE_STAT.match(line)
stats.append({
"name": m.group(1).replace("|", "-"),
"cnt": int(m.group(2)),
"len": float(m.group(3)),
"area": float(m.group(4)),
"names": int(m.group(5))
})
return stats
def read_config(f):
config = []
for line in f:
columns = [c.strip() for c in line.split(";", 2)]
columns[0] = re.compile(columns[0])
columns[1] = columns[1].lower()
config.append(columns)
return config
def process_stat(config, stats):
result = {}
for param in config:
res = 0
for typ in stats:
if param[0].match(typ["name"]):
if param[1] == "len":
res += typ["len"]
elif param[1] == "area":
res += typ["area"]
elif param[1] == "cnt_names":
res += typ["names"]
else:
res += typ["cnt"]
result[str(param[0]) + param[1]] = res
return result
def format_res(res, typ):
if typ == "len":
unit = "м"
elif typ == "area":
unit = "м²"
else:
unit = "шт."
return res, unit
def make_stats(config_path, stats_path):
with open(config_path) as f:
config = read_config(f)
with open(stats_path) as f:
stats = process_stat(config, read_stat(f))
lines = []
for param in config:
k = str(param[0]) + param[1]
st = format_res(stats[k], param[1])
lines.append({"type": param[2], "quantity": st[0], "unit": st[1]})
return lines
def parse_time(time_str):
parts = RE_TIME_DELTA.match(time_str)
if not parts:
return
parts = parts.groupdict()
time_params = {}
for name, param in parts.items():
if param:
time_params[name] = int(param)
return datetime.timedelta(**time_params)
def get_stages_info(log_path):
result = defaultdict(lambda: defaultdict(dict))
for file in os.listdir(log_path):
path = os.path.join(log_path, file)
with open(path) as f:
for line in f:
m = RE_FINISH_STAGE.match(line)
if not m:
continue
stage_name = m.group(2)
dt = parse_time(m.group(3))
if file.startswith("stage_"):
result["stages"][stage_name] = dt
else:
country = file.split(".")[0]
result["countries"][country][stage_name] = dt
return result

View file

@ -3,7 +3,10 @@ import os
import shutil
from functools import partial
from multiprocessing.pool import ThreadPool
from collections import defaultdict
import multiprocessing
import json
import datetime
from descriptions.descriptions_downloader import (check_and_get_checker,
download_from_wikipedia_tags,
@ -22,6 +25,7 @@ from .generator.env import (planet_lock_file, build_lock_file,
from .generator.exceptions import (ContinueError, BadExitStatusError,
wait_and_raise_if_fail)
from .generator.gen_tool import run_gen_tool
from .generator.statistics import make_stats, get_stages_info
from .utils.file import is_verified, download_file, make_tarfile
logger = logging.getLogger("maps_generator")
@ -238,8 +242,9 @@ def stage_external_resources(env):
for ttf_file in resources:
shutil.copy2(ttf_file, env.intermediate_path)
shutil.copy2(os.path.join(env.user_resource_path, "WorldCoasts_obsolete.mwm"),
env.mwm_path)
shutil.copy2(
os.path.join(env.user_resource_path, "WorldCoasts_obsolete.mwm"),
env.mwm_path)
for file in os.listdir(env.mwm_path):
if file.startswith(WORLD_NAME) and file.endswith(".mwm"):
@ -259,6 +264,46 @@ def stage_localads(env):
make_tarfile(f"{env.localads_path}.tar.gz", env.localads_path)
@stage
def stage_statistics(env):
result = defaultdict(lambda: defaultdict(dict))
@country_stage_log
def stage_mwm_statistics(env, country, **kwargs):
stats_tmp = os.path.join(env.draft_path, f"{country}.stat")
with open(stats_tmp, "w") as f:
maps_stages.run_gen_tool_with_recovery_country(
env,
env.gen_tool,
out=f,
err=env.get_subprocess_out(country),
data_path=env.mwm_path,
user_resource_path=env.user_resource_path,
type_statistics=True,
output=country,
**kwargs
)
result["countries"][country]["types"] = \
make_stats(settings.STATS_TYPES_CONFIG, stats_tmp)
mwms = env.get_mwm_names()
countries = filter(lambda x: x not in WORLDS_NAMES, mwms)
with ThreadPool() as pool:
pool.map(partial(stage_mwm_statistics, env), countries)
stages_info = get_stages_info(env.log_path)
result["stages"] = stages_info["stages"]
for c in stages_info["countries"]:
result["countries"][c]["stages"] = stages_info["countries"][c]
def default(o):
if isinstance(o, datetime.timedelta):
return str(o)
with open(os.path.join(env.stats_path, "stats.json"), "w") as f:
json.dump(result, f, ensure_ascii=False, sort_keys=True,
indent=2, default=default)
@stage
def stage_cleanup(env):
osm2ft_path = os.path.join(env.out_path, "osm2ft")
@ -283,7 +328,7 @@ STAGES = [s.__name__ for s in
stage_download_and_convert_planet, stage_update_planet,
stage_coastline, stage_preprocess, stage_features, stage_mwm,
stage_descriptions, stage_countries_txt, stage_external_resources,
stage_localads, stage_cleanup)]
stage_localads, stage_statistics, stage_cleanup)]
ALL_STAGES = STAGES + COUNTRIES_STAGES
@ -343,6 +388,7 @@ def generate_maps(env):
stage_countries_txt(env)
stage_external_resources(env)
stage_localads(env)
stage_statistics(env)
stage_cleanup(env)

View file

@ -32,5 +32,8 @@ OSM_TOOLS_PATH: ~/osmctools
# POPULARITY_URL:
# SUBWAY_URL:
# FOOD_URL:
# FOOD_TRANSLATIONS_URL:
# FOOD_TRANSLATIONS_URL:
[Stats]
STATS_TYPES_CONFIG: ${Developer:OMIM_PATH}/tools/python/maps_generator/var/etc/stats_types_config.txt

View file

@ -0,0 +1,59 @@
barrier-(fence|gate);len;Заборы
building;cnt;Здания
(amenity|shop|historic)-.*;cnt;POI
(amenity|shop|historic)-.*;cnt_names;POI c именами
amenity-(cafe|restaurant|fast_food).*;cnt;Кафе и рестораны
amenity-(pub|bar);cnt;Бары и пабы
amenity-kindergarten;cnt;Детские сады
amenity-(school|university|college);cnt;Школы и университеты
amenity-parking.*;cnt;Автостоянки
amenity-parking.*;area;Автостоянки
amenity-pharmacy;cnt;Аптеки
amenity-place_of_worship.*;cnt;Храмы
amenity-(hospital|doctors);cnt;Больницы и поликлиники
amenity-toilets;cnt;Туалеты
amenity-(waste_disposal|recycling);cnt;Мусорные баки
highway-(motorway|trunk|primary|secondary|tertiary|residential|unclassified|service|track|living_street)(_link)?(-.*)?;len;Автодорожная сеть
highway-(footway|path|pedestrian|steps).*;len;Пешеходные дорожки
highway-.*-bridge;len;Мосты
highway-.*-tunnel;len;Туннели
highway-(footway|path|steps)-bridge;len;Пешеходные мосты
highway-(footway|path|steps)-tunnel;len;Пешеходные туннели
highway-steps.*;len;Лестницы
highway-speed_camera;cnt;Камеры контроля скорости
internet_access-wlan;cnt;Точки доступа Wi-Fi
leisure-(pitch|stadium|playing_fields|track|sports_centre).*;cnt;Спортплощадки и комплексы
leisure-playground;cnt;Детские площадки
man_made-lighthouse;cnt;Маяки
man_made-windmill;cnt;Ветряные мельницы
man_made-pipeline.*;len;Трубопроводы
natural-beach;cnt;Пляжи
natural-tree;cnt;Отдельностоящие деревья
natural-waterfall;cnt;Водопады
piste:type.*;len;Лыжни
place-(city.*|town|village|hamlet);cnt;Населённые пункты
place-island;cnt;Острова
power-(minor_)?line.*;len;Линии электропередачи
power-(pole|tower);cnt;Опоры ЛЭП
railway-(rail|monorail|light_rail|narrow_gauge|preserved|siding|spur|yard|disused|incline).*;len;Железные дороги
railway-.*-(bridge|tunnel);len;Железнодорожные мосты и туннели
railway-(razed|abandoned).*;len;Снятые ветки ж/д
railway-narrow_gauge.*;len;Узкоколейные ж/д
railway-tram(-.*)?;len;Трамвайные пути
railway-(halt|station);cnt;Станции железной дороги
railway-subway.*;len;Линии метро
highway-bus_stop|railway-tram_stop;cnt;Остановки наземного транспорта
shop-bakery;cnt;Пекарни
shop-books;cnt;Книжные магазины
shop-clothes;cnt;Магазины одежды
shop-shoes;cnt;Магазины обуви
shop-(convenience|supermarket);cnt;Продуктовые магазины
shop-florist;cnt;Цветочные салоны
shop-(hairdresser|beauty);cnt;Парикмахерские и салоны красоты
tourism-(guest_house|hos?tel|motel);cnt;Гостиницы и хостелы
tourism-(attraction|viewpoint);cnt;Достопримечательности и точки обзора
waterway-(canal|river|stream)(-.*)?;len;Реки, каналы и ручьи
landuse-cemetery.*;area;Кладбища
leisure-park.*;area;Парки
natural-beach;area;Пляжи
sponsored-booking;cnt;Booking отели