forked from organicmaps/organicmaps
[generator][promo] inject countries osm ids into countries.txt
This commit is contained in:
parent
3211f04247
commit
334c5d5534
6 changed files with 114 additions and 86 deletions
|
@ -202,6 +202,11 @@ class Env:
|
|||
def promo_catalog_cities_path(self):
|
||||
return os.path.join(self.intermediate_path, "promo_catalog_cities.json")
|
||||
|
||||
@property
|
||||
def promo_catalog_countries_path(self):
|
||||
return os.path.join(self.intermediate_path,
|
||||
"promo_catalog_countries.json")
|
||||
|
||||
@property
|
||||
def popularity_path(self):
|
||||
return os.path.join(self.intermediate_path, "popular_places.csv")
|
||||
|
|
|
@ -46,6 +46,7 @@ PLANET_COASTS_URL = ""
|
|||
UGC_URL = ""
|
||||
HOTELS_URL = ""
|
||||
PROMO_CATALOG_CITIES_URL = ""
|
||||
PROMO_CATALOG_COUNTRIES_URL = ""
|
||||
POPULARITY_URL= ""
|
||||
SUBWAY_URL = ""
|
||||
FOOD_URL = ""
|
||||
|
@ -120,6 +121,7 @@ PLANET_COASTS_URL = _get_opt_path(config, "External", "PLANET_COASTS_URL", PLANE
|
|||
UGC_URL = _get_opt_path(config, "External", "UGC_URL", UGC_URL)
|
||||
HOTELS_URL = _get_opt_path(config, "External", "HOTELS_URL", HOTELS_URL)
|
||||
PROMO_CATALOG_CITIES_URL = _get_opt_path(config, "External", "PROMO_CATALOG_CITIES_URL", PROMO_CATALOG_CITIES_URL)
|
||||
PROMO_CATALOG_COUNTRIES_URL = _get_opt_path(config, "External", "PROMO_CATALOG_COUNTRIES_URL", PROMO_CATALOG_COUNTRIES_URL)
|
||||
POPULARITY_URL = _get_opt_path(config, "External", "POPULARITY_URL", POPULARITY_URL)
|
||||
SUBWAY_URL = _get_opt(config, "External", "SUBWAY_URL", SUBWAY_URL)
|
||||
FOOD_URL = _get_opt(config, "External", "FOOD_URL", FOOD_URL)
|
||||
|
|
|
@ -14,7 +14,7 @@ from descriptions.descriptions_downloader import (check_and_get_checker,
|
|||
download_from_wikidata_tags)
|
||||
from filelock import FileLock
|
||||
from post_generation.hierarchy_to_countries import hierarchy_to_countries
|
||||
from post_generation.inject_promo_cities import inject_promo_cities
|
||||
from post_generation.inject_promo_ids import inject_promo_ids
|
||||
from post_generation.localads_mwm_to_csv import create_csv
|
||||
|
||||
from .generator import stages
|
||||
|
@ -63,6 +63,7 @@ def stage_download_production_external(env):
|
|||
settings.UGC_URL: env.ugc_path,
|
||||
settings.HOTELS_URL: env.hotels_path,
|
||||
settings.PROMO_CATALOG_CITIES_URL: env.promo_catalog_cities_path,
|
||||
settings.PROMO_CATALOG_COUNTRIES_URL: env.promo_catalog_countries_path,
|
||||
settings.POPULARITY_URL: env.popularity_path,
|
||||
settings.FOOD_URL: env.food_paths,
|
||||
settings.FOOD_TRANSLATIONS_URL: env.food_translations_path,
|
||||
|
@ -235,8 +236,9 @@ def stage_countries_txt(env):
|
|||
env.mwm_version)
|
||||
if env.is_accepted_stage(stage_download_production_external):
|
||||
countries_json = json.loads(countries)
|
||||
inject_promo_cities(countries_json, env.promo_catalog_cities_path,
|
||||
env.mwm_path, env.types_path, env.mwm_path)
|
||||
inject_promo_ids(countries_json, env.promo_catalog_cities_path,
|
||||
env.promo_catalog_countries_path, env.mwm_path,
|
||||
env.types_path, env.mwm_path)
|
||||
countries = json.dumps(countries_json, ensure_ascii=True, indent=1)
|
||||
|
||||
with open(env.counties_txt_path, "w") as f:
|
||||
|
|
|
@ -31,6 +31,7 @@ SUBWAY_URL: http://osm-subway.maps.me/mapsme/latest.json
|
|||
# UGC_URL:
|
||||
# HOTELS_URL:
|
||||
# PROMO_CATALOG_CITIES_URL:
|
||||
# PROMO_CATALOG_COUNTRIES_URL:
|
||||
# POPULARITY_URL:
|
||||
# FOOD_URL:
|
||||
# FOOD_TRANSLATIONS_URL:
|
||||
|
|
|
@ -4,7 +4,7 @@ import os
|
|||
import sys
|
||||
|
||||
from .hierarchy_to_countries import hierarchy_to_countries as hierarchy_to_countries_
|
||||
from .inject_promo_ids import inject_promo_cities
|
||||
from .inject_promo_ids import inject_promo_ids
|
||||
from .localads_mwm_to_csv import create_csv
|
||||
|
||||
|
||||
|
@ -16,7 +16,7 @@ class PostGeneration:
|
|||
The post_generation commands are:
|
||||
localads_mwm_to_csv Prepares CSV files for uploading to localads database from mwm files.
|
||||
hierarchy_to_countries Produces countries.txt from hierarchy.txt.
|
||||
inject_promo_cities Injects promo cities osm ids into countries.txt
|
||||
inject_promo_ids Injects promo osm ids into countries.txt
|
||||
""")
|
||||
parser.add_argument("command", help="Subcommand to run")
|
||||
args = parser.parse_args(sys.argv[1:2])
|
||||
|
@ -88,7 +88,7 @@ The post_generation commands are:
|
|||
print(countries_json)
|
||||
|
||||
@staticmethod
|
||||
def inject_promo_cities():
|
||||
def inject_promo_ids():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Injects promo cities osm ids into countries.txt")
|
||||
parser.add_argument("--mwm", required=True, help="path to mwm files")
|
||||
|
@ -96,6 +96,8 @@ The post_generation commands are:
|
|||
help="path to omim/data/types.txt")
|
||||
parser.add_argument("--promo_cities", required=True,
|
||||
help="Path to promo cities file")
|
||||
parser.add_argument("--promo_countries", required=True,
|
||||
help="Path to promo countries file")
|
||||
parser.add_argument("--osm2ft",
|
||||
help="path to osm2ft files (default is the same as mwm)")
|
||||
parser.add_argument("--countries",
|
||||
|
@ -114,8 +116,8 @@ The post_generation commands are:
|
|||
with open(args.countries) as f:
|
||||
countries = json.load(f)
|
||||
|
||||
inject_promo_cities(countries, args.promo_cities, args.mwm, args.types,
|
||||
args.osm2ft)
|
||||
inject_promo_ids(countries, args.promo_cities, args.promo_countries,
|
||||
args.mwm, args.types, args.osm2ft)
|
||||
|
||||
with open(args.output, "w") as f:
|
||||
json.dump(countries, f, indent=1)
|
||||
|
|
|
@ -4,20 +4,45 @@ import os
|
|||
import re
|
||||
import sys
|
||||
|
||||
from collections import defaultdict
|
||||
from multiprocessing import Pool
|
||||
|
||||
from mwm import mwm
|
||||
|
||||
|
||||
class PromoCities(object):
|
||||
def __init__(self, cities, mwm_path, types_path, osm2ft_path):
|
||||
class PromoIds(object):
|
||||
def __init__(self, countries, cities, mwm_path, types_path, osm2ft_path):
|
||||
self.countries = countries
|
||||
self.cities = cities
|
||||
self.mwm_path = mwm_path
|
||||
self.types_path = types_path
|
||||
self.osm2ft_path = osm2ft_path
|
||||
|
||||
def find(self, leaf_id):
|
||||
result = []
|
||||
def inject_into_country(self, country):
|
||||
nodes = self._get_nodes(country)
|
||||
with Pool() as pool:
|
||||
proposed_ids = pool.map(self._find, (n["id"] for n in nodes),
|
||||
chunksize=1)
|
||||
|
||||
countries_ids = [ids for node_ids in proposed_ids for ids in
|
||||
node_ids["countries"]]
|
||||
if countries_ids:
|
||||
country["top_countries_geo_ids"] = countries_ids
|
||||
|
||||
for idx, node_ids in enumerate(proposed_ids):
|
||||
if not node_ids["cities"]:
|
||||
continue
|
||||
node = nodes[idx]
|
||||
best = self._choose_best_city(node_ids["cities"])
|
||||
node["top_city_geo_id"] = best["id"]
|
||||
if best["id"] < 0:
|
||||
node["top_city_geo_id"] += (1 << 64)
|
||||
|
||||
def _find(self, leaf_id):
|
||||
result = {
|
||||
"countries": [],
|
||||
"cities": []
|
||||
}
|
||||
ft2osm = load_osm2ft(self.osm2ft_path, leaf_id)
|
||||
with open(os.path.join(self.mwm_path, leaf_id + ".mwm"), "rb") as f:
|
||||
mwm_file = mwm.MWM(f)
|
||||
|
@ -27,63 +52,80 @@ class PromoCities(object):
|
|||
osm_id = ft2osm.get(feature["id"], None)
|
||||
types = feature["header"]["types"]
|
||||
|
||||
if "sponsored-promo_catalog" not in types or osm_id not in self.cities:
|
||||
continue
|
||||
if "sponsored-promo_catalog" in types and osm_id in self.cities:
|
||||
city = self._get_city(osm_id, types)
|
||||
result["cities"].append(city)
|
||||
|
||||
city = {
|
||||
"id": osm_id,
|
||||
"count_of_guides": self.cities[osm_id],
|
||||
"types": []
|
||||
}
|
||||
|
||||
for t in types:
|
||||
if t.startswith("place"):
|
||||
city["types"].append(t)
|
||||
|
||||
if not city["types"]:
|
||||
logging.error(f"Incorrect types for sponsored-promo_catalog "
|
||||
f"feature osm_id {osm_id}")
|
||||
sys.exit(3)
|
||||
|
||||
result.append(city)
|
||||
if "place-country" in types and osm_id in self.countries:
|
||||
result["countries"].append(osm_id)
|
||||
|
||||
return result
|
||||
|
||||
@staticmethod
|
||||
def choose_best_city(proposed_cities):
|
||||
def _get_nodes(root):
|
||||
def __get_nodes(node, mwm_nodes):
|
||||
if "g" in node:
|
||||
for item in node["g"]:
|
||||
__get_nodes(item, mwm_nodes)
|
||||
else:
|
||||
mwm_nodes.append(node)
|
||||
|
||||
mwm_nodes = []
|
||||
__get_nodes(root, mwm_nodes)
|
||||
return mwm_nodes
|
||||
|
||||
def _get_city(self, osm_id, types):
|
||||
city = {
|
||||
"id": osm_id,
|
||||
"count_of_guides": self.cities[osm_id],
|
||||
"types": []
|
||||
}
|
||||
|
||||
for t in types:
|
||||
if t.startswith("place"):
|
||||
city["types"].append(t)
|
||||
|
||||
if not city["types"]:
|
||||
logging.error(f"Incorrect types for sponsored-promo_catalog "
|
||||
f"feature osm_id {osm_id}")
|
||||
sys.exit(3)
|
||||
|
||||
return city
|
||||
|
||||
def _choose_best_city(self, proposed_cities):
|
||||
def key_compare(city):
|
||||
return city["count_of_guides"], score_types(city["types"])
|
||||
return city["count_of_guides"], self._score_city_types(
|
||||
city["types"])
|
||||
|
||||
return max(proposed_cities, key=key_compare)
|
||||
|
||||
def _score_city_types(self, types):
|
||||
return max([self._city_type_to_int(t) for t in types])
|
||||
|
||||
def place_type_to_int(t):
|
||||
if t == "place-town":
|
||||
return 1
|
||||
if t == "place-city":
|
||||
return 2
|
||||
@staticmethod
|
||||
def _city_type_to_int(t):
|
||||
if t == "place-town":
|
||||
return 1
|
||||
if t == "place-city":
|
||||
return 2
|
||||
|
||||
m = re.match(r"^place-city-capital?(-(?P<admin_level>\d+)|)$", t)
|
||||
if m:
|
||||
admin_level = int(m.groupdict("1")["admin_level"])
|
||||
if 1 <= admin_level <= 12:
|
||||
return 14 - admin_level
|
||||
return 0
|
||||
m = re.match(r"^place-city-capital?(-(?P<admin_level>\d+)|)$", t)
|
||||
if m:
|
||||
admin_level = int(m.groupdict("1")["admin_level"])
|
||||
if 1 <= admin_level <= 12:
|
||||
return 14 - admin_level
|
||||
return 0
|
||||
|
||||
|
||||
def score_types(types):
|
||||
return max([place_type_to_int(t) for t in types])
|
||||
|
||||
|
||||
def load_cities(path):
|
||||
def load_promo_ids(path):
|
||||
with open(path) as f:
|
||||
cities_list = json.load(f)
|
||||
root = json.load(f)
|
||||
|
||||
cities = {}
|
||||
for city in cities_list["data"]:
|
||||
cities[city["osmid"]] = city["paid_bundles_count"]
|
||||
ids = {}
|
||||
for item in root["data"]:
|
||||
ids[item["osmid"]] = item["paid_bundles_count"]
|
||||
|
||||
return cities
|
||||
return ids
|
||||
|
||||
|
||||
def load_osm2ft(osm2ft_path, mwm_id):
|
||||
|
@ -95,36 +137,10 @@ def load_osm2ft(osm2ft_path, mwm_id):
|
|||
return mwm.read_osm2ft(f, ft2osm=True, tuples=False)
|
||||
|
||||
|
||||
def get_nodes(node):
|
||||
def _get_nodes(node, mwm_nodes):
|
||||
if "g" in node:
|
||||
for item in node["g"]:
|
||||
_get_nodes(item, mwm_nodes)
|
||||
else:
|
||||
mwm_nodes.append(node)
|
||||
|
||||
mwm_nodes = []
|
||||
_get_nodes(node, mwm_nodes)
|
||||
return mwm_nodes
|
||||
|
||||
|
||||
def inject_into_leafs(node, cities):
|
||||
nodes = get_nodes(node)
|
||||
with Pool() as pool:
|
||||
proposed_cities_list = pool.map(cities.find, (n["id"] for n in nodes),
|
||||
chunksize=1)
|
||||
for idx, proposed_cities in enumerate(proposed_cities_list):
|
||||
if not proposed_cities:
|
||||
continue
|
||||
node = nodes[idx]
|
||||
best = cities.choose_best_city(proposed_cities)
|
||||
node["top_city_geo_id"] = best["id"]
|
||||
if best["id"] < 0:
|
||||
node["top_city_geo_id"] += (1 << 64)
|
||||
|
||||
|
||||
def inject_promo_cities(countries_json, promo_cities_path, mwm_path, types_path,
|
||||
osm2ft_path):
|
||||
cities = PromoCities(load_cities(promo_cities_path), mwm_path, types_path,
|
||||
osm2ft_path)
|
||||
inject_into_leafs(countries_json, cities)
|
||||
def inject_promo_ids(countries_json, promo_cities_path, promo_countries_path,
|
||||
mwm_path, types_path, osm2ft_path):
|
||||
promo_ids = PromoIds(load_promo_ids(promo_countries_path),
|
||||
load_promo_ids(promo_cities_path), mwm_path,
|
||||
types_path, osm2ft_path)
|
||||
for country in countries_json["g"]:
|
||||
promo_ids.inject_into_country(country)
|
||||
|
|
Loading…
Add table
Reference in a new issue