[countries][promo] osm ids are injected into countries.txt

This commit is contained in:
Arsentiy Milchakov 2019-06-07 16:17:04 +03:00 committed by Maksim Andrianov
parent 7817da0d7f
commit bed9f4f01e
3 changed files with 178 additions and 1 deletions

View file

@ -13,6 +13,7 @@ from descriptions.descriptions_downloader import (check_and_get_checker,
download_from_wikidata_tags)
from filelock import FileLock
from post_generation.hierarchy_to_countries import hierarchy_to_countries
from post_generation.inject_promo_cities import inject_promo_cities
from post_generation.localads_mwm_to_csv import create_csv
from .generator import basic_stages
@ -236,8 +237,11 @@ def stage_countries_txt(env):
env.countries_synonyms_path,
env.hierarchy_path, env.mwm_path,
env.mwm_version)
countries_json = json.loads(countries)
inject_promo_cities(countries_json, env.promo_catalog_cities_path,
env.mwm_path, env.types_path, env.mwm_path)
with open(env.counties_txt_path, "w") as f:
f.write(countries)
json.dump(countries_json, f, indent=1)
@stage

View file

@ -1,8 +1,10 @@
import argparse
import json
import os
import sys
from .hierarchy_to_countries import hierarchy_to_countries as hierarchy_to_countries_
from .inject_promo_cities import inject_promo_cities
from .localads_mwm_to_csv import create_csv
@ -14,6 +16,7 @@ class PostGeneration:
The post_generation commands are:
localads_mwm_to_csv Prepares CSV files for uploading to localads database from mwm files.
hierarchy_to_countries Produces countries.txt from hierarchy.txt.
inject_promo_cities Injects promo cities osm ids into countries.txt
""")
parser.add_argument("command", help="Subcommand to run")
args = parser.parse_args(sys.argv[1:2])
@ -84,5 +87,38 @@ The post_generation commands are:
else:
print(countries_json)
@staticmethod
def inject_promo_cities():
parser = argparse.ArgumentParser(
description="Injects promo cities osm ids into countries.txt")
parser.add_argument("--mwm", required=True, help="path to mwm files")
parser.add_argument("--types", required=True,
help="path to omim/data/types.txt")
parser.add_argument("--promo_cities", required=True,
help="Path to promo cities file")
parser.add_argument("--osm2ft",
help="path to osm2ft files (default is the same as mwm)")
parser.add_argument("--countries",
help="path to countries.txt file (default is countries.txt file into mwm directory)")
parser.add_argument("--output",
help="Output countries.txt file (default is countries.txt file into mwm directory)")
args = parser.parse_args(sys.argv[2:])
if not args.osm2ft:
args.osm2ft = args.mwm
if not args.countries:
args.countries = os.path.join(args.mwm, "countries.txt")
if not args.output:
args.output = os.path.join(args.mwm, "countries.txt")
with open(args.countries, "r") as f:
countries = json.load(f)
inject_promo_cities(countries, args.promo_cities, args.mwm, args.types,
args.osm2ft)
with open(args.output, "w") as f:
json.dump(countries, f, indent=1)
PostGeneration()

View file

@ -0,0 +1,137 @@
import json
import logging
import os
import sys
from mwm import mwm
class PromoCities(object):
def __init__(self, cities, mwm_path, types_path, osm2ft_path):
self.cities = cities
self.mwm_path = mwm_path
self.types_path = types_path
self.osm2ft_path = osm2ft_path
def find(self, leaf_id):
result = list()
ft2osm = load_osm2ft(self.osm2ft_path, leaf_id)
with open(os.path.join(self.mwm_path, leaf_id + ".mwm"), "rb") as f:
mwm_file = mwm.MWM(f)
mwm_file.read_header()
mwm_file.read_types(self.types_path)
for feature in mwm_file.iter_features(metadata=True):
osm_id = ft2osm.get(feature["id"], None)
types = feature["header"]["types"]
if "sponsored-promo_catalog" not in types or osm_id not in self.cities:
continue
city = {
"id": osm_id,
"count_of_guides": self.cities[osm_id],
"types": list()
}
for t in types:
if t.startswith("place"):
city["types"].append(t)
if not city["types"]:
logging.error("Incorrect types for sponsored-promo_catalog "
"feature osm_id %s", osm_id)
sys.exit(3)
result.append(city)
return result
@staticmethod
def choose_best_city(proposed_cities):
def key_compare(city):
return city["count_of_guides"], score_types(city["types"])
result = sorted(proposed_cities, key=key_compare, reverse=True)
# Debug
print(result)
return result[0]
def place_type_to_int(t):
if t == "place-town":
return 1
if t == "place-city":
return 2
if t == "place-city-capital-11":
return 3
if t == "place-city-capital-10":
return 4
if t == "place-city-capital-9":
return 5
if t == "place-city-capital-8":
return 6
if t == "place-city-capital-7":
return 7
if t == "place-city-capital-6":
return 8
if t == "place-city-capital-5":
return 9
if t == "place-city-capital-4":
return 10
if t == "place-city-capital-3":
return 11
if t == "place-city-capital-2":
return 12
if t == "place-city-capital":
return 13
return 0
def score_types(types):
ranked = sorted([place_type_to_int(t) for t in types], reverse=True)
return ranked[0]
def load_cities(path):
with open(path, "r") as f:
cities_list = json.load(f)
cities = dict()
for city in cities_list["data"]:
cities[city["osmid"]] = city["paid_bundles_count"]
return cities
def load_osm2ft(osm2ft_path, mwm_id):
osm2ft_name = os.path.join(osm2ft_path, mwm_id + ".mwm.osm2ft")
if not os.path.exists(osm2ft_name):
logging.error("Cannot find %s", osm2ft_name)
sys.exit(3)
with open(osm2ft_name, "rb") as f:
return mwm.read_osm2ft(f, ft2osm=True, tuples=False)
def inject_into_leafs(node, cities):
if "g" in node:
for item in node["g"]:
inject_into_leafs(item, cities)
else:
proposed_cities = cities.find(node["id"])
if not proposed_cities:
return
best_city = cities.choose_best_city(proposed_cities)
if best_city["id"] < 0:
node["pc"] = best_city["id"] + (1 << 64)
else:
node["pc"] = best_city["id"]
def inject_promo_cities(countries_json, promo_cities_path, mwm_path, types_path,
osm2ft_path):
cities = PromoCities(load_cities(promo_cities_path), mwm_path, types_path,
osm2ft_path)
inject_into_leafs(countries_json, cities)