From bed9f4f01efd7adaff19f0a929bfc767dc6731a8 Mon Sep 17 00:00:00 2001 From: Arsentiy Milchakov Date: Fri, 7 Jun 2019 16:17:04 +0300 Subject: [PATCH] [countries][promo] osm ids are injected into countries.txt --- tools/python/maps_generator/maps_generator.py | 6 +- tools/python/post_generation/__main__.py | 36 +++++ .../post_generation/inject_promo_cities.py | 137 ++++++++++++++++++ 3 files changed, 178 insertions(+), 1 deletion(-) create mode 100644 tools/python/post_generation/inject_promo_cities.py diff --git a/tools/python/maps_generator/maps_generator.py b/tools/python/maps_generator/maps_generator.py index 981e776449..4a57f7e8be 100644 --- a/tools/python/maps_generator/maps_generator.py +++ b/tools/python/maps_generator/maps_generator.py @@ -13,6 +13,7 @@ from descriptions.descriptions_downloader import (check_and_get_checker, download_from_wikidata_tags) from filelock import FileLock from post_generation.hierarchy_to_countries import hierarchy_to_countries +from post_generation.inject_promo_cities import inject_promo_cities from post_generation.localads_mwm_to_csv import create_csv from .generator import basic_stages @@ -236,8 +237,11 @@ def stage_countries_txt(env): env.countries_synonyms_path, env.hierarchy_path, env.mwm_path, env.mwm_version) + countries_json = json.loads(countries) + inject_promo_cities(countries_json, env.promo_catalog_cities_path, + env.mwm_path, env.types_path, env.mwm_path) with open(env.counties_txt_path, "w") as f: - f.write(countries) + json.dump(countries_json, f, indent=1) @stage diff --git a/tools/python/post_generation/__main__.py b/tools/python/post_generation/__main__.py index deb526186c..b8645d75ee 100644 --- a/tools/python/post_generation/__main__.py +++ b/tools/python/post_generation/__main__.py @@ -1,8 +1,10 @@ import argparse +import json import os import sys from .hierarchy_to_countries import hierarchy_to_countries as hierarchy_to_countries_ +from .inject_promo_cities import inject_promo_cities from .localads_mwm_to_csv import create_csv @@ -14,6 +16,7 @@ class PostGeneration: The post_generation commands are: localads_mwm_to_csv Prepares CSV files for uploading to localads database from mwm files. hierarchy_to_countries Produces countries.txt from hierarchy.txt. + inject_promo_cities Injects promo cities osm ids into countries.txt """) parser.add_argument("command", help="Subcommand to run") args = parser.parse_args(sys.argv[1:2]) @@ -84,5 +87,38 @@ The post_generation commands are: else: print(countries_json) + @staticmethod + def inject_promo_cities(): + parser = argparse.ArgumentParser( + description="Injects promo cities osm ids into countries.txt") + parser.add_argument("--mwm", required=True, help="path to mwm files") + parser.add_argument("--types", required=True, + help="path to omim/data/types.txt") + parser.add_argument("--promo_cities", required=True, + help="Path to promo cities file") + parser.add_argument("--osm2ft", + help="path to osm2ft files (default is the same as mwm)") + parser.add_argument("--countries", + help="path to countries.txt file (default is countries.txt file into mwm directory)") + parser.add_argument("--output", + help="Output countries.txt file (default is countries.txt file into mwm directory)") + args = parser.parse_args(sys.argv[2:]) + + if not args.osm2ft: + args.osm2ft = args.mwm + if not args.countries: + args.countries = os.path.join(args.mwm, "countries.txt") + if not args.output: + args.output = os.path.join(args.mwm, "countries.txt") + + with open(args.countries, "r") as f: + countries = json.load(f) + + inject_promo_cities(countries, args.promo_cities, args.mwm, args.types, + args.osm2ft) + + with open(args.output, "w") as f: + json.dump(countries, f, indent=1) + PostGeneration() diff --git a/tools/python/post_generation/inject_promo_cities.py b/tools/python/post_generation/inject_promo_cities.py new file mode 100644 index 0000000000..c4d37e2b01 --- /dev/null +++ b/tools/python/post_generation/inject_promo_cities.py @@ -0,0 +1,137 @@ +import json +import logging +import os +import sys + +from mwm import mwm + + +class PromoCities(object): + def __init__(self, cities, mwm_path, types_path, osm2ft_path): + self.cities = cities + self.mwm_path = mwm_path + self.types_path = types_path + self.osm2ft_path = osm2ft_path + + def find(self, leaf_id): + result = list() + ft2osm = load_osm2ft(self.osm2ft_path, leaf_id) + with open(os.path.join(self.mwm_path, leaf_id + ".mwm"), "rb") as f: + mwm_file = mwm.MWM(f) + mwm_file.read_header() + mwm_file.read_types(self.types_path) + for feature in mwm_file.iter_features(metadata=True): + osm_id = ft2osm.get(feature["id"], None) + types = feature["header"]["types"] + + if "sponsored-promo_catalog" not in types or osm_id not in self.cities: + continue + + city = { + "id": osm_id, + "count_of_guides": self.cities[osm_id], + "types": list() + } + + for t in types: + if t.startswith("place"): + city["types"].append(t) + + if not city["types"]: + logging.error("Incorrect types for sponsored-promo_catalog " + "feature osm_id %s", osm_id) + sys.exit(3) + + result.append(city) + + return result + + @staticmethod + def choose_best_city(proposed_cities): + def key_compare(city): + return city["count_of_guides"], score_types(city["types"]) + + result = sorted(proposed_cities, key=key_compare, reverse=True) + # Debug + print(result) + return result[0] + + +def place_type_to_int(t): + if t == "place-town": + return 1 + if t == "place-city": + return 2 + if t == "place-city-capital-11": + return 3 + if t == "place-city-capital-10": + return 4 + if t == "place-city-capital-9": + return 5 + if t == "place-city-capital-8": + return 6 + if t == "place-city-capital-7": + return 7 + if t == "place-city-capital-6": + return 8 + if t == "place-city-capital-5": + return 9 + if t == "place-city-capital-4": + return 10 + if t == "place-city-capital-3": + return 11 + if t == "place-city-capital-2": + return 12 + if t == "place-city-capital": + return 13 + return 0 + + +def score_types(types): + ranked = sorted([place_type_to_int(t) for t in types], reverse=True) + return ranked[0] + + +def load_cities(path): + with open(path, "r") as f: + cities_list = json.load(f) + + cities = dict() + for city in cities_list["data"]: + cities[city["osmid"]] = city["paid_bundles_count"] + + return cities + + +def load_osm2ft(osm2ft_path, mwm_id): + osm2ft_name = os.path.join(osm2ft_path, mwm_id + ".mwm.osm2ft") + if not os.path.exists(osm2ft_name): + logging.error("Cannot find %s", osm2ft_name) + sys.exit(3) + with open(osm2ft_name, "rb") as f: + return mwm.read_osm2ft(f, ft2osm=True, tuples=False) + + +def inject_into_leafs(node, cities): + if "g" in node: + for item in node["g"]: + inject_into_leafs(item, cities) + else: + proposed_cities = cities.find(node["id"]) + + if not proposed_cities: + return + + best_city = cities.choose_best_city(proposed_cities) + + if best_city["id"] < 0: + node["pc"] = best_city["id"] + (1 << 64) + else: + node["pc"] = best_city["id"] + + +def inject_promo_cities(countries_json, promo_cities_path, mwm_path, types_path, + osm2ft_path): + cities = PromoCities(load_cities(promo_cities_path), mwm_path, types_path, + osm2ft_path) + inject_into_leafs(countries_json, cities)