diff --git a/tools/python/maps_generator/generator/stages_declaration.py b/tools/python/maps_generator/generator/stages_declaration.py index ba04b21fff..50943e904e 100644 --- a/tools/python/maps_generator/generator/stages_declaration.py +++ b/tools/python/maps_generator/generator/stages_declaration.py @@ -359,7 +359,6 @@ class StageLocalAds(Stage): env.paths.localads_path, env.paths.mwm_path, env.paths.mwm_path, - env.paths.types_path, env.mwm_version, multiprocessing.cpu_count(), ) diff --git a/tools/python/mwm/__init__.py b/tools/python/mwm/__init__.py index e1ea1ddf8a..0aa6e12691 100644 --- a/tools/python/mwm/__init__.py +++ b/tools/python/mwm/__init__.py @@ -9,8 +9,8 @@ try: from mwm.mwm_pygen import MwmPygen as Mwm from mwm.mwm_pygen import FeaturePygen as Feature except ImportError: - from mwm.mwm_native import MwmNative as Mwm - from mwm.mwm_native import FeatureNative as Feature + from mwm.mwm_python import MwmPython as Mwm + from mwm.mwm_python import FeaturePython as Feature from mwm.mwm_interface import GeomType from mwm.mwm_interface import MapType @@ -19,7 +19,7 @@ from mwm.mwm_interface import Point from mwm.mwm_interface import Rect from mwm.mwm_interface import RegionDataField from mwm.mwm_interface import Triangle -from mwm.mwm_native import get_crossmwm -from mwm.mwm_native import get_region_info +from mwm.mwm_python import get_region_info from mwm.types import readable_type +from mwm.types import type_index from mwm.utils import EnumAsStrEncoder diff --git a/tools/python/mwm/decode_id.py b/tools/python/mwm/decode_id.py index a2f7f4d019..9763d2daa9 100755 --- a/tools/python/mwm/decode_id.py +++ b/tools/python/mwm/decode_id.py @@ -12,12 +12,13 @@ def decode_id(id): else: m = re.search(r"/(node|way|relation)/(\d+)", id) if m: + type_name = m.group(1) oid = int(m.group(2)) - if m.group(1) == "node": + if type_name == "node": oid |= OsmIdCode.NODE - elif m.group(1) == "way": + elif type_name == "way": oid |= OsmIdCode.WAY - elif m.group(1) == "relation": + elif type_name == "relation": oid |= OsmIdCode.RELATION return oid else: diff --git a/tools/python/mwm/find_feature.py b/tools/python/mwm/find_feature.py index d14e091890..af94cd9129 100755 --- a/tools/python/mwm/find_feature.py +++ b/tools/python/mwm/find_feature.py @@ -9,6 +9,7 @@ from mwm import readable_type def find_features(path: str, typ: str, string: str) -> List[Feature]: features = [] + index = int(string) if typ == "id" else None for feature in Mwm(path): found = False if typ == "n": @@ -30,7 +31,7 @@ def find_features(path: str, typ: str, string: str) -> List[Feature]: if string in f.name: found = True break - elif typ == "id" and int(string) == feature.index(): + elif typ == "id" and index == feature.index(): found = True if found: diff --git a/tools/python/mwm/ft2osm.py b/tools/python/mwm/ft2osm.py index bd58f83a27..8331fe5f5a 100755 --- a/tools/python/mwm/ft2osm.py +++ b/tools/python/mwm/ft2osm.py @@ -1,24 +1,27 @@ -from mwm.mwm_native import read_uint -from mwm.mwm_native import read_varuint +from mwm.mwm_python import read_uint +from mwm.mwm_python import read_varuint class OsmIdCode: + # We use here obsolete types. If we change this types to new types, + # we must support it here. See base/geo_object_id.hpp. NODE = 0x4000000000000000 WAY = 0x8000000000000000 RELATION = 0xC000000000000000 - RESET = ~(NODE | WAY | RELATION) + FULL_MASK = NODE | WAY | RELATION + RESET = ~FULL_MASK @staticmethod def is_node(code): - return code & OsmIdCode.NODE == OsmIdCode.NODE + return code & OsmIdCode.FULL_MASK == OsmIdCode.NODE @staticmethod def is_way(code): - return code & OsmIdCode.WAY == OsmIdCode.WAY + return code & OsmIdCode.FULL_MASK == OsmIdCode.WAY @staticmethod def is_relation(code): - return code & OsmIdCode.RELATION == OsmIdCode.RELATION + return code & OsmIdCode.FULL_MASK == OsmIdCode.RELATION @staticmethod def get_type(code): @@ -64,6 +67,8 @@ def _read_osm2ft_v1(f, ft2osm, tuples): result = {} for i in range(count): osmid = read_uint(f, 8) + # V1 use complex ids. Here we want to skip second part of complex id + # to save old interface osm2ft. read_uint(f, 8) if tuples: osmid = unpack_osmid(osmid) diff --git a/tools/python/mwm/mwm_feature_compare.py b/tools/python/mwm/mwm_feature_compare.py index 5c810bc4a1..3dde3c29f3 100755 --- a/tools/python/mwm/mwm_feature_compare.py +++ b/tools/python/mwm/mwm_feature_compare.py @@ -4,23 +4,23 @@ import os from mwm.find_feature import find_features -def compare_feature_num(old_mwm, new_mwm, name, threshold): - old_count = len(find_features(old_mwm, "et", name)) - new_count = len(find_features(new_mwm, "et", name)) +def compare_feature_num(old_mwm, new_mwm, type_name, threshold): + old_count = len(find_features(old_mwm, "et", type_name)) + new_count = len(find_features(new_mwm, "et", type_name)) delta = new_count - old_count if delta < 0: p_change = float(abs(delta)) / old_count * 100 if p_change > threshold: print( - f'In "{os.path.basename(new_mwm)}" number of "{name}" ' + f'In "{os.path.basename(new_mwm)}" number of "{type_name}" ' f"decreased by {round(p_change)} ({old_count} → {new_count})" ) return False return True -def compare_mwm(old_mwm_path, new_mwm_path, name, threshold): +def compare_mwm(old_mwm_path, new_mwm_path, type_name, threshold): def generate_names(path): return { file_name: os.path.abspath(os.path.join(path, file_name)) @@ -31,8 +31,8 @@ def compare_mwm(old_mwm_path, new_mwm_path, name, threshold): old_mwms = generate_names(old_mwm_path) new_mwms = generate_names(new_mwm_path) - same_mwms = set(new_mwms) & set(new_mwms) - args = ((old_mwms[mwm], new_mwms[mwm], name, threshold) for mwm in same_mwms) + same_mwms = set(new_mwms) & set(old_mwms) + args = ((old_mwms[mwm], new_mwms[mwm], type_name, threshold) for mwm in same_mwms) pool = multiprocessing.Pool() return all(pool.imap(compare_feature_num, args)) diff --git a/tools/python/mwm/mwm_interface.py b/tools/python/mwm/mwm_interface.py index 4aadbae239..f0afd10450 100644 --- a/tools/python/mwm/mwm_interface.py +++ b/tools/python/mwm/mwm_interface.py @@ -9,6 +9,7 @@ from typing import Union from mwm.types import readable_type +# See coding/string_utf8_multilang.cpp to synchronize languages. LANGS = ( "default", "en", @@ -39,35 +40,35 @@ LANGS = ( "uk", "ca", "hu", - "hsb", + "reserved (earlier hsb)", "eu", "fa", - "br", + "reserved (earlier br)", "pl", "hy", - "kn", + "reserved (earlier kn)", "sl", "ro", "sq", "am", - "fy", + "reserved (earlier fy)", "cs", - "gd", + "reserved (earlier gd)", "sk", "af", "ja_kana", - "lb", + "reserved (earlier lb)", "pt", "hr", - "fur", + "reserved (earlier fur)", "vi", "tr", "bg", - "eo", + "reserved (earlier eo)", "lt", - "la", + "reserved (earlier la)", "kk", - "gsw", + "reserved (earlier gsw)", "et", "ku", "mn", @@ -310,6 +311,9 @@ class Feature(ABC): def types(self) -> List[int]: pass + def readable_types(self) -> List[str]: + return [readable_type(i) for i in self.types()] + @abstractmethod def metadata(self) -> Dict[MetadataField, str]: pass @@ -371,7 +375,7 @@ class Feature(ABC): f"Feature[\n" f" index: {self.index()}\n" f" readable name: {self.readable_name()}\n" - f" types: {[readable_type(t) for t in self.types()]}\n" + f" types: {self.readable_types()}\n" f" names: {self.names()}\n" f" metadata: {self.metadata()}\n" f" geom_type: {self.geom_type()}\n" diff --git a/tools/python/mwm/mwm_pygen.py b/tools/python/mwm/mwm_pygen.py index 4142f51f5c..57d5246be0 100644 --- a/tools/python/mwm/mwm_pygen.py +++ b/tools/python/mwm/mwm_pygen.py @@ -34,7 +34,7 @@ class MwmPygen(mi.Mwm): def sections_info(self) -> Dict[str, mi.SectionInfo]: si = self.mwm.sections_info() return { - k: mi.SectionInfo(name=v.tag, offset=v.offset, size=v.offset) + k: mi.SectionInfo(name=v.tag, offset=v.offset, size=v.size) for k, v in si.items() } diff --git a/tools/python/mwm/mwm_native.py b/tools/python/mwm/mwm_python.py similarity index 82% rename from tools/python/mwm/mwm_native.py rename to tools/python/mwm/mwm_python.py index cf4158204f..c8b1d57c8c 100644 --- a/tools/python/mwm/mwm_native.py +++ b/tools/python/mwm/mwm_python.py @@ -15,12 +15,12 @@ from mwm import mwm_interface as mi logger = logging.getLogger(__name__) -class MwmNative(mi.Mwm): +class MwmPython(mi.Mwm): def __init__(self, filename: str, parse: bool = False): super().__init__(filename) - self.f = open(filename, "r+b") - self.file = mmap.mmap(self.f.fileno(), 0) + self.f = open(filename, "rb") + self.file = mmap.mmap(self.f.fileno(), 0, access=mmap.ACCESS_READ) self.tags = self._read_sections_info() @@ -28,7 +28,6 @@ class MwmNative(mi.Mwm): coord_bits = read_varuint(self.file) self.coord_size = (1 << coord_bits) - 1 self.base_point = mwm_bitwise_split(read_varuint(self.file)) - self.bp = to_4326(self.coord_size, self.base_point) self.bounds_ = read_bounds(self.file, self.coord_size) self.scales = read_uint_array(self.file) self.langs = [mi.LANGS[code] for code in read_uint_array(self.file)] @@ -66,7 +65,7 @@ class MwmNative(mi.Mwm): def __iter__(self) -> Iterable: assert self.has_tag("dat") - return MwmNativeIter(self) + return MwmPythonIter(self) def get_tag(self, name: str) -> mi.SectionInfo: return self.tags[name] @@ -113,25 +112,25 @@ class MwmNative(mi.Mwm): ) -class MwmNativeIter: - def __init__(self, mwm: MwmNative): +class MwmPythonIter: + def __init__(self, mwm: MwmPython): self.mwm = mwm self.index = 0 tag_info = self.mwm.get_tag("dat") self.pos = tag_info.offset self.end = self.pos + tag_info.size - def __iter__(self) -> "MwmNativeIter": + def __iter__(self) -> "MwmPythonIter": return self - def __next__(self) -> "FeatureNative": - if self.end < self.pos: + def __next__(self) -> "FeaturePython": + if self.end <= self.pos: raise StopIteration self.mwm.file.seek(self.pos) feature_size = read_varuint(self.mwm.file) self.pos = self.mwm.file.tell() + feature_size - feature = FeatureNative(self.mwm, self.index) + feature = FeaturePython(self.mwm, self.index) self.index += 1 return feature @@ -143,8 +142,8 @@ class GeomType: POINT_EX = 3 << 5 -class FeatureNative(mi.Feature): - def __init__(self, mwm: MwmNative, index: int): +class FeaturePython(mi.Feature): + def __init__(self, mwm: MwmPython, index: int): self.mwm = mwm self._index = index @@ -171,7 +170,18 @@ class FeatureNative(mi.Feature): elif geom_type == GeomType.AREA or geom_type == GeomType.POINT_EX: self._house_number = read_numeric_string(self.mwm.file) - self._geom_type, self._geometry = self._init_geom(geom_type) + self._geom_type = mi.GeomType.undefined + self._geometry = [] + + if geom_type == GeomType.POINT or geom_type == GeomType.POINT_EX: + self._geometry = mi.GeomType.point + geometry = [ + read_coord(self.mwm.file, self.mwm.base_point, self.mwm.coord_size) + ] + elif geom_type == GeomType.LINE: + self._geometry = mi.GeomType.line + elif geom_type == GeomType.AREA: + self._geometry = mi.GeomType.area def readable_name(self) -> str: if "default" in self._names: @@ -249,33 +259,19 @@ class FeatureNative(mi.Feature): return self._geom_type def geometry(self) -> Union[List[mi.Point], List[mi.Triangle]]: + if self._geometry == mi.GeomType.line: + logger.warn("Method geometry() does not have an implementation for line.") + elif self._geometry == mi.GeomType.area: + logger.warn("Method geometry() does not have an implementation for area.") + return self._geometry def parse(self): pass - def _init_geom(self, t): - geom_type = None - geometry = [] - if t == GeomType.POINT or t == GeomType.POINT_EX: - geom_type = mi.GeomType.point - geometry = [ - read_coord(self.mwm.file, self.mwm.base_point, self.mwm.coord_size) - ] - elif t == GeomType.LINE: - geom_type = mi.GeomType.line - logger.warn("Method geometry() does not have an implementation for line.") - elif t == GeomType.AREA: - geom_type = mi.GeomType.area - logger.warn("Method geometry() does not have an implementation for area.") - else: - geom_type = mi.GeomType.undefined - - return geom_type, geometry - def get_region_info(path): - m = MwmNative(path) + m = MwmPython(path) if not m.has_tag("rgninfo"): return {} @@ -284,48 +280,13 @@ def get_region_info(path): sz = read_varuint(m.file) for _ in range(sz): t = read_varuint(m.file) - filed = mi.RegionDataField(t) - region_info[filed] = read_string(m.file) + field = mi.RegionDataField(t) + region_info[field] = read_string(m.file) if t == mi.RegionDataField.languages: - region_info[filed] = [mi.LANGS[ord(x)] for x in region_info[filed]] + region_info[field] = [mi.LANGS[ord(x)] for x in region_info[field]] return region_info -def get_crossmwm(path): - m = MwmNative(path) - if not m.has_tag("chrysler"): - return {} - - m.seek_tag("chrysler") - # Ingoing nodes: array of (nodeId, coord) tuples - incomingCount = read_uint(m.file, 4) - incoming = [] - for _ in range(incomingCount): - nodeId = read_uint(m.file, 4) - point = read_coord(m.file, m.base_point, m.coord_size, False) - incoming.append((nodeId, point)) - # Outgoing nodes: array of (nodeId, coord, outIndex) tuples - # outIndex is an index in neighbours array - outgoingCount = read_uint(m.file, 4) - outgoing = [] - for _ in range(outgoingCount): - nodeId = read_uint(m.file, 4) - point = read_coord(m.file, m.base_point, m.coord_size, False) - outIndex = read_uint(m.file, 1) - outgoing.append((nodeId, point, outIndex)) - # Adjacency matrix: costs of routes for each (incoming, outgoing) tuple - matrix = [] - for _ in range(incomingCount): - matrix.append([read_uint(m.file, 4) for _ in range(outgoingCount)]) - # List of mwms to which leads each outgoing node - neighboursCount = read_uint(m.file, 4) - neighbours = [] - for _ in range(neighboursCount): - size = read_uint(m.file, 4) - neighbours.append(m.file.read(size).decode("utf-8")) - return {"in": incoming, "out": outgoing, "matrix": matrix, "neighbours": neighbours} - - def read_point(f, base_point: mi.Point, packed: bool = True) -> mi.Point: """Reads an unsigned point, returns (x, y).""" u = read_varuint(f) if packed else read_uint(f, 8) @@ -414,7 +375,10 @@ def read_multilang(f) -> Dict[str, str]: except TypeError: lng = s[i] & 0x3F if lng < len(mi.LANGS): - langs[mi.LANGS[lng]] = s[i + 1 : n].decode("utf-8") + try: + langs[mi.LANGS[lng]] = s[i + 1: n].decode("utf-8") + except: + print(s[i + 1: n]) i = n return langs diff --git a/tools/python/mwm/types.py b/tools/python/mwm/types.py index 7653a66871..6f62c41ee7 100644 --- a/tools/python/mwm/types.py +++ b/tools/python/mwm/types.py @@ -1,23 +1,37 @@ import os from typing import Dict +from typing import Tuple -def read_types_mappings() -> Dict[int, str]: +def read_types_mappings() -> Tuple[Dict[int, str], Dict[str, int]]: resources_path = os.environ.get("MWM_RESOURCES_DIR") - types = {} + name_to_index = {} + index_to_name = {} with open(os.path.join(resources_path, "types.txt")) as f: for i, line in enumerate(f): - if line.startswith("*"): - types[i] = line[1:].strip().replace("|", "-") + s = line.strip() + name = s.replace("|", "-") + if s.startswith("*"): + name = name[1:] + name_to_index[name] = i - return types + index_to_name[i] = name + + return index_to_name, name_to_index -TYPES_MAPPING = read_types_mappings() +INDEX_TO_NAME_TYPE_MAPPING, NAME_TO_INDEX_TYPE_MAPPING = read_types_mappings() -def readable_type(type: int) -> str: +def readable_type(index: int) -> str: try: - return TYPES_MAPPING[type] + return INDEX_TO_NAME_TYPE_MAPPING[index] except KeyError: return "unknown" + + +def type_index(type_name: str) -> int: + try: + return NAME_TO_INDEX_TYPE_MAPPING[type_name] + except KeyError: + return -1 diff --git a/tools/python/post_generation/inject_promo_ids.py b/tools/python/post_generation/inject_promo_ids.py index f76ba56ad9..056676e18c 100644 --- a/tools/python/post_generation/inject_promo_ids.py +++ b/tools/python/post_generation/inject_promo_ids.py @@ -3,11 +3,10 @@ import logging import os import re import sys - -from collections import defaultdict from multiprocessing import Pool -from mwm import mwm +from mwm import Mwm +from mwm.ft2osm import read_osm2ft class PromoIds(object): @@ -44,20 +43,17 @@ class PromoIds(object): "cities": [] } ft2osm = load_osm2ft(self.osm2ft_path, leaf_id) - with open(os.path.join(self.mwm_path, leaf_id + ".mwm"), "rb") as f: - mwm_file = mwm.MWM(f) - mwm_file.read_header() - mwm_file.read_types(self.types_path) - for feature in mwm_file.iter_features(): - osm_id = ft2osm.get(feature["id"], None) - types = feature["header"]["types"] - if "sponsored-promo_catalog" in types and osm_id in self.cities: - city = self._get_city(osm_id, types) - result["cities"].append(city) + for feature in Mwm(os.path.join(self.mwm_path, leaf_id + ".mwm")): + osm_id = ft2osm.get(feature.index(), None) + types = feature.readable_types() - if "place-country" in types and osm_id in self.countries: - result["countries"].append(osm_id) + if "sponsored-promo_catalog" in types and osm_id in self.cities: + city = self._get_city(osm_id, types) + result["cities"].append(city) + + if "place-country" in types and osm_id in self.countries: + result["countries"].append(osm_id) return result @@ -100,7 +96,7 @@ class PromoIds(object): return max(proposed_cities, key=key_compare) def _score_city_types(self, types): - return max([self._city_type_to_int(t) for t in types]) + return max(self._city_type_to_int(t) for t in types) @staticmethod def _city_type_to_int(t): @@ -134,7 +130,7 @@ def load_osm2ft(osm2ft_path, mwm_id): logging.error(f"Cannot find {osm2ft_name}") sys.exit(3) with open(osm2ft_name, "rb") as f: - return mwm.read_osm2ft(f, ft2osm=True, tuples=False) + return read_osm2ft(f, ft2osm=True, tuples=False) def inject_promo_ids(countries_json, promo_cities_path, promo_countries_path, diff --git a/tools/python/post_generation/localads_mwm_to_csv.py b/tools/python/post_generation/localads_mwm_to_csv.py index 79a3aef0f1..828098aa15 100755 --- a/tools/python/post_generation/localads_mwm_to_csv.py +++ b/tools/python/post_generation/localads_mwm_to_csv.py @@ -6,7 +6,9 @@ import sys from multiprocessing import Pool, Queue, Process from zlib import adler32 -from mwm import mwm +from mwm import MetadataField +from mwm import Mwm +from mwm.ft2osm import read_osm2ft HEADERS = { "mapping": "osmid fid mwm_id mwm_version source_type".split(), @@ -27,39 +29,39 @@ def generate_id_from_name_and_version(name, version): return ctypes.c_long((adler32(bytes(name, "utf-8")) << 32) | version).value -def parse_mwm(mwm_name, osm2ft_name, override_version, types_name): +def parse_mwm(mwm_name, osm2ft_name, override_version): region_name = os.path.splitext(os.path.basename(mwm_name))[0] logging.info(region_name) with open(osm2ft_name, "rb") as f: - ft2osm = mwm.read_osm2ft(f, ft2osm=True, tuples=False) - with open(mwm_name, "rb") as f: - mwm_file = mwm.MWM(f) - version = override_version or mwm_file.read_version()["version"] - mwm_id = generate_id_from_name_and_version(region_name, version) - QUEUES["mwm"].put((mwm_id, region_name, version)) - mwm_file.read_header() - mwm_file.read_types(types_name) - for feature in mwm_file.iter_features(metadata=True): - osm_id = ft2osm.get(feature["id"], None) - if osm_id is None: - if "metadata" in feature and "ref:sponsored" in feature["metadata"]: - for t in feature["header"]["types"]: - if t.startswith("sponsored-"): - QUEUES["sponsored"].put((feature["metadata"]["ref:sponsored"], - feature["id"], - mwm_id, - version, - SOURCE_TYPES[t[t.find("-") + 1:]])) - break - else: - for t in feature["header"]["types"]: - if t.startswith(GOOD_TYPES): - QUEUES["mapping"].put((ctypes.c_long(osm_id).value, - feature["id"], - mwm_id, - version, - SOURCE_TYPES["osm"])) + ft2osm = read_osm2ft(f, ft2osm=True, tuples=False) + + mwm_file = Mwm(mwm_name) + version = override_version or mwm_file.version().version + mwm_id = generate_id_from_name_and_version(region_name, version) + QUEUES["mwm"].put((mwm_id, region_name, version)) + for feature in mwm_file: + osm_id = ft2osm.get(feature.index(), None) + readable_types = feature.readable_types() + if osm_id is None: + metadata = feature.metadata() + if metadata is not None and MetadataField.sponsored_id in metadata: + for t in readable_types: + if t.startswith("sponsored-"): + QUEUES["sponsored"].put((metadata[MetadataField.sponsored_id], + feature.index(), + mwm_id, + version, + SOURCE_TYPES[t[t.find("-") + 1:]])) break + else: + for t in readable_types: + if t.startswith(GOOD_TYPES): + QUEUES["mapping"].put((ctypes.c_long(osm_id).value, + feature.index(), + mwm_id, + version, + SOURCE_TYPES["osm"])) + break def write_csv(output_dir, qtype): @@ -72,7 +74,7 @@ def write_csv(output_dir, qtype): mapping = QUEUES[qtype].get() -def create_csv(output, mwm_path, osm2ft_path, types, version, threads): +def create_csv(output, mwm_path, osm2ft_path, version, threads): if not os.path.isdir(output): os.mkdir(output) @@ -89,7 +91,7 @@ def create_csv(output, mwm_path, osm2ft_path, types, version, threads): if not os.path.exists(osm2ft_name): logging.error("Cannot find %s", osm2ft_name) sys.exit(2) - parse_mwm_args = (os.path.join(mwm_path, mwm_name), osm2ft_name, int(version), types) + parse_mwm_args = (os.path.join(mwm_path, mwm_name), osm2ft_name, int(version)) pool.apply_async(parse_mwm, parse_mwm_args) pool.close() pool.join()