mwm.py/mwm/mwm.py
Ilya Zverev 345da7d16d 0.10.1
2018-06-20 17:11:53 +03:00

275 lines
10 KiB
Python

# MWM Reader Module
from .mwmfile import MWMFile
from datetime import datetime
import os
# Unprocessed sections: geomN, trgN, idx, sdx (search index),
# addr (search address), offs (feature offsets - succinct)
# TODO:
# - Predictive reading of LineStrings
# - Find why polygon geometry is incorrect in iter_features()
class MWM(MWMFile):
# indexer/feature_meta.hpp
metadata = ["0",
"cuisine", "open_hours", "phone_number", "fax_number", "stars",
"operator", "url", "website", "internet", "ele",
"turn_lanes", "turn_lanes_forward", "turn_lanes_backward", "email", "postcode",
"wikipedia", "maxspeed", "flats", "height", "min_height",
"denomination", "building_levels", "test_id", "ref:sponsored", "price_rate",
"rating", "banner_url", "level"]
regiondata = ["languages", "driving", "timezone", "addr_fmt", "phone_fmt",
"postcode_fmt", "holidays", "housenames"]
def __init__(self, f):
MWMFile.__init__(self, f)
self.read_tags()
self.read_header()
self.type_mapping = []
self.read_types(os.path.join(
os.getcwd(), os.path.dirname(__file__), 'types.txt'))
def read_types(self, filename):
if not os.path.exists(filename):
return
self.type_mapping = []
with open(filename, 'r') as ft:
for line in ft:
if len(line.strip()) > 0:
self.type_mapping.append(line.strip().replace('|', '-'))
def read_version(self):
"""Reads 'version' section."""
self.seek_tag('version')
self.f.read(4) # skip prolog
fmt = self.read_varuint() + 1
version = self.read_varuint()
if version < 161231:
vdate = datetime(2000 + int(version / 10000), int(version / 100) % 100, version % 100)
else:
vdate = datetime.fromtimestamp(version)
version = int(vdate.strftime('%y%m%d'))
return {'fmt': fmt, 'version': version, 'date': vdate}
def read_header(self):
"""Reads 'header' section."""
if not self.has_tag('header'):
# Stub for routing files
self.coord_size = (1 << 30) - 1
return {}
self.seek_tag('header')
result = {}
coord_bits = self.read_varuint()
self.coord_size = (1 << coord_bits) - 1
self.base_point = self.mwm_bitwise_split(self.read_varuint())
result['basePoint'] = self.to_4326(self.base_point)
result['bounds'] = self.read_bounds()
result['scales'] = self.read_uint_array()
langs = self.read_uint_array()
for i in range(len(langs)):
if i < len(self.languages):
langs[i] = self.languages[langs[i]]
result['langs'] = langs
map_type = self.read_varint()
if map_type == 0:
result['mapType'] = 'world'
elif map_type == 1:
result['mapType'] = 'worldcoasts'
elif map_type == 2:
result['mapType'] = 'country'
else:
result['mapType'] = 'unknown: {0}'.format(map_type)
return result
# COMPLEX READERS
def read_region_info(self):
if not self.has_tag('rgninfo'):
return {}
fields = {}
self.seek_tag('rgninfo')
sz = self.read_varuint()
if sz:
for i in range(sz):
t = self.read_varuint()
t = self.regiondata[t] if t < len(self.regiondata) else str(t)
fields[t] = self.read_string()
if t == 'languages':
fields[t] = [self.languages[ord(x)] for x in fields[t]]
return fields
def read_metadata(self):
"""Reads 'meta' and 'metaidx' sections."""
if not self.has_tag('metaidx'):
return {}
# Metadata format is different since v8
fmt = self.read_version()['fmt']
# First, read metaidx, to match featureId <-> metadata
self.seek_tag('metaidx')
ftid_meta = []
while self.inside_tag('metaidx'):
ftid = self.read_uint(4)
moffs = self.read_uint(4)
ftid_meta.append((moffs, ftid))
# Sort ftid_meta array
ftid_meta.sort(key=lambda x: x[0])
ftpos = 0
# Now read metadata
self.seek_tag('meta')
metadatar = {}
while self.inside_tag('meta'):
tag_pos = self.tag_offset('meta')
fields = {}
if fmt >= 8:
sz = self.read_varuint()
if sz:
for i in range(sz):
t = self.read_varuint()
t = self.metadata[t] if t < len(self.metadata) else str(t)
fields[t] = self.read_string()
if t == 'fuel':
fields[t] = fields[t].split('\x01')
else:
while True:
t = self.read_uint(1)
is_last = t & 0x80 > 0
t = t & 0x7f
t = self.metadata[t] if t < len(self.metadata) else str(t)
l = self.read_uint(1)
fields[t] = self.f.read(l).decode('utf-8')
if is_last:
break
if len(fields):
while ftpos < len(ftid_meta) and ftid_meta[ftpos][0] < tag_pos:
ftpos += 1
if ftpos < len(ftid_meta):
if ftid_meta[ftpos][0] == tag_pos:
metadatar[ftid_meta[ftpos][1]] = fields
return metadatar
def read_crossmwm(self):
"""Reads 'chrysler' section (cross-mwm routing table)."""
if not self.has_tag('chrysler'):
return {}
self.seek_tag('chrysler')
# Ingoing nodes: array of (nodeId, coord) tuples
incomingCount = self.read_uint(4)
incoming = []
for i in range(incomingCount):
nodeId = self.read_uint(4)
point = self.read_coord(False)
incoming.append((nodeId, point))
# Outgoing nodes: array of (nodeId, coord, outIndex) tuples
# outIndex is an index in neighbours array
outgoingCount = self.read_uint(4)
outgoing = []
for i in range(outgoingCount):
nodeId = self.read_uint(4)
point = self.read_coord(False)
outIndex = self.read_uint(1)
outgoing.append((nodeId, point, outIndex))
# Adjacency matrix: costs of routes for each (incoming, outgoing) tuple
matrix = []
for i in range(incomingCount):
sub = []
for j in range(outgoingCount):
sub.append(self.read_uint(4))
matrix.append(sub)
# List of mwms to which leads each outgoing node
neighboursCount = self.read_uint(4)
neighbours = []
for i in range(neighboursCount):
size = self.read_uint(4)
neighbours.append(self.f.read(size).decode('utf-8'))
return {'in': incoming, 'out': outgoing, 'matrix': matrix, 'neighbours': neighbours}
def iter_features(self, metadata=False):
"""Reads 'dat' section."""
if not self.has_tag('dat'):
return
# TODO: read 'offs'?
md = {}
if metadata:
md = self.read_metadata()
self.seek_tag('dat')
ftid = -1
while self.inside_tag('dat'):
ftid += 1
feature = {'id': ftid}
feature_size = self.read_varuint()
next_feature = self.f.tell() + feature_size
feature['size'] = feature_size
# Header
header = {}
header_bits = self.read_uint(1)
types_count = (header_bits & 0x07) + 1
has_name = header_bits & 0x08 > 0
has_layer = header_bits & 0x10 > 0
has_addinfo = header_bits & 0x80 > 0
geom_type = header_bits & 0x60
types = []
for i in range(types_count):
type_id = self.read_varuint()
if type_id < len(self.type_mapping):
types.append(self.type_mapping[type_id])
else:
types.append(str(type_id + 1)) # So the numbers match with mapcss-mapping.csv
header['types'] = types
if has_name:
header['name'] = self.read_multilang()
if has_layer:
header['layer'] = self.read_uint(1)
if has_addinfo:
if geom_type == MWM.GeomType.POINT:
header['rank'] = self.read_uint(1)
elif geom_type == MWM.GeomType.LINE:
header['ref'] = self.read_string()
elif geom_type == MWM.GeomType.AREA or geom_type == MWM.GeomType.POINT_EX:
header['house'] = self.read_numeric_string()
feature['header'] = header
# Metadata
if ftid in md:
feature['metadata'] = md[ftid]
# Geometry
geometry = {}
if geom_type == MWM.GeomType.POINT or geom_type == MWM.GeomType.POINT_EX:
geometry['type'] = 'Point'
elif geom_type == MWM.GeomType.LINE:
geometry['type'] = 'LineString'
elif geom_type == MWM.GeomType.AREA:
geometry['type'] = 'Polygon'
if geom_type == MWM.GeomType.POINT:
geometry['coordinates'] = list(self.read_coord())
# (flipping table emoticon)
feature['geometry'] = geometry
if False:
if geom_type != MWM.GeomType.POINT:
polygon_count = self.read_varuint()
polygons = []
for i in range(polygon_count):
count = self.read_varuint()
buf = self.f.read(count)
# TODO: decode
geometry['coordinates'] = polygons
feature['coastCell'] = self.read_varint()
# OSM IDs
count = self.read_varuint()
osmids = []
for i in range(count):
osmid = self.read_osmid()
osmids.append('{0}{1}'.format(osmid[0], osmid[1]))
feature['osmIds'] = osmids
if self.f.tell() > next_feature:
raise Exception('Feature parsing error, read too much')
yield feature
self.f.seek(next_feature)