Initial commit, transferred from the omim repository
This commit is contained in:
commit
49b5652213
10 changed files with 865 additions and 0 deletions
3
.gitignore
vendored
Normal file
3
.gitignore
vendored
Normal file
|
@ -0,0 +1,3 @@
|
|||
build/
|
||||
__pycache__/
|
||||
*.pyc
|
175
LICENSE
Normal file
175
LICENSE
Normal file
|
@ -0,0 +1,175 @@
|
|||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
31
README.md
Normal file
31
README.md
Normal file
|
@ -0,0 +1,31 @@
|
|||
# mwm.py
|
||||
|
||||
It is a python library to read contents of MAPS.ME mwm files. Not everything
|
||||
is supported, but you can get at least all the features and their attributes.
|
||||
We at MAPS.ME use this script to do analytics and maintenance.
|
||||
|
||||
## Installation
|
||||
|
||||
pip install mwm
|
||||
|
||||
## Usage
|
||||
|
||||
Just add `import mwm` to your script, and read an mwm file with:
|
||||
|
||||
```python
|
||||
with open('file.mwm', 'rb') as f:
|
||||
data = mwm.MWM(f)
|
||||
```
|
||||
|
||||
## Tools
|
||||
|
||||
There are some useful tools in the relevant directory, which can serve as
|
||||
the library usage examples:
|
||||
|
||||
* `dump_mwm.py` prints the header and some statistics on an mwm file.
|
||||
* `find_feature.py` can find features inside an mwm by type or name.
|
||||
* `ft2osm.py` converts a feature id to an OSM website link.
|
||||
|
||||
## License
|
||||
|
||||
Written by Ilya Zverev for MAPS.ME. Published under the Apache License 2.0.
|
2
mwm/__init__.py
Normal file
2
mwm/__init__.py
Normal file
|
@ -0,0 +1,2 @@
|
|||
from .mwm import MWM, Osm2Ft
|
||||
from .mwmfile import MWMFile
|
305
mwm/mwm.py
Normal file
305
mwm/mwm.py
Normal file
|
@ -0,0 +1,305 @@
|
|||
# MWM Reader Module
|
||||
from .mwmfile import MWMFile
|
||||
from datetime import datetime
|
||||
|
||||
# Unprocessed sections: geomN, trgN, idx, sdx (search index), addr (search address), offs (feature offsets - succinct)
|
||||
# Routing sections: mercedes (matrix), daewoo (edge data), infinity (edge id), skoda (shortcuts), chrysler (cross context), ftseg, node2ftseg
|
||||
# (these mostly are succinct structures, except chrysler and node2ftseg, so no use trying to load them here)
|
||||
|
||||
# TODO:
|
||||
# - Predictive reading of LineStrings
|
||||
# - Find why polygon geometry is incorrect in iter_features()
|
||||
# - Find feature ids in the 'dat' section, or find a way to read the 'offs' section
|
||||
|
||||
|
||||
class MWM(MWMFile):
|
||||
# indexer/feature_meta.hpp
|
||||
metadata = ["0",
|
||||
"cuisine", "open_hours", "phone_number", "fax_number", "stars",
|
||||
"operator", "url", "website", "internet", "ele",
|
||||
"turn_lanes", "turn_lanes_forward", "turn_lanes_backward", "email", "postcode",
|
||||
"wikipedia", "maxspeed", "flats", "height", "min_height",
|
||||
"denomination", "building_levels", "test_id", "ref:sponsored", "price_rate",
|
||||
"rating", "fuel", "routes"]
|
||||
|
||||
regiondata = ["languages", "driving", "timezone", "addr_fmt", "phone_fmt", "postcode_fmt", "holidays", "housenames"]
|
||||
|
||||
def __init__(self, f):
|
||||
MWMFile.__init__(self, f)
|
||||
self.read_tags()
|
||||
self.read_header()
|
||||
self.type_mapping = []
|
||||
|
||||
def read_types(self, filename):
|
||||
with open(filename, 'r') as ft:
|
||||
for line in ft:
|
||||
if len(line.strip()) > 0:
|
||||
self.type_mapping.append(line.strip().replace('|', '-'))
|
||||
|
||||
def read_version(self):
|
||||
"""Reads 'version' section."""
|
||||
self.seek_tag('version')
|
||||
self.f.read(4) # skip prolog
|
||||
fmt = self.read_varuint() + 1
|
||||
version = self.read_varuint()
|
||||
if version < 161231:
|
||||
version = datetime(2000 + int(version / 10000), int(version / 100) % 100, version % 100)
|
||||
else:
|
||||
version = datetime.fromtimestamp(version)
|
||||
return {'fmt': fmt, 'version': version}
|
||||
|
||||
def read_header(self):
|
||||
"""Reads 'header' section."""
|
||||
if not self.has_tag('header'):
|
||||
# Stub for routing files
|
||||
self.coord_size = (1 << 30) - 1
|
||||
return {}
|
||||
self.seek_tag('header')
|
||||
result = {}
|
||||
coord_bits = self.read_varuint()
|
||||
self.coord_size = (1 << coord_bits) - 1
|
||||
self.base_point = self.mwm_bitwise_split(self.read_varuint())
|
||||
result['basePoint'] = self.to_4326(self.base_point)
|
||||
result['bounds'] = self.read_bounds()
|
||||
result['scales'] = self.read_uint_array()
|
||||
langs = self.read_uint_array()
|
||||
for i in range(len(langs)):
|
||||
if i < len(self.languages):
|
||||
langs[i] = self.languages[langs[i]]
|
||||
result['langs'] = langs
|
||||
map_type = self.read_varint()
|
||||
if map_type == 0:
|
||||
result['mapType'] = 'world'
|
||||
elif map_type == 1:
|
||||
result['mapType'] = 'worldcoasts'
|
||||
elif map_type == 2:
|
||||
result['mapType'] = 'country'
|
||||
else:
|
||||
result['mapType'] = 'unknown: {0}'.format(map_type)
|
||||
return result
|
||||
|
||||
# COMPLEX READERS
|
||||
|
||||
def read_region_info(self):
|
||||
if not self.has_tag('rgninfo'):
|
||||
return {}
|
||||
fields = {}
|
||||
self.seek_tag('rgninfo')
|
||||
sz = self.read_varuint()
|
||||
if sz:
|
||||
for i in range(sz):
|
||||
t = self.read_varuint()
|
||||
t = self.regiondata[t] if t < len(self.regiondata) else str(t)
|
||||
fields[t] = self.read_string()
|
||||
if t == 'languages':
|
||||
fields[t] = [self.languages[ord(x)] for x in fields[t]]
|
||||
return fields
|
||||
|
||||
def read_metadata(self):
|
||||
"""Reads 'meta' and 'metaidx' sections."""
|
||||
if not self.has_tag('metaidx'):
|
||||
return {}
|
||||
# Metadata format is different since v8
|
||||
fmt = self.read_version()['fmt']
|
||||
# First, read metaidx, to match featureId <-> metadata
|
||||
self.seek_tag('metaidx')
|
||||
ftid_meta = []
|
||||
while self.inside_tag('metaidx'):
|
||||
ftid = self.read_uint(4)
|
||||
moffs = self.read_uint(4)
|
||||
ftid_meta.append((moffs, ftid))
|
||||
# Sort ftid_meta array
|
||||
ftid_meta.sort(key=lambda x: x[0])
|
||||
ftpos = 0
|
||||
# Now read metadata
|
||||
self.seek_tag('meta')
|
||||
metadatar = {}
|
||||
while self.inside_tag('meta'):
|
||||
tag_pos = self.tag_offset('meta')
|
||||
fields = {}
|
||||
if fmt >= 8:
|
||||
sz = self.read_varuint()
|
||||
if sz:
|
||||
for i in range(sz):
|
||||
t = self.read_varuint()
|
||||
t = self.metadata[t] if t < len(self.metadata) else str(t)
|
||||
fields[t] = self.read_string()
|
||||
if t == 'fuel':
|
||||
fields[t] = fields[t].split('\x01')
|
||||
else:
|
||||
while True:
|
||||
t = self.read_uint(1)
|
||||
is_last = t & 0x80 > 0
|
||||
t = t & 0x7f
|
||||
t = self.metadata[t] if t < len(self.metadata) else str(t)
|
||||
l = self.read_uint(1)
|
||||
fields[t] = self.f.read(l).decode('utf-8')
|
||||
if is_last:
|
||||
break
|
||||
|
||||
if len(fields):
|
||||
while ftpos < len(ftid_meta) and ftid_meta[ftpos][0] < tag_pos:
|
||||
ftpos += 1
|
||||
if ftpos < len(ftid_meta):
|
||||
if ftid_meta[ftpos][0] == tag_pos:
|
||||
metadatar[ftid_meta[ftpos][1]] = fields
|
||||
return metadatar
|
||||
|
||||
def read_crossmwm(self):
|
||||
"""Reads 'chrysler' section (cross-mwm routing table)."""
|
||||
if not self.has_tag('chrysler'):
|
||||
return {}
|
||||
self.seek_tag('chrysler')
|
||||
# Ingoing nodes: array of (nodeId, coord) tuples
|
||||
incomingCount = self.read_uint(4)
|
||||
incoming = []
|
||||
for i in range(incomingCount):
|
||||
nodeId = self.read_uint(4)
|
||||
point = self.read_coord(False)
|
||||
incoming.append((nodeId, point))
|
||||
# Outgoing nodes: array of (nodeId, coord, outIndex) tuples
|
||||
# outIndex is an index in neighbours array
|
||||
outgoingCount = self.read_uint(4)
|
||||
outgoing = []
|
||||
for i in range(outgoingCount):
|
||||
nodeId = self.read_uint(4)
|
||||
point = self.read_coord(False)
|
||||
outIndex = self.read_uint(1)
|
||||
outgoing.append((nodeId, point, outIndex))
|
||||
# Adjacency matrix: costs of routes for each (incoming, outgoing) tuple
|
||||
matrix = []
|
||||
for i in range(incomingCount):
|
||||
sub = []
|
||||
for j in range(outgoingCount):
|
||||
sub.append(self.read_uint(4))
|
||||
matrix.append(sub)
|
||||
# List of mwms to which leads each outgoing node
|
||||
neighboursCount = self.read_uint(4)
|
||||
neighbours = []
|
||||
for i in range(neighboursCount):
|
||||
size = self.read_uint(4)
|
||||
neighbours.append(self.f.read(size).decode('utf-8'))
|
||||
return {'in': incoming, 'out': outgoing, 'matrix': matrix, 'neighbours': neighbours}
|
||||
|
||||
def iter_features(self, metadata=False):
|
||||
"""Reads 'dat' section."""
|
||||
if not self.has_tag('dat'):
|
||||
return
|
||||
# TODO: read 'offs'?
|
||||
md = {}
|
||||
if metadata:
|
||||
md = self.read_metadata()
|
||||
self.seek_tag('dat')
|
||||
ftid = -1
|
||||
while self.inside_tag('dat'):
|
||||
ftid += 1
|
||||
feature = {'id': ftid}
|
||||
feature_size = self.read_varuint()
|
||||
next_feature = self.f.tell() + feature_size
|
||||
feature['size'] = feature_size
|
||||
|
||||
# Header
|
||||
header = {}
|
||||
header_bits = self.read_uint(1)
|
||||
types_count = (header_bits & 0x07) + 1
|
||||
has_name = header_bits & 0x08 > 0
|
||||
has_layer = header_bits & 0x10 > 0
|
||||
has_addinfo = header_bits & 0x80 > 0
|
||||
geom_type = header_bits & 0x60
|
||||
types = []
|
||||
for i in range(types_count):
|
||||
type_id = self.read_varuint()
|
||||
if type_id < len(self.type_mapping):
|
||||
types.append(self.type_mapping[type_id])
|
||||
else:
|
||||
types.append(str(type_id + 1)) # So the numbers match with mapcss-mapping.csv
|
||||
header['types'] = types
|
||||
if has_name:
|
||||
header['name'] = self.read_multilang()
|
||||
if has_layer:
|
||||
header['layer'] = self.read_uint(1)
|
||||
if has_addinfo:
|
||||
if geom_type == MWM.GeomType.POINT:
|
||||
header['rank'] = self.read_uint(1)
|
||||
elif geom_type == MWM.GeomType.LINE:
|
||||
header['ref'] = self.read_string()
|
||||
elif geom_type == MWM.GeomType.AREA or geom_type == MWM.GeomType.POINT_EX:
|
||||
header['house'] = self.read_numeric_string()
|
||||
feature['header'] = header
|
||||
|
||||
# Metadata
|
||||
if ftid in md:
|
||||
feature['metadata'] = md[ftid]
|
||||
|
||||
# Geometry
|
||||
geometry = {}
|
||||
if geom_type == MWM.GeomType.POINT or geom_type == MWM.GeomType.POINT_EX:
|
||||
geometry['type'] = 'Point'
|
||||
elif geom_type == MWM.GeomType.LINE:
|
||||
geometry['type'] = 'LineString'
|
||||
elif geom_type == MWM.GeomType.AREA:
|
||||
geometry['type'] = 'Polygon'
|
||||
if geom_type == MWM.GeomType.POINT:
|
||||
geometry['coordinates'] = list(self.read_coord())
|
||||
|
||||
# (flipping table emoticon)
|
||||
feature['geometry'] = geometry
|
||||
if False:
|
||||
if geom_type != MWM.GeomType.POINT:
|
||||
polygon_count = self.read_varuint()
|
||||
polygons = []
|
||||
for i in range(polygon_count):
|
||||
count = self.read_varuint()
|
||||
buf = self.f.read(count)
|
||||
# TODO: decode
|
||||
geometry['coordinates'] = polygons
|
||||
feature['coastCell'] = self.read_varint()
|
||||
|
||||
# OSM IDs
|
||||
count = self.read_varuint()
|
||||
osmids = []
|
||||
for i in range(count):
|
||||
osmid = self.read_osmid()
|
||||
osmids.append('{0}{1}'.format(osmid[0], osmid[1]))
|
||||
feature['osmIds'] = osmids
|
||||
|
||||
if self.f.tell() > next_feature:
|
||||
raise Exception('Feature parsing error, read too much')
|
||||
yield feature
|
||||
self.f.seek(next_feature)
|
||||
|
||||
|
||||
class Osm2Ft(MWMFile):
|
||||
def __init__(self, f, ft2osm=False, tuples=True):
|
||||
MWMFile.__init__(self, f)
|
||||
self.read(ft2osm, tuples)
|
||||
|
||||
def read(self, ft2osm=False, tuples=True):
|
||||
"""Reads mwm.osm2ft file, returning a dict of feature id <-> osm way id."""
|
||||
count = self.read_varuint()
|
||||
self.data = {}
|
||||
self.ft2osm = ft2osm
|
||||
for i in range(count):
|
||||
osmid = self.read_osmid(tuples)
|
||||
fid = self.read_uint(4)
|
||||
self.read_uint(4) # filler
|
||||
if osmid is not None:
|
||||
if ft2osm:
|
||||
self.data[fid] = osmid
|
||||
else:
|
||||
self.data[osmid] = fid
|
||||
|
||||
def __getitem__(self, k):
|
||||
return self.data.get(k)
|
||||
|
||||
def __repr__(self):
|
||||
return '{} with {} items'.format('ft2osm' if self.ft2osm else 'osm2ft', len(self.data))
|
||||
|
||||
def __len__(self):
|
||||
return len(self.data)
|
||||
|
||||
def __contains__(self, k):
|
||||
return k in self.data
|
||||
|
||||
def __iter__(self):
|
||||
return iter(self.data)
|
218
mwm/mwmfile.py
Normal file
218
mwm/mwmfile.py
Normal file
|
@ -0,0 +1,218 @@
|
|||
# MWM Reader Module
|
||||
import struct
|
||||
import math
|
||||
|
||||
|
||||
class MWMFile(object):
|
||||
# coding/multilang_utf8_string.cpp
|
||||
languages = ["default",
|
||||
"en", "ja", "fr", "ko_rm", "ar", "de", "int_name", "ru", "sv", "zh", "fi", "be", "ka", "ko",
|
||||
"he", "nl", "ga", "ja_rm", "el", "it", "es", "zh_pinyin", "th", "cy", "sr", "uk", "ca", "hu",
|
||||
"hsb", "eu", "fa", "br", "pl", "hy", "kn", "sl", "ro", "sq", "am", "fy", "cs", "gd", "sk",
|
||||
"af", "ja_kana", "lb", "pt", "hr", "fur", "vi", "tr", "bg", "eo", "lt", "la", "kk", "gsw",
|
||||
"et", "ku", "mn", "mk", "lv", "hi"]
|
||||
|
||||
def __init__(self, f):
|
||||
self.f = f
|
||||
self.tags = {}
|
||||
self.coord_size = None
|
||||
self.base_point = (0, 0)
|
||||
|
||||
def read_tags(self):
|
||||
self.f.seek(0)
|
||||
self.f.seek(self.read_uint(8))
|
||||
cnt = self.read_varuint()
|
||||
for i in range(cnt):
|
||||
name = self.read_string(plain=True)
|
||||
offset = self.read_varuint()
|
||||
length = self.read_varuint()
|
||||
self.tags[name] = (offset, length)
|
||||
|
||||
def has_tag(self, tag):
|
||||
return tag in self.tags and self.tags[tag][1] > 0
|
||||
|
||||
def seek_tag(self, tag):
|
||||
self.f.seek(self.tags[tag][0])
|
||||
|
||||
def tag_offset(self, tag):
|
||||
return self.f.tell() - self.tags[tag][0]
|
||||
|
||||
def inside_tag(self, tag):
|
||||
pos = self.tag_offset(tag)
|
||||
return pos >= 0 and pos < self.tags[tag][1]
|
||||
|
||||
def read_uint(self, bytelen=1):
|
||||
if bytelen == 1:
|
||||
fmt = 'B'
|
||||
elif bytelen == 2:
|
||||
fmt = 'H'
|
||||
elif bytelen == 4:
|
||||
fmt = 'I'
|
||||
elif bytelen == 8:
|
||||
fmt = 'Q'
|
||||
else:
|
||||
raise Exception('Bytelen {0} is not supported'.format(bytelen))
|
||||
res = struct.unpack(fmt, self.f.read(bytelen))
|
||||
return res[0]
|
||||
|
||||
def read_varuint(self):
|
||||
res = 0
|
||||
shift = 0
|
||||
more = True
|
||||
while more:
|
||||
b = self.f.read(1)
|
||||
if not b:
|
||||
return res
|
||||
try:
|
||||
bc = ord(b)
|
||||
except TypeError:
|
||||
bc = b
|
||||
res |= (bc & 0x7F) << shift
|
||||
shift += 7
|
||||
more = bc >= 0x80
|
||||
return res
|
||||
|
||||
@staticmethod
|
||||
def zigzag_decode(uint):
|
||||
res = uint >> 1
|
||||
return res if uint & 1 == 0 else -res
|
||||
|
||||
def read_varint(self):
|
||||
return self.zigzag_decode(self.read_varuint())
|
||||
|
||||
class GeomType:
|
||||
POINT = 0
|
||||
LINE = 1 << 5
|
||||
AREA = 1 << 6
|
||||
POINT_EX = 3 << 5
|
||||
|
||||
class OsmIdCode:
|
||||
NODE = 0x4000000000000000
|
||||
WAY = 0x8000000000000000
|
||||
RELATION = 0xC000000000000000
|
||||
RESET = ~(NODE | WAY | RELATION)
|
||||
|
||||
@staticmethod
|
||||
def unpack_osmid(num):
|
||||
if num & MWMFile.OsmIdCode.NODE == MWMFile.OsmIdCode.NODE:
|
||||
typ = 'n'
|
||||
elif num & MWMFile.OsmIdCode.WAY == MWMFile.OsmIdCode.WAY:
|
||||
typ = 'w'
|
||||
elif num & MWMFile.OsmIdCode.RELATION == MWMFile.OsmIdCode.RELATION:
|
||||
typ = 'r'
|
||||
else:
|
||||
return None
|
||||
return typ, num & MWMFile.OsmIdCode.RESET
|
||||
|
||||
def read_osmid(self, as_tuple=True):
|
||||
osmid = self.read_uint(8)
|
||||
return self.unpack_osmid(osmid) if as_tuple else osmid
|
||||
|
||||
def mwm_unshuffle(self, x):
|
||||
x = ((x & 0x22222222) << 1) | ((x >> 1) & 0x22222222) | (x & 0x99999999)
|
||||
x = ((x & 0x0C0C0C0C) << 2) | ((x >> 2) & 0x0C0C0C0C) | (x & 0xC3C3C3C3)
|
||||
x = ((x & 0x00F000F0) << 4) | ((x >> 4) & 0x00F000F0) | (x & 0xF00FF00F)
|
||||
x = ((x & 0x0000FF00) << 8) | ((x >> 8) & 0x0000FF00) | (x & 0xFF0000FF)
|
||||
return x
|
||||
|
||||
def mwm_bitwise_split(self, v):
|
||||
hi = self.mwm_unshuffle(v >> 32)
|
||||
lo = self.mwm_unshuffle(v & 0xFFFFFFFF)
|
||||
x = ((hi & 0xFFFF) << 16) | (lo & 0xFFFF)
|
||||
y = (hi & 0xFFFF0000) | (lo >> 16)
|
||||
return (x, y)
|
||||
|
||||
def mwm_decode_delta(self, v, ref):
|
||||
x, y = self.mwm_bitwise_split(v)
|
||||
return ref[0] + self.zigzag_decode(x), ref[1] + self.zigzag_decode(y)
|
||||
|
||||
def read_point(self, ref, packed=True):
|
||||
"""Reads an unsigned point, returns (x, y)."""
|
||||
if packed:
|
||||
u = self.read_varuint()
|
||||
else:
|
||||
u = self.read_uint(8)
|
||||
return self.mwm_decode_delta(u, ref)
|
||||
|
||||
def to_4326(self, point):
|
||||
"""Convert a point in maps.me-mercator CS to WGS-84 (EPSG:4326)."""
|
||||
if self.coord_size is None:
|
||||
raise Exception('Call read_header() first.')
|
||||
merc_bounds = (-180.0, -180.0, 180.0, 180.0) # Xmin, Ymin, Xmax, Ymax
|
||||
x = point[0] * (merc_bounds[2] - merc_bounds[0]) / self.coord_size + merc_bounds[0]
|
||||
y = point[1] * (merc_bounds[3] - merc_bounds[1]) / self.coord_size + merc_bounds[1]
|
||||
y = 360.0 * math.atan(math.tanh(y * math.pi / 360.0)) / math.pi
|
||||
return (x, y)
|
||||
|
||||
def read_coord(self, packed=True):
|
||||
"""Reads a pair of coords in degrees mercator, returns (lon, lat)."""
|
||||
point = self.read_point(self.base_point, packed)
|
||||
return self.to_4326(point)
|
||||
|
||||
def read_bounds(self):
|
||||
"""Reads mercator bounds, returns (min_lon, min_lat, max_lon, max_lat)."""
|
||||
rmin = self.mwm_bitwise_split(self.read_varint())
|
||||
rmax = self.mwm_bitwise_split(self.read_varint())
|
||||
pmin = self.to_4326(rmin)
|
||||
pmax = self.to_4326(rmax)
|
||||
return (pmin[0], pmin[1], pmax[0], pmax[1])
|
||||
|
||||
def read_string(self, plain=False, decode=True):
|
||||
length = self.read_varuint() + (0 if plain else 1)
|
||||
s = self.f.read(length)
|
||||
return s.decode('utf-8') if decode else s
|
||||
|
||||
def read_uint_array(self):
|
||||
length = self.read_varuint()
|
||||
result = []
|
||||
for i in range(length):
|
||||
result.append(self.read_varuint())
|
||||
return result
|
||||
|
||||
def read_numeric_string(self):
|
||||
sz = self.read_varuint()
|
||||
if sz & 1 != 0:
|
||||
return str(sz >> 1)
|
||||
sz = (sz >> 1) + 1
|
||||
return self.f.read(sz).decode('utf-8')
|
||||
|
||||
def read_multilang(self):
|
||||
def find_multilang_next(s, i):
|
||||
i += 1
|
||||
while i < len(s):
|
||||
try:
|
||||
c = ord(s[i])
|
||||
except:
|
||||
c = s[i]
|
||||
if c & 0xC0 == 0x80:
|
||||
break
|
||||
if c & 0x80 == 0:
|
||||
pass
|
||||
elif c & 0xFE == 0xFE:
|
||||
i += 6
|
||||
elif c & 0xFC == 0xFC:
|
||||
i += 5
|
||||
elif c & 0xF8 == 0xF8:
|
||||
i += 4
|
||||
elif c & 0xF0 == 0xF0:
|
||||
i += 3
|
||||
elif c & 0xE0 == 0xE0:
|
||||
i += 2
|
||||
elif c & 0xC0 == 0xC0:
|
||||
i += 1
|
||||
i += 1
|
||||
return i
|
||||
|
||||
s = self.read_string(decode=False)
|
||||
langs = {}
|
||||
i = 0
|
||||
while i < len(s):
|
||||
n = find_multilang_next(s, i)
|
||||
try:
|
||||
lng = ord(s[i]) & 0x3F
|
||||
except TypeError:
|
||||
lng = s[i] & 0x3F
|
||||
if lng < len(self.languages):
|
||||
langs[self.languages[lng]] = s[i+1:n].decode('utf-8')
|
||||
i = n
|
||||
return langs
|
28
setup.py
Normal file
28
setup.py
Normal file
|
@ -0,0 +1,28 @@
|
|||
from setuptools import setup
|
||||
from os import path
|
||||
|
||||
here = path.abspath(path.dirname(__file__))
|
||||
|
||||
setup(
|
||||
name='mwm',
|
||||
version='0.9.0',
|
||||
author='Ilya Zverev',
|
||||
author_email='ilya@zverev.info',
|
||||
packages=['mwm'],
|
||||
url='http://pypi.python.org/pypi/mwm/',
|
||||
license='Apache License 2.0',
|
||||
description='Library to read binary MAPS.ME files.',
|
||||
long_description=open(path.join(here, 'README.md')).read(),
|
||||
classifiers=[
|
||||
'Development Status :: 4 - Beta',
|
||||
'Intended Audience :: Developers',
|
||||
'Topic :: Software Development :: Libraries :: Python Modules',
|
||||
'Topic :: Utilities',
|
||||
'Environment :: Console',
|
||||
'License :: OSI Approved :: Apache Software License',
|
||||
'Programming Language :: Python',
|
||||
'Programming Language :: Python :: 2.6',
|
||||
'Programming Language :: Python :: 2.7',
|
||||
'Programming Language :: Python :: 3',
|
||||
],
|
||||
)
|
40
tools/dump_mwm.py
Executable file
40
tools/dump_mwm.py
Executable file
|
@ -0,0 +1,40 @@
|
|||
#!/usr/bin/python
|
||||
import sys, os.path, random
|
||||
import json
|
||||
from mwm import MWM
|
||||
|
||||
if len(sys.argv) < 2:
|
||||
print('Dumps some MWM structures.')
|
||||
print('Usage: {0} <country.mwm>'.format(sys.argv[0]))
|
||||
sys.exit(1)
|
||||
|
||||
mwm = MWM(open(sys.argv[1], 'rb'))
|
||||
mwm.read_types(os.path.join(os.path.dirname(sys.argv[0]), '..', '..', '..', '..', 'data', 'types.txt'))
|
||||
print('Tags:')
|
||||
tvv = sorted([(k, v[0], v[1]) for k, v in mwm.tags.items()], key=lambda x: x[1])
|
||||
for tv in tvv:
|
||||
print(' {0:<8}: offs {1:9} len {2:8}'.format(tv[0], tv[1], tv[2]))
|
||||
v = mwm.read_version()
|
||||
print('Format: {0}, version: {1}'.format(v['fmt'], v['version'].strftime('%Y-%m-%d %H:%M')))
|
||||
print('Header: {0}'.format(mwm.read_header()))
|
||||
print('Region Info: {0}'.format(mwm.read_region_info()))
|
||||
print('Metadata count: {0}'.format(len(mwm.read_metadata())))
|
||||
|
||||
cross = mwm.read_crossmwm()
|
||||
if cross:
|
||||
print('Outgoing points: {0}, incoming: {1}'.format(len(cross['out']), len(cross['in'])))
|
||||
print('Outgoing regions: {0}'.format(set(cross['neighbours'])))
|
||||
|
||||
# Print some random features using reservoir sampling
|
||||
count = 5
|
||||
sample = []
|
||||
for i, feature in enumerate(mwm.iter_features()):
|
||||
if i < count:
|
||||
sample.append(feature)
|
||||
elif random.randint(0, i) < count:
|
||||
sample[random.randint(0, count-1)] = feature
|
||||
|
||||
print('Feature count: {0}'.format(i))
|
||||
print('Sample features:')
|
||||
for feature in sample:
|
||||
print(json.dumps(feature, ensure_ascii=False))
|
41
tools/find_feature.py
Executable file
41
tools/find_feature.py
Executable file
|
@ -0,0 +1,41 @@
|
|||
#!/usr/bin/env python
|
||||
import sys, os.path, json
|
||||
from mwm import MWM
|
||||
|
||||
if len(sys.argv) < 4:
|
||||
print('Finds features in an mwm file based on a query')
|
||||
print('Usage: {0} <country.mwm> <type> <string>'.format(sys.argv[0]))
|
||||
print('')
|
||||
print('Type:')
|
||||
print(' t for inside types ("t hwtag" will find all hwtags-*)')
|
||||
print(' et for exact type ("et shop" won\'t find shop-chemist)')
|
||||
print(' n for names, case-sensitive ("n Starbucks" for all starbucks)')
|
||||
print(' m for metadata keys ("m flats" for features with flats)')
|
||||
print(' id for feature id ("id 1234" for feature #1234)')
|
||||
sys.exit(1)
|
||||
|
||||
typ = sys.argv[2].lower()
|
||||
find = sys.argv[3].decode('utf-8')
|
||||
|
||||
mwm = MWM(open(sys.argv[1], 'rb'))
|
||||
mwm.read_header()
|
||||
mwm.read_types(os.path.join(os.path.dirname(sys.argv[0]), '..', '..', '..', '..', 'data', 'types.txt'))
|
||||
for i, feature in enumerate(mwm.iter_features(metadata=True)):
|
||||
found = False
|
||||
if typ == 'n' and 'name' in feature['header']:
|
||||
for value in feature['header']['name'].values():
|
||||
if find in value:
|
||||
found = True
|
||||
elif typ in ('t', 'et'):
|
||||
for t in feature['header']['types']:
|
||||
if t == find:
|
||||
found = True
|
||||
elif typ == 't' and find in t:
|
||||
found = True
|
||||
elif typ == 'm' and 'metadata' in feature:
|
||||
if find in feature['metadata']:
|
||||
found = True
|
||||
elif typ == 'id' and i == int(find):
|
||||
found = True
|
||||
if found:
|
||||
print(json.dumps(feature, ensure_ascii=False, sort_keys=True).encode('utf-8'))
|
22
tools/ft2osm.py
Executable file
22
tools/ft2osm.py
Executable file
|
@ -0,0 +1,22 @@
|
|||
#!/usr/bin/env python
|
||||
import sys
|
||||
import mwm
|
||||
|
||||
if len(sys.argv) < 3:
|
||||
print('Finds an OSM object for a given feature id.')
|
||||
print('Usage: {} <mwm.osm2ft> <ftid>'.format(sys.argv[0]))
|
||||
sys.exit(1)
|
||||
|
||||
with open(sys.argv[1], 'rb') as f:
|
||||
ft2osm = mwm.Osm2Ft(f, True)
|
||||
|
||||
code = 0
|
||||
type_abbr = {'n': 'node', 'w': 'way', 'r': 'relation'}
|
||||
for ftid in sys.argv[2:]:
|
||||
ftid = int(ftid)
|
||||
if ftid in ft2osm:
|
||||
print('https://www.openstreetmap.org/{}/{}'.format(type_abbr[ft2osm[ftid][0]], ft2osm[ftid][1]))
|
||||
else:
|
||||
print('Could not find osm id for feature {}'.format(ftid))
|
||||
code = 2
|
||||
sys.exit(code)
|
Loading…
Add table
Reference in a new issue