Compare commits

...

No commits in common. "master" and "newcode" have entirely different histories.

14 changed files with 199 additions and 1611 deletions

2
.gitignore vendored
View file

@ -1,5 +1,3 @@
build/
__pycache__/
*.pyc
dist/
*.egg*

View file

@ -1,28 +0,0 @@
# mwm.py Change Log
## master branch
## 0.10.1
_Released 2018-06-20_
* Better support for Python 2.7.
* Encoding and decoding int64 negative ids.
* Allow short form for id: `mwmtool id way/123456`.
## 0.10.0
_Released 2018-06-18_
* Extracted osm id encoding methods to `OsmIdCode` class.
* Added id decoding to mwmtool.
* Fixed printing utf-8 characters under Python 2 in mwmtool.
* Python 2.6 is not supported officially.
* Package `types.txt` to eliminate the need to check out omim repository.
* `dump -s` will skip reading all the features.
## 0.9.0
_Released 2017-06-08_
The initial release with some features.

31
README.md Normal file
View file

@ -0,0 +1,31 @@
# mwm.py
It is a python library to read contents of MAPS.ME mwm files. Not everything
is supported, but you can get at least all the features and their attributes.
We at MAPS.ME use this script to do analytics and maintenance.
## Installation
pip install mwm
## Usage
Just add `import mwm` to your script, and read an mwm file with:
```python
with open('file.mwm', 'rb') as f:
data = mwm.MWM(f)
```
## Tools
There are some useful tools in the relevant directory, which can serve as
the library usage examples:
* `dump_mwm.py` prints the header and some statistics on an mwm file.
* `find_feature.py` can find features inside an mwm by type or name.
* `ft2osm.py` converts a feature id to an OSM website link.
## License
Written by Ilya Zverev for MAPS.ME. Published under the Apache License 2.0.

View file

@ -1,39 +0,0 @@
mwm.py
======
It is a python library to read contents of MAPS.ME mwm files. Not
everything is supported, but you can get at least all the features and
their attributes. We at MAPS.ME use this script to do analytics and
maintenance.
Installation
------------
::
pip install mwm
Usage
-----
Just add ``import mwm`` to your script, and read an mwm file with:
.. code:: python
with open('file.mwm', 'rb') as f:
data = mwm.MWM(f)
Tools
-----
The package installs the ``mwmtool`` command-line script. It shows
statistics about an MWM file, can search for features or convert ids.
Run it with ``-h`` to see a list of options.
The script source can serve as a library usage example.
License
-------
Written by Ilya Zverev for MAPS.ME. Published under the Apache License
2.0.

View file

@ -1,5 +1,2 @@
from .mwmfile import MWMFile, OsmIdCode
from .mwm import MWM
from .osm2ft import Osm2Ft
__version__ = '0.10.1'
from .mwm import MWM, Osm2Ft
from .mwmfile import MWMFile

View file

@ -1,14 +1,15 @@
# MWM Reader Module
from .mwmfile import MWMFile
from datetime import datetime
import os
# Unprocessed sections: geomN, trgN, idx, sdx (search index),
# addr (search address), offs (feature offsets - succinct)
# Unprocessed sections: geomN, trgN, idx, sdx (search index), addr (search address), offs (feature offsets - succinct)
# Routing sections: mercedes (matrix), daewoo (edge data), infinity (edge id), skoda (shortcuts), chrysler (cross context), ftseg, node2ftseg
# (these mostly are succinct structures, except chrysler and node2ftseg, so no use trying to load them here)
# TODO:
# - Predictive reading of LineStrings
# - Find why polygon geometry is incorrect in iter_features()
# - Find feature ids in the 'dat' section, or find a way to read the 'offs' section
class MWM(MWMFile):
@ -19,23 +20,17 @@ class MWM(MWMFile):
"turn_lanes", "turn_lanes_forward", "turn_lanes_backward", "email", "postcode",
"wikipedia", "maxspeed", "flats", "height", "min_height",
"denomination", "building_levels", "test_id", "ref:sponsored", "price_rate",
"rating", "banner_url", "level"]
"rating", "fuel", "routes"]
regiondata = ["languages", "driving", "timezone", "addr_fmt", "phone_fmt",
"postcode_fmt", "holidays", "housenames"]
regiondata = ["languages", "driving", "timezone", "addr_fmt", "phone_fmt", "postcode_fmt", "holidays", "housenames"]
def __init__(self, f):
MWMFile.__init__(self, f)
self.read_tags()
self.read_header()
self.type_mapping = []
self.read_types(os.path.join(
os.getcwd(), os.path.dirname(__file__), 'types.txt'))
def read_types(self, filename):
if not os.path.exists(filename):
return
self.type_mapping = []
with open(filename, 'r') as ft:
for line in ft:
if len(line.strip()) > 0:
@ -48,11 +43,10 @@ class MWM(MWMFile):
fmt = self.read_varuint() + 1
version = self.read_varuint()
if version < 161231:
vdate = datetime(2000 + int(version / 10000), int(version / 100) % 100, version % 100)
version = datetime(2000 + int(version / 10000), int(version / 100) % 100, version % 100)
else:
vdate = datetime.fromtimestamp(version)
version = int(vdate.strftime('%y%m%d'))
return {'fmt': fmt, 'version': version, 'date': vdate}
version = datetime.fromtimestamp(version)
return {'fmt': fmt, 'version': version}
def read_header(self):
"""Reads 'header' section."""
@ -273,3 +267,39 @@ class MWM(MWMFile):
raise Exception('Feature parsing error, read too much')
yield feature
self.f.seek(next_feature)
class Osm2Ft(MWMFile):
def __init__(self, f, ft2osm=False, tuples=True):
MWMFile.__init__(self, f)
self.read(ft2osm, tuples)
def read(self, ft2osm=False, tuples=True):
"""Reads mwm.osm2ft file, returning a dict of feature id <-> osm way id."""
count = self.read_varuint()
self.data = {}
self.ft2osm = ft2osm
for i in range(count):
osmid = self.read_osmid(tuples)
fid = self.read_uint(4)
self.read_uint(4) # filler
if osmid is not None:
if ft2osm:
self.data[fid] = osmid
else:
self.data[osmid] = fid
def __getitem__(self, k):
return self.data.get(k)
def __repr__(self):
return '{} with {} items'.format('ft2osm' if self.ft2osm else 'osm2ft', len(self.data))
def __len__(self):
return len(self.data)
def __contains__(self, k):
return k in self.data
def __iter__(self):
return iter(self.data)

View file

@ -3,65 +3,6 @@ import struct
import math
class OsmIdCode(object):
NODE = 0x4000000000000000
WAY = 0x8000000000000000
RELATION = 0xC000000000000000
RESET = ~(NODE | WAY | RELATION)
@staticmethod
def is_node(code):
return code & OsmIdCode.NODE == OsmIdCode.NODE
@staticmethod
def is_way(code):
return code & OsmIdCode.WAY == OsmIdCode.WAY
@staticmethod
def is_relation(code):
return code & OsmIdCode.RELATION == OsmIdCode.RELATION
@staticmethod
def get_type(code):
if OsmIdCode.is_relation(code):
return 'r'
elif OsmIdCode.is_node(code):
return 'n'
elif OsmIdCode.is_way(code):
return 'w'
return None
@staticmethod
def get_id(code):
return code & OsmIdCode.RESET
@staticmethod
def unpack(num):
if num < 0:
num = (-1 - num) ^ (2**64 - 1)
typ = OsmIdCode.get_type(num)
if typ is None:
return None
return typ, OsmIdCode.get_id(num)
@staticmethod
def pack(osm_type, osm_id, int64=False):
if osm_type is None or len(osm_type) == 0:
return None
typ = osm_type[0].lower()
if typ == 'r':
result = osm_id | OsmIdCode.RELATION
elif typ == 'w':
result = osm_id | OsmIdCode.WAY
elif typ == 'n':
result = osm_id | OsmIdCode.NODE
else:
return None
if int64 and result >= 2**63:
result = -1 - (result ^ (2**64 - 1))
return result
class MWMFile(object):
# coding/multilang_utf8_string.cpp
languages = ["default",
@ -145,12 +86,23 @@ class MWMFile(object):
AREA = 1 << 6
POINT_EX = 3 << 5
class OsmIdCode:
NODE = 0x4000000000000000
WAY = 0x8000000000000000
RELATION = 0xC000000000000000
RESET = ~(NODE | WAY | RELATION)
@staticmethod
def unpack_osmid(num):
typ = OsmIdCode.get_type(num)
if typ is None:
if num & MWMFile.OsmIdCode.NODE == MWMFile.OsmIdCode.NODE:
typ = 'n'
elif num & MWMFile.OsmIdCode.WAY == MWMFile.OsmIdCode.WAY:
typ = 'w'
elif num & MWMFile.OsmIdCode.RELATION == MWMFile.OsmIdCode.RELATION:
typ = 'r'
else:
return None
return typ, OsmIdCode.get_id(num)
return typ, num & MWMFile.OsmIdCode.RESET
def read_osmid(self, as_tuple=True):
osmid = self.read_uint(8)

View file

@ -1,189 +0,0 @@
from __future__ import print_function
import sys
import random
import json
import argparse
import re
from . import MWM, Osm2Ft, OsmIdCode
def print_json(data):
s = json.dumps(data, ensure_ascii=False, sort_keys=True)
if sys.version_info[0] >= 3:
print(s)
else:
print(s.encode('utf-8'))
def dump_mwm(args):
mwm = MWM(args.mwm)
if args.types:
mwm.read_types(args.types)
print('Tags:')
tvv = sorted([(k, v[0], v[1]) for k, v in mwm.tags.items()], key=lambda x: x[1])
for tv in tvv:
print(' {0:<8}: offs {1:9} len {2:8}'.format(tv[0], tv[1], tv[2]))
v = mwm.read_version()
print('Format: {0}, version: {1}'.format(v['fmt'], v['date'].strftime('%Y-%m-%d %H:%M')))
print('Header: {0}'.format(mwm.read_header()))
print('Region Info: {0}'.format(mwm.read_region_info()))
if args.short:
return
print('Metadata count: {0}'.format(len(mwm.read_metadata())))
cross = mwm.read_crossmwm()
if cross:
print('Outgoing points: {0}, incoming: {1}'.format(len(cross['out']), len(cross['in'])))
print('Outgoing regions: {0}'.format(set(cross['neighbours'])))
# Print some random features using reservoir sampling
count = 5
sample = []
for i, feature in enumerate(mwm.iter_features()):
if i < count:
sample.append(feature)
elif random.randint(0, i) < count:
sample[random.randint(0, count-1)] = feature
print('Feature count: {0}'.format(i))
print('Sample features:')
for feature in sample:
print_json(feature)
def find_feature(args):
mwm = MWM(args.mwm)
mwm.read_header()
if args.types:
mwm.read_types(args.types)
if args.iname:
args.iname = args.iname.lower()
for i, feature in enumerate(mwm.iter_features(metadata=True)):
if args.fid and i != args.fid:
continue
if args.name or args.iname:
if 'name' not in feature['header']:
continue
found = False
for value in feature['header']['name'].values():
if args.name and args.name in value:
found = True
elif args.iname and args.iname in value.lower():
found = True
if not found:
continue
if args.type or args.exact_type:
found = False
for t in feature['header']['types']:
if t == args.type or t == args.exact_type:
found = True
elif args.type and args.type in t:
found = True
if not found:
continue
if args.meta and ('metadata' not in feature or args.meta not in feature['metadata']):
continue
print_json(feature)
def ft2osm(args):
ft2osm = Osm2Ft(args.osm2ft, True)
code = 0
type_abbr = {'n': 'node', 'w': 'way', 'r': 'relation'}
for ftid in args.ftid:
if ftid in ft2osm:
print('https://www.openstreetmap.org/{}/{}'.format(
type_abbr[ft2osm[ftid][0]],
ft2osm[ftid][1]))
else:
print('Could not find osm id for feature {}'.format(ftid))
code = 2
return code
def decode_id(args):
if args.id.isdigit() or args.id.startswith('-'):
osm_id = OsmIdCode.unpack(int(args.id))
if osm_id is None:
print('That is not a valid identifier')
return 2
else:
type_abbr = {'n': 'node', 'w': 'way', 'r': 'relation'}
print('https://www.openstreetmap.org/{}/{}'.format(
type_abbr[osm_id[0]], osm_id[1]))
else:
m = re.search(r'(node|way|relation)/(\d+)', args.id)
if m:
print(OsmIdCode.pack(m.group(1), int(m.group(2)), args.int64))
else:
print('Please specify an URL to OSM object on its website')
return 2
def dat_to_gpx(args):
POINT_SOURCE = ['apple', 'windows', 'android', 'google', 'tizen', 'predictor']
out = sys.stdout if not args.gpx else open(args.gpx, 'w')
# TODO
print('Not implemented yet, sorry.')
return 2
def main():
parser = argparse.ArgumentParser(description='Toolbox for MWM files.')
parser.add_argument('-t', '--types', help='path to types.txt')
subparsers = parser.add_subparsers(dest='cmd')
subparsers.required = True
parser_dump = subparsers.add_parser('dump', help='Dumps some structures.')
parser_dump.add_argument('mwm', type=argparse.FileType('rb'), help='file to browse')
parser_dump.add_argument('-s', '--short', action='store_true',
help='Read header only, no features')
parser_dump.set_defaults(func=dump_mwm)
parser_find = subparsers.add_parser('find', help='Finds features in a file.')
parser_find.add_argument('mwm', type=argparse.FileType('rb'), help='file to search')
parser_find.add_argument('-t', dest='type',
help='look inside types ("-t hwtag" will find all hwtags-*)')
parser_find.add_argument('-et', dest='exact_type',
help='look for a type ("-et shop won\'t find shop-chemist)')
parser_find.add_argument('-n', dest='name',
help='look inside names, case-sensitive ("-n Starbucks" '
'for all starbucks)')
parser_find.add_argument('-in', '-ni', dest='iname',
help='look inside names, case-insensitive ("-in star" will '
'find Starbucks)')
parser_find.add_argument('-m', dest='meta',
help='look for a metadata key ("m flats" for features with flats)')
parser_find.add_argument('-id', dest='fid', type=int,
help='look for a feature id ("-id 1234 for feature #1234)')
parser_find.set_defaults(func=find_feature)
parser_osm = subparsers.add_parser('osm',
help='Displays an OpenStreetMap link for a feature id.')
parser_osm.add_argument('osm2ft', type=argparse.FileType('rb'), help='.mwm.osm2ft file')
parser_osm.add_argument('ftid', type=int, nargs='+', help='feature id')
parser_osm.set_defaults(func=ft2osm)
parser_id = subparsers.add_parser('id', help='Decode or encode OSM ID')
parser_id.add_argument('id', help='MWM internal OSM ID, or a link to OSM website')
parser_id.add_argument('-i', '--int64', action='store_true',
help='Use int64 instead of uint64')
parser_id.set_defaults(func=decode_id)
parser_dump = subparsers.add_parser('gpx', help='Convert gps_track.dat to GPX')
parser_dump.add_argument('dat', type=argparse.FileType('rb'), help='file to convert')
parser_dump.add_argument('--gpx', '-o', type=argparse.FileType('w'), help='output gpx file')
parser_dump.set_defaults(func=dat_to_gpx)
args = parser.parse_args()
code = args.func(args)
if code is not None:
sys.exit(code)
if __name__ == '__main__':
main()

View file

@ -1,38 +0,0 @@
# OSM2FT Reader
from .mwmfile import MWMFile
class Osm2Ft(MWMFile):
def __init__(self, f, ft2osm=False, tuples=True):
MWMFile.__init__(self, f)
self.read(ft2osm, tuples)
def read(self, ft2osm=False, tuples=True):
"""Reads mwm.osm2ft file, returning a dict of feature id <-> osm way id."""
count = self.read_varuint()
self.data = {}
self.ft2osm = ft2osm
for i in range(count):
osmid = self.read_osmid(tuples)
fid = self.read_uint(4)
self.read_uint(4) # filler
if osmid is not None:
if ft2osm:
self.data[fid] = osmid
else:
self.data[osmid] = fid
def __getitem__(self, k):
return self.data.get(k)
def __repr__(self):
return '{} with {} items'.format('ft2osm' if self.ft2osm else 'osm2ft', len(self.data))
def __len__(self):
return len(self.data)
def __contains__(self, k):
return k in self.data
def __iter__(self):
return iter(self.data)

File diff suppressed because it is too large Load diff

View file

@ -1,20 +1,18 @@
from setuptools import setup
from os import path
from mwm import __version__
here = path.abspath(path.dirname(__file__))
setup(
name='mwm',
version=__version__,
version='0.9.0',
author='Ilya Zverev',
author_email='ilya@zverev.info',
packages=['mwm'],
package_data={'mwm': ['types.txt']},
url='https://github.com/mapsme/mwm.py',
url='http://pypi.python.org/pypi/mwm/',
license='Apache License 2.0',
description='Library to read binary MAPS.ME files.',
long_description=open(path.join(here, 'README.rst')).read(),
long_description=open(path.join(here, 'README.md')).read(),
classifiers=[
'Development Status :: 4 - Beta',
'Intended Audience :: Developers',
@ -23,10 +21,8 @@ setup(
'Environment :: Console',
'License :: OSI Approved :: Apache Software License',
'Programming Language :: Python',
'Programming Language :: Python :: 2.6',
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3',
],
entry_points={
'console_scripts': ['mwmtool = mwm.mwmtool:main']
},
)

40
tools/dump_mwm.py Executable file
View file

@ -0,0 +1,40 @@
#!/usr/bin/python
import sys, os.path, random
import json
from mwm import MWM
if len(sys.argv) < 2:
print('Dumps some MWM structures.')
print('Usage: {0} <country.mwm>'.format(sys.argv[0]))
sys.exit(1)
mwm = MWM(open(sys.argv[1], 'rb'))
mwm.read_types(os.path.join(os.path.dirname(sys.argv[0]), '..', '..', '..', '..', 'data', 'types.txt'))
print('Tags:')
tvv = sorted([(k, v[0], v[1]) for k, v in mwm.tags.items()], key=lambda x: x[1])
for tv in tvv:
print(' {0:<8}: offs {1:9} len {2:8}'.format(tv[0], tv[1], tv[2]))
v = mwm.read_version()
print('Format: {0}, version: {1}'.format(v['fmt'], v['version'].strftime('%Y-%m-%d %H:%M')))
print('Header: {0}'.format(mwm.read_header()))
print('Region Info: {0}'.format(mwm.read_region_info()))
print('Metadata count: {0}'.format(len(mwm.read_metadata())))
cross = mwm.read_crossmwm()
if cross:
print('Outgoing points: {0}, incoming: {1}'.format(len(cross['out']), len(cross['in'])))
print('Outgoing regions: {0}'.format(set(cross['neighbours'])))
# Print some random features using reservoir sampling
count = 5
sample = []
for i, feature in enumerate(mwm.iter_features()):
if i < count:
sample.append(feature)
elif random.randint(0, i) < count:
sample[random.randint(0, count-1)] = feature
print('Feature count: {0}'.format(i))
print('Sample features:')
for feature in sample:
print(json.dumps(feature, ensure_ascii=False))

41
tools/find_feature.py Executable file
View file

@ -0,0 +1,41 @@
#!/usr/bin/env python
import sys, os.path, json
from mwm import MWM
if len(sys.argv) < 4:
print('Finds features in an mwm file based on a query')
print('Usage: {0} <country.mwm> <type> <string>'.format(sys.argv[0]))
print('')
print('Type:')
print(' t for inside types ("t hwtag" will find all hwtags-*)')
print(' et for exact type ("et shop" won\'t find shop-chemist)')
print(' n for names, case-sensitive ("n Starbucks" for all starbucks)')
print(' m for metadata keys ("m flats" for features with flats)')
print(' id for feature id ("id 1234" for feature #1234)')
sys.exit(1)
typ = sys.argv[2].lower()
find = sys.argv[3].decode('utf-8')
mwm = MWM(open(sys.argv[1], 'rb'))
mwm.read_header()
mwm.read_types(os.path.join(os.path.dirname(sys.argv[0]), '..', '..', '..', '..', 'data', 'types.txt'))
for i, feature in enumerate(mwm.iter_features(metadata=True)):
found = False
if typ == 'n' and 'name' in feature['header']:
for value in feature['header']['name'].values():
if find in value:
found = True
elif typ in ('t', 'et'):
for t in feature['header']['types']:
if t == find:
found = True
elif typ == 't' and find in t:
found = True
elif typ == 'm' and 'metadata' in feature:
if find in feature['metadata']:
found = True
elif typ == 'id' and i == int(find):
found = True
if found:
print(json.dumps(feature, ensure_ascii=False, sort_keys=True).encode('utf-8'))

22
tools/ft2osm.py Executable file
View file

@ -0,0 +1,22 @@
#!/usr/bin/env python
import sys
import mwm
if len(sys.argv) < 3:
print('Finds an OSM object for a given feature id.')
print('Usage: {} <mwm.osm2ft> <ftid>'.format(sys.argv[0]))
sys.exit(1)
with open(sys.argv[1], 'rb') as f:
ft2osm = mwm.Osm2Ft(f, True)
code = 0
type_abbr = {'n': 'node', 'w': 'way', 'r': 'relation'}
for ftid in sys.argv[2:]:
ftid = int(ftid)
if ftid in ft2osm:
print('https://www.openstreetmap.org/{}/{}'.format(type_abbr[ft2osm[ftid][0]], ft2osm[ftid][1]))
else:
print('Could not find osm id for feature {}'.format(ftid))
code = 2
sys.exit(code)