diff --git a/.gitignore b/.gitignore index 255c255..7a99b67 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ build/ __pycache__/ *.pyc +dist/ +*.egg* diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..d396a94 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,9 @@ +# mwm.py Change Log + +## master branch + +## 0.9.0 + +_Released 2017-06-08_ + +The initial release with some features. diff --git a/README.md b/README.md deleted file mode 100644 index a2243ba..0000000 --- a/README.md +++ /dev/null @@ -1,31 +0,0 @@ -# mwm.py - -It is a python library to read contents of MAPS.ME mwm files. Not everything -is supported, but you can get at least all the features and their attributes. -We at MAPS.ME use this script to do analytics and maintenance. - -## Installation - - pip install mwm - -## Usage - -Just add `import mwm` to your script, and read an mwm file with: - -```python -with open('file.mwm', 'rb') as f: - data = mwm.MWM(f) -``` - -## Tools - -There are some useful tools in the relevant directory, which can serve as -the library usage examples: - -* `dump_mwm.py` prints the header and some statistics on an mwm file. -* `find_feature.py` can find features inside an mwm by type or name. -* `ft2osm.py` converts a feature id to an OSM website link. - -## License - -Written by Ilya Zverev for MAPS.ME. Published under the Apache License 2.0. diff --git a/README.rst b/README.rst new file mode 100644 index 0000000..76734e0 --- /dev/null +++ b/README.rst @@ -0,0 +1,39 @@ +mwm.py +====== + +It is a python library to read contents of MAPS.ME mwm files. Not +everything is supported, but you can get at least all the features and +their attributes. We at MAPS.ME use this script to do analytics and +maintenance. + +Installation +------------ + +:: + + pip install mwm + +Usage +----- + +Just add ``import mwm`` to your script, and read an mwm file with: + +.. code:: python + + with open('file.mwm', 'rb') as f: + data = mwm.MWM(f) + +Tools +----- + +The package installs the ``mwmtool`` command-line script. It shows +statistics about an MWM file, can search for features or convert ids. +Run it with ``-h`` to see a list of options. + +The script source can serve as a library usage example. + +License +------- + +Written by Ilya Zverev for MAPS.ME. Published under the Apache License +2.0. \ No newline at end of file diff --git a/mwm/__init__.py b/mwm/__init__.py index c429c2f..fc587ff 100644 --- a/mwm/__init__.py +++ b/mwm/__init__.py @@ -1,2 +1,2 @@ -from .mwm import MWM, Osm2Ft +from .mwm import MWM, Osm2Ft, __version__ from .mwmfile import MWMFile diff --git a/mwm/mwm.py b/mwm/mwm.py index d8827ed..cd918bf 100644 --- a/mwm/mwm.py +++ b/mwm/mwm.py @@ -2,14 +2,14 @@ from .mwmfile import MWMFile from datetime import datetime -# Unprocessed sections: geomN, trgN, idx, sdx (search index), addr (search address), offs (feature offsets - succinct) -# Routing sections: mercedes (matrix), daewoo (edge data), infinity (edge id), skoda (shortcuts), chrysler (cross context), ftseg, node2ftseg -# (these mostly are succinct structures, except chrysler and node2ftseg, so no use trying to load them here) +__version__ = '0.9.0' + +# Unprocessed sections: geomN, trgN, idx, sdx (search index), +# addr (search address), offs (feature offsets - succinct) # TODO: # - Predictive reading of LineStrings # - Find why polygon geometry is incorrect in iter_features() -# - Find feature ids in the 'dat' section, or find a way to read the 'offs' section class MWM(MWMFile): @@ -43,10 +43,11 @@ class MWM(MWMFile): fmt = self.read_varuint() + 1 version = self.read_varuint() if version < 161231: - version = datetime(2000 + int(version / 10000), int(version / 100) % 100, version % 100) + vdate = datetime(2000 + int(version / 10000), int(version / 100) % 100, version % 100) else: - version = datetime.fromtimestamp(version) - return {'fmt': fmt, 'version': version} + vdate = datetime.fromtimestamp(version) + version = int(vdate.strftime('%y%m%d')) + return {'fmt': fmt, 'version': version, 'date': vdate} def read_header(self): """Reads 'header' section.""" diff --git a/mwm/mwmtool.py b/mwm/mwmtool.py new file mode 100755 index 0000000..5e265e1 --- /dev/null +++ b/mwm/mwmtool.py @@ -0,0 +1,123 @@ +#!/usr/bin/env python +import sys +import os.path +import random +import json +import argparse +from mwm import MWM, Osm2Ft + + +def dump_mwm(args): + mwm = MWM(args.mwm) + if os.path.exists(args.types): + mwm.read_types(args.types) + + print('Tags:') + tvv = sorted([(k, v[0], v[1]) for k, v in mwm.tags.items()], key=lambda x: x[1]) + for tv in tvv: + print(' {0:<8}: offs {1:9} len {2:8}'.format(tv[0], tv[1], tv[2])) + v = mwm.read_version() + print('Format: {0}, version: {1}'.format(v['fmt'], v['date'].strftime('%Y-%m-%d %H:%M'))) + print('Header: {0}'.format(mwm.read_header())) + print('Region Info: {0}'.format(mwm.read_region_info())) + print('Metadata count: {0}'.format(len(mwm.read_metadata()))) + + cross = mwm.read_crossmwm() + if cross: + print('Outgoing points: {0}, incoming: {1}'.format(len(cross['out']), len(cross['in']))) + print('Outgoing regions: {0}'.format(set(cross['neighbours']))) + + # Print some random features using reservoir sampling + count = 5 + sample = [] + for i, feature in enumerate(mwm.iter_features()): + if i < count: + sample.append(feature) + elif random.randint(0, i) < count: + sample[random.randint(0, count-1)] = feature + + print('Feature count: {0}'.format(i)) + print('Sample features:') + for feature in sample: + print(json.dumps(feature, ensure_ascii=False)) + + +def find_feature(args): + mwm = MWM(args.mwm) + mwm.read_header() + if os.path.exists(args.types): + mwm.read_types(args.types) + if args.iname: + args.iname = args.iname.lower() + + for i, feature in enumerate(mwm.iter_features(metadata=True)): + if args.fid and i != args.fid: + continue + if args.name or args.iname: + if 'name' not in feature['header']: + continue + found = False + for value in feature['header']['name'].values(): + if args.name and args.name in value: + found = True + elif args.iname and args.iname in value.lower(): + found = True + if not found: + continue + if args.type or args.exact_type: + found = False + for t in feature['header']['types']: + if t == args.type or t == args.exact_type: + found = True + elif args.type and args.type in t: + found = True + if not found: + continue + if args.meta and ('metadata' not in feature or args.meta not in feature['metadata']): + continue + print(json.dumps(feature, ensure_ascii=False, sort_keys=True)) + + +def ft2osm(args): + ft2osm = Osm2Ft(args.osm2ft, True) + code = 0 + type_abbr = {'n': 'node', 'w': 'way', 'r': 'relation'} + for ftid in args.ftid: + if ftid in ft2osm: + print('https://www.openstreetmap.org/{}/{}'.format(type_abbr[ft2osm[ftid][0]], ft2osm[ftid][1])) + else: + print('Could not find osm id for feature {}'.format(ftid)) + code = 2 + return code + + +def main(): + parser = argparse.ArgumentParser(description='Toolbox for MWM files.') + parser.add_argument('--types', default=os.path.join(os.path.dirname(sys.argv[0]), '..', '..', '..', '..', 'data', 'types.txt'), help='path to types.txt') + subparsers = parser.add_subparsers(dest='cmd') + subparsers.required = True + parser_dump = subparsers.add_parser('dump', help='Dumps some structures.') + parser_dump.add_argument('mwm', type=argparse.FileType('rb'), help='file to browse') + parser_dump.set_defaults(func=dump_mwm) + parser_find = subparsers.add_parser('find', help='Finds features in a file.') + parser_find.add_argument('mwm', type=argparse.FileType('rb'), help='file to search') + parser_find.add_argument('-t', dest='type', help='look inside types ("-t hwtag" will find all hwtags-*)') + parser_find.add_argument('-et', dest='exact_type', help='look for a type ("-et shop won\'t find shop-chemist)') + parser_find.add_argument('-n', dest='name', help='look inside names, case-sensitive ("-n Starbucks" for all starbucks)') + parser_find.add_argument('-in', dest='iname', help='look inside names, case-insensitive ("-in star" will find Starbucks)') + parser_find.add_argument('-m', dest='meta', help='look for a metadata key ("m flats" for features with flats)') + parser_find.add_argument('-id', dest='fid', type=int, help='look for a feature id ("-id 1234 for feature #1234)') + parser_find.set_defaults(func=find_feature) + parser_osm = subparsers.add_parser('osm', help='Displays an OpenStreetMap link for a feature id.') + parser_osm.add_argument('osm2ft', type=argparse.FileType('rb'), help='.mwm.osm2ft file') + parser_osm.add_argument('ftid', type=int, nargs='+', help='feature id') + parser_osm.set_defaults(func=ft2osm) + + args = parser.parse_args() + code = args.func(args) + if code is not None: + sys.exit(code) + + +if __name__ == '__main__': + main() diff --git a/setup.py b/setup.py index 928aaaf..07f6cd8 100644 --- a/setup.py +++ b/setup.py @@ -1,18 +1,19 @@ from setuptools import setup from os import path +from mwm import __version__ here = path.abspath(path.dirname(__file__)) setup( name='mwm', - version='0.9.0', + version=__version__, author='Ilya Zverev', author_email='ilya@zverev.info', packages=['mwm'], - url='http://pypi.python.org/pypi/mwm/', + url='https://github.com/mapsme/mwm.py', license='Apache License 2.0', description='Library to read binary MAPS.ME files.', - long_description=open(path.join(here, 'README.md')).read(), + long_description=open(path.join(here, 'README.rst')).read(), classifiers=[ 'Development Status :: 4 - Beta', 'Intended Audience :: Developers', @@ -25,4 +26,7 @@ setup( 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', ], + entry_points={ + 'console_scripts': ['mwmtool = mwm.mwmtool:main'] + }, ) diff --git a/tools/dump_mwm.py b/tools/dump_mwm.py deleted file mode 100755 index 564ec1d..0000000 --- a/tools/dump_mwm.py +++ /dev/null @@ -1,40 +0,0 @@ -#!/usr/bin/python -import sys, os.path, random -import json -from mwm import MWM - -if len(sys.argv) < 2: - print('Dumps some MWM structures.') - print('Usage: {0} '.format(sys.argv[0])) - sys.exit(1) - -mwm = MWM(open(sys.argv[1], 'rb')) -mwm.read_types(os.path.join(os.path.dirname(sys.argv[0]), '..', '..', '..', '..', 'data', 'types.txt')) -print('Tags:') -tvv = sorted([(k, v[0], v[1]) for k, v in mwm.tags.items()], key=lambda x: x[1]) -for tv in tvv: - print(' {0:<8}: offs {1:9} len {2:8}'.format(tv[0], tv[1], tv[2])) -v = mwm.read_version() -print('Format: {0}, version: {1}'.format(v['fmt'], v['version'].strftime('%Y-%m-%d %H:%M'))) -print('Header: {0}'.format(mwm.read_header())) -print('Region Info: {0}'.format(mwm.read_region_info())) -print('Metadata count: {0}'.format(len(mwm.read_metadata()))) - -cross = mwm.read_crossmwm() -if cross: - print('Outgoing points: {0}, incoming: {1}'.format(len(cross['out']), len(cross['in']))) - print('Outgoing regions: {0}'.format(set(cross['neighbours']))) - -# Print some random features using reservoir sampling -count = 5 -sample = [] -for i, feature in enumerate(mwm.iter_features()): - if i < count: - sample.append(feature) - elif random.randint(0, i) < count: - sample[random.randint(0, count-1)] = feature - -print('Feature count: {0}'.format(i)) -print('Sample features:') -for feature in sample: - print(json.dumps(feature, ensure_ascii=False)) diff --git a/tools/find_feature.py b/tools/find_feature.py deleted file mode 100755 index 858aa7d..0000000 --- a/tools/find_feature.py +++ /dev/null @@ -1,41 +0,0 @@ -#!/usr/bin/env python -import sys, os.path, json -from mwm import MWM - -if len(sys.argv) < 4: - print('Finds features in an mwm file based on a query') - print('Usage: {0} '.format(sys.argv[0])) - print('') - print('Type:') - print(' t for inside types ("t hwtag" will find all hwtags-*)') - print(' et for exact type ("et shop" won\'t find shop-chemist)') - print(' n for names, case-sensitive ("n Starbucks" for all starbucks)') - print(' m for metadata keys ("m flats" for features with flats)') - print(' id for feature id ("id 1234" for feature #1234)') - sys.exit(1) - -typ = sys.argv[2].lower() -find = sys.argv[3].decode('utf-8') - -mwm = MWM(open(sys.argv[1], 'rb')) -mwm.read_header() -mwm.read_types(os.path.join(os.path.dirname(sys.argv[0]), '..', '..', '..', '..', 'data', 'types.txt')) -for i, feature in enumerate(mwm.iter_features(metadata=True)): - found = False - if typ == 'n' and 'name' in feature['header']: - for value in feature['header']['name'].values(): - if find in value: - found = True - elif typ in ('t', 'et'): - for t in feature['header']['types']: - if t == find: - found = True - elif typ == 't' and find in t: - found = True - elif typ == 'm' and 'metadata' in feature: - if find in feature['metadata']: - found = True - elif typ == 'id' and i == int(find): - found = True - if found: - print(json.dumps(feature, ensure_ascii=False, sort_keys=True).encode('utf-8')) diff --git a/tools/ft2osm.py b/tools/ft2osm.py deleted file mode 100755 index d3c7c0d..0000000 --- a/tools/ft2osm.py +++ /dev/null @@ -1,22 +0,0 @@ -#!/usr/bin/env python -import sys -import mwm - -if len(sys.argv) < 3: - print('Finds an OSM object for a given feature id.') - print('Usage: {} '.format(sys.argv[0])) - sys.exit(1) - -with open(sys.argv[1], 'rb') as f: - ft2osm = mwm.Osm2Ft(f, True) - -code = 0 -type_abbr = {'n': 'node', 'w': 'way', 'r': 'relation'} -for ftid in sys.argv[2:]: - ftid = int(ftid) - if ftid in ft2osm: - print('https://www.openstreetmap.org/{}/{}'.format(type_abbr[ft2osm[ftid][0]], ft2osm[ftid][1])) - else: - print('Could not find osm id for feature {}'.format(ftid)) - code = 2 -sys.exit(code)