From af5f48fbf8169ad2ea6616d62d9d208546ae607c Mon Sep 17 00:00:00 2001 From: Ilya Zverev Date: Wed, 15 Feb 2017 20:21:05 +0300 Subject: [PATCH] Initial commit --- .gitignore | 3 + LICENSE | 175 +++++++++++++ README.md | 28 ++ conflate.py | 480 +++++++++++++++++++++++++++++++++++ profiles/moscow_parkomats.py | 66 +++++ requirements.txt | 2 + 6 files changed, 754 insertions(+) create mode 100644 .gitignore create mode 100644 LICENSE create mode 100644 README.md create mode 100755 conflate.py create mode 100644 profiles/moscow_parkomats.py create mode 100644 requirements.txt diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..cb6ba26 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +*.swp +*.osc +*.zip diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..67db858 --- /dev/null +++ b/LICENSE @@ -0,0 +1,175 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. diff --git a/README.md b/README.md new file mode 100644 index 0000000..df0a61e --- /dev/null +++ b/README.md @@ -0,0 +1,28 @@ +# OSM Conflator + +This is a script for merging points from some third-party source with OpenStreetMap data. +Please make sure the license allows that. After merging and uploading, the data can be updated. + +## Profiles + +Each source should have a profile. It is a python script with variables configuring +names, tags and processing. See examples in the `profiles` directory. + +## Usage + +For a simplest case, run: + + ./conflate.py + +You might want to add `-v` to get status messages, and other arguments to pass a dataset file +or write the resulting osmChange somewhere. Run `./conflate.py --help` to see a list of arguments. + +## Uploading to OpenStreetMap + +It is recommended to open the resulting file in the JOSM editor and manually check the changes. +Alternatively, you can use [bulk_upload.py](https://wiki.openstreetmap.org/wiki/Bulk_upload.py) +to upload a change file from the command line. + +## License + +Written by Ilya Zverev for MAPS.ME. Published under the Apache 2.0 license. diff --git a/conflate.py b/conflate.py new file mode 100755 index 0000000..5056007 --- /dev/null +++ b/conflate.py @@ -0,0 +1,480 @@ +#!/usr/bin/env python3 +import argparse +import logging +import requests +import sys +import kdtree +from io import BytesIO +import json # for profiles +import re # for profiles +try: + from lxml import etree +except ImportError: + import xml.etree.ElementTree as etree + +OVERPASS_SERVER = 'http://overpass-api.de/api/' +BBOX_PADDING = 0.1 # in degrees +MAX_DISTANCE = 0.001 # how far can object be to be considered a match. 0.001 dg is ~110 m + + +class SourcePoint: + """A common class for points. Has an id, latitude and longitude, + and a dict of tags.""" + def __init__(self, pid, lat, lon, tags=None): + self.id = str(pid) + self.lat = lat + self.lon = lon + self.tags = {} if tags is None else tags + + def __len__(self): + return 2 + + def __getitem__(self, i): + if i == 0: + return self.lat + elif i == 1: + return self.lon + else: + raise ValueError('A SourcePoint has only lat and lon in a list') + + def __eq__(self, other): + return self.id == other.id + + def __hash__(self): + return hash(self.id) + + +class OSMPoint(SourcePoint): + """An OSM points is a SourcePoint with a few extra fields. + Namely, version, members (for ways and relations), and an action. + The id is compound and created from object type and object id.""" + def __init__(self, ptype, pid, version, lat, lon, tags=None): + super().__init__('{}{}'.format(ptype[0], pid), lat, lon, tags) + self.osm_type = ptype + self.osm_id = pid + self.version = version + self.members = None + self.action = None + + def to_xml(self): + """Produces an XML out of the point data. Disregards the "action" field.""" + el = etree.Element(self.osm_type, id=str(self.osm_id), version=str(self.version)) + for tag, value in self.tags.items(): + etree.SubElement(el, 'tag', k=tag, v=value) + + if self.osm_type == 'node': + el.set('lat', str(self.lat)) + el.set('lon', str(self.lon)) + elif self.osm_type == 'way': + for node_id in self.members: + etree.SubElement(el, 'nd', ref=str(node_id)) + elif self.osm_type == 'relation': + for member in self.members: + m = etree.SubElement(el, 'member') + for i, n in enumerate(('type', 'ref', 'role')): + m.set(n, str(member[i])) + return el + + +class ProfileException(Exception): + """An exception class for the Profile instance.""" + def __init__(self, attr, desc): + super().__init__('Field missing in profile: {} ({})'.format(attr, desc)) + + +class Profile: + """A wrapper for a profile. + + A profile is a python script that sets a few local variables. + These variables become properties of the profile, accessible with + a "get" method. If something is a function, it will be called, + optional parameters might be passed to it. + + You can compile a list of all supported variables by grepping through + this code, or by looking at a few example profiles. If something + is required, you will be notified of that. + """ + def __init__(self, fileobj): + s = fileobj.read().replace('\r', '') + self.profile = {} + exec(s, globals(), self.profile) + + def has(self, attr): + return attr in self.profile + + def get(self, attr, default=None, required=None, args=None): + if attr in self.profile: + value = self.profile[attr] + if callable(value): + if args is None: + return value() + else: + return value(*args) + else: + return value + if required is not None: + raise ProfileException(attr, required) + return default + + +class OsmConflator: + """The main class for the conflator. + + It receives a dataset, after which one must call either + "download_osm" or "parse_osm" methods. Then it is ready to match: + call the "match" method and get results with "to_osc". + """ + def __init__(self, profile, dataset): + self.dataset = {p.id: p for p in dataset} + self.osmdata = {} + self.matched = [] + self.profile = profile + if self.profile.get('no_dataset_id', False): + self.ref = None + else: + self.ref = 'ref:' + self.profile.get('dataset_id', required='A fairly unique id of the dataset to query OSM') + + def construct_overpass_query(self, bbox=None): + """Constructs an Overpass API query from the "query" list in the profile. + (k, v) turns into [k=v], (k,) into [k], (k, None) into [!k], (k, "~v") into [k~v].""" + tags = self.profile.get('query', required="a list of tuples. E.g. [('amenity', 'cafe'), ('name', '~Mc.*lds')]") + tag_str = '' + for t in tags: + if len(t) == 1: + q = '"{}"'.format(t[0]) + elif t[1] is None or len(t[1]) == 0: + q = '"!{}"'.format(t[0]) + elif t[1][0] == '~': + q = '"{}"~"{}"'.format(t[0], t[1][1:]) + else: + q = '"{}"="{}"'.format(t[0], t[1]) + tag_str += '[' + q + ']' + query = '[out:json][timeout:300];(' + bbox_str = '' if bbox is None else '(' + ','.join([str(x) for x in bbox]) + ')' + for t in ('node', 'way', 'relation'): + query += t + tag_str + bbox_str + ';' + if self.ref is not None: + query += t + '["' + self.ref + '"];' + query += '); out meta center;' + return query + + def get_dataset_bbox(self): + """Plain iterates over the dataset and returns the bounding box + that encloses it.""" + bbox = [90.0, 180.0, -90.0, -180.0] + for p in self.dataset.values(): + bbox[0] = min(bbox[0], p.lat - BBOX_PADDING) + bbox[1] = min(bbox[1], p.lon - BBOX_PADDING) + bbox[2] = max(bbox[2], p.lat + BBOX_PADDING) + bbox[3] = max(bbox[3], p.lon + BBOX_PADDING) + return bbox + + def split_into_bboxes(self): + """ + Splits the dataset into multiple bboxes to lower load on the overpass api. + + Returns a list of tuples (minlat, minlon, maxlat, maxlon). + + Not implemented for now, returns the single big bbox. Not sure if needed. + """ + # TODO + return [self.get_dataset_bbox()] + + def check_against_profile_tags(self, tags): + qualifies = self.profile.get('qualifies', args=tags) + if qualifies is not None: + return qualifies + + query = self.profile.get('query', None) + if query is not None: + for tag in query: + if len(tag) >= 1: + if tag[0] not in tags: + return False + if len(tag) >= 2 and tag[1][0] != '~': + if tag[1] != tags[tag[0]]: + return False + return True + + def download_osm(self): + """Constructs an Overpass API query and requests objects + to match from a server.""" + profile_bbox = self.profile.get('bbox', True) + if not profile_bbox: + bboxes = [None] + elif hasattr(profile_bbox, '__len__') and len(profile_bbox) == 4: + bboxes = [profile_bbox] + else: + bboxes = self.split_into_bboxes() + + for b in bboxes: + query = self.construct_overpass_query(b) + logging.debug('Overpass query: %s', query) + r = requests.get(OVERPASS_SERVER + 'interpreter', {'data': query}) + if r.status_code != 200: + raise IOError('Failed to download data from Overpass API: {} {}\nQuery: {}'.format(r.status_code, r.text, query)) + for el in r.json()['elements']: + if 'tags' not in el: + continue + if 'center' in el: + for ll in ('lat', 'lon'): + el[ll] = el['center'][ll] + if self.check_against_profile_tags(el['tags']): + pt = OSMPoint(el['type'], el['id'], el['version'], el['lat'], el['lon'], el['tags']) + if 'nodes' in el: + pt.members = el['nodes'] + elif 'members' in el: + pt.members = [(x['type'], x['ref'], x['role']) for x in el['members']] + self.osmdata[pt.id] = pt + + def parse_osm(self, fileobj): + """Parses an OSM XML file into the "osmdata" field. For ways and relations, + finds the center. Drops objects that do not match the overpass query tags + (see "check_against_profile_tags" method).""" + xml = etree.parse(fileobj).getroot() + nodes = {} + for nd in xml.findall('node'): + nodes[nd.get('id')] = (float(nd.get('lat')), float(nd.get('lon'))) + ways = {} + for way in xml.findall('way'): + coord = [0, 0] + count = 0 + for nd in way.findall('nd'): + if nd.get('id') in nodes: + count += 1 + for i in range(len(coord)): + coord[i] += nodes[nd.get('ref')][i] + ways[way.get('id')] = [coord[0] / count, coord[1] / count] + + for el in xml: + tags = {} + for tag in el.findall('tag'): + tags[tag.get('k')] = tag.get('v') + if not self.check_against_profile_tags(tags): + continue + + if el.tag == 'node': + coord = nodes[el.get('id')] + members = None + elif el.tag == 'way': + coord = ways[el.get('id')] + members = [nd.get('ref') for nd in el.findall('nd')] + elif el.tag == 'relation': + coord = [0, 0] + count = 0 + for m in el.findall('member'): + if m.get('type') == 'node' and m.get('ref') in nodes: + count += 1 + for i in range(len(coord)): + coord[i] += nodes[m.get('ref')][i] + elif m.get('type') == 'way' and m.get('ref') in ways: + count += 1 + for i in range(len(coord)): + coord[i] += ways[m.get('ref')][i] + coord = [coord[0] / count, coord[1] / count] + members = [(m.get('type'), m.get('ref'), m.get('role')) for m in el.findall('member')] + pt = OSMPoint(el.tag, el.get('id'), el.get('version'), coord[0], coord[1], tags) + pt.members = members + self.osmdata[pt.id] = pt + + def register_match(self, dataset_key, osmdata_key, retag=None): + if osmdata_key is not None: + p = self.osmdata[osmdata_key] + del self.osmdata[osmdata_key] + else: + p = None + + if dataset_key is not None: + sp = self.dataset[dataset_key] + del self.dataset[dataset_key] + if p is None: + p = OSMPoint('node', -1-len(self.matched), 1, sp.lat, sp.lon, sp.tags) + p.action = 'create' + else: + master_tags = self.profile.get('master_tags', required='a set of authoritative tags that replace OSM values') + changed = False + for k, v in sp.tags.items(): + if k not in p.tags or (k in master_tags and p.tags[k] != v): + p.tags[k] = v + changed = True + if changed: + p.action = 'modify' + # If not, action is None and we're not including this object into the osmChange + source = self.profile.get('source', required='value of "source" tag for uploaded OSM objects') + p.tags['source'] = source + if self.ref is not None: + p.tags[self.ref] = sp.id + elif retag: + for k, v in retag.items(): + if v is not None: + p.tags[k] = v + elif k in p.tags: + del p.tags[k] + p.action = 'modify' + else: + p.action = 'delete' + if p is not None and p.action is not None: + self.matched.append(p) + + def match_dataset_points_smart(self): + """Smart matching for dataset <-> OSM points. + + We find a shortest link between a dataset and an OSM point. + Then we match these and remove both from dicts. + Then find another link and so on, until the length of a link + becomes larger than "max_distance". + + Currently the worst case complexity is around O(n^2*log^2 n). + But given the small number of objects to match, and that + the average case complexity is ~O(n*log^2 n), this is fine. + """ + if not self.osmdata: + return + # KDTree distance is squared, so we square the max_distance + max_distance = pow(self.profile.get('max_distance', MAX_DISTANCE), 2) + osm_kd = kdtree.create(list(self.osmdata.values())) + count_matched = 0 + dist = [] + for sp, v in self.dataset.items(): + osm_point, distance = osm_kd.search_nn(v) + if osm_point is not None and distance <= max_distance: + dist.append((distance, sp, osm_point.data)) + needs_sorting = True + while dist: + if needs_sorting: + dist.sort(key=lambda x: x[0]) + needs_sorting = False + count_matched += 1 + osm_point = dist[0][2] + self.register_match(dist[0][1], osm_point.id) + osm_kd = osm_kd.remove(osm_point) + del dist[0] + for i in range(len(dist)-1, -1, -1): + if dist[i][2] == osm_point: + nearest = osm_kd.search_nn(self.dataset[dist[i][1]]) + if nearest and nearest[1] <= max_distance: + new_point, distance = nearest + dist[i] = (distance, dist[i][1], new_point.data) + needs_sorting = i == 0 or distance < dist[0][0] + else: + del dist[i] + needs_sorting = i == 0 + logging.info('Matched %s points', count_matched) + + def match(self): + """Matches each osm object with a SourcePoint, or marks it as obsolete. + The resulting list of OSM Points are written to the "matched" field.""" + if self.ref is not None: + # First match all objects with ref:whatever tag set + count_ref = 0 + for k, p in list(self.osmdata.items()): + if self.ref in p.tags: + if p.tags[self.ref] in self.dataset: + count_ref += 1 + self.register_match(p.tags[self.ref], k) + logging.info('Updated %s OSM objects with %s tag', count_ref, self.ref) + + # Then find matches for unmatched dataset points + self.match_dataset_points_smart() + + # Add unmatched dataset points + logging.info('Adding %s unmatched dataset points', len(self.dataset)) + for k in list(self.dataset.keys()): + self.register_match(k, None) + + # And finally delete some or all of the remaining osm objects + if len(self.osmdata) > 0: + count_deleted = 0 + count_retagged = 0 + delete_unmatched = self.profile.get('delete_unmatched', False) + retag = self.profile.get('tag_unmatched') + for k, p in list(self.osmdata.items()): + if self.ref is not None and self.ref in p.tags: + # When ref:whatever is present, we can delete that object safely + count_deleted += 1 + self.register_match(None, k) + elif delete_unmatched or retag: + if retag: + count_retagged += 1 + else: + count_deleted += 1 + self.register_match(None, k, retag=retag) + logging.info('Deleted %s and retagged %s unmatched objects from OSM', count_deleted, count_retagged) + + def to_osc(self): + """Returns a string with osmChange.""" + osc = etree.Element('osmChange', version='0.6', generator='OSM Conflator') + for osmel in self.matched: + if osmel.action is not None: + el = osmel.to_xml() + etree.SubElement(osc, osmel.action).append(el) + return "\n" + etree.tostring(osc, encoding='utf-8').decode('utf-8') + + +def read_dataset(profile, fileobj): + """A helper function to call a "dataset" function in the profile. + If the fileobj is not specified, tries to download a dataset from + an URL specified in "download_url" profile variable.""" + if not fileobj: + url = profile.get('download_url') + if url is None: + logging.error('No download_url specified in the profile, please provide a dataset file with --source') + return None + r = requests.get(url) + if r.status_code != 200: + logging.error('Could not download source data: %s %s', r.status_code, r.text) + return None + if len(r.content) == 0: + logging.error('Empty response from %s', url) + return None + fileobj = BytesIO(r.content) + if not profile.has('dataset'): + # The default option is to parse the source as a JSON + try: + data = [] + for item in json.load(fileobj): + data.append(SourcePoint(item['id'], item['lat'], item['lon'], item['tags'])) + return data + except Exception: + logging.error('Failed to parse the source as a JSON') + return profile.get('dataset', args=(fileobj,), required='returns a list of SourcePoints with the dataset') + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description=''' + OSM Conflator. + Reads a profile with source data and conflates it with OpenStreetMap data. + Produces an osmChange file ready to be uploaded.''') + parser.add_argument('profile', type=argparse.FileType('r'), help='Name of a profile to use') + parser.add_argument('-o', '--osc', type=argparse.FileType('w'), default=sys.stdout, help='Output osmChange file name') + parser.add_argument('-i', '--source', type=argparse.FileType('rb'), help='Source file to pass to the profile dataset() function') + parser.add_argument('--osm', type=argparse.FileType('r'), help='Instead of querying Overpass API, use this unpacked osm file') + parser.add_argument('--verbose', '-v', action='count', help='Display info messages, use -vv for debugging') + options = parser.parse_args() + + if options.verbose == 2: + log_level = logging.DEBUG + elif options.verbose == 1: + log_level = logging.INFO + else: + log_level = logging.WARNING + logging.basicConfig(level=log_level, format='%(asctime)s %(message)s', datefmt='%H:%M:%S') + logging.getLogger("requests").setLevel(logging.WARNING) + + logging.debug('Loading profile %s', options.profile) + profile = Profile(options.profile) + + dataset = read_dataset(profile, options.source) + if not dataset: + logging.error('Empty source dataset') + sys.exit(2) + logging.info('Read %s items from the dataset', len(dataset)) + conflator = OsmConflator(profile, dataset) + if options.osm: + conflator.parse_osm(options.osm) + else: + conflator.download_osm() + logging.info('Downloaded %s objects from OSM', len(conflator.osmdata)) + conflator.match() + diff = conflator.to_osc() + options.osc.write(diff) diff --git a/profiles/moscow_parkomats.py b/profiles/moscow_parkomats.py new file mode 100644 index 0000000..ee56721 --- /dev/null +++ b/profiles/moscow_parkomats.py @@ -0,0 +1,66 @@ +# Available modules: logging, requests, json, re, etree. But importing these helps catch other errors +import json +import re +import logging + +# Verify this at http://data.mos.ru/opendata/1421/passport ("Download .json") +# Disabled since the link returns a zip file and not a plain json +# download_url = 'http://op.mos.ru/EHDWSREST/catalog/export/get?id=89786' + +# What will be put into "source" tags. Lower case please +source = 'dit.mos.ru' +# A fairly unique id of the dataset to query OSM, used for "ref:mos_parking" tags +# If you omit it, set explicitly "no_dataset_id = True" +dataset_id = 'mos_parking' +# Tags for querying with overpass api +query = [('amenity', 'vending_machine'), ('vending', 'parking_tickets')] +# Use bbox from dataset points (default). False = query whole world, [minlat, minlon, maxlat, maxlon] to override +bbox = True +# How close OSM point should be to register a match. Default is 0.001 (~110 m) +max_distance = 0.0003 # ~30 m +# Delete objects that match query tags but not dataset? False is the default +delete_unmatched = False +# If set, modify tags on unmatched objects instead +tag_unmatched = { + 'fixme': 'Проверить на местности: в данных ДИТ отсутствует. Вероятно, демонтирован', + 'amenity': None, + 'was:amenity': 'vending_machine' +} +# A set of authoritative tags to replace on matched objects +master_tags = set(('zone:parking', 'ref', 'contact:phone', 'contact:website', 'operator')) + + +# A list of SourcePoint objects. Initialize with (id, lat, lon, {tags}). +def dataset(fileobj): + source = json.loads(fileobj.read().decode('cp1251')) + RE_NUM4 = re.compile(r'\d{4,6}') + data = [] + for el in source: + try: + gid = el['global_id'] + zone = el['ParkingZoneNumber'] + lon = el['Longitude_WGS84'] + lat = el['Latitude_WGS84'] + pnum = el['NumberOfParkingMeter'] + tags = { + 'amenity': 'vending_machine', + 'vending': 'parking_tickets', + 'zone:parking': zone, + 'contact:phone': '+7 495 539-54-54', + 'contact:website': 'http://parking.mos.ru/', + 'opening_hours': '24/7', + 'operator': 'ГКУ «Администратор Московского парковочного пространства»', + 'payment:cash': 'no', + 'payment:credit_cards': 'yes', + 'payment:debit_cards': 'yes' + } + try: + lat = float(lat) + lon = float(lon) + tags['ref'] = RE_NUM4.search(pnum).group(0) + data.append(SourcePoint(gid, lat, lon, tags)) + except Exception as e: + logging.warning('PROFILE: Failed to parse lat/lon/ref for parking meter %s: %s', gid, str(e)) + except Exception as e: + logging.warning('PROFILE: Failed to get attributes for parking meter: %s', str(e)) + return data diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..15e1813 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +requests +kdtree