126 lines
4.3 KiB
Python
126 lines
4.3 KiB
Python
source = 'opendata.mkrf.ru'
|
|
dataset_id = 'mkrf_theaters'
|
|
query = [('amenity', 'theatre')]
|
|
max_distance = 300
|
|
master_tags = ('official_name', 'phone', 'opening_hours', 'website')
|
|
|
|
|
|
# Reading the dataset passport to determine an URL of the latest dataset version
|
|
def download_url():
|
|
import logging
|
|
import requests
|
|
|
|
dataset_id = '7705851331-' + (param or 'museums')
|
|
r = requests.get('http://opendata.mkrf.ru/opendata/{}/meta.json'.format(dataset_id))
|
|
if r.status_code != 200 or len(r.content) == 0:
|
|
logging.error('Could not get URL for dataset: %s %s', r.status_code, r.text)
|
|
logging.error('Please check http://opendata.mkrf.ru/opendata/{}'.format(dataset_id))
|
|
return None
|
|
result = r.json()
|
|
latest = result['data'][-1]
|
|
logging.info('Downloading %s from %s', result['title'], latest['created'])
|
|
return latest['source']
|
|
|
|
source = 'opendata.mkrf.ru'
|
|
dataset_id = 'mkrf_'+(param or 'museums')
|
|
if not param or param == 'museums':
|
|
query = [('tourism', 'museum')]
|
|
elif param == 'theaters':
|
|
query = [('amenity', 'theatre')]
|
|
elif param == 'circuses':
|
|
query = [('amenity', 'circus')]
|
|
elif param == 'philharmonic':
|
|
query = [('amenity', 'theatre')]
|
|
else:
|
|
raise ValueError('Unknown param value: {}'.format(param))
|
|
|
|
max_distance = 300
|
|
master_tags = ('official_name', 'phone', 'opening_hours', 'website')
|
|
|
|
|
|
def dataset(fileobj):
|
|
import json
|
|
import codecs
|
|
|
|
def make_wd_ranges(r):
|
|
"""Converts e.g. [0,1,4] into 'Mo-Tu, Fr'."""
|
|
wd = ['Mo', 'Tu', 'We', 'Th', 'Fr', 'Sa', 'Su']
|
|
res = wd[r[0]]
|
|
in_range = False
|
|
for i in range(1, len(r)+1):
|
|
if i < len(r) and r[i] == r[i-1] + 1:
|
|
in_range = True
|
|
else:
|
|
if in_range:
|
|
res += '-' + wd[r[i-1]]
|
|
in_range = False
|
|
if i < len(r):
|
|
res += ', ' + wd[r[i]]
|
|
return res
|
|
|
|
def parse_hours(h):
|
|
"""Receives a dict {'0': {'from': '10:00:00', 'to': '18:00:00'}, ...}
|
|
and returns a proper opening_hours value."""
|
|
days = {}
|
|
for wd, d in h.items():
|
|
if not d['from']:
|
|
continue
|
|
for i in ('from', 'to'):
|
|
d[i] = d[i][:5]
|
|
if d['to'] == '00:00':
|
|
d['to'] = '24:00'
|
|
elif not d['to']:
|
|
d['to'] = '19:00+'
|
|
k = '{}-{}'.format(d['from'], d['to'])
|
|
if k not in days:
|
|
days[k] = set()
|
|
days[k].add(int(wd))
|
|
days2 = {}
|
|
for op, d in days.items():
|
|
days2[tuple(sorted(d))] = op
|
|
res = []
|
|
for d in sorted(days2.keys(), key=lambda x: min(x)):
|
|
res.append(' '.join([make_wd_ranges(d), days2[d]]))
|
|
return '; '.join(res)
|
|
|
|
def wrap(coord, absmax):
|
|
if coord < -absmax:
|
|
return coord + absmax * 2
|
|
if coord > absmax:
|
|
return coord - absmax * 2
|
|
return coord
|
|
|
|
def format_phone(ph):
|
|
if ph and len(ph) == 11 and ph[0] == '7':
|
|
return '+7 {} {}-{}-{}'.format(ph[1:4], ph[4:7], ph[7:9], ph[9:])
|
|
return ph
|
|
|
|
source = json.load(codecs.getreader('utf-8')(fileobj))
|
|
data = []
|
|
for el in source:
|
|
d = el['data']['general']
|
|
gid = d['id']
|
|
lon = wrap(d['address']['mapPosition']['coordinates'][1], 180)
|
|
lat = d['address']['mapPosition']['coordinates'][0]
|
|
tags = {
|
|
'amenity': 'theatre',
|
|
'name': d['name'],
|
|
# 'official_name': d['name'],
|
|
# 'image': d['image']['url'],
|
|
'operator': d['organization']['name'],
|
|
'addr:full': '{}, {}'.format(d['locale']['name'], d['address']['street']),
|
|
}
|
|
if tags['operator'] == tags['name']:
|
|
del tags['operator']
|
|
if d.get('workingSchedule'):
|
|
tags['opening_hours'] = parse_hours(d['workingSchedule'])
|
|
if 'email' in d['contacts']:
|
|
tags['email'] = d['contacts']['email']
|
|
if 'website' in d['contacts']:
|
|
tags['website'] = d['contacts']['website']
|
|
if tags['website'].endswith('.ru'):
|
|
tags['website'] += '/'
|
|
if 'phones' in d['contacts'] and d['contacts']['phones']:
|
|
tags['phone'] = format_phone(d['contacts']['phones'][0]['value'])
|
|
data.append(SourcePoint(gid, lat, lon, tags))
|
|
return data
|