osm_conflate/profiles/minkult.py

source = 'opendata.mkrf.ru'
dataset_id = 'mkrf_theaters'
query = [('amenity', 'theatre')]
max_distance = 300
master_tags = ('official_name', 'phone', 'opening_hours', 'website')


# Reading the dataset passport to determine an URL of the latest dataset version
def download_url():
    import logging
    import requests

    dataset_id = '7705851331-' + (param or 'museums')
    r = requests.get('http://opendata.mkrf.ru/opendata/{}/meta.json'.format(dataset_id))
    if r.status_code != 200 or len(r.content) == 0:
        logging.error('Could not get URL for dataset: %s %s', r.status_code, r.text)
        logging.error('Please check http://opendata.mkrf.ru/opendata/{}'.format(dataset_id))
        return None
    result = r.json()
    latest = result['data'][-1]
    logging.info('Downloading %s from %s', result['title'], latest['created'])
    return latest['source']

source = 'opendata.mkrf.ru'
dataset_id = 'mkrf_'+(param or 'museums')
if not param or param == 'museums':
    query = [('tourism', 'museum')]
elif param == 'theaters':
    query = [('amenity', 'theatre')]
elif param == 'circuses':
    query = [('amenity', 'circus')]
elif param == 'philharmonic':
    query = [('amenity', 'theatre')]
else:
    raise ValueError('Unknown param value: {}'.format(param))

max_distance = 300
master_tags = ('official_name', 'phone', 'opening_hours', 'website')


def dataset(fileobj):
    import json
    import codecs

    def make_wd_ranges(r):
        """Converts e.g. [0,1,4] into 'Mo-Tu, Fr'."""
        wd = ['Mo', 'Tu', 'We', 'Th', 'Fr', 'Sa', 'Su']
        res = wd[r[0]]
        in_range = False
        for i in range(1, len(r)+1):
            if i < len(r) and r[i] == r[i-1] + 1:
                in_range = True
            else:
                if in_range:
                    res += '-' + wd[r[i-1]]
                    in_range = False
                if i < len(r):
                    res += ', ' + wd[r[i]]
        return res

    def parse_hours(h):
        """Receives a dict {'0': {'from': '10:00:00', 'to': '18:00:00'}, ...}
        and returns a proper opening_hours value."""
        days = {}
        for wd, d in h.items():
            if not d['from']:
                continue
            for i in ('from', 'to'):
                d[i] = d[i][:5]
            if d['to'] == '00:00':
                d['to'] = '24:00'
            elif not d['to']:
                d['to'] = '19:00+'
            k = '{}-{}'.format(d['from'], d['to'])
            if k not in days:
                days[k] = set()
            days[k].add(int(wd))
        days2 = {}
        for op, d in days.items():
            days2[tuple(sorted(d))] = op
        res = []
        for d in sorted(days2.keys(), key=lambda x: min(x)):
            res.append(' '.join([make_wd_ranges(d), days2[d]]))
        return '; '.join(res)

    def wrap(coord, absmax):
        if coord < -absmax:
            return coord + absmax * 2
        if coord > absmax:
            return coord - absmax * 2
        return coord

    def format_phone(ph):
        if ph and len(ph) == 11 and ph[0] == '7':
            return '+7 {} {}-{}-{}'.format(ph[1:4], ph[4:7], ph[7:9], ph[9:])
        return ph

    source = json.load(codecs.getreader('utf-8')(fileobj))
    data = []
    for el in source:
        d = el['data']['general']
        gid = d['id']
        lon = wrap(d['address']['mapPosition']['coordinates'][1], 180)
        lat = d['address']['mapPosition']['coordinates'][0]
        tags = {
            'amenity': 'theatre',
            'name': d['name'],
            # 'official_name': d['name'],
            # 'image': d['image']['url'],
            'operator': d['organization']['name'],
            'addr:full': '{}, {}'.format(d['locale']['name'], d['address']['street']),
        }
        if tags['operator'] == tags['name']:
            del tags['operator']
        if d.get('workingSchedule'):
            tags['opening_hours'] = parse_hours(d['workingSchedule'])
        if 'email' in d['contacts']:
            tags['email'] = d['contacts']['email']
        if 'website' in d['contacts']:
            tags['website'] = d['contacts']['website']
            if tags['website'].endswith('.ru'):
                tags['website'] += '/'
        if 'phones' in d['contacts'] and d['contacts']['phones']:
            tags['phone'] = format_phone(d['contacts']['phones'][0]['value'])
        data.append(SourcePoint(gid, lat, lon, tags))
    return data