Add five more profile examples

This commit is contained in:
Ilya Zverev 2017-12-20 19:27:42 +03:00
parent 9b0f14ef4b
commit 9ad48a0d44
5 changed files with 390 additions and 0 deletions

96
profiles/burgerking.py Normal file
View file

@ -0,0 +1,96 @@
import json
import codecs
import re
download_url = 'https://burgerking.ru/restaurant-locations-json-reply-new'
source = 'Burger King'
dataset_id = 'burger_king'
no_dataset_id = True
query = '[amenity~"cafe|restaurant|fast_food"][name~"burger.*king|бургер.*кинг",i]'
max_distance = 1000
overpass_timeout = 1200
max_request_boxes = 4
master_tags = ('name', 'amenity', 'name:ru', 'name:en', 'contact:phone', 'opening_hours')
tag_unmatched = {
'fixme': 'Проверить на местности: в данных сайта отсутствует.',
'amenity': None,
'was:amenity': 'fast_food'
}
def dataset(fileobj):
def parse_hours(s):
s = re.sub('^зал:? *', '', s.lower())
s = s.replace('<br />', ';').replace('<br>', ';').replace('\n', ';').replace(' ', '').replace(',', ';').replace('', '-')
s = s.replace('-00:', '-24:')
weekdays = {k: v for k, v in map(lambda x: x.split(), 'пн Mo,вт Tu,ср We,чт Th,пт Fr,сб Sa,вс Su'.split(','))}
if s == 'круглосуточно':
return '24/7'
parts = s.split(';')
WEEKDAY_PATH = '(?:пн|вт|ср|чт|пт|сб|вск?)'
result = []
found_allweek = False
for p in parts:
if not p:
continue
m = re.match(r'^('+WEEKDAY_PATH+'(?:[-,]'+WEEKDAY_PATH+')*)?с?(\d?\d[:.]\d\d-\d?\d[:.]\d\d)$', p)
if not m:
# Disregarding other parts
return None
times = re.sub('(^|-)(\d:)', r'\g<1>0\g<2>', m[2].replace('.', ':'))
if m[1]:
wd = m[1].replace('вск', 'вс')
for k, v in weekdays.items():
wd = wd.replace(k, v)
else:
found_allweek = True
wd = 'Mo-Su'
result.append(wd + ' ' + times)
if not result or (found_allweek and len(result) > 1):
return None
return '; '.join(result)
def parse_phone(s):
s = s.replace('(', '').replace(')', '').replace('-', '')
s = s.replace(' доб. ', '-')
return s
notes = {
172: 'Подвинуть на второй терминал',
25: 'Подвинуть в ЮниМолл',
133: 'Передвинуть в Парк №1: https://prnt.sc/gtlwjs',
471: 'Передвинуть в ТЦ Балканский 6, самый северный, где кино',
234: 'Передвинуть на север, в дом 7',
111: 'Сдвинуть в здание',
59: 'Сдвинуть в торговый центр севернее',
346: 'Передвинуть к кафе',
}
source = json.load(codecs.getreader('utf-8')(fileobj))
data = []
for el in source:
gid = int(el['origID'])
tags = {
'amenity': 'fast_food',
'name': 'Бургер Кинг',
'name:ru': 'Бургер Кинг',
'name:en': 'Burger King',
'ref': gid,
'cuisine': 'burger',
'takeaway': 'yes',
'wikipedia:brand': 'ru:Burger King',
'wikidata:brand': 'Q177054',
'contact:website': 'https://burgerking.ru/',
'contact:email': el['email'],
'contact:phone': parse_phone(el['tel']),
'opening_hours': parse_hours(el['opened'])
}
if gid in notes:
tags['fixme'] = notes[gid]
if el['is_wifi']:
tags['internet_access'] = 'wlan'
tags['internet_access:fee'] = 'no'
else:
tags['internet_access'] = 'no'
data.append(SourcePoint(gid, float(el['lat']), float(el['lng']), tags))
return data

97
profiles/minkult.py Normal file
View file

@ -0,0 +1,97 @@
import json
import logging
import requests
import codecs
def download_url(dataset_id='7705851331-museums'):
r = requests.get('http://opendata.mkrf.ru/opendata/{}/meta.json'.format(dataset_id))
if r.status_code != 200 or len(r.content) == 0:
logging.error('Could not get URL for dataset: %s %s', r.status_code, r.text)
logging.error('Please check http://opendata.mkrf.ru/opendata/{}'.format(dataset_id))
return None
result = r.json()
latest = result['data'][-1]
logging.info('Downloading %s from %s', result['title'], latest['created'])
return latest['source']
source = 'opendata.mkrf.ru'
dataset_id = 'mkrf_museums'
query = [('tourism', 'museum')]
max_distance = 300
master_tags = ('official_name', 'phone', 'opening_hours', 'website')
def dataset(fileobj):
def make_wd_ranges(r):
wd = ['Mo', 'Tu', 'We', 'Th', 'Fr', 'Sa', 'Su']
res = wd[r[0]]
in_range = False
for i in range(1, len(r)+1):
if i < len(r) and r[i] == r[i-1] + 1:
in_range = True
else:
if in_range:
res += '-' + wd[r[i-1]]
in_range = False
if i < len(r):
res += ', ' + wd[r[i]]
return res
def parse_hours(h):
days = {}
for wd, d in h.items():
if not d['from']:
continue
for i in ('from', 'to'):
d[i] = d[i][:5]
if d['to'] == '00:00':
d['to'] = '24:00'
elif not d['to']:
d['to'] = '19:00+'
k = '{}-{}'.format(d['from'], d['to'])
if k not in days:
days[k] = set()
days[k].add(int(wd))
days2 = {}
for op, d in days.items():
days2[tuple(sorted(d))] = op
res = []
for d in sorted(days2.keys(), key=lambda x: min(x)):
res.append(' '.join([make_wd_ranges(d), days2[d]]))
return '; '.join(res)
def wrap(coord, absmax):
if coord < -absmax:
return coord + absmax * 2
if coord > absmax:
return coord - absmax * 2
return coord
source = json.load(codecs.getreader('utf-8')(fileobj))
data = []
for el in source:
d = el['data']['general']
gid = d['id']
lon = wrap(d['address']['mapPosition']['coordinates'][1], 180)
lat = d['address']['mapPosition']['coordinates'][0]
tags = {
'tourism': 'museum',
'name': d['name'],
'official_name': d['name'],
'image': d['image']['url'],
'operator': d['organization']['name'],
'addr:full': '{}, {}'.format(d['locale']['name'], d['address']['street']),
}
if d.get('workingSchedule'):
tags['opening_hours'] = parse_hours(d['workingSchedule'])
if 'email' in d['contacts']:
tags['email'] = d['contacts']['email']
if 'website' in d['contacts']:
tags['website'] = d['contacts']['website']
if tags['website'].endswith('.ru'):
tags['website'] += '/'
if 'phones' in d['contacts'] and d['contacts']['phones']:
tags['phone'] = '+' + d['contacts']['phones'][0]['value']
data.append(SourcePoint(gid, lat, lon, tags))
return data

44
profiles/navads_shell.py Normal file
View file

@ -0,0 +1,44 @@
source = 'Navads'
dataset_id = 'navads_shell'
query = [('amenity', 'fuel')]
master_tags = ('brand', 'addr:postcode', 'phone', 'opening_hours')
max_distance = 50
def format_phone(ph):
if ph and len(ph) == 13 and ph[:3] == '+44':
if (ph[3] == '1' and ph[4] != '1' and ph[5] != '1') or ph[3:7] == '7624':
return ' '.join([ph[:3], ph[3:7], ph[7:]])
elif ph[3] in ('1', '3', '8', '9'):
return ' '.join([ph[:3], ph[3:6], ph[6:9], ph[9:]])
else:
return ' '.join([ph[:3], ph[3:5], ph[5:9], ph[9:]])
return ph
transform = {
'amenity': 'fuel',
'postal_code': '>addr:postcode',
'phone': format_phone,
'name': '-'
}
# Example JSON line:
#
# {
# "id": "NVDS298-10018804",
# "lat": 51.142491,
# "lon": -0.074893,
# "tags": {
# "name": "Shell",
# "brand": "Shell",
# "addr:street": "Snow Hill",
# "postal_code": "RH10 3EQ",
# "addr:city": "Crawley",
# "phone": "+441342718750",
# "website": "http://www.shell.co.uk",
# "operator": "Shell",
# "opening_hours": "24/7",
# "amenity": "fuel"
# }
# }

View file

@ -0,0 +1,99 @@
import json
import codecs
import re
from collections import defaultdict
source = 'Navads'
dataset_id = 'navads_shell'
query = [('amenity', 'fuel')]
master_tags = ('brand', 'phone', 'opening_hours')
max_distance = 50
max_request_boxes = 3
def dataset(fileobj):
def format_phone(ph):
if ph and len(ph) == 13 and ph[:3] == '+44':
if (ph[3] == '1' and ph[4] != '1' and ph[5] != '1') or ph[3:7] == '7624':
return ' '.join([ph[:3], ph[3:7], ph[7:]])
elif ph[3] in ('1', '3', '8', '9'):
return ' '.join([ph[:3], ph[3:6], ph[6:9], ph[9:]])
else:
return ' '.join([ph[:3], ph[3:5], ph[5:9], ph[9:]])
return ph
def make_wd_ranges(r):
wd = ['Mo', 'Tu', 'We', 'Th', 'Fr', 'Sa', 'Su']
res = wd[r[0]]
in_range = False
for i in range(1, len(r)+1):
if i < len(r) and r[i] == r[i-1] + 1:
in_range = True
else:
if in_range:
res += '-' + wd[r[i-1]]
in_range = False
if i < len(r):
res += ',' + wd[r[i]]
return res
def parse_hours(h):
if not h:
return None
WD = {x: i for i, x in enumerate([
'MONDAY', 'TUESDAY', 'WEDNESDAY', 'THURSDAY', 'FRIDAY', 'SATURDAY', 'SUNDAY'
])}
days = defaultdict(list)
for d in h.split(';'):
parts = re.findall(r'([A-Z]+)=([0-9:-]+)', d)
if len(set([p[0] for p in parts])) != 1:
raise Exception('Parts format fail: {}'.format(d))
days[','.join([p[1] for p in parts])].append(WD[parts[0][0]])
res = []
for time, wd in sorted(days.items(), key=lambda x: min(x[1])):
res.append(' '.join([make_wd_ranges(wd), time]))
if res[0] == 'Mo-Su 00:00-23:59':
return '24/7'
return '; '.join(res).replace('23:59', '24:00')
source = json.load(codecs.getreader('utf-8-sig')(fileobj))
data = []
for el in source['Locations']:
if not el['location']:
continue
coords = [float(x) for x in el['location'].split(',')]
tags = {
'amenity': 'fuel',
'brand': el['name'],
'addr:postcode': el['address_zip'] or None,
'phone': format_phone('+'+str(el['phone'])),
'opening_hours': parse_hours(el['daily_hours']),
}
if (el['address_street'] and el['address_number'] and
not re.search(r'^([ABCDM]\d+|Junction)', el['address_street']) and
'Ln' not in el['address_street'] and 'A' not in el['address_number']):
tags['addr:street'] = el['address_street']
tags['addr:housenumber'] = el['address_number']
data.append(SourcePoint(el['place_id'], coords[0], coords[1], tags))
return data
# Example line of the source JSON:
#
# {
# "place_id": "NVDS353-10019224",
# "name": "Shell",
# "category": "GAS_STATION",
# "location": "54.978366,-1.57441",
# "description": "",
# "phone": 441912767084,
# "address_street": "Shields Road",
# "address_number": "308",
# "address_city": "Newcastle-Upon-Tyne",
# "address_zip": "NE6 2UU",
# "address_country": "GB",
# "website": "http://www.shell.co.uk/motorist/station-locator.html?id=10019224&modeselected=true",
# "daily_hours": "MONDAY=00:00-23:59;TUESDAY=00:00-23:59;WEDNESDAY=00:00-23:59;THURSDAY=00:00-23:59;FRIDAY=00:00-23:59;SATURDAY=00:00-23:59;SUNDAY=00:00-23:59",
# "brand": "Shell",
# "is_deleted": false
# },

54
profiles/velobike.py Normal file
View file

@ -0,0 +1,54 @@
import codecs
import json
import logging
download_url = 'http://www.velobike.ru/proxy/parkings/'
source = 'velobike.ru'
dataset_id = 'velobike'
no_dataset_id = True
query = [('amenity', 'bicycle_rental'), ('network', 'Велобайк')]
max_distance = 100
delete_unmatched = True
tag_unmatched = {
'fixme': 'Проверить на местности: в данных велобайка отсутствует. Вероятно, демонтирована',
'amenity': None,
'was:amenity': 'bicycle_rental'
}
master_tags = ('ref', 'capacity', 'capacity:electric', 'contact:email',
'contact:phone', 'contact:website', 'operator')
def dataset(fileobj):
source = json.load(codecs.getreader('utf-8')(fileobj))
data = []
for el in source['Items']:
try:
gid = int(el['Id'])
lon = el['Position']['Lon']
lat = el['Position']['Lat']
terminal = 'yes' if el['HasTerminal'] else 'no'
tags = {
'amenity': 'bicycle_rental',
'network': 'Велобайк',
'ref': gid,
'capacity': el['TotalOrdinaryPlaces'],
'capacity:electric': el['TotalElectricPlaces'],
'contact:email': 'info@velobike.ru',
'contact:phone': '+7 495 966-46-69',
'contact:website': 'https://velobike.ru/',
'opening_hours': '24/7',
'operator': 'ЗАО «СитиБайк»',
'payment:cash': 'no',
'payment:troika': 'no',
'payment:mastercard': terminal,
'payment:visa': terminal,
}
try:
lat = float(lat)
lon = float(lon)
data.append(SourcePoint(gid, lat, lon, tags))
except Exception as e:
logging.warning('PROFILE: Failed to parse lat/lon for rental stand %s: %s', gid, str(e))
except Exception as e:
logging.warning('PROFILE: Failed to get attributes for rental stand: %s', str(e))
return data