Add a couple more profile examples

This commit is contained in:
Ilya Zverev 2018-06-26 15:06:46 +03:00
parent 1b97e96785
commit 7d8b7e8ccd
2 changed files with 182 additions and 0 deletions

78
profiles/rosinter.py Normal file
View file

@ -0,0 +1,78 @@
download_url = 'http://www.rosinter.ru/locator/RestaurantsFeed.aspx?city=all&location=&lang=ru&brand=all&cuisine=all&metro=&hasDelivery=&isCorporate='
source = 'Rosinter'
no_dataset_id = True
max_distance = 500
query = [('amenity', 'restaurant', 'cafe', 'bar', 'pub', 'fast_food')]
overpass_timeout = 1000
duplicate_distance = -1
nearest_points = 30
master_tags = ('name', 'phone', 'amenity')
types = {
# substr: osm_substr, amenity, cuisine
'Costa': ['costa', 'cafe', 'coffee_shop'],
'IL': [('patio', 'патио'), 'restaurant', 'italian'],
'TGI': [('tgi', 'friday'), 'restaurant', 'american'],
'Бар и': ['гриль', 'restaurant', 'american'],
'Макд': ['мак', 'fast_food', None],
'Раша': ['мама', 'fast_food', 'russian'],
'Планета': ['планета', 'restaurant', 'japanese'],
'Шика': ['шика', 'restaurant', 'asian'],
'Свои': ['сво', 'restaurant', None],
}
def matches(osmtags, ritags):
global types
rname = ritags['name']
name = osmtags.get('name', '').lower()
for k, v in types.items():
if k in rname:
if isinstance(v[0], str):
return v[0] in name
for n in v[0]:
if n in name:
return True
return False
logging.error('Unknown rname value: %s', rname)
return False
def dataset(f):
global types
from lxml import etree
root = etree.parse(f).getroot()
for el in root.find('Restaurants'):
rid = el.find('id').text
city = el.find('city').text
if city in ('Прага', 'Будапешт', 'Варшава', 'Баку', 'Рига'):
continue
brand = el.find('brand').text
if 'TGI' in brand:
brand = 'TGI Fridays'
elif 'СВОИ' in brand:
brand = 'Свои'
phone = el.find('telephone').text
if phone:
phone = phone.replace('(', '').replace(')', '')
website = el.find('siteurl').text
if website and 'il-patio' in website:
website = 'http://ilpatio.ru'
if 'Свои' in brand:
website = 'http://restoransvoi.by'
lat = float(el.find('latitude').text)
lon = float(el.find('longitude').text)
tags = {
'amenity': 'restaurant',
'name': brand,
'phone': phone,
'website': website,
}
address = el.find('address').text
for k, v in types.items():
if k in brand:
tags['amenity'] = v[1]
tags['cuisine'] = v[2]
yield SourcePoint(
rid, lat, lon, tags,
remarks='Обязательно подвиньте точку!\nАдрес: ' + str(address))

104
profiles/schocoladnitsa.py Normal file
View file

@ -0,0 +1,104 @@
download_url = 'http://new.shoko.ru/addresses/'
source = 'Шоколадница'
no_dataset_id = True
overpass_timeout = 600
max_distance = 250
max_request_boxes = 6
query = [('amenity',), ('name', '~Шоколадница')]
master_tags = ['amenity', 'name', 'name:ru', 'name:en', 'website', 'phone', 'opening_hours']
def dataset(fileobj):
def parse_oh(s):
if not s:
return None
olds = s
if s.strip().lower() == 'круглосуточно':
return '24/7'
trans = {
'будни': 'Mo-Fr',
'суббота': 'Sa',
'воскресенье': 'Su',
'ежедневно': 'Mo-Su',
'выходные': 'Sa-Su',
'восерсенье': 'Su',
'ежеденевно': 'Mo-Su',
'пн-чтивс': 'Mo-Th,Su',
'пн-чт,вс': 'Mo-Th,Su',
'пт.-сб': 'Fr-Sa',
'вск.-чт': 'Su-Th',
'смаяпооктябрь': 'May-Oct',
'ч.смаяпооктябрь': 'May-Oct',
'сентября': 'May-Sep',
}
weekdays = {'пн': 'Mo', 'вт': 'Tu', 'ср': 'We', 'чт': 'Th', 'пт': 'Fr', 'сб': 'Sa', 'вс': 'Su'}
if s == 'с 10 до 22' or s == 'с 10.00-22.00':
s = '10:00 - 22:00'
s = s.replace('круглосуточно', '00:00-24:00')
s = s.replace('23,', '23:00')
parts = []
for m in re.finditer(r'([а-яА-Я ,.:\(\)-]+?)?(?:\sс)?\s*(\d?\d[:.]\d\d)(?: до |[^\w\d]+)(\d\d[:.]\d\d)', s):
days = (m[1] or '').strip(' -.,:()').lower().replace(' ', '')
m2 = re.match(r'^([б-ч]{2})\s?[,и-]\s?([б-ч]{2})$', days)
if not days:
days = 'Mo-Su'
elif days in weekdays:
days = weekdays[days]
elif m2 and m2[1] in weekdays and m2[2] in weekdays:
days = weekdays[m2[1]] + '-' + weekdays[m2[2]]
else:
if days not in trans:
logging.warn('Unknown days: %s', days)
continue
days = trans[days]
parts.append('{} {:0>5}-{}'.format(days, m[2].replace('.', ':'), m[3].replace('.', ':')))
# logging.info('%s -> %s', olds, '; '.join(parts))
if parts:
return '; '.join(parts)
return None
from lxml import html
import re
import logging
import phonenumbers
h = html.fromstring(fileobj.read().decode('utf-8'))
markers = h.get_element_by_id('markers')
i = 0
for m in markers:
lat = m.get('data-lat')
lon = m.get('data-lng')
if not lat or not lon:
continue
oh = parse_oh(m.get('data-time'))
phone = m.get('data-phone')
if phone[:3] == '812':
phone = '+7' + phone
if ' 891' in phone:
phone = phone[:phone.index(' 891')]
if ' 8-91' in phone:
phone = phone[:phone.index(' 8-91')]
try:
if phone == 'отключен' or not phone:
phone = None
else:
parsed_phone = phonenumbers.parse(phone.replace(';', ',').split(',')[0], "RU")
except:
logging.info(phone)
raise
if phone is None:
fphone = None
else:
fphone = phonenumbers.format_number(
parsed_phone, phonenumbers.PhoneNumberFormat.INTERNATIONAL)
tags = {
'amenity': 'cafe',
'name': 'Шоколадница',
'name:ru': 'Шоколадница',
'name:en': 'Shokoladnitsa',
'website': 'http://shoko.ru',
'cuisine': 'coffee_shop',
'phone': fphone,
'opening_hours': oh
}
i += 1
yield SourcePoint(i, float(lat), float(lon), tags, remarks=m.get('data-title'))