diff --git a/profiles/rosinter.py b/profiles/rosinter.py new file mode 100644 index 0000000..877705c --- /dev/null +++ b/profiles/rosinter.py @@ -0,0 +1,78 @@ +download_url = 'http://www.rosinter.ru/locator/RestaurantsFeed.aspx?city=all&location=&lang=ru&brand=all&cuisine=all&metro=&hasDelivery=&isCorporate=' +source = 'Rosinter' +no_dataset_id = True +max_distance = 500 +query = [('amenity', 'restaurant', 'cafe', 'bar', 'pub', 'fast_food')] +overpass_timeout = 1000 +duplicate_distance = -1 +nearest_points = 30 +master_tags = ('name', 'phone', 'amenity') + +types = { + # substr: osm_substr, amenity, cuisine + 'Costa': ['costa', 'cafe', 'coffee_shop'], + 'IL': [('patio', 'патио'), 'restaurant', 'italian'], + 'TGI': [('tgi', 'friday'), 'restaurant', 'american'], + 'Бар и': ['гриль', 'restaurant', 'american'], + 'Макд': ['мак', 'fast_food', None], + 'Раша': ['мама', 'fast_food', 'russian'], + 'Планета': ['планета', 'restaurant', 'japanese'], + 'Шика': ['шика', 'restaurant', 'asian'], + 'Свои': ['сво', 'restaurant', None], +} + + +def matches(osmtags, ritags): + global types + rname = ritags['name'] + name = osmtags.get('name', '').lower() + for k, v in types.items(): + if k in rname: + if isinstance(v[0], str): + return v[0] in name + for n in v[0]: + if n in name: + return True + return False + logging.error('Unknown rname value: %s', rname) + return False + + +def dataset(f): + global types + from lxml import etree + root = etree.parse(f).getroot() + for el in root.find('Restaurants'): + rid = el.find('id').text + city = el.find('city').text + if city in ('Прага', 'Будапешт', 'Варшава', 'Баку', 'Рига'): + continue + brand = el.find('brand').text + if 'TGI' in brand: + brand = 'TGI Fridays' + elif 'СВОИ' in brand: + brand = 'Свои' + phone = el.find('telephone').text + if phone: + phone = phone.replace('(', '').replace(')', '') + website = el.find('siteurl').text + if website and 'il-patio' in website: + website = 'http://ilpatio.ru' + if 'Свои' in brand: + website = 'http://restoransvoi.by' + lat = float(el.find('latitude').text) + lon = float(el.find('longitude').text) + tags = { + 'amenity': 'restaurant', + 'name': brand, + 'phone': phone, + 'website': website, + } + address = el.find('address').text + for k, v in types.items(): + if k in brand: + tags['amenity'] = v[1] + tags['cuisine'] = v[2] + yield SourcePoint( + rid, lat, lon, tags, + remarks='Обязательно подвиньте точку!\nАдрес: ' + str(address)) diff --git a/profiles/schocoladnitsa.py b/profiles/schocoladnitsa.py new file mode 100644 index 0000000..508168b --- /dev/null +++ b/profiles/schocoladnitsa.py @@ -0,0 +1,104 @@ +download_url = 'http://new.shoko.ru/addresses/' +source = 'Шоколадница' +no_dataset_id = True +overpass_timeout = 600 +max_distance = 250 +max_request_boxes = 6 +query = [('amenity',), ('name', '~Шоколадница')] +master_tags = ['amenity', 'name', 'name:ru', 'name:en', 'website', 'phone', 'opening_hours'] + + +def dataset(fileobj): + def parse_oh(s): + if not s: + return None + olds = s + if s.strip().lower() == 'круглосуточно': + return '24/7' + trans = { + 'будни': 'Mo-Fr', + 'суббота': 'Sa', + 'воскресенье': 'Su', + 'ежедневно': 'Mo-Su', + 'выходные': 'Sa-Su', + 'восерсенье': 'Su', + 'ежеденевно': 'Mo-Su', + 'пн-чтивс': 'Mo-Th,Su', + 'пн-чт,вс': 'Mo-Th,Su', + 'пт.-сб': 'Fr-Sa', + 'вск.-чт': 'Su-Th', + 'смаяпооктябрь': 'May-Oct', + 'ч.смаяпооктябрь': 'May-Oct', + 'сентября': 'May-Sep', + } + weekdays = {'пн': 'Mo', 'вт': 'Tu', 'ср': 'We', 'чт': 'Th', 'пт': 'Fr', 'сб': 'Sa', 'вс': 'Su'} + if s == 'с 10 до 22' or s == 'с 10.00-22.00': + s = '10:00 - 22:00' + s = s.replace('круглосуточно', '00:00-24:00') + s = s.replace('23,', '23:00') + parts = [] + for m in re.finditer(r'([а-яА-Я ,.:\(\)-]+?)?(?:\sс)?\s*(\d?\d[:.]\d\d)(?: до |[^\w\d]+)(\d\d[:.]\d\d)', s): + days = (m[1] or '').strip(' -.,:()').lower().replace(' ', '') + m2 = re.match(r'^([б-ч]{2})\s?[,и-]\s?([б-ч]{2})$', days) + if not days: + days = 'Mo-Su' + elif days in weekdays: + days = weekdays[days] + elif m2 and m2[1] in weekdays and m2[2] in weekdays: + days = weekdays[m2[1]] + '-' + weekdays[m2[2]] + else: + if days not in trans: + logging.warn('Unknown days: %s', days) + continue + days = trans[days] + parts.append('{} {:0>5}-{}'.format(days, m[2].replace('.', ':'), m[3].replace('.', ':'))) + # logging.info('%s -> %s', olds, '; '.join(parts)) + if parts: + return '; '.join(parts) + return None + + from lxml import html + import re + import logging + import phonenumbers + h = html.fromstring(fileobj.read().decode('utf-8')) + markers = h.get_element_by_id('markers') + i = 0 + for m in markers: + lat = m.get('data-lat') + lon = m.get('data-lng') + if not lat or not lon: + continue + oh = parse_oh(m.get('data-time')) + phone = m.get('data-phone') + if phone[:3] == '812': + phone = '+7' + phone + if ' 891' in phone: + phone = phone[:phone.index(' 891')] + if ' 8-91' in phone: + phone = phone[:phone.index(' 8-91')] + try: + if phone == 'отключен' or not phone: + phone = None + else: + parsed_phone = phonenumbers.parse(phone.replace(';', ',').split(',')[0], "RU") + except: + logging.info(phone) + raise + if phone is None: + fphone = None + else: + fphone = phonenumbers.format_number( + parsed_phone, phonenumbers.PhoneNumberFormat.INTERNATIONAL) + tags = { + 'amenity': 'cafe', + 'name': 'Шоколадница', + 'name:ru': 'Шоколадница', + 'name:en': 'Shokoladnitsa', + 'website': 'http://shoko.ru', + 'cuisine': 'coffee_shop', + 'phone': fphone, + 'opening_hours': oh + } + i += 1 + yield SourcePoint(i, float(lat), float(lon), tags, remarks=m.get('data-title'))