Update gitignore and parse opening hours for Auchan
This commit is contained in:
parent
7ffa9222a0
commit
392067cc9e
3 changed files with 42 additions and 7 deletions
4
.gitignore
vendored
4
.gitignore
vendored
|
@ -1,3 +1,7 @@
|
|||
*.swp
|
||||
*.osc
|
||||
*.zip
|
||||
*.json
|
||||
*.gz
|
||||
*.csv
|
||||
private/
|
||||
|
|
|
@ -225,11 +225,11 @@ class OsmConflator:
|
|||
r = requests.get(OVERPASS_SERVER + 'interpreter', {'data': query})
|
||||
if r.status_code != 200:
|
||||
logging.error('Failed to download data from Overpass API: %s', r.status_code)
|
||||
logging.error('Query: %s', query)
|
||||
logging.error('Error message: %s', r.text)
|
||||
if 'rate_limited' in r.text:
|
||||
r = requests.get(OVERPASS_SERVER + 'status')
|
||||
logging.warning('Seems like you are rate limited. API status:\n%s', r.text)
|
||||
else:
|
||||
logging.error('Error message: %s', r.text)
|
||||
raise IOError()
|
||||
for el in r.json()['elements']:
|
||||
if 'tags' not in el:
|
||||
|
|
|
@ -8,9 +8,8 @@ source = 'auchan.ru'
|
|||
# Not adding a ref:auchan tag, since we don't have good identifiers
|
||||
no_dataset_id = True
|
||||
# Using a name query with regular expressions
|
||||
query = [('shop', 'supermarket'), ('name', '~Ашан|АШАН')]
|
||||
# We don't parse opening hours yet, but it'd be cool if we did
|
||||
master_tags = ('name', 'opening_hours', 'phone')
|
||||
query = [('shop', '~supermarket|mall'), ('name', '~Ашан|АШАН')]
|
||||
master_tags = ('name', 'opening_hours', 'phone', 'website')
|
||||
# Empty dict so we don't add a fixme tag to unmatched objects
|
||||
tag_unmatched = {}
|
||||
# Coordinates are VERY approximate, so increasing max distance to ~1 km
|
||||
|
@ -20,6 +19,29 @@ max_distance = 0.01
|
|||
# And defining them as "global" moves these from locals() to globals()
|
||||
download_url_copy = download_url
|
||||
def dataset(fileobj):
|
||||
def parse_weekdays(s):
|
||||
weekdays = {k: v for k, v in map(lambda x: x.split(), 'пн Mo,вт Tu,ср We,чт Th,пт Fr,сб Sa,вс Su'.split(','))}
|
||||
s = s.replace(' ', '').lower().replace('c', 'с')
|
||||
if s == 'ежедневно' or s == 'пн-вс':
|
||||
return ''
|
||||
parts = []
|
||||
for x in s.split(','):
|
||||
p = None
|
||||
if x in weekdays:
|
||||
p = weekdays[x]
|
||||
elif '-' in x:
|
||||
m = re.match(r'(\w\w)-(\w\w)', x)
|
||||
if m:
|
||||
pts = [weekdays.get(m.group(i), None) for i in (1, 2)]
|
||||
if pts[0] and pts[1]:
|
||||
p = '-'.join(pts)
|
||||
if p:
|
||||
parts.append(p)
|
||||
else:
|
||||
logging.warning('Could not parse opening hours: %s', s)
|
||||
return None
|
||||
return ','.join(parts)
|
||||
|
||||
# We are parsing HTML, and for that we need an lxml package
|
||||
from lxml import html
|
||||
global download_url_copy
|
||||
|
@ -27,9 +49,9 @@ def dataset(fileobj):
|
|||
shops = h.find_class('shops-in-the-city-holder')[0]
|
||||
shops.make_links_absolute(download_url_copy)
|
||||
blocks = shops.xpath("//div[@class='mark-box'] | //ul[@class='shops-list']")
|
||||
logging.debug('Found %s blocks', len(blocks))
|
||||
name = None
|
||||
RE_GMAPS = re.compile(r'q=(-?[0-9.]+)\+(-?[0-9.]+)$')
|
||||
RE_OH = re.compile(r'(Ежедневно|(?:(?:Пн|Вт|Ср|Чт|Пт|Сб|В[сc])[, -]*)+)[ сc:]+(\d\d?[:.]\d\d)[- до]+(\d\d[.:]\d\d)', re.I)
|
||||
data = []
|
||||
for block in blocks:
|
||||
if block.get('class') == 'mark-box':
|
||||
|
@ -51,7 +73,15 @@ def dataset(fileobj):
|
|||
if m:
|
||||
lat = float(m.group(1))
|
||||
lon = float(m.group(2))
|
||||
logging.debug('Found title: %s, website: %s, address: %s, coords: %s, %s', title, website, addr, lat, lon)
|
||||
opening_hours = []
|
||||
# Extract opening hours
|
||||
oh = ' '.join(li.xpath("p/text()"))
|
||||
for m in RE_OH.finditer(oh):
|
||||
weekdays = parse_weekdays(m.group(1))
|
||||
if weekdays is not None:
|
||||
opening_hours.append('{}{:0>5s}-{:0>5s}'.format(
|
||||
weekdays + ' ' if weekdays else '', m.group(2).replace('.', ':'), m.group(3).replace('.', ':')))
|
||||
logging.debug('Found title: %s, website: %s, opens: %s, coords: %s, %s', title, website, '; '.join(opening_hours) or None, lat, lon)
|
||||
if lat is not None and name is not None:
|
||||
tags = {
|
||||
'name': name,
|
||||
|
@ -59,6 +89,7 @@ def dataset(fileobj):
|
|||
'shop': 'supermarket',
|
||||
'phone': '8-800-700-5-800',
|
||||
'operator': 'ООО «АШАН»',
|
||||
'opening_hours': '; '.join(opening_hours),
|
||||
'addr:full': addr,
|
||||
'website': website
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue