Example for using param in minkult.py, and remove imports for profiles
This commit is contained in:
parent
7d0a631874
commit
79c7ab80ce
6 changed files with 32 additions and 14 deletions
|
@ -1,6 +1,7 @@
|
|||
#!/usr/bin/env python3
|
||||
import argparse
|
||||
import codecs
|
||||
import json
|
||||
import kdtree
|
||||
import logging
|
||||
import math
|
||||
|
@ -8,10 +9,6 @@ import requests
|
|||
import os
|
||||
import sys
|
||||
from io import BytesIO
|
||||
import json # for profiles
|
||||
import re # for profiles
|
||||
import zipfile # for profiles
|
||||
from collections import defaultdict # for profiles
|
||||
try:
|
||||
from .version import __version__
|
||||
except ImportError:
|
||||
|
|
|
@ -44,7 +44,7 @@ def dataset(fileobj):
|
|||
|
||||
# We are parsing HTML, and for that we need an lxml package
|
||||
from lxml import html
|
||||
global download_url_copy
|
||||
global download_url_copy, re
|
||||
h = html.fromstring(fileobj.read().decode('utf-8'))
|
||||
shops = h.find_class('shops-in-the-city-holder')[0]
|
||||
shops.make_links_absolute(download_url_copy)
|
||||
|
|
|
@ -1,3 +1,7 @@
|
|||
# Note: the json file at the burgerking website was restructured
|
||||
# and does not contain any useful data now.
|
||||
# So this profile is here solely for demonstration purposes.
|
||||
|
||||
import json
|
||||
import codecs
|
||||
import re
|
||||
|
@ -20,6 +24,7 @@ tag_unmatched = {
|
|||
|
||||
def dataset(fileobj):
|
||||
def parse_hours(s):
|
||||
global re
|
||||
s = re.sub('^зал:? *', '', s.lower())
|
||||
s = s.replace('<br />', ';').replace('<br>', ';').replace('\n', ';').replace(' ', '').replace(',', ';').replace('–', '-')
|
||||
s = s.replace('-00:', '-24:')
|
||||
|
@ -66,7 +71,11 @@ def dataset(fileobj):
|
|||
346: 'Передвинуть к кафе',
|
||||
|
||||
}
|
||||
source = json.load(codecs.getreader('utf-8')(fileobj))
|
||||
json_src = codecs.getreader('utf-8')(fileobj).read()
|
||||
p = json_src.find('<div')
|
||||
if p > 0:
|
||||
json_src = json_src[:p]
|
||||
source = json.loads(json_src)
|
||||
data = []
|
||||
for el in source:
|
||||
gid = int(el['origID'])
|
||||
|
|
|
@ -5,7 +5,8 @@ import codecs
|
|||
|
||||
|
||||
# Reading the dataset passport to determine an URL of the latest dataset version
|
||||
def download_url(dataset_id='7705851331-museums'):
|
||||
def download_url():
|
||||
dataset_id = '7705851331-' + (param or 'museums')
|
||||
r = requests.get('http://opendata.mkrf.ru/opendata/{}/meta.json'.format(dataset_id))
|
||||
if r.status_code != 200 or len(r.content) == 0:
|
||||
logging.error('Could not get URL for dataset: %s %s', r.status_code, r.text)
|
||||
|
@ -17,8 +18,18 @@ def download_url(dataset_id='7705851331-museums'):
|
|||
return latest['source']
|
||||
|
||||
source = 'opendata.mkrf.ru'
|
||||
dataset_id = 'mkrf_museums'
|
||||
query = [('tourism', 'museum')]
|
||||
dataset_id = 'mkrf_'+(param or 'museums')
|
||||
if not param or param == 'museums':
|
||||
query = [('tourism', 'museum')]
|
||||
elif param == 'theaters':
|
||||
query = [('amenity', 'theatre')]
|
||||
elif param == 'circuses':
|
||||
query = [('amenity', 'circus')]
|
||||
elif param == 'philharmonic':
|
||||
query = [('amenity', 'theatre')]
|
||||
else:
|
||||
raise ValueError('Unknown param value: {}'.format(param))
|
||||
|
||||
max_distance = 300
|
||||
master_tags = ('official_name', 'phone', 'opening_hours', 'website')
|
||||
|
||||
|
|
|
@ -1,12 +1,10 @@
|
|||
# Available modules: codecs, logging, requests, json, re, etree. But importing these helps catch other errors
|
||||
# Available modules: codecs, logging, requests, json, etree. But importing these helps catch other errors
|
||||
import json
|
||||
import re
|
||||
import logging
|
||||
import requests
|
||||
import zipfile
|
||||
|
||||
|
||||
def download_url(mos_dataset_id=1421):
|
||||
import requests
|
||||
r = requests.get('https://data.mos.ru/api/datasets/expformats/?datasetId={}'.format(mos_dataset_id))
|
||||
if r.status_code != 200 or len(r.content) == 0:
|
||||
logging.error('Could not get URL for dataset: %s %s', r.status_code, r.text)
|
||||
|
@ -15,7 +13,7 @@ def download_url(mos_dataset_id=1421):
|
|||
url = [x for x in r.json() if x['Format'] == 'json'][0]
|
||||
version = '?'
|
||||
title = 'dataset'
|
||||
r = requests.get('https://data.mos.ru/apiproxy/opendata/1421/meta.json'.format(mos_dataset_id))
|
||||
r = requests.get('https://data.mos.ru/apiproxy/opendata/{}/meta.json'.format(mos_dataset_id))
|
||||
if r.status_code == 200:
|
||||
title = r.json()['Title']
|
||||
version = r.json()['VersionNumber']
|
||||
|
@ -50,6 +48,8 @@ master_tags = ('zone:parking', 'ref', 'contact:phone', 'contact:website', 'opera
|
|||
|
||||
# A list of SourcePoint objects. Initialize with (id, lat, lon, {tags}).
|
||||
def dataset(fileobj):
|
||||
import zipfile
|
||||
import re
|
||||
zf = zipfile.ZipFile(fileobj)
|
||||
source = json.loads(zf.read(zf.namelist()[0]).decode('cp1251'))
|
||||
RE_NUM4 = re.compile(r'\d{4,6}')
|
||||
|
|
|
@ -56,6 +56,7 @@ def dataset(fileobj):
|
|||
return '24/7'
|
||||
return '; '.join(res).replace('23:59', '24:00')
|
||||
|
||||
global re, defaultdict
|
||||
source = json.load(codecs.getreader('utf-8-sig')(fileobj))
|
||||
data = []
|
||||
for el in source['Locations']:
|
||||
|
|
Loading…
Add table
Reference in a new issue