Example for using param in minkult.py, and remove imports for profiles

This commit is contained in:
Ilya Zverev 2018-01-16 17:04:47 +03:00
parent 7d0a631874
commit 79c7ab80ce
6 changed files with 32 additions and 14 deletions

View file

@ -1,6 +1,7 @@
#!/usr/bin/env python3
import argparse
import codecs
import json
import kdtree
import logging
import math
@ -8,10 +9,6 @@ import requests
import os
import sys
from io import BytesIO
import json # for profiles
import re # for profiles
import zipfile # for profiles
from collections import defaultdict # for profiles
try:
from .version import __version__
except ImportError:

View file

@ -44,7 +44,7 @@ def dataset(fileobj):
# We are parsing HTML, and for that we need an lxml package
from lxml import html
global download_url_copy
global download_url_copy, re
h = html.fromstring(fileobj.read().decode('utf-8'))
shops = h.find_class('shops-in-the-city-holder')[0]
shops.make_links_absolute(download_url_copy)

View file

@ -1,3 +1,7 @@
# Note: the json file at the burgerking website was restructured
# and does not contain any useful data now.
# So this profile is here solely for demonstration purposes.
import json
import codecs
import re
@ -20,6 +24,7 @@ tag_unmatched = {
def dataset(fileobj):
def parse_hours(s):
global re
s = re.sub('^зал:? *', '', s.lower())
s = s.replace('<br />', ';').replace('<br>', ';').replace('\n', ';').replace(' ', '').replace(',', ';').replace('', '-')
s = s.replace('-00:', '-24:')
@ -66,7 +71,11 @@ def dataset(fileobj):
346: 'Передвинуть к кафе',
}
source = json.load(codecs.getreader('utf-8')(fileobj))
json_src = codecs.getreader('utf-8')(fileobj).read()
p = json_src.find('<div')
if p > 0:
json_src = json_src[:p]
source = json.loads(json_src)
data = []
for el in source:
gid = int(el['origID'])

View file

@ -5,7 +5,8 @@ import codecs
# Reading the dataset passport to determine an URL of the latest dataset version
def download_url(dataset_id='7705851331-museums'):
def download_url():
dataset_id = '7705851331-' + (param or 'museums')
r = requests.get('http://opendata.mkrf.ru/opendata/{}/meta.json'.format(dataset_id))
if r.status_code != 200 or len(r.content) == 0:
logging.error('Could not get URL for dataset: %s %s', r.status_code, r.text)
@ -17,8 +18,18 @@ def download_url(dataset_id='7705851331-museums'):
return latest['source']
source = 'opendata.mkrf.ru'
dataset_id = 'mkrf_museums'
query = [('tourism', 'museum')]
dataset_id = 'mkrf_'+(param or 'museums')
if not param or param == 'museums':
query = [('tourism', 'museum')]
elif param == 'theaters':
query = [('amenity', 'theatre')]
elif param == 'circuses':
query = [('amenity', 'circus')]
elif param == 'philharmonic':
query = [('amenity', 'theatre')]
else:
raise ValueError('Unknown param value: {}'.format(param))
max_distance = 300
master_tags = ('official_name', 'phone', 'opening_hours', 'website')

View file

@ -1,12 +1,10 @@
# Available modules: codecs, logging, requests, json, re, etree. But importing these helps catch other errors
# Available modules: codecs, logging, requests, json, etree. But importing these helps catch other errors
import json
import re
import logging
import requests
import zipfile
def download_url(mos_dataset_id=1421):
import requests
r = requests.get('https://data.mos.ru/api/datasets/expformats/?datasetId={}'.format(mos_dataset_id))
if r.status_code != 200 or len(r.content) == 0:
logging.error('Could not get URL for dataset: %s %s', r.status_code, r.text)
@ -15,7 +13,7 @@ def download_url(mos_dataset_id=1421):
url = [x for x in r.json() if x['Format'] == 'json'][0]
version = '?'
title = 'dataset'
r = requests.get('https://data.mos.ru/apiproxy/opendata/1421/meta.json'.format(mos_dataset_id))
r = requests.get('https://data.mos.ru/apiproxy/opendata/{}/meta.json'.format(mos_dataset_id))
if r.status_code == 200:
title = r.json()['Title']
version = r.json()['VersionNumber']
@ -50,6 +48,8 @@ master_tags = ('zone:parking', 'ref', 'contact:phone', 'contact:website', 'opera
# A list of SourcePoint objects. Initialize with (id, lat, lon, {tags}).
def dataset(fileobj):
import zipfile
import re
zf = zipfile.ZipFile(fileobj)
source = json.loads(zf.read(zf.namelist()[0]).decode('cp1251'))
RE_NUM4 = re.compile(r'\d{4,6}')

View file

@ -56,6 +56,7 @@ def dataset(fileobj):
return '24/7'
return '; '.join(res).replace('23:59', '24:00')
global re, defaultdict
source = json.load(codecs.getreader('utf-8-sig')(fileobj))
data = []
for el in source['Locations']: