[booking] Fix for hotel types

This commit is contained in:
Ilya Zverev 2016-06-08 12:46:12 +03:00 committed by Vladimir Byko-Ianko
parent dfcd87e35a
commit acb5b6c40b
2 changed files with 18 additions and 23 deletions

View file

@ -162,6 +162,7 @@ void BookingDataset::BuildFeatures(function<void(OsmElement *)> const & fn) cons
e.lat = hotel.lat;
e.lon = hotel.lon;
e.AddTag("sponsored", "booking");
e.AddTag("name", hotel.name);
e.AddTag("ref:sponsored", strings::to_string(hotel.id));
e.AddTag("website", hotel.descUrl);

View file

@ -2,7 +2,7 @@
# coding: utf8
from __future__ import print_function
from collections import namedtuple, defaultdict
from collections import defaultdict
from datetime import datetime
import argparse
import base64
@ -13,13 +13,12 @@ import pickle
import time
import urllib2
# init logging
# Initialize logging.
logging.basicConfig(level=logging.DEBUG, format='[%(asctime)s] %(levelname)s: %(message)s')
Hotel = namedtuple('Hotel',
['id', 'lat', 'lon', 'name', 'address',
'stars', 'priceCategory', 'ratingBooking',
'ratingUser', 'descUrl'])
# Names starting with '.' are calculated in get_hotel_field() below.
HOTEL_FIELDS = ('hotel_id', '.lat', '.lon', 'name', 'address', 'class', '.rate', 'ranking', 'review_score', 'url', 'hoteltype_id')
class BookingApi:
def __init__(self, login, password):
@ -70,21 +69,6 @@ class BookingApi:
return None
def make_record(src, rate):
return Hotel(
unicode(src['hotel_id']),
unicode(src['location']['latitude']),
unicode(src['location']['longitude']),
unicode(src['name']),
unicode(src['address']),
unicode(src['class']),
unicode(rate),
unicode(src['ranking']),
unicode(src['review_score']),
unicode(src['url'])
)
def download(user, password, path):
'''
Downloads all hotels from booking.com and stores them in a bunch of .pkl files.
@ -150,6 +134,17 @@ def translate(source, output):
# Price rate ranges, relative to the median price for a city
rates = (0.7, 1.3)
def get_hotel_field(hotel, field, rate):
if field == '.lat':
return hotel['location']['latitude']
elif field == '.lon':
return hotel['location']['longitude']
elif field == '.rate':
return rate
elif field in hotel:
return hotel[field]
raise ValueError('Unknown hotel field: {0}'.format(field))
with open(output, 'w') as fd:
for hotel in data:
rate = 0
@ -160,8 +155,7 @@ def translate(source, output):
# Find a range that contains the price
while rate <= len(rates) and price > avg * rates[rate - 1]:
rate += 1
cur = make_record(hotel, rate)
l = [e.encode('utf8') for e in cur]
l = [unicode(get_hotel_field(hotel, e, rate)).encode('utf8').replace('\t', ' ') for e in HOTEL_FIELDS]
print('\t'.join(l), file=fd)