diff --git a/generator/booking_dataset.cpp b/generator/booking_dataset.cpp index caeaff88d1..9444e7c3be 100644 --- a/generator/booking_dataset.cpp +++ b/generator/booking_dataset.cpp @@ -162,6 +162,7 @@ void BookingDataset::BuildFeatures(function const & fn) cons e.lat = hotel.lat; e.lon = hotel.lon; + e.AddTag("sponsored", "booking"); e.AddTag("name", hotel.name); e.AddTag("ref:sponsored", strings::to_string(hotel.id)); e.AddTag("website", hotel.descUrl); diff --git a/tools/python/booking_hotels.py b/tools/python/booking_hotels.py index a775fa0726..ca63585e6e 100755 --- a/tools/python/booking_hotels.py +++ b/tools/python/booking_hotels.py @@ -2,7 +2,7 @@ # coding: utf8 from __future__ import print_function -from collections import namedtuple, defaultdict +from collections import defaultdict from datetime import datetime import argparse import base64 @@ -13,13 +13,12 @@ import pickle import time import urllib2 -# init logging +# Initialize logging. logging.basicConfig(level=logging.DEBUG, format='[%(asctime)s] %(levelname)s: %(message)s') -Hotel = namedtuple('Hotel', - ['id', 'lat', 'lon', 'name', 'address', - 'stars', 'priceCategory', 'ratingBooking', - 'ratingUser', 'descUrl']) +# Names starting with '.' are calculated in get_hotel_field() below. +HOTEL_FIELDS = ('hotel_id', '.lat', '.lon', 'name', 'address', 'class', '.rate', 'ranking', 'review_score', 'url', 'hoteltype_id') + class BookingApi: def __init__(self, login, password): @@ -70,21 +69,6 @@ class BookingApi: return None -def make_record(src, rate): - return Hotel( - unicode(src['hotel_id']), - unicode(src['location']['latitude']), - unicode(src['location']['longitude']), - unicode(src['name']), - unicode(src['address']), - unicode(src['class']), - unicode(rate), - unicode(src['ranking']), - unicode(src['review_score']), - unicode(src['url']) - ) - - def download(user, password, path): ''' Downloads all hotels from booking.com and stores them in a bunch of .pkl files. @@ -150,6 +134,17 @@ def translate(source, output): # Price rate ranges, relative to the median price for a city rates = (0.7, 1.3) + def get_hotel_field(hotel, field, rate): + if field == '.lat': + return hotel['location']['latitude'] + elif field == '.lon': + return hotel['location']['longitude'] + elif field == '.rate': + return rate + elif field in hotel: + return hotel[field] + raise ValueError('Unknown hotel field: {0}'.format(field)) + with open(output, 'w') as fd: for hotel in data: rate = 0 @@ -160,8 +155,7 @@ def translate(source, output): # Find a range that contains the price while rate <= len(rates) and price > avg * rates[rate - 1]: rate += 1 - cur = make_record(hotel, rate) - l = [e.encode('utf8') for e in cur] + l = [unicode(get_hotel_field(hotel, e, rate)).encode('utf8').replace('\t', ' ') for e in HOTEL_FIELDS] print('\t'.join(l), file=fd)