Review fixes

This commit is contained in:
Sergey Yershov 2016-05-25 15:23:47 +03:00
parent 706e4467f3
commit d2bcd9e16f
6 changed files with 111 additions and 98 deletions

View file

@ -55,6 +55,20 @@ bool to_int(char const * s, int & i, int base /*= 10*/)
return false;
}
bool to_uint(char const * s, unsigned int & i, int base /*= 10*/)
{
char * stop;
long const x = strtoul(s, &stop, base);
if (*stop == 0)
{
i = static_cast<unsigned int>(x);
ASSERT_EQUAL(static_cast<unsigned long>(i), x, ());
return true;
}
return false;
}
bool to_uint64(char const * s, uint64_t & i)
{
char * stop;

View file

@ -209,6 +209,7 @@ template <class T, size_t N, class TT> bool IsInArray(T (&arr) [N], TT const & t
/// @name From string to numeric.
//@{
bool to_int(char const * s, int & i, int base = 10);
bool to_uint(char const * s, unsigned int & i, int base = 10);
bool to_uint64(char const * s, uint64_t & i);
bool to_int64(char const * s, int64_t & i);
bool to_double(char const * s, double & d);
@ -216,6 +217,7 @@ bool to_double(char const * s, double & d);
inline bool is_number(string const & s) { int64_t dummy; return to_int64(s.c_str(), dummy); }
inline bool to_int(string const & s, int & i, int base = 10) { return to_int(s.c_str(), i, base); }
inline bool to_uint(string const & s, unsigned int & i, int base = 10) { return to_uint(s.c_str(), i, base); }
inline bool to_uint64(string const & s, uint64_t & i) { return to_uint64(s.c_str(), i); }
inline bool to_int64(string const & s, int64_t & i) { return to_int64(s.c_str(), i); }
inline bool to_double(string const & s, double & d) { return to_double(s.c_str(), d); }

View file

@ -1,81 +1,69 @@
#include "generator/booking_dataset.hpp"
#include "base/string_utils.hpp"
#include "geometry/distance_on_sphere.hpp"
#include "indexer/search_delimiters.hpp"
#include "indexer/search_string_utils.hpp"
#include "geometry/distance_on_sphere.hpp"
#include "base/string_utils.hpp"
#include "std/fstream.hpp"
#include "std/iostream.hpp"
#include "std/sstream.hpp"
BookingDataset::BookingHotel::BookingHotel(string const & src)
namespace generator
{
BookingDataset::Hotel::Hotel(string const & src)
{
stringstream ss(src);
string elem;
vector<string> rec(FieldsCount());
for (size_t i = 0; getline(ss, elem, '\t') && i < rec.size(); ++i)
rec[i] = elem;
strings::SimpleTokenizer token(src, "\t");
for (size_t i = 0; token && i < rec.size(); ++i, ++token)
rec[i] = *token;
id = static_cast<uint32_t>(strtoul(rec[Index(Fields::Id)].c_str(), nullptr, 10));
lat = strtod(rec[Index(Fields::Latitude)].c_str(), nullptr);
lon = strtod(rec[Index(Fields::Longtitude)].c_str(), nullptr);
strings::to_uint(rec[Index(Fields::Id)], id);
strings::to_double(rec[Index(Fields::Latitude)], lat);
strings::to_double(rec[Index(Fields::Longtitude)], lon);
name = rec[Index(Fields::Name)];
address = rec[Index(Fields::Address)];
stars = rec[Index(Fields::Stars)].empty()
? 0
: static_cast<uint32_t>(strtoul(rec[Index(Fields::Stars)].c_str(), nullptr, 10));
priceCategory =
rec[Index(Fields::PriceCategory)].empty()
? 0
: static_cast<uint32_t>(strtoul(rec[Index(Fields::PriceCategory)].c_str(), nullptr, 10));
ratingBooking = rec[Index(Fields::RatingBooking)].empty()
? 0
: strtod(rec[Index(Fields::RatingBooking)].c_str(), nullptr);
ratingUser = rec[Index(Fields::RatingUsers)].empty()
? 0
: strtod(rec[Index(Fields::RatingUsers)].c_str(), nullptr);
strings::to_uint(rec[Index(Fields::Stars)], stars);
strings::to_uint(rec[Index(Fields::PriceCategory)], priceCategory);
strings::to_double(rec[Index(Fields::RatingBooking)], ratingBooking);
strings::to_double(rec[Index(Fields::RatingUsers)], ratingUser);
descUrl = rec[Index(Fields::DescUrl)];
type = rec[Index(Fields::Type)].empty()
? 0
: static_cast<uint32_t>(strtoul(rec[Index(Fields::Type)].c_str(), nullptr, 10));
strings::to_uint(rec[Index(Fields::Type)], type);
}
ostream & operator<<(ostream & s, BookingDataset::BookingHotel const & h)
ostream & operator<<(ostream & s, BookingDataset::Hotel const & h)
{
return s << "Name: " << h.name << " lon: " << h.lon << " lat: " << h.lat;
return s << "Name: " << h.name << " lat: " << h.lat << " lon: " << h.lon;
}
void BookingDataset::LoadBookingHotels(string const & path)
void BookingDataset::LoadHotels(string const & path)
{
m_hotels.clear();
if(path.empty())
if (path.empty())
return;
ifstream src(path);
for (string elem; getline(src, elem);)
m_hotels.emplace_back(elem);
for (string line; getline(src, line);)
m_hotels.emplace_back(line);
}
BookingDataset::BookingDataset(string const & dataPath)
{
LoadBookingHotels(dataPath);
LoadHotels(dataPath);
size_t counter = 0;
for (auto const & hotel : m_hotels)
{
TBox b(TPoint(hotel.lon, hotel.lat), TPoint(hotel.lon, hotel.lat));
m_rtree.insert(std::make_pair(b, counter++));
TBox b(TPoint(hotel.lat, hotel.lon), TPoint(hotel.lat, hotel.lon));
m_rtree.insert(std::make_pair(b, counter));
++counter;
}
}
@ -108,11 +96,11 @@ bool BookingDataset::MatchWithBooking(OsmElement const & e) const
// Find 3 nearest values to a point.
vector<TValue> result;
for_each(boost::geometry::index::qbegin(m_rtree,
boost::geometry::index::nearest(TPoint(e.lon, e.lat), 3)),
boost::geometry::index::nearest(TPoint(e.lat, e.lon), 3)),
boost::geometry::index::qend(m_rtree), [&](TValue const & v)
{
auto const & hotel = m_hotels[v.second];
double dist = ms::DistanceOnEarth(e.lon, e.lat, hotel.lon, hotel.lat);
double dist = ms::DistanceOnEarth(e.lat, e.lon, hotel.lat, hotel.lon);
if (dist > 150 /* max distance in meters */)
return;
@ -243,10 +231,12 @@ void BookingDataset::BuildFeatures(function<void(OsmElement *)> const & fn) cons
case 201: e.AddTag("tourism", "apartment"); break;
case 214: e.AddTag("tourism", "camp_site"); break;
default: e.AddTag("tourism", "hotel"); break;
}
fn(&e);
}
}
} // namespace generator

View file

@ -10,12 +10,14 @@
#include "std/function.hpp"
#include "std/string.hpp"
namespace generator
{
class BookingDataset
{
public:
struct BookingHotel
struct Hotel
{
enum class Fields : size_t
enum class Fields
{
Id = 0,
Latitude = 1,
@ -28,10 +30,10 @@ public:
RatingUsers = 8,
DescUrl = 9,
Type = 10,
Counter
};
uint32_t id = 0;
double lat = 0.0;
double lon = 0.0;
@ -43,20 +45,19 @@ public:
double ratingUser = 0.0;
string descUrl;
uint32_t type = 0;
constexpr size_t Index(Fields field) const { return static_cast<size_t>(field); }
constexpr size_t FieldsCount() const { return static_cast<size_t>(Fields::Counter); }
BookingHotel(string const &src);
explicit Hotel(string const & src);
};
BookingDataset(string const & dataPath);
explicit BookingDataset(string const & dataPath);
bool Filter(OsmElement const & e) const;
void BuildFeatures(function<void(OsmElement *)> const & fn) const;
protected:
vector<BookingHotel> m_hotels;
vector<Hotel> m_hotels;
// create the rtree using default constructor
using TPoint = boost::geometry::model::point<float, 2, boost::geometry::cs::cartesian>;
@ -64,7 +65,9 @@ protected:
using TValue = pair<TBox, size_t>;
boost::geometry::index::rtree<TValue, boost::geometry::index::quadratic<16>> m_rtree;
void LoadBookingHotels(string const & path);
void LoadHotels(string const & path);
bool MatchWithBooking(OsmElement const & e) const;
};
} // namespace generator

View file

@ -514,7 +514,7 @@ bool GenerateFeaturesImpl(feature::GenerateInfo & info)
TagReplacer tagReplacer(GetPlatform().ResourcesDir() + REPLACED_TAGS_FILE);
// If info.m_bookingDatafileName is empty then no data will be loaded.
BookingDataset bookingDataset(info.m_bookingDatafileName);
generator::BookingDataset bookingDataset(info.m_bookingDatafileName);
// Here we can add new tags to element!!!
auto const fn = [&](OsmElement * e)

View file

@ -2,16 +2,16 @@
# coding: utf8
from __future__ import print_function
import json
import urllib2
import base64
from datetime import datetime
import time
import logging
import pickle
import os
import argparse
from collections import namedtuple, defaultdict
from datetime import datetime
import argparse
import base64
import json
import logging
import os
import pickle
import time
import urllib2
# init logging
logging.basicConfig(level=logging.DEBUG, format='[%(asctime)s] %(levelname)s: %(message)s')
@ -60,6 +60,7 @@ class BookingApi:
request = urllib2.Request(url, None, self.baseConfig["headers"])
stream = urllib2.urlopen(request)
payload = stream.read()
print(payload)
return json.loads(payload)
except Exception as e:
@ -69,41 +70,40 @@ class BookingApi:
def make_record(src, rate):
return Hotel(
int(src['hotel_id']),
float(src['location']['latitude']),
float(src['location']['longitude']),
src['name'],
src['address'],
int(src['class']),
rate,
src['ranking'],
src['review_score'],
src['url']
unicode(src['hotel_id']),
unicode(src['location']['latitude']),
unicode(src['location']['longitude']),
unicode(src['name']),
unicode(src['address']),
unicode(src['class']),
unicode(rate),
unicode(src['ranking']),
unicode(src['review_score']),
unicode(src['url'])
)
def download(user, password, path):
'''
Download all hotels from booking.com and store then in them set of .pkl files.
'''
api = BookingApi(user, password)
maxrows = 1000
countries = api.call("getCountries", dict(languagecodes='en'))
for country in countries:
countrycode = country['countrycode']
logging.info(u'{0} {1}'.format(countrycode, country['name']))
logging.info(u'Download[{0}]: {1}'.format(countrycode, country['name']))
counter = 0
allhotels = []
while True:
hotels = api.call('getHotels',
dict(new_hotel_type=1, offset=counter, rows=maxrows, countrycodes=countrycode))
dict(new_hotel_type=1, offset=len(allhotels), rows=maxrows, countrycodes=countrycode))
if isinstance(hotels, dict) and 'ruid' in hotels:
logging.error('{0} Code: {1}'.format(hotels['message'], hotels['code']))
logging.error('Api call failed with error: {0} Code: {1}'.format(hotels['message'], hotels['code']))
exit(1)
for hotel in hotels:
allhotels.append(hotel)
counter += len(hotels)
allhotels.append(hotels)
if len(hotels) < maxrows:
break
@ -116,13 +116,12 @@ def download(user, password, path):
def translate(source, output):
files = []
'''
Read *.pkl files and produce a single list of hotels as tab separated values.
'''
files = [filename for filename in os.listdir(source) if filename.endswith('.pkl')]
data = []
for filename in os.listdir(source):
if filename.endswith(".pkl"):
files.append(filename)
for filename in files:
logging.info('Processing {0}'.format(filename))
with open(filename, 'rb') as fd:
@ -131,12 +130,15 @@ def translate(source, output):
# Dict of dicts city_id -> { currency -> [prices] }
cities = defaultdict(lambda: defaultdict(list))
def valid(hotel):
return 'city_id' in hotel and 'currencycode' in hotel and 'minrate' in hotel and hotel['minrate'] is not None
# Collect prices
for hotel in data:
if 'city_id' in hotel and 'currencycode' in hotel and 'minrate' in hotel and hotel['minrate'] is not None:
if valid(hotel):
cities[hotel['city_id']][hotel['currencycode']].append(float(hotel['minrate']))
# Find median prices
# Replaces list of prices by a median price.
for city in cities:
for cur in cities[city]:
cities[city][cur] = sorted(cities[city][cur])[len(cities[city][cur]) / 2]
@ -147,14 +149,15 @@ def translate(source, output):
with open(output, 'w') as fd:
for hotel in data:
rate = 0
if 'city_id' in hotel and 'currencycode' in hotel and 'minrate' in hotel and hotel['minrate'] is not None:
if valid(hotel):
avg = cities[hotel['city_id']][hotel['currencycode']]
price = float(hotel['minrate'])
rate = 1
# Find a range that contains the price
while rate <= len(rates) and price > avg * rates[rate - 1]:
rate += 1
cur = make_record(hotel, rate)
l = [(str(e) if e else '') if not isinstance(e, unicode) else e.encode('utf8') for e in cur]
l = [e.encode('utf8') for e in cur]
print('\t'.join(l), file=fd)
@ -166,7 +169,7 @@ def process_options():
parser.add_argument("--password", dest="password", help="Booking.com account password")
parser.add_argument("--user", dest="user", help="Booking.com account user name")
parser.add_argument("--path", dest="path", help="path to data files")
parser.add_argument("--path", dest="path", help="Path to data files")
parser.add_argument("--output", dest="output", help="Name and destination for output file")
parser.add_argument("--download", action="store_true", dest="download", default=False)
@ -179,6 +182,7 @@ def process_options():
if options.translate and not options.output:
print("--output isn't set")
parser.print_help()
exit()
return options