forked from organicmaps/organicmaps
Review fixes
This commit is contained in:
parent
706e4467f3
commit
d2bcd9e16f
6 changed files with 111 additions and 98 deletions
|
@ -55,6 +55,20 @@ bool to_int(char const * s, int & i, int base /*= 10*/)
|
|||
return false;
|
||||
}
|
||||
|
||||
bool to_uint(char const * s, unsigned int & i, int base /*= 10*/)
|
||||
{
|
||||
char * stop;
|
||||
long const x = strtoul(s, &stop, base);
|
||||
if (*stop == 0)
|
||||
{
|
||||
i = static_cast<unsigned int>(x);
|
||||
ASSERT_EQUAL(static_cast<unsigned long>(i), x, ());
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
bool to_uint64(char const * s, uint64_t & i)
|
||||
{
|
||||
char * stop;
|
||||
|
|
|
@ -209,6 +209,7 @@ template <class T, size_t N, class TT> bool IsInArray(T (&arr) [N], TT const & t
|
|||
/// @name From string to numeric.
|
||||
//@{
|
||||
bool to_int(char const * s, int & i, int base = 10);
|
||||
bool to_uint(char const * s, unsigned int & i, int base = 10);
|
||||
bool to_uint64(char const * s, uint64_t & i);
|
||||
bool to_int64(char const * s, int64_t & i);
|
||||
bool to_double(char const * s, double & d);
|
||||
|
@ -216,6 +217,7 @@ bool to_double(char const * s, double & d);
|
|||
inline bool is_number(string const & s) { int64_t dummy; return to_int64(s.c_str(), dummy); }
|
||||
|
||||
inline bool to_int(string const & s, int & i, int base = 10) { return to_int(s.c_str(), i, base); }
|
||||
inline bool to_uint(string const & s, unsigned int & i, int base = 10) { return to_uint(s.c_str(), i, base); }
|
||||
inline bool to_uint64(string const & s, uint64_t & i) { return to_uint64(s.c_str(), i); }
|
||||
inline bool to_int64(string const & s, int64_t & i) { return to_int64(s.c_str(), i); }
|
||||
inline bool to_double(string const & s, double & d) { return to_double(s.c_str(), d); }
|
||||
|
|
|
@ -1,81 +1,69 @@
|
|||
#include "generator/booking_dataset.hpp"
|
||||
|
||||
#include "base/string_utils.hpp"
|
||||
|
||||
#include "geometry/distance_on_sphere.hpp"
|
||||
|
||||
#include "indexer/search_delimiters.hpp"
|
||||
#include "indexer/search_string_utils.hpp"
|
||||
|
||||
#include "geometry/distance_on_sphere.hpp"
|
||||
|
||||
#include "base/string_utils.hpp"
|
||||
|
||||
#include "std/fstream.hpp"
|
||||
#include "std/iostream.hpp"
|
||||
#include "std/sstream.hpp"
|
||||
|
||||
BookingDataset::BookingHotel::BookingHotel(string const & src)
|
||||
namespace generator
|
||||
{
|
||||
BookingDataset::Hotel::Hotel(string const & src)
|
||||
{
|
||||
stringstream ss(src);
|
||||
string elem;
|
||||
vector<string> rec(FieldsCount());
|
||||
for (size_t i = 0; getline(ss, elem, '\t') && i < rec.size(); ++i)
|
||||
rec[i] = elem;
|
||||
strings::SimpleTokenizer token(src, "\t");
|
||||
for (size_t i = 0; token && i < rec.size(); ++i, ++token)
|
||||
rec[i] = *token;
|
||||
|
||||
id = static_cast<uint32_t>(strtoul(rec[Index(Fields::Id)].c_str(), nullptr, 10));
|
||||
|
||||
lat = strtod(rec[Index(Fields::Latitude)].c_str(), nullptr);
|
||||
lon = strtod(rec[Index(Fields::Longtitude)].c_str(), nullptr);
|
||||
strings::to_uint(rec[Index(Fields::Id)], id);
|
||||
strings::to_double(rec[Index(Fields::Latitude)], lat);
|
||||
strings::to_double(rec[Index(Fields::Longtitude)], lon);
|
||||
|
||||
name = rec[Index(Fields::Name)];
|
||||
address = rec[Index(Fields::Address)];
|
||||
|
||||
stars = rec[Index(Fields::Stars)].empty()
|
||||
? 0
|
||||
: static_cast<uint32_t>(strtoul(rec[Index(Fields::Stars)].c_str(), nullptr, 10));
|
||||
|
||||
priceCategory =
|
||||
rec[Index(Fields::PriceCategory)].empty()
|
||||
? 0
|
||||
: static_cast<uint32_t>(strtoul(rec[Index(Fields::PriceCategory)].c_str(), nullptr, 10));
|
||||
|
||||
ratingBooking = rec[Index(Fields::RatingBooking)].empty()
|
||||
? 0
|
||||
: strtod(rec[Index(Fields::RatingBooking)].c_str(), nullptr);
|
||||
|
||||
ratingUser = rec[Index(Fields::RatingUsers)].empty()
|
||||
? 0
|
||||
: strtod(rec[Index(Fields::RatingUsers)].c_str(), nullptr);
|
||||
strings::to_uint(rec[Index(Fields::Stars)], stars);
|
||||
strings::to_uint(rec[Index(Fields::PriceCategory)], priceCategory);
|
||||
strings::to_double(rec[Index(Fields::RatingBooking)], ratingBooking);
|
||||
strings::to_double(rec[Index(Fields::RatingUsers)], ratingUser);
|
||||
|
||||
descUrl = rec[Index(Fields::DescUrl)];
|
||||
|
||||
type = rec[Index(Fields::Type)].empty()
|
||||
? 0
|
||||
: static_cast<uint32_t>(strtoul(rec[Index(Fields::Type)].c_str(), nullptr, 10));
|
||||
strings::to_uint(rec[Index(Fields::Type)], type);
|
||||
}
|
||||
|
||||
ostream & operator<<(ostream & s, BookingDataset::BookingHotel const & h)
|
||||
ostream & operator<<(ostream & s, BookingDataset::Hotel const & h)
|
||||
{
|
||||
return s << "Name: " << h.name << " lon: " << h.lon << " lat: " << h.lat;
|
||||
return s << "Name: " << h.name << " lat: " << h.lat << " lon: " << h.lon;
|
||||
}
|
||||
|
||||
void BookingDataset::LoadBookingHotels(string const & path)
|
||||
void BookingDataset::LoadHotels(string const & path)
|
||||
{
|
||||
m_hotels.clear();
|
||||
|
||||
if(path.empty())
|
||||
|
||||
if (path.empty())
|
||||
return;
|
||||
|
||||
|
||||
ifstream src(path);
|
||||
for (string elem; getline(src, elem);)
|
||||
m_hotels.emplace_back(elem);
|
||||
for (string line; getline(src, line);)
|
||||
m_hotels.emplace_back(line);
|
||||
}
|
||||
|
||||
BookingDataset::BookingDataset(string const & dataPath)
|
||||
{
|
||||
LoadBookingHotels(dataPath);
|
||||
LoadHotels(dataPath);
|
||||
|
||||
size_t counter = 0;
|
||||
for (auto const & hotel : m_hotels)
|
||||
{
|
||||
TBox b(TPoint(hotel.lon, hotel.lat), TPoint(hotel.lon, hotel.lat));
|
||||
m_rtree.insert(std::make_pair(b, counter++));
|
||||
TBox b(TPoint(hotel.lat, hotel.lon), TPoint(hotel.lat, hotel.lon));
|
||||
m_rtree.insert(std::make_pair(b, counter));
|
||||
++counter;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -108,11 +96,11 @@ bool BookingDataset::MatchWithBooking(OsmElement const & e) const
|
|||
// Find 3 nearest values to a point.
|
||||
vector<TValue> result;
|
||||
for_each(boost::geometry::index::qbegin(m_rtree,
|
||||
boost::geometry::index::nearest(TPoint(e.lon, e.lat), 3)),
|
||||
boost::geometry::index::nearest(TPoint(e.lat, e.lon), 3)),
|
||||
boost::geometry::index::qend(m_rtree), [&](TValue const & v)
|
||||
{
|
||||
auto const & hotel = m_hotels[v.second];
|
||||
double dist = ms::DistanceOnEarth(e.lon, e.lat, hotel.lon, hotel.lat);
|
||||
double dist = ms::DistanceOnEarth(e.lat, e.lon, hotel.lat, hotel.lon);
|
||||
if (dist > 150 /* max distance in meters */)
|
||||
return;
|
||||
|
||||
|
@ -243,10 +231,12 @@ void BookingDataset::BuildFeatures(function<void(OsmElement *)> const & fn) cons
|
|||
case 201: e.AddTag("tourism", "apartment"); break;
|
||||
|
||||
case 214: e.AddTag("tourism", "camp_site"); break;
|
||||
|
||||
|
||||
default: e.AddTag("tourism", "hotel"); break;
|
||||
}
|
||||
|
||||
fn(&e);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace generator
|
||||
|
|
|
@ -10,12 +10,14 @@
|
|||
#include "std/function.hpp"
|
||||
#include "std/string.hpp"
|
||||
|
||||
namespace generator
|
||||
{
|
||||
class BookingDataset
|
||||
{
|
||||
public:
|
||||
struct BookingHotel
|
||||
struct Hotel
|
||||
{
|
||||
enum class Fields : size_t
|
||||
enum class Fields
|
||||
{
|
||||
Id = 0,
|
||||
Latitude = 1,
|
||||
|
@ -28,10 +30,10 @@ public:
|
|||
RatingUsers = 8,
|
||||
DescUrl = 9,
|
||||
Type = 10,
|
||||
|
||||
|
||||
Counter
|
||||
};
|
||||
|
||||
|
||||
uint32_t id = 0;
|
||||
double lat = 0.0;
|
||||
double lon = 0.0;
|
||||
|
@ -43,20 +45,19 @@ public:
|
|||
double ratingUser = 0.0;
|
||||
string descUrl;
|
||||
uint32_t type = 0;
|
||||
|
||||
|
||||
constexpr size_t Index(Fields field) const { return static_cast<size_t>(field); }
|
||||
constexpr size_t FieldsCount() const { return static_cast<size_t>(Fields::Counter); }
|
||||
|
||||
BookingHotel(string const &src);
|
||||
explicit Hotel(string const & src);
|
||||
};
|
||||
|
||||
BookingDataset(string const & dataPath);
|
||||
|
||||
explicit BookingDataset(string const & dataPath);
|
||||
|
||||
bool Filter(OsmElement const & e) const;
|
||||
void BuildFeatures(function<void(OsmElement *)> const & fn) const;
|
||||
|
||||
|
||||
protected:
|
||||
vector<BookingHotel> m_hotels;
|
||||
vector<Hotel> m_hotels;
|
||||
|
||||
// create the rtree using default constructor
|
||||
using TPoint = boost::geometry::model::point<float, 2, boost::geometry::cs::cartesian>;
|
||||
|
@ -64,7 +65,9 @@ protected:
|
|||
using TValue = pair<TBox, size_t>;
|
||||
|
||||
boost::geometry::index::rtree<TValue, boost::geometry::index::quadratic<16>> m_rtree;
|
||||
|
||||
void LoadBookingHotels(string const & path);
|
||||
|
||||
void LoadHotels(string const & path);
|
||||
bool MatchWithBooking(OsmElement const & e) const;
|
||||
};
|
||||
|
||||
} // namespace generator
|
||||
|
|
|
@ -514,7 +514,7 @@ bool GenerateFeaturesImpl(feature::GenerateInfo & info)
|
|||
TagReplacer tagReplacer(GetPlatform().ResourcesDir() + REPLACED_TAGS_FILE);
|
||||
|
||||
// If info.m_bookingDatafileName is empty then no data will be loaded.
|
||||
BookingDataset bookingDataset(info.m_bookingDatafileName);
|
||||
generator::BookingDataset bookingDataset(info.m_bookingDatafileName);
|
||||
|
||||
// Here we can add new tags to element!!!
|
||||
auto const fn = [&](OsmElement * e)
|
||||
|
|
|
@ -2,16 +2,16 @@
|
|||
# coding: utf8
|
||||
from __future__ import print_function
|
||||
|
||||
import json
|
||||
import urllib2
|
||||
import base64
|
||||
from datetime import datetime
|
||||
import time
|
||||
import logging
|
||||
import pickle
|
||||
import os
|
||||
import argparse
|
||||
from collections import namedtuple, defaultdict
|
||||
from datetime import datetime
|
||||
import argparse
|
||||
import base64
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import pickle
|
||||
import time
|
||||
import urllib2
|
||||
|
||||
# init logging
|
||||
logging.basicConfig(level=logging.DEBUG, format='[%(asctime)s] %(levelname)s: %(message)s')
|
||||
|
@ -60,6 +60,7 @@ class BookingApi:
|
|||
request = urllib2.Request(url, None, self.baseConfig["headers"])
|
||||
stream = urllib2.urlopen(request)
|
||||
payload = stream.read()
|
||||
print(payload)
|
||||
return json.loads(payload)
|
||||
|
||||
except Exception as e:
|
||||
|
@ -69,41 +70,40 @@ class BookingApi:
|
|||
|
||||
def make_record(src, rate):
|
||||
return Hotel(
|
||||
int(src['hotel_id']),
|
||||
float(src['location']['latitude']),
|
||||
float(src['location']['longitude']),
|
||||
src['name'],
|
||||
src['address'],
|
||||
int(src['class']),
|
||||
rate,
|
||||
src['ranking'],
|
||||
src['review_score'],
|
||||
src['url']
|
||||
unicode(src['hotel_id']),
|
||||
unicode(src['location']['latitude']),
|
||||
unicode(src['location']['longitude']),
|
||||
unicode(src['name']),
|
||||
unicode(src['address']),
|
||||
unicode(src['class']),
|
||||
unicode(rate),
|
||||
unicode(src['ranking']),
|
||||
unicode(src['review_score']),
|
||||
unicode(src['url'])
|
||||
)
|
||||
|
||||
|
||||
def download(user, password, path):
|
||||
'''
|
||||
Download all hotels from booking.com and store then in them set of .pkl files.
|
||||
'''
|
||||
api = BookingApi(user, password)
|
||||
|
||||
maxrows = 1000
|
||||
countries = api.call("getCountries", dict(languagecodes='en'))
|
||||
for country in countries:
|
||||
countrycode = country['countrycode']
|
||||
logging.info(u'{0} {1}'.format(countrycode, country['name']))
|
||||
logging.info(u'Download[{0}]: {1}'.format(countrycode, country['name']))
|
||||
|
||||
counter = 0
|
||||
allhotels = []
|
||||
while True:
|
||||
hotels = api.call('getHotels',
|
||||
dict(new_hotel_type=1, offset=counter, rows=maxrows, countrycodes=countrycode))
|
||||
dict(new_hotel_type=1, offset=len(allhotels), rows=maxrows, countrycodes=countrycode))
|
||||
if isinstance(hotels, dict) and 'ruid' in hotels:
|
||||
logging.error('{0} Code: {1}'.format(hotels['message'], hotels['code']))
|
||||
logging.error('Api call failed with error: {0} Code: {1}'.format(hotels['message'], hotels['code']))
|
||||
exit(1)
|
||||
|
||||
for hotel in hotels:
|
||||
allhotels.append(hotel)
|
||||
|
||||
counter += len(hotels)
|
||||
allhotels.append(hotels)
|
||||
|
||||
if len(hotels) < maxrows:
|
||||
break
|
||||
|
@ -116,13 +116,12 @@ def download(user, password, path):
|
|||
|
||||
|
||||
def translate(source, output):
|
||||
files = []
|
||||
'''
|
||||
Read *.pkl files and produce a single list of hotels as tab separated values.
|
||||
'''
|
||||
files = [filename for filename in os.listdir(source) if filename.endswith('.pkl')]
|
||||
|
||||
data = []
|
||||
|
||||
for filename in os.listdir(source):
|
||||
if filename.endswith(".pkl"):
|
||||
files.append(filename)
|
||||
|
||||
for filename in files:
|
||||
logging.info('Processing {0}'.format(filename))
|
||||
with open(filename, 'rb') as fd:
|
||||
|
@ -131,12 +130,15 @@ def translate(source, output):
|
|||
# Dict of dicts city_id -> { currency -> [prices] }
|
||||
cities = defaultdict(lambda: defaultdict(list))
|
||||
|
||||
def valid(hotel):
|
||||
return 'city_id' in hotel and 'currencycode' in hotel and 'minrate' in hotel and hotel['minrate'] is not None
|
||||
|
||||
# Collect prices
|
||||
for hotel in data:
|
||||
if 'city_id' in hotel and 'currencycode' in hotel and 'minrate' in hotel and hotel['minrate'] is not None:
|
||||
if valid(hotel):
|
||||
cities[hotel['city_id']][hotel['currencycode']].append(float(hotel['minrate']))
|
||||
|
||||
# Find median prices
|
||||
# Replaces list of prices by a median price.
|
||||
for city in cities:
|
||||
for cur in cities[city]:
|
||||
cities[city][cur] = sorted(cities[city][cur])[len(cities[city][cur]) / 2]
|
||||
|
@ -147,14 +149,15 @@ def translate(source, output):
|
|||
with open(output, 'w') as fd:
|
||||
for hotel in data:
|
||||
rate = 0
|
||||
if 'city_id' in hotel and 'currencycode' in hotel and 'minrate' in hotel and hotel['minrate'] is not None:
|
||||
if valid(hotel):
|
||||
avg = cities[hotel['city_id']][hotel['currencycode']]
|
||||
price = float(hotel['minrate'])
|
||||
rate = 1
|
||||
# Find a range that contains the price
|
||||
while rate <= len(rates) and price > avg * rates[rate - 1]:
|
||||
rate += 1
|
||||
cur = make_record(hotel, rate)
|
||||
l = [(str(e) if e else '') if not isinstance(e, unicode) else e.encode('utf8') for e in cur]
|
||||
l = [e.encode('utf8') for e in cur]
|
||||
print('\t'.join(l), file=fd)
|
||||
|
||||
|
||||
|
@ -166,7 +169,7 @@ def process_options():
|
|||
parser.add_argument("--password", dest="password", help="Booking.com account password")
|
||||
parser.add_argument("--user", dest="user", help="Booking.com account user name")
|
||||
|
||||
parser.add_argument("--path", dest="path", help="path to data files")
|
||||
parser.add_argument("--path", dest="path", help="Path to data files")
|
||||
parser.add_argument("--output", dest="output", help="Name and destination for output file")
|
||||
|
||||
parser.add_argument("--download", action="store_true", dest="download", default=False)
|
||||
|
@ -179,6 +182,7 @@ def process_options():
|
|||
|
||||
if options.translate and not options.output:
|
||||
print("--output isn't set")
|
||||
parser.print_help()
|
||||
exit()
|
||||
|
||||
return options
|
||||
|
|
Loading…
Add table
Reference in a new issue