Merge pull request from mgsergio/booking-workflow

[booking] Booking workflow
This commit is contained in:
Sergey Yershov 2016-10-10 16:30:04 +03:00 committed by GitHub
commit 43dce88ec6
30 changed files with 1184 additions and 539 deletions

View file

@ -807,6 +807,7 @@ world +
{}
sponsored +
booking -
opentable -
{}
sport +
american_football -

Binary file not shown.

View file

@ -76689,6 +76689,33 @@ cont {
}
}
}
cont {
name: "sponsored-opentable"
element {
scale: 16
symbol {
name: "restaurant"
apply_for_type: 1
priority: 16000
}
}
element {
scale: 17
symbol {
name: "restaurant"
apply_for_type: 1
priority: 16000
}
}
element {
scale: 18
symbol {
name: "restaurant"
apply_for_type: 1
priority: 16000
}
}
}
cont {
name: "sport-american_football"
element {

Binary file not shown.

View file

@ -65014,6 +65014,39 @@ cont {
}
}
}
cont {
name: "sponsored-opentable"
element {
scale: 16
symbol {
name: "restaurant"
apply_for_type: 1
priority: 16000
}
}
element {
scale: 17
symbol {
name: "restaurant"
apply_for_type: 1
priority: 16000
}
}
element {
scale: 18
symbol {
name: "hotel"
priority: 16000
}
}
element {
scale: 19
symbol {
name: "hotel"
priority: 16000
}
}
}
cont {
name: "sport-american_football"
element {

View file

@ -1136,3 +1136,4 @@ olympics|stadium_main;1135;
olympics|stadium;1136;
olympics|water_sport;1137;
olympics|bike_sport;1138;
sponsored|opentable;1139;

1 building;[building];;addr:housenumber;name;1;
1136 olympics|stadium;1136;
1137 olympics|water_sport;1137;
1138 olympics|bike_sport;1138;
1139 sponsored|opentable;1139;

View file

@ -602,7 +602,8 @@ line|z16[man_made=pipeline][location=overground]
linecap: butt;
}
area|z16-[amenity=restaurant],
node|z16-[amenity=restaurant]
node|z16-[amenity=restaurant],
node|z16-[sponsored=opentable]
{
icon-image: restaurant.svg;
}

View file

@ -1136,3 +1136,4 @@ olympics|stadium_main
olympics|stadium
olympics|water_sport
olympics|bike_sport
sponsored|opentable

View file

@ -807,6 +807,7 @@ world 00000000000000000000 +
{}
sponsored 00000000000000000011 +
booking 00000000000000000011 -
opentable 00000000000000001111 -
{}
sport 00000000000000000000 +
american_football 00000000000000000111 -

View file

@ -143,7 +143,9 @@ Can be empty. Example: `$(ls ../../data/borders/{UK*,Ireland}.poly)`.
* `SRTM_PATH`: a path to `*.zip` files with SRTM data.
* `OSC`: a path to an osmChange file to apply after updating the planet.
* `BOOKING_FILE`: a path to hotels.csv with booking data.
* `BOOKING_USER` and `BOOKING_PATH`: user name and password for booking.com API
* `BOOKING_USER` and `BOOKING_PASS`: user name and password for booking.com API
* `OPENTABLE_FILE`: a path to restaurants.csv with opentable data.
* `OPENTABLE_USER` and `OPENTABLE_PASS`: user name and password for opentable.com API
to download hotels data.
### Testing

View file

@ -1,133 +1,56 @@
#include "generator/booking_dataset.hpp"
#include "generator/booking_scoring.hpp"
#include "generator/feature_builder.hpp"
#include "platform/local_country_file_utils.hpp"
#include "platform/platform.hpp"
#include "generator/sponsored_scoring.hpp"
#include "indexer/classificator.hpp"
#include "indexer/ftypes_matcher.hpp"
#include "geometry/distance_on_sphere.hpp"
#include "base/logging.hpp"
#include "base/string_utils.hpp"
#include "std/fstream.hpp"
#include "std/iostream.hpp"
#include "std/sstream.hpp"
#include "boost/algorithm/string/replace.hpp"
namespace generator
{
namespace
{
string EscapeTabs(string const & str)
{
stringstream ss;
for (char c : str)
{
if (c == '\t')
ss << "\\t";
else
ss << c;
}
return ss.str();
}
} // namespace
BookingDataset::Hotel::Hotel(string const & src)
// BookingHotel ------------------------------------------------------------------------------------
BookingHotel::BookingHotel(string const & src)
{
vector<string> rec;
strings::ParseCSVRow(src, '\t', rec);
CHECK(rec.size() == FieldsCount(), ("Error parsing hotels.tsv line:", EscapeTabs(src)));
CHECK_EQUAL(rec.size(), FieldsCount(), ("Error parsing hotels.tsv line:",
boost::replace_all_copy(src, "\t", "\\t")));
strings::to_uint(rec[Index(Fields::Id)], id);
strings::to_double(rec[Index(Fields::Latitude)], lat);
strings::to_double(rec[Index(Fields::Longtitude)], lon);
strings::to_uint(rec[FieldIndex(Fields::Id)], m_id.Get());
// TODO(mgsergio): Use ms::LatLon.
strings::to_double(rec[FieldIndex(Fields::Latitude)], m_latLon.lat);
strings::to_double(rec[FieldIndex(Fields::Longtitude)], m_latLon.lon);
name = rec[Index(Fields::Name)];
address = rec[Index(Fields::Address)];
m_name = rec[FieldIndex(Fields::Name)];
m_address = rec[FieldIndex(Fields::Address)];
strings::to_uint(rec[Index(Fields::Stars)], stars);
strings::to_uint(rec[Index(Fields::PriceCategory)], priceCategory);
strings::to_double(rec[Index(Fields::RatingBooking)], ratingBooking);
strings::to_double(rec[Index(Fields::RatingUsers)], ratingUser);
strings::to_uint(rec[FieldIndex(Fields::Stars)], m_stars);
strings::to_uint(rec[FieldIndex(Fields::PriceCategory)], m_priceCategory);
strings::to_double(rec[FieldIndex(Fields::RatingBooking)], m_ratingBooking);
strings::to_double(rec[FieldIndex(Fields::RatingUsers)], m_ratingUser);
descUrl = rec[Index(Fields::DescUrl)];
m_descUrl = rec[FieldIndex(Fields::DescUrl)];
strings::to_uint(rec[Index(Fields::Type)], type);
strings::to_uint(rec[FieldIndex(Fields::Type)], m_type);
translations = rec[Index(Fields::Translations)];
m_translations = rec[FieldIndex(Fields::Translations)];
}
ostream & operator<<(ostream & s, BookingDataset::Hotel const & h)
ostream & operator<<(ostream & s, BookingHotel const & h)
{
s << fixed << setprecision(7);
return s << "Id: " << h.id << "\t Name: " << h.name << "\t Address: " << h.address
<< "\t lat: " << h.lat << " lon: " << h.lon;
s << "Id: " << h.m_id << "\t Name: " << h.m_name << "\t Address: " << h.m_address
<< "\t lat: " << h.m_latLon.lat << " lon: " << h.m_latLon.lon;
return s;
}
BookingDataset::AddressMatcher::AddressMatcher()
{
vector<platform::LocalCountryFile> localFiles;
Platform & platform = GetPlatform();
platform::FindAllLocalMapsInDirectoryAndCleanup(platform.WritableDir(), 0 /* version */,
-1 /* latestVersion */, localFiles);
for (platform::LocalCountryFile const & localFile : localFiles)
{
LOG(LINFO, ("Found mwm:", localFile));
try
{
m_index.RegisterMap(localFile);
}
catch (RootException const & ex)
{
CHECK(false, ("Bad mwm file:", localFile));
}
}
m_coder = make_unique<search::ReverseGeocoder>(m_index);
}
void BookingDataset::AddressMatcher::operator()(Hotel & hotel)
{
search::ReverseGeocoder::Address addr;
m_coder->GetNearbyAddress(MercatorBounds::FromLatLon(hotel.lat, hotel.lon), addr);
hotel.street = addr.GetStreetName();
hotel.houseNumber = addr.GetHouseNumber();
}
BookingDataset::BookingDataset(string const & dataPath, string const & addressReferencePath)
{
if (dataPath.empty())
return;
ifstream dataSource(dataPath);
if (!dataSource.is_open())
{
LOG(LERROR, ("Error while opening", dataPath, ":", strerror(errno)));
return;
}
LoadHotels(dataSource, addressReferencePath);
}
BookingDataset::BookingDataset(istream & dataSource, string const & addressReferencePath)
{
LoadHotels(dataSource, addressReferencePath);
}
size_t BookingDataset::GetMatchingHotelIndex(FeatureBuilder1 const & fb) const
{
if (CanBeBooking(fb))
return MatchWithBooking(fb);
return kInvalidHotelIndex;
}
bool BookingDataset::CanBeBooking(FeatureBuilder1 const & fb) const
// BookingDataset ----------------------------------------------------------------------------------
template <>
bool BookingDataset::NecessaryMatchingConditionHolds(FeatureBuilder1 const & fb) const
{
if (fb.GetName(StringUtf8Multilang::kDefaultCode).empty())
return false;
@ -135,78 +58,66 @@ bool BookingDataset::CanBeBooking(FeatureBuilder1 const & fb) const
return ftypes::IsHotelChecker::Instance()(fb.GetTypes());
}
BookingDataset::Hotel const & BookingDataset::GetHotelById(uint32_t const id) const
template <>
void BookingDataset::PreprocessMatchedOsmObject(ObjectId, FeatureBuilder1 & fb,
function<void(FeatureBuilder1 &)> const fn) const
{
auto const it = m_hotels.find(id);
CHECK(it != end(m_hotels), ("Got wrong hotel id:", id));
return it->second;
// Turn a hotel into a simple building.
if (fb.GetGeomType() == feature::GEOM_AREA)
{
// Remove all information about the hotel.
auto params = fb.GetParams();
params.ClearName();
auto & meta = params.GetMetadata();
meta.Drop(feature::Metadata::EType::FMD_STARS);
meta.Drop(feature::Metadata::EType::FMD_WEBSITE);
meta.Drop(feature::Metadata::EType::FMD_PHONE_NUMBER);
auto const tourism = classif().GetTypeByPath({"tourism"});
my::EraseIf(params.m_Types, [tourism](uint32_t type)
{
ftype::TruncValue(type, 1);
return type == tourism;
});
fb.SetParams(params);
}
fn(fb);
}
BookingDataset::Hotel & BookingDataset::GetHotelById(uint32_t const id)
{
auto it = m_hotels.find(id);
CHECK(it != end(m_hotels), ("Got wrong hotel id:", id));
return it->second;
}
vector<uint32_t> BookingDataset::GetNearestHotels(ms::LatLon const & latLon, size_t const limit,
double const maxDistance /* = 0.0 */) const
{
namespace bgi = boost::geometry::index;
vector<uint32_t> indexes;
for_each(bgi::qbegin(m_rtree, bgi::nearest(TPoint(latLon.lat, latLon.lon), limit)),
bgi::qend(m_rtree), [this, &latLon, &indexes, maxDistance](TValue const & v)
{
auto const & hotel = GetHotelById(v.second);
double const dist = ms::DistanceOnEarth(latLon.lat, latLon.lon, hotel.lat, hotel.lon);
if (maxDistance != 0.0 && dist > maxDistance /* max distance in meters */)
return;
indexes.emplace_back(v.second);
});
return indexes;
}
void BookingDataset::BuildHotels(function<void(FeatureBuilder1 &)> const & fn) const
{
for (auto const & item : m_hotels)
BuildHotel(item.second, fn);
}
void BookingDataset::BuildHotel(Hotel const & hotel,
function<void(FeatureBuilder1 &)> const & fn) const
template <>
void BookingDataset::BuildObject(Object const & hotel,
function<void(FeatureBuilder1 &)> const & fn) const
{
FeatureBuilder1 fb;
FeatureParams params;
fb.SetCenter(MercatorBounds::FromLatLon(hotel.lat, hotel.lon));
fb.SetCenter(MercatorBounds::FromLatLon(hotel.m_latLon.lat, hotel.m_latLon.lon));
auto & metadata = params.GetMetadata();
metadata.Set(feature::Metadata::FMD_SPONSORED_ID, strings::to_string(hotel.id));
metadata.Set(feature::Metadata::FMD_WEBSITE, hotel.descUrl);
metadata.Set(feature::Metadata::FMD_RATING, strings::to_string(hotel.ratingUser));
metadata.Set(feature::Metadata::FMD_STARS, strings::to_string(hotel.stars));
metadata.Set(feature::Metadata::FMD_PRICE_RATE, strings::to_string(hotel.priceCategory));
metadata.Set(feature::Metadata::FMD_SPONSORED_ID, strings::to_string(hotel.m_id.Get()));
metadata.Set(feature::Metadata::FMD_WEBSITE, hotel.m_descUrl);
metadata.Set(feature::Metadata::FMD_RATING, strings::to_string(hotel.m_ratingUser));
metadata.Set(feature::Metadata::FMD_STARS, strings::to_string(hotel.m_stars));
metadata.Set(feature::Metadata::FMD_PRICE_RATE, strings::to_string(hotel.m_priceCategory));
// params.AddAddress(hotel.address);
// TODO(mgsergio): addr:full ???
if (!hotel.street.empty())
fb.AddStreet(hotel.street);
if (!hotel.m_street.empty())
fb.AddStreet(hotel.m_street);
if (!hotel.houseNumber.empty())
fb.AddHouseNumber(hotel.houseNumber);
if (!hotel.m_houseNumber.empty())
fb.AddHouseNumber(hotel.m_houseNumber);
params.AddName(StringUtf8Multilang::GetLangByCode(StringUtf8Multilang::kDefaultCode),
hotel.name);
if (!hotel.translations.empty())
hotel.m_name);
if (!hotel.m_translations.empty())
{
// TODO(mgsergio): Move parsing to the hotel costruction stage.
vector<string> parts;
strings::ParseCSVRow(hotel.translations, '|', parts);
CHECK_EQUAL(parts.size() % 3, 0, ("Invalid translation string:", hotel.translations));
strings::ParseCSVRow(hotel.m_translations, '|', parts);
CHECK_EQUAL(parts.size() % 3, 0, ("Invalid translation string:", hotel.m_translations));
for (auto i = 0; i < parts.size(); i += 3)
{
auto const langCode = StringUtf8Multilang::GetLangIndex(parts[i]);
@ -219,7 +130,7 @@ void BookingDataset::BuildHotel(Hotel const & hotel,
params.AddType(clf.GetTypeByPath({"sponsored", "booking"}));
// Matching booking.com hotel types to OpenStreetMap values.
// Booking types are listed in the closed API docs.
switch (hotel.type)
switch (hotel.m_type)
{
case 19:
case 205: params.AddType(clf.GetTypeByPath({"tourism", "motel"})); break;
@ -273,68 +184,24 @@ void BookingDataset::BuildHotel(Hotel const & hotel,
fn(fb);
}
void BookingDataset::LoadHotels(istream & src, string const & addressReferencePath)
{
m_hotels.clear();
m_rtree.clear();
for (string line; getline(src, line);)
{
Hotel hotel(line);
m_hotels.emplace(hotel.id, hotel);
}
if (!addressReferencePath.empty())
{
LOG(LINFO, ("Reference addresses for booking objects", addressReferencePath));
Platform & platform = GetPlatform();
string const backupPath = platform.WritableDir();
platform.SetWritableDirForTests(addressReferencePath);
AddressMatcher addressMatcher;
size_t matchedNum = 0;
size_t emptyAddr = 0;
for (auto & item : m_hotels)
{
auto & hotel = item.second;
addressMatcher(hotel);
if (hotel.address.empty())
++emptyAddr;
if (hotel.IsAddressPartsFilled())
++matchedNum;
}
LOG(LINFO,
("Num of hotels:", m_hotels.size(), "matched:", matchedNum, "empty addresses:", emptyAddr));
platform.SetWritableDirForTests(backupPath);
}
for (auto const & item : m_hotels)
{
auto const & hotel = item.second;
TBox b(TPoint(hotel.lat, hotel.lon), TPoint(hotel.lat, hotel.lon));
m_rtree.insert(make_pair(b, hotel.id));
}
}
size_t BookingDataset::MatchWithBooking(FeatureBuilder1 const & fb) const
template <>
BookingDataset::ObjectId BookingDataset::FindMatchingObjectIdImpl(FeatureBuilder1 const & fb) const
{
auto const name = fb.GetName(StringUtf8Multilang::kDefaultCode);
if (name.empty())
return false;
return Object::InvalidObjectId();
// Find |kMaxSelectedElements| nearest values to a point.
auto const bookingIndexes = GetNearestHotels(MercatorBounds::ToLatLon(fb.GetKeyPoint()),
kMaxSelectedElements, kDistanceLimitInMeters);
auto const bookingIndexes = GetNearestObjects(MercatorBounds::ToLatLon(fb.GetKeyPoint()),
kMaxSelectedElements, kDistanceLimitInMeters);
for (uint32_t const j : bookingIndexes)
for (auto const j : bookingIndexes)
{
if (booking_scoring::Match(GetHotelById(j), fb).IsMatched())
if (sponsored_scoring::Match(GetObjectById(j), fb).IsMatched())
return j;
}
return kInvalidHotelIndex;
return Object::InvalidObjectId();
}
} // namespace generator

View file

@ -1,116 +1,68 @@
#pragma once
#include "indexer/index.hpp"
#include "generator/sponsored_dataset.hpp"
#include "search/reverse_geocoder.hpp"
#include "geometry/latlon.hpp"
#include "boost/geometry.hpp"
#include "boost/geometry/geometries/point.hpp"
#include "boost/geometry/geometries/box.hpp"
#include "boost/geometry/index/rtree.hpp"
#include "base/newtype.hpp"
#include "std/function.hpp"
#include "std/limits.hpp"
#include "std/map.hpp"
#include "std/string.hpp"
class FeatureBuilder1;
namespace generator
{
class BookingDataset
// TODO(mgsergio): Try to get rid of code duplication. (See OpenTableRestaurant)
struct BookingHotel
{
public:
static double constexpr kDistanceLimitInMeters = 150;
static size_t constexpr kMaxSelectedElements = 3;
static auto constexpr kInvalidHotelIndex = numeric_limits<uint32_t>::max();
NEWTYPE(uint32_t, ObjectId);
struct Hotel
enum class Fields
{
enum class Fields
{
Id = 0,
Latitude = 1,
Longtitude = 2,
Name = 3,
Address = 4,
Stars = 5,
PriceCategory = 6,
RatingBooking = 7,
RatingUsers = 8,
DescUrl = 9,
Type = 10,
Translations = 11,
Counter
};
// TODO(mgsergio): Make a separate type for this or an alias.
uint32_t id = 0;
double lat = 0.0;
double lon = 0.0;
string name;
string address;
string street;
string houseNumber;
uint32_t stars = 0;
uint32_t priceCategory = 0;
double ratingBooking = 0.0;
double ratingUser = 0.0;
string descUrl;
uint32_t type = 0;
string translations;
static constexpr size_t Index(Fields field) { return static_cast<size_t>(field); }
static constexpr size_t FieldsCount() { return static_cast<size_t>(Fields::Counter); }
explicit Hotel(string const & src);
inline bool IsAddressPartsFilled() const { return !street.empty() || !houseNumber.empty(); }
Id = 0,
Latitude = 1,
Longtitude = 2,
Name = 3,
Address = 4,
Stars = 5,
PriceCategory = 6,
RatingBooking = 7,
RatingUsers = 8,
DescUrl = 9,
Type = 10,
Translations = 11,
Counter
};
class AddressMatcher
static constexpr ObjectId InvalidObjectId()
{
Index m_index;
unique_ptr<search::ReverseGeocoder> m_coder;
return ObjectId(numeric_limits<typename ObjectId::RepType>::max());
}
public:
AddressMatcher();
void operator()(Hotel & hotel);
};
explicit BookingHotel(string const & src);
explicit BookingDataset(string const & dataPath, string const & addressReferencePath = string());
explicit BookingDataset(istream & dataSource, string const & addressReferencePath = string());
static constexpr size_t FieldIndex(Fields field) { return static_cast<size_t>(field); }
static constexpr size_t FieldsCount() { return static_cast<size_t>(Fields::Counter); }
/// @return an index of a matched hotel or kInvalidHotelIndex on failure.
size_t GetMatchingHotelIndex(FeatureBuilder1 const & fb) const;
/// @return true if |fb| is a hotel with a name.
bool CanBeBooking(FeatureBuilder1 const & fb) const;
bool HasAddresParts() const { return !m_street.empty() || !m_houseNumber.empty(); }
inline size_t Size() const { return m_hotels.size(); }
Hotel const & GetHotelById(uint32_t id) const;
Hotel & GetHotelById(uint32_t id);
vector<uint32_t> GetNearestHotels(ms::LatLon const & latLon, size_t limit,
double maxDistance = 0.0) const;
bool MatchByName(string const & osmName, vector<size_t> const & bookingIndexes) const;
ObjectId m_id{InvalidObjectId()};
ms::LatLon m_latLon = ms::LatLon::Zero();
string m_name;
string m_street;
string m_houseNumber;
void BuildHotels(function<void(FeatureBuilder1 &)> const & fn) const;
protected:
map<uint32_t, Hotel> m_hotels;
using TPoint = boost::geometry::model::point<float, 2, boost::geometry::cs::cartesian>;
using TBox = boost::geometry::model::box<TPoint>;
using TValue = pair<TBox, uint32_t>;
// Create the rtree using default constructor.
boost::geometry::index::rtree<TValue, boost::geometry::index::quadratic<16>> m_rtree;
void BuildHotel(Hotel const & hotel, function<void(FeatureBuilder1 &)> const & fn) const;
void LoadHotels(istream & path, string const & addressReferencePath);
/// @return an index of a matched hotel or numeric_limits<size_t>::max() on failure.
size_t MatchWithBooking(FeatureBuilder1 const & e) const;
string m_address;
uint32_t m_stars = 0;
uint32_t m_priceCategory = 0;
double m_ratingBooking = 0.0;
double m_ratingUser = 0.0;
string m_descUrl;
uint32_t m_type = 0;
string m_translations;
};
ostream & operator<<(ostream & s, BookingDataset::Hotel const & h);
ostream & operator<<(ostream & s, BookingHotel const & h);
NEWTYPE_SIMPLE_OUTPUT(BookingHotel::ObjectId);
using BookingDataset = SponsoredDataset<BookingHotel>;
} // namespace generator

View file

@ -87,7 +87,7 @@ int main(int argc, char * argv[])
if (hotel.address.empty())
++emptyAddr;
if (hotel.IsAddressPartsFilled())
if (hotel.HasAddresParts())
{
++matchedNum;
cout << "[" << i << "/" << bookingDataset.Size() << "] Hotel: " << hotel.address

View file

@ -1,7 +1,8 @@
#include "generator/booking_dataset.hpp"
#include "generator/booking_scoring.hpp"
#include "generator/feature_builder.hpp"
#include "generator/opentable_dataset.hpp"
#include "generator/osm_source.hpp"
#include "generator/sponsored_scoring.hpp"
#include "indexer/classificator_loader.hpp"
@ -21,12 +22,19 @@
#include "3party/gflags/src/gflags/gflags.h"
#include "boost/range/adaptor/map.hpp"
#include "boost/range/algorithm/copy.hpp"
DEFINE_string(osm, "", "Input .o5m file");
DEFINE_string(booking, "", "Path to booking data in .tsv format");
DEFINE_string(opentable, "", "Path to opentable data in .tsv format");
DEFINE_string(factors, "", "Factors output path");
DEFINE_string(sample, "", "Path so sample file");
DEFINE_uint64(selection_size, 1000, "Selection size");
DEFINE_uint64(seed, minstd_rand::default_seed, "Seed for random shuffle");
DEFINE_uint64(selection_size, 1000, "Selection size");
DEFINE_bool(generate, false, "Generate unmarked sample");
using namespace generator;
@ -89,11 +97,12 @@ osm::Id ReadDebuggedPrintedOsmId(string const & str)
MYTHROW(ParseError, ("Can't make osmId from string", str));
}
template <typename Dataset>
class Emitter : public EmitterBase
{
public:
Emitter(BookingDataset const & booking, map<osm::Id, FeatureBuilder1> & features)
: m_bookingDataset(booking)
Emitter(Dataset const & dataset, map<osm::Id, FeatureBuilder1> & features)
: m_dataset(dataset)
, m_features(features)
{
LOG_SHORT(LINFO, ("OSM data:", FLAGS_osm));
@ -101,7 +110,7 @@ public:
void operator()(FeatureBuilder1 & fb) override
{
if (m_bookingDataset.CanBeBooking(fb))
if (m_dataset.NecessaryMatchingConditionHolds(fb))
m_features.emplace(fb.GetMostGenericOsmId(), fb);
}
@ -117,7 +126,7 @@ public:
}
private:
BookingDataset const & m_bookingDataset;
Dataset const & m_dataset;
map<osm::Id, FeatureBuilder1> & m_features;
};
@ -125,6 +134,7 @@ feature::GenerateInfo GetGenerateInfo()
{
feature::GenerateInfo info;
info.m_bookingDatafileName = FLAGS_booking;
info.m_opentableDatafileName = FLAGS_opentable;
info.m_osmFileName = FLAGS_osm;
info.SetNodeStorageType("map");
info.SetOsmFileType("o5m");
@ -136,111 +146,242 @@ feature::GenerateInfo GetGenerateInfo()
return info;
}
template <typename Object>
struct SampleItem
{
enum MatchStatus {Uninitialized, Yes, No};
using ObjectId = typename Object::ObjectId;
SampleItem() = default;
SampleItem(osm::Id const & osmId, uint32_t const bookingId, MatchStatus const match = Uninitialized)
SampleItem(osm::Id const & osmId, ObjectId const sponsoredId, MatchStatus const match = Uninitialized)
: m_osmId(osmId)
, m_bookingId(bookingId)
, m_sponsoredId(sponsoredId)
, m_match(match)
{
}
osm::Id m_osmId;
uint32_t m_bookingId = BookingDataset::kInvalidHotelIndex;
ObjectId m_sponsoredId = Object::InvalidObjectId();
MatchStatus m_match = Uninitialized;
};
SampleItem::MatchStatus ReadMatchStatus(string const & str)
template <typename Object>
typename SampleItem<Object>::MatchStatus ReadMatchStatus(string const & str)
{
if (str == "Yes")
return SampleItem::Yes;
return SampleItem<Object>::Yes;
if (str == "No")
return SampleItem::No;
return SampleItem<Object>::No;
if (str == "Uninitialized")
return SampleItem::Uninitialized;
return SampleItem<Object>::Uninitialized;
MYTHROW(ParseError, ("Can't make SampleItem::MatchStatus from string:", str));
}
SampleItem ReadSampleItem(string const & str)
template <typename Object>
SampleItem<Object> ReadSampleItem(string const & str)
{
SampleItem item;
SampleItem<Object> item;
auto const parts = strings::Tokenize(str, "\t");
CHECK_EQUAL(parts.size(), 3, ("Cant't make SampleItem from string:", str,
"due to wrong number of fields."));
item.m_osmId = ReadDebuggedPrintedOsmId(parts[0]);
if (!strings::to_uint(parts[1], item.m_bookingId))
if (!strings::to_uint(parts[1], item.m_sponsoredId.Get()))
MYTHROW(ParseError, ("Can't make uint32 from string:", parts[1]));
item.m_match = ReadMatchStatus(parts[2]);
item.m_match = ReadMatchStatus<Object>(parts[2]);
return item;
}
vector<SampleItem> ReadSample(istream & ist)
template <typename Object>
vector<SampleItem<Object>> ReadSample(istream & ist)
{
vector<SampleItem> result;
vector<SampleItem<Object>> result;
size_t lineNumber = 1;
try
{
for (string line; getline(ist, line); ++lineNumber)
{
result.emplace_back(ReadSampleItem(line));
result.emplace_back(ReadSampleItem<Object>(line));
}
}
catch (ParseError const & e)
{
LOG(LERROR, ("Wrong format: line", lineNumber, e.Msg()));
LOG_SHORT(LERROR, ("Wrong format: line", lineNumber, e.Msg()));
exit(1);
}
return result;
}
vector<SampleItem> ReadSampleFromFile(string const & name)
template <typename Object>
vector<SampleItem<Object>> ReadSampleFromFile(string const & name)
{
ifstream ist(name);
CHECK(ist.is_open(), ("Can't open file:", name, strerror(errno)));
return ReadSample(ist);
return ReadSample<Object>(ist);
}
void GenerateFactors(BookingDataset const & booking, map<osm::Id, FeatureBuilder1> const & features,
vector<SampleItem> const & sampleItems, ostream & ost)
template <typename Dataset, typename Object = typename Dataset::Object>
void GenerateFactors(Dataset const & dataset,
map<osm::Id, FeatureBuilder1> const & features,
vector<SampleItem<Object>> const & sampleItems, ostream & ost)
{
for (auto const & item : sampleItems)
{
auto const & hotel = booking.GetHotelById(item.m_bookingId);
auto const & object = dataset.GetObjectById(item.m_sponsoredId);
auto const & feature = features.at(item.m_osmId);
auto const score = booking_scoring::Match(hotel, feature);
auto const score = generator::sponsored_scoring::Match(object, feature);
auto const center = MercatorBounds::ToLatLon(feature.GetKeyPoint());
double const distanceMeters = ms::DistanceOnEarth(center.lat, center.lon,
hotel.lat, hotel.lon);
double const distanceMeters = ms::DistanceOnEarth(center, object.m_latLon);
auto const matched = score.IsMatched();
ost << "# ------------------------------------------" << fixed << setprecision(6)
<< endl;
ost << (matched ? 'y' : 'n') << " \t" << DebugPrint(feature.GetMostGenericOsmId())
<< "\t " << hotel.id
<< "\t " << object.m_id
<< "\tdistance: " << distanceMeters
<< "\tdistance score: " << score.m_linearNormDistanceScore
<< "\tname score: " << score.m_nameSimilarityScore
<< "\tresult score: " << score.GetMatchingScore()
<< endl;
ost << "# " << PrintBuilder(feature) << endl;
ost << "# " << hotel << endl;
ost << "# URL: https://www.openstreetmap.org/?mlat=" << hotel.lat
<< "&mlon=" << hotel.lon << "#map=18/" << hotel.lat << "/" << hotel.lon << endl;
ost << "# " << object << endl;
ost << "# URL: https://www.openstreetmap.org/?mlat="
<< object.m_latLon.lat << "&mlon=" << object.m_latLon.lon << "#map=18/"
<< object.m_latLon.lat << "/" << object.m_latLon.lon << endl;
}
}
enum class DatasetType
{
Booking,
Opentable
};
template <typename Dataset, typename Object = typename Dataset::Object>
void GenerateSample(Dataset const & dataset,
map<osm::Id, FeatureBuilder1> const & features,
ostream & ost)
{
LOG_SHORT(LINFO, ("Num of elements:", features.size()));
vector<osm::Id> elementIndexes(features.size());
boost::copy(features | boost::adaptors::map_keys, begin(elementIndexes));
// TODO(mgsergio): Try RandomSample (from search:: at the moment of writing).
shuffle(elementIndexes.begin(), elementIndexes.end(), minstd_rand(FLAGS_seed));
if (FLAGS_selection_size < elementIndexes.size())
elementIndexes.resize(FLAGS_selection_size);
stringstream outStream;
for (auto osmId : elementIndexes)
{
auto const & fb = features.at(osmId);
auto const sponsoredIndexes = dataset.GetNearestObjects(
MercatorBounds::ToLatLon(fb.GetKeyPoint()),
Dataset::kMaxSelectedElements,
Dataset::kDistanceLimitInMeters);
for (auto const sponsoredId : sponsoredIndexes)
{
auto const & object = dataset.GetObjectById(sponsoredId);
auto const score = sponsored_scoring::Match(object, fb);
auto const center = MercatorBounds::ToLatLon(fb.GetKeyPoint());
double const distanceMeters = ms::DistanceOnEarth(center, object.m_latLon);
auto const matched = score.IsMatched();
outStream << "# ------------------------------------------" << fixed << setprecision(6)
<< endl;
outStream << (matched ? 'y' : 'n') << " \t" << DebugPrint(osmId) << "\t " << sponsoredId
<< "\tdistance: " << distanceMeters
<< "\tdistance score: " << score.m_linearNormDistanceScore
<< "\tname score: " << score.m_nameSimilarityScore
<< "\tresult score: " << score.GetMatchingScore()
<< endl;
outStream << "# " << PrintBuilder(fb) << endl;
outStream << "# " << object << endl;
outStream << "# URL: https://www.openstreetmap.org/?mlat="
<< object.m_latLon.lat << "&mlon=" << object.m_latLon.lon
<< "#map=18/" << object.m_latLon.lat << "/" << object.m_latLon.lon << endl;
}
if (!sponsoredIndexes.empty())
outStream << endl << endl;
}
if (FLAGS_sample.empty())
{
cout << outStream.str();
}
else
{
ofstream file(FLAGS_sample);
if (file.is_open())
file << outStream.str();
else
LOG_SHORT(LERROR, ("Can't output into", FLAGS_sample, strerror(errno)));
}
}
template <typename Dataset>
string GetDatasetFilePath(feature::GenerateInfo const & info);
template <>
string GetDatasetFilePath<BookingDataset>(feature::GenerateInfo const & info)
{
return info.m_bookingDatafileName;
}
template <>
string GetDatasetFilePath<OpentableDataset>(feature::GenerateInfo const & info)
{
return info.m_opentableDatafileName;
}
template <typename Dataset, typename Object = typename Dataset::Object>
void RunImpl(feature::GenerateInfo & info)
{
// TODO(mgsergio): Log correctly LOG_SHORT(LINFO, ("Booking data:", FLAGS_booking));
Dataset dataset(GetDatasetFilePath<Dataset>(info));
LOG_SHORT(LINFO, (dataset.Size(), "objects are loaded from a Dataset."));
map<osm::Id, FeatureBuilder1> features;
GenerateFeatures(info, [&dataset, &features](feature::GenerateInfo const & /* info */)
{
return make_unique<Emitter<Dataset>>(dataset, features);
});
if (FLAGS_generate)
{
ofstream ost(FLAGS_sample);
GenerateSample(dataset, features, ost);
}
else
{
auto const sample = ReadSampleFromFile<Object>(FLAGS_sample);
LOG_SHORT(LINFO, ("Sample size is", sample.size()));
ofstream ost(FLAGS_factors);
CHECK(ost.is_open(), ("Can't open file", FLAGS_factors, strerror(errno)));
GenerateFactors<Dataset>(dataset, features, sample, ost);
}
}
void Run(DatasetType const datasetType, feature::GenerateInfo & info)
{
switch (datasetType)
{
case DatasetType::Booking: RunImpl<BookingDataset>(info); break;
case DatasetType::Opentable: RunImpl<OpentableDataset>(info); break;
}
}
} // namespace
@ -259,31 +400,19 @@ int main(int argc, char * argv[])
CHECK(!FLAGS_sample.empty(), ("Please specify sample path."));
CHECK(!FLAGS_osm.empty(), ("Please specify osm path."));
CHECK(!FLAGS_booking.empty(), ("Please specify booking path."));
CHECK(!FLAGS_factors.empty(), ("Please specify factors path."));
CHECK(!FLAGS_booking.empty() ^ !FLAGS_opentable.empty(),
("Please specify either booking or opentable path."));
CHECK(!FLAGS_factors.empty() ^ FLAGS_generate, ("Please either specify factors path"
"or use -generate."));
auto const datasetType = FLAGS_booking.empty() ? DatasetType::Opentable : DatasetType::Booking;
classificator::Load();
auto info = GetGenerateInfo();
GenerateIntermediateData(info);
LOG_SHORT(LINFO, ("Booking data:", FLAGS_booking));
BookingDataset booking(info.m_bookingDatafileName);
LOG_SHORT(LINFO, (booking.Size(), "hotels are loaded from Booking."));
map<osm::Id, FeatureBuilder1> features;
GenerateFeatures(info, [&booking, &features](feature::GenerateInfo const & /* info */)
{
return make_unique<Emitter>(booking, features);
});
auto const sample = ReadSampleFromFile(FLAGS_sample);
LOG(LINFO, ("Sample size is", sample.size()));
{
ofstream ost(FLAGS_factors);
CHECK(ost.is_open(), ("Can't open file", FLAGS_factors, strerror(errno)));
GenerateFactors(booking, features, sample, ost);
}
Run(datasetType, info);
return 0;
}

View file

@ -1,144 +1,47 @@
#include "generator/booking_scoring.hpp"
#include "generator/sponsored_scoring.hpp"
#include "generator/booking_dataset.hpp"
#include "generator/feature_builder.hpp"
#include "indexer/search_delimiters.hpp"
#include "indexer/search_string_utils.hpp"
#include "geometry/distance_on_sphere.hpp"
#include "base/collection_cast.hpp"
#include "base/stl_iterator.hpp"
#include "std/algorithm.hpp"
#include "std/vector.hpp"
namespace generator
{
namespace booking_scoring
{
namespace
{
// Calculated with tools/python/booking_hotels_quality.py.
double constexpr kOptimalThreshold = 0.304875;
template <typename T, typename U>
struct decay_equiv :
std::is_same<typename std::decay<T>::type, U>::type
{};
using WeightedBagOfWords = vector<pair<strings::UniString, double>>;
vector<strings::UniString> StringToSetOfWords(string const & str)
{
vector<strings::UniString> result;
search::NormalizeAndTokenizeString(str, result, search::Delimiters{});
sort(begin(result), end(result));
return result;
}
WeightedBagOfWords MakeWeightedBagOfWords(vector<strings::UniString> const & words)
{
// TODO(mgsergio): Calculate tf-idsf score for every word.
auto constexpr kTfIdfScorePlaceholder = 1;
WeightedBagOfWords result;
for (auto i = 0; i < words.size(); ++i)
{
result.emplace_back(words[i], kTfIdfScorePlaceholder);
while (i + 1 < words.size() && words[i] == words[i + 1])
{
result.back().second += kTfIdfScorePlaceholder; // TODO(mgsergio): tf-idf score for result[i].frist;
++i;
}
}
return result;
}
double WeightedBagsDotProduct(WeightedBagOfWords const & lhs, WeightedBagOfWords const & rhs)
{
double result{};
auto lhsIt = begin(lhs);
auto rhsIt = begin(rhs);
while (lhsIt != end(lhs) && rhsIt != end(rhs))
{
if (lhsIt->first == rhsIt->first)
{
result += lhsIt->second * rhsIt->second;
++lhsIt;
++rhsIt;
}
else if (lhsIt->first < rhsIt->first)
{
++lhsIt;
}
else
{
++rhsIt;
}
}
return result;
}
double WeightedBagOfWordsCos(WeightedBagOfWords const & lhs, WeightedBagOfWords const & rhs)
{
auto const product = WeightedBagsDotProduct(lhs, rhs);
auto const lhsLength = sqrt(WeightedBagsDotProduct(lhs, lhs));
auto const rhsLength = sqrt(WeightedBagsDotProduct(rhs, rhs));
if (product == 0.0)
return 0.0;
return product / (lhsLength * rhsLength);
}
double GetLinearNormDistanceScore(double distance)
{
distance = my::clamp(distance, 0, BookingDataset::kDistanceLimitInMeters);
return 1.0 - distance / BookingDataset::kDistanceLimitInMeters;
}
double GetNameSimilarityScore(string const & booking_name, string const & osm_name)
{
auto const aws = MakeWeightedBagOfWords(StringToSetOfWords(booking_name));
auto const bws = MakeWeightedBagOfWords(StringToSetOfWords(osm_name));
if (aws.empty() && bws.empty())
return 1.0;
if (aws.empty() || bws.empty())
return 0.0;
return WeightedBagOfWordsCos(aws, bws);
}
} // namespace
double BookingMatchScore::GetMatchingScore() const
namespace generator
{
namespace sponsored_scoring
{
template <>
double MatchStats<BookingHotel>::GetMatchingScore() const
{
// TODO(mgsergio): Use tuner to get optimal function.
return m_linearNormDistanceScore * m_nameSimilarityScore;
}
bool BookingMatchScore::IsMatched() const
template <>
bool MatchStats<BookingHotel>::IsMatched() const
{
return GetMatchingScore() > kOptimalThreshold;
}
BookingMatchScore Match(BookingDataset::Hotel const & h, FeatureBuilder1 const & fb)
// TODO(mgsergio): Do I need to spesialize this method?
template <>
MatchStats<BookingHotel> Match(BookingHotel const & h, FeatureBuilder1 const & fb)
{
BookingMatchScore score;
MatchStats<BookingHotel> score;
auto const fbCenter = MercatorBounds::ToLatLon(fb.GetKeyPoint());
auto const distance = ms::DistanceOnEarth(fbCenter.lat, fbCenter.lon, h.lat, h.lon);
score.m_linearNormDistanceScore = GetLinearNormDistanceScore(distance);
auto const distance = ms::DistanceOnEarth(fbCenter, h.m_latLon);
score.m_linearNormDistanceScore =
impl::GetLinearNormDistanceScore(distance, BookingDataset::kDistanceLimitInMeters);
// TODO(mgsergio): Check all translations and use the best one.
score.m_nameSimilarityScore =
GetNameSimilarityScore(h.name, fb.GetName(StringUtf8Multilang::kDefaultCode));
impl::GetNameSimilarityScore(h.m_name, fb.GetName(StringUtf8Multilang::kDefaultCode));
return score;
}
} // namespace booking_scoring
} // namespace sponsored_scoring
} // namespace generator

View file

@ -1,22 +0,0 @@
#pragma once
#include "generator/booking_dataset.hpp"
class FeatureBuilder1;
namespace generator
{
namespace booking_scoring
{
struct BookingMatchScore
{
double GetMatchingScore() const;
bool IsMatched() const;
double m_linearNormDistanceScore{};
double m_nameSimilarityScore{};
};
BookingMatchScore Match(BookingDataset::Hotel const & h, FeatureBuilder1 const & fb);
} // namespace booking_scoring
} // namespace generator

View file

@ -42,6 +42,8 @@ struct GenerateInfo
string m_bookingDatafileName;
string m_bookingReferenceDir;
string m_opentableDatafileName;
string m_opentableReferenceDir;
uint32_t m_versionDate = 0;

View file

@ -28,6 +28,8 @@ SOURCES += \
feature_generator.cpp \
feature_merger.cpp \
feature_sorter.cpp \
opentable_dataset.cpp \
opentable_scoring.cpp \
osm2meta.cpp \
osm2type.cpp \
osm_element.cpp \
@ -36,6 +38,7 @@ SOURCES += \
region_meta.cpp \
routing_generator.cpp \
search_index_builder.cpp \
sponsored_scoring.cpp \
srtm_parser.cpp \
statistics.cpp \
tesselator.cpp \
@ -45,7 +48,6 @@ SOURCES += \
HEADERS += \
altitude_generator.hpp \
booking_dataset.hpp \
booking_scoring.hpp \
borders_generator.hpp \
borders_loader.hpp \
centers_table_builder.hpp \
@ -61,6 +63,7 @@ HEADERS += \
generate_info.hpp \
intermediate_data.hpp\
intermediate_elements.hpp\
opentable_dataset.hpp \
osm2meta.hpp \
osm2type.hpp \
osm_element.hpp \
@ -72,6 +75,9 @@ HEADERS += \
region_meta.hpp \
routing_generator.hpp \
search_index_builder.hpp \
sponsored_dataset.hpp \
sponsored_dataset_inl.hpp \
sponsored_scoring.hpp \
srtm_parser.hpp \
statistics.hpp \
tag_admixer.hpp \

View file

@ -74,7 +74,9 @@ DEFINE_string(osm_file_name, "", "Input osm area file.");
DEFINE_string(osm_file_type, "xml", "Input osm area file type [xml, o5m].");
DEFINE_string(user_resource_path, "", "User defined resource path for classificator.txt and etc.");
DEFINE_string(booking_data, "", "Path to booking data in .tsv format.");
DEFINE_string(booking_reference_path, "", "Path to mwm dataset for match booking addresses.");
DEFINE_string(booking_reference_path, "", "Path to mwm dataset for booking addresses matching.");
DEFINE_string(opentable_data, "", "Path to opentable data in .tsv format.");
DEFINE_string(opentable_reference_path, "", "Path to mwm dataset for opentable addresses matching.");
DEFINE_uint64(planet_version, my::SecondsSinceEpoch(),
"Version as seconds since epoch, by default - now.");
DEFINE_string(srtm_path, "",
@ -114,6 +116,8 @@ int main(int argc, char ** argv)
genInfo.m_preloadCache = FLAGS_preload_cache;
genInfo.m_bookingDatafileName = FLAGS_booking_data;
genInfo.m_bookingReferenceDir = FLAGS_booking_reference_path;
genInfo.m_opentableDatafileName = FLAGS_opentable_data;
genInfo.m_opentableReferenceDir = FLAGS_opentable_reference_path;
genInfo.m_versionDate = static_cast<uint32_t>(FLAGS_planet_version);

View file

@ -0,0 +1,94 @@
#include "generator/opentable_dataset.hpp"
#include "generator/feature_builder.hpp"
#include "generator/sponsored_scoring.hpp"
#include "indexer/classificator.hpp"
#include "indexer/ftypes_matcher.hpp"
#include "base/string_utils.hpp"
#include "boost/algorithm/string/replace.hpp"
namespace generator
{
// OpentableRestaurant ------------------------------------------------------------------------------
OpentableRestaurant::OpentableRestaurant(string const & src)
{
vector<string> rec;
strings::ParseCSVRow(src, '\t', rec);
CHECK_EQUAL(rec.size(), FieldsCount(), ("Error parsing restaurants.tsv line:",
boost::replace_all_copy(src, "\t", "\\t")));
strings::to_uint(rec[FieldIndex(Fields::Id)], m_id.Get());
strings::to_double(rec[FieldIndex(Fields::Latitude)], m_latLon.lat);
strings::to_double(rec[FieldIndex(Fields::Longtitude)], m_latLon.lon);
m_name = rec[FieldIndex(Fields::Name)];
m_address = rec[FieldIndex(Fields::Address)];
m_descUrl = rec[FieldIndex(Fields::DescUrl)];
}
ostream & operator<<(ostream & s, OpentableRestaurant const & h)
{
s << fixed << setprecision(7);
return s << "Id: " << h.m_id << "\t Name: " << h.m_name << "\t Address: " << h.m_address
<< "\t lat: " << h.m_latLon.lat << " lon: " << h.m_latLon.lon;
}
// OpentableDataset ---------------------------------------------------------------------------------
template <>
bool OpentableDataset::NecessaryMatchingConditionHolds(FeatureBuilder1 const & fb) const
{
if (fb.GetName(StringUtf8Multilang::kDefaultCode).empty())
return false;
return ftypes::IsFoodChecker::Instance()(fb.GetTypes());
}
template <>
void OpentableDataset::PreprocessMatchedOsmObject(ObjectId const matchedObjId, FeatureBuilder1 & fb,
function<void(FeatureBuilder1 &)> const fn) const
{
FeatureParams params = fb.GetParams();
auto restaurant = GetObjectById(matchedObjId);
auto & metadata = params.GetMetadata();
metadata.Set(feature::Metadata::FMD_SPONSORED_ID, strings::to_string(restaurant.m_id.Get()));
metadata.Set(feature::Metadata::FMD_WEBSITE, restaurant.m_descUrl);
// params.AddAddress(restaurant.address);
// TODO(mgsergio): addr:full ???
params.AddName(StringUtf8Multilang::GetLangByCode(StringUtf8Multilang::kDefaultCode),
restaurant.m_name);
auto const & clf = classif();
params.AddType(clf.GetTypeByPath({"sponsored", "opentable"}));
fb.SetParams(params);
fn(fb);
}
template <>
OpentableDataset::ObjectId OpentableDataset::FindMatchingObjectIdImpl(FeatureBuilder1 const & fb) const
{
auto const name = fb.GetName(StringUtf8Multilang::kDefaultCode);
if (name.empty())
return Object::InvalidObjectId();
// Find |kMaxSelectedElements| nearest values to a point.
auto const nearbyIds = GetNearestObjects(MercatorBounds::ToLatLon(fb.GetKeyPoint()),
kMaxSelectedElements, kDistanceLimitInMeters);
for (auto const objId : nearbyIds)
{
if (sponsored_scoring::Match(GetObjectById(objId), fb).IsMatched())
return objId;
}
return Object::InvalidObjectId();
}
} // namespace generator

View file

@ -0,0 +1,60 @@
#pragma once
#include "generator/sponsored_dataset.hpp"
#include "geometry/latlon.hpp"
#include "base/newtype.hpp"
#include "std/limits.hpp"
#include "std/string.hpp"
namespace generator
{
// TODO(mgsergio): Try to get rid of code duplication. (See BookingHotel)
struct OpentableRestaurant
{
NEWTYPE(uint32_t, ObjectId);
enum class Fields
{
Id = 0,
Latitude,
Longtitude,
Name,
Address,
DescUrl,
Phone,
// Opentable doesn't have translations.
// Translations,
Counter
};
static constexpr ObjectId InvalidObjectId()
{
return ObjectId(numeric_limits<typename ObjectId::RepType>::max());
}
explicit OpentableRestaurant(string const & src);
static constexpr size_t FieldIndex(Fields field) { return static_cast<size_t>(field); }
static constexpr size_t FieldsCount() { return static_cast<size_t>(Fields::Counter); }
bool HasAddresParts() const { return !m_street.empty() || !m_houseNumber.empty(); }
ObjectId m_id{InvalidObjectId()};
ms::LatLon m_latLon = ms::LatLon::Zero();
string m_name;
string m_street;
string m_houseNumber;
string m_address;
string m_descUrl;
// string m_translations;
};
ostream & operator<<(ostream & s, OpentableRestaurant const & r);
NEWTYPE_SIMPLE_OUTPUT(OpentableRestaurant::ObjectId);
using OpentableDataset = SponsoredDataset<OpentableRestaurant>;
} // namespace generator

View file

@ -0,0 +1,45 @@
#include "generator/sponsored_scoring.hpp"
#include "generator/opentable_dataset.hpp"
#include "generator/feature_builder.hpp"
namespace
{
// Calculated with tools/python/booking_hotels_quality.py.
double constexpr kOptimalThreshold = 0.312887;
} // namespace
namespace generator
{
namespace sponsored_scoring
{
template <>
double MatchStats<OpentableRestaurant>::GetMatchingScore() const
{
// TODO(mgsergio): Use tuner to get optimal function.
return m_linearNormDistanceScore * m_nameSimilarityScore;
}
template <>
bool MatchStats<OpentableRestaurant>::IsMatched() const
{
return GetMatchingScore() > kOptimalThreshold;
}
template <>
MatchStats<OpentableRestaurant> Match(OpentableRestaurant const & r, FeatureBuilder1 const & fb)
{
MatchStats<OpentableRestaurant> score;
auto const fbCenter = MercatorBounds::ToLatLon(fb.GetKeyPoint());
auto const distance = ms::DistanceOnEarth(fbCenter, r.m_latLon);
score.m_linearNormDistanceScore =
impl::GetLinearNormDistanceScore(distance, OpentableDataset::kDistanceLimitInMeters);
score.m_nameSimilarityScore =
impl::GetNameSimilarityScore(r.m_name, fb.GetName(StringUtf8Multilang::kDefaultCode));
return score;
}
} // namespace sponsored_scoring
} // namespace generator

View file

@ -1,4 +1,3 @@
#include "generator/booking_dataset.hpp"
#include "generator/coastlines_generator.hpp"
#include "generator/feature_generator.hpp"
#include "generator/intermediate_data.hpp"
@ -13,6 +12,9 @@
#include "generator/towns_dumper.hpp"
#include "generator/world_map_generator.hpp"
#include "generator/booking_dataset.hpp"
#include "generator/opentable_dataset.hpp"
#include "indexer/classificator.hpp"
#include "platform/platform.hpp"
@ -278,6 +280,7 @@ class MainFeaturesEmitter : public EmitterBase
bool m_failOnCoasts;
generator::BookingDataset m_bookingDataset;
generator::OpentableDataset m_opentableDataset;
/// Used to prepare a list of cities to serve as a list of nodes
/// for building a highway graph with OSRM for low zooms.
@ -301,6 +304,8 @@ public:
: m_skippedElementsPath(info.GetIntermediateFileName("skipped_elements", ".lst"))
, m_failOnCoasts(info.m_failOnCoasts)
, m_bookingDataset(info.m_bookingDatafileName, info.m_bookingReferenceDir)
, m_opentableDataset(info.m_opentableDatafileName, info.m_opentableReferenceDir)
{
Classificator const & c = classif();
@ -342,47 +347,43 @@ public:
static uint32_t const placeType = classif().GetTypeByPath({"place"});
uint32_t const type = fb.GetParams().FindType(placeType, 1);
auto hotelIndex = generator::BookingDataset::kInvalidHotelIndex;
// TODO(mgserigio): Would it be better to have objects that store callback
// and can be piped: action-if-cond1 | action-if-cond-2 | ... ?
// The first object which perform action terminates the cahin.
if (type != ftype::GetEmptyValue() && !fb.GetName().empty())
{
m_places.ReplaceEqualInRect(
Place(fb, type),
[](Place const & p1, Place const & p2) { return p1.IsEqual(p2); },
[](Place const & p1, Place const & p2) { return p1.IsBetterThan(p2); });
return;
}
else if ((hotelIndex = m_bookingDataset.GetMatchingHotelIndex(fb)) !=
generator::BookingDataset::kInvalidHotelIndex)
{
m_skippedElements << DebugPrint(fb.GetMostGenericOsmId()) << endl;
// Turn a hotel into a simple building.
if (fb.GetGeomType() == feature::GEOM_AREA)
auto const bookingObjId = m_bookingDataset.FindMatchingObjectId(fb);
if (bookingObjId != generator::BookingHotel::InvalidObjectId())
{
m_bookingDataset.PreprocessMatchedOsmObject(bookingObjId, fb, [this, bookingObjId](FeatureBuilder1 & fb)
{
// Remove all information about a hotel.
auto params = fb.GetParams();
params.ClearName();
auto & meta = params.GetMetadata();
meta.Drop(feature::Metadata::EType::FMD_STARS);
meta.Drop(feature::Metadata::EType::FMD_WEBSITE);
meta.Drop(feature::Metadata::EType::FMD_PHONE_NUMBER);
auto const & c = classif();
auto const tourism = c.GetTypeByPath({"tourism"});
my::EraseIf(params.m_Types, [&c, tourism](uint32_t type)
{
ftype::TruncValue(type, 1);
return type == tourism;
});
fb.SetParams(params);
m_skippedElements << "BOOKING\t" << DebugPrint(fb.GetMostGenericOsmId())
<< '\t' << bookingObjId.Get() << endl;
Emit(fb);
}
});
return;
}
else
auto const opentableObjId = m_opentableDataset.FindMatchingObjectId(fb);
if (opentableObjId != generator::OpentableRestaurant::InvalidObjectId())
{
Emit(fb);
m_opentableDataset.PreprocessMatchedOsmObject(opentableObjId, fb, [this, opentableObjId](FeatureBuilder1 & fb)
{
m_skippedElements << "OPENTABLE\t" << DebugPrint(fb.GetMostGenericOsmId())
<< '\t' << opentableObjId.Get() << endl;
Emit(fb);
});
return;
}
Emit(fb);
}
/// @return false if coasts are not merged and FLAG_fail_on_coasts is set
@ -390,8 +391,10 @@ public:
{
DumpSkippedElements();
// Emit all booking objecs to the map.
m_bookingDataset.BuildHotels([this](FeatureBuilder1 & fb) { Emit(fb); });
// Emit all required booking objects to the map.
m_bookingDataset.BuildOsmObjects([this](FeatureBuilder1 & fb) { Emit(fb); });
// No opentable objects should be emitted. Opentable data enriches some data
// with a link to a restaurant's reservation page.
m_places.ForEach([this](Place const & p)
{

View file

@ -0,0 +1,89 @@
#pragma once
#include "indexer/index.hpp"
#include "search/reverse_geocoder.hpp"
#include "platform/local_country_file.hpp"
#include "platform/local_country_file_utils.hpp"
#include "platform/platform.hpp"
#include "base/newtype.hpp"
#include "std/function.hpp"
#include "std/map.hpp"
#include "std/string.hpp"
#include "boost/geometry.hpp"
#include "boost/geometry/geometries/point.hpp"
#include "boost/geometry/geometries/box.hpp"
#include "boost/geometry/index/rtree.hpp"
class FeatureBuilder1;
namespace generator
{
template <typename SponsoredObject>
class SponsoredDataset
{
public:
using Object = SponsoredObject;
using ObjectId = typename Object::ObjectId;
static double constexpr kDistanceLimitInMeters = 150;
static size_t constexpr kMaxSelectedElements = 3;
explicit SponsoredDataset(string const & dataPath, string const & addressReferencePath = string());
explicit SponsoredDataset(istream & dataSource, string const & addressReferencePath = string());
size_t Size() const { return m_objects.size(); }
Object const & GetObjectById(ObjectId id) const;
Object & GetObjectById(ObjectId id);
vector<ObjectId> GetNearestObjects(ms::LatLon const & latLon, size_t limit,
double maxDistance = 0.0) const;
/// @return true if |fb| satisfies some necessary conditions to match one or serveral
/// objects from dataset.
bool NecessaryMatchingConditionHolds(FeatureBuilder1 const & fb) const;
ObjectId FindMatchingObjectId(FeatureBuilder1 const & e) const;
void PreprocessMatchedOsmObject(ObjectId matchedObjId, FeatureBuilder1 & fb,
function<void(FeatureBuilder1 &)> const fn) const;
void BuildOsmObjects(function<void(FeatureBuilder1 &)> const & fn) const;
protected:
class AddressMatcher
{
public:
AddressMatcher();
void operator()(Object & object);
private:
Index m_index;
unique_ptr<search::ReverseGeocoder> m_coder;
};
// TODO(mgsergio): Get rid of Box since boost::rtree supports point as value type.
// TODO(mgsergio): Use mercator instead of latlon or boost::geometry::cs::spherical_equatorial
// instead of boost::geometry::cs::cartesian.
using Point = boost::geometry::model::point<float, 2, boost::geometry::cs::cartesian>;
using Box = boost::geometry::model::box<Point>;
using Value = pair<Box, ObjectId>;
// Create the rtree using default constructor.
boost::geometry::index::rtree<Value, boost::geometry::index::quadratic<16>> m_rtree;
void BuildObject(Object const & object,
function<void(FeatureBuilder1 &)> const & fn) const;
void LoadData(istream & src, string const & addressReferencePath);
/// @return an id of a matched object or kInvalidObjectId on failure.
ObjectId FindMatchingObjectIdImpl(FeatureBuilder1 const & fb) const;
map<ObjectId, Object> m_objects;
};
} // namespace generator
#include "generator/sponsored_dataset_inl.hpp" // SponsoredDataset implementation.

View file

@ -0,0 +1,179 @@
#include "generator/sponsored_dataset.hpp"
#include "geometry/distance_on_sphere.hpp"
#include "base/logging.hpp"
#include "base/string_utils.hpp"
#include "std/fstream.hpp"
#include "std/iostream.hpp"
namespace generator
{
// AddressMatcher ----------------------------------------------------------------------------------
template <typename SponsoredObject>
SponsoredDataset<SponsoredObject>::AddressMatcher::AddressMatcher()
{
vector<platform::LocalCountryFile> localFiles;
Platform & platform = GetPlatform();
platform::FindAllLocalMapsInDirectoryAndCleanup(platform.WritableDir(), 0 /* version */,
-1 /* latestVersion */, localFiles);
for (platform::LocalCountryFile const & localFile : localFiles)
{
LOG(LINFO, ("Found mwm:", localFile));
try
{
m_index.RegisterMap(localFile);
}
catch (RootException const & ex)
{
CHECK(false, (ex.Msg(), "Bad mwm file:", localFile));
}
}
m_coder = make_unique<search::ReverseGeocoder>(m_index);
}
template <typename SponsoredObject>
void SponsoredDataset<SponsoredObject>::AddressMatcher::operator()(Object & object)
{
search::ReverseGeocoder::Address addr;
m_coder->GetNearbyAddress(MercatorBounds::FromLatLon(object.m_latLon), addr);
object.m_street = addr.GetStreetName();
object.m_houseNumber = addr.GetHouseNumber();
}
// SponsoredDataset --------------------------------------------------------------------------------
template <typename SponsoredObject>
SponsoredDataset<SponsoredObject>::SponsoredDataset(string const & dataPath, string const & addressReferencePath)
{
if (dataPath.empty())
return;
ifstream dataSource(dataPath);
if (!dataSource.is_open())
{
LOG(LERROR, ("Error while opening", dataPath, ":", strerror(errno)));
return;
}
LoadData(dataSource, addressReferencePath);
}
template <typename SponsoredObject>
SponsoredDataset<SponsoredObject>::SponsoredDataset(istream & dataSource, string const & addressReferencePath)
{
LoadData(dataSource, addressReferencePath);
}
template <typename SponsoredObject>
typename SponsoredDataset<SponsoredObject>::Object const &
SponsoredDataset<SponsoredObject>::GetObjectById(ObjectId id) const
{
auto const it = m_objects.find(id);
CHECK(it != end(m_objects), ("Got wrong object id:", id));
return it->second;
}
template <typename SponsoredObject>
typename SponsoredDataset<SponsoredObject>::Object &
SponsoredDataset<SponsoredObject>::GetObjectById(ObjectId id)
{
auto const it = m_objects.find(id);
CHECK(it != end(m_objects), ("Got wrong object id:", id));
return it->second;
}
template <typename SponsoredObject>
void SponsoredDataset<SponsoredObject>::BuildOsmObjects(function<void(FeatureBuilder1 &)> const & fn) const
{
for (auto const & item : m_objects)
BuildObject(item.second, fn);
}
template <typename SponsoredObject>
typename SponsoredDataset<SponsoredObject>::ObjectId
SponsoredDataset<SponsoredObject>::FindMatchingObjectId(FeatureBuilder1 const & fb) const
{
if (NecessaryMatchingConditionHolds(fb))
return FindMatchingObjectIdImpl(fb);
return Object::InvalidObjectId();
}
template <typename SponsoredObject>
vector<typename SponsoredDataset<SponsoredObject>::ObjectId>
SponsoredDataset<SponsoredObject>::GetNearestObjects(ms::LatLon const & latLon, size_t const limit,
double const maxDistanceMeters /* = 0.0 */) const
{
namespace bgi = boost::geometry::index;
vector<ObjectId> indexes;
for_each(bgi::qbegin(m_rtree, bgi::nearest(Point(latLon.lat, latLon.lon), limit)),
bgi::qend(m_rtree), [this, &latLon, &indexes, maxDistanceMeters](Value const & v)
{
auto const & object = GetObjectById(v.second);
double const dist = ms::DistanceOnEarth(latLon, object.m_latLon);
if (maxDistanceMeters != 0.0 && dist > maxDistanceMeters /* max distance in meters */)
return;
indexes.emplace_back(v.second);
});
return indexes;
}
template <typename SponsoredObject>
void SponsoredDataset<SponsoredObject>::LoadData(istream & src, string const & addressReferencePath)
{
m_objects.clear();
m_rtree.clear();
for (string line; getline(src, line);)
{
Object hotel(line);
m_objects.emplace(hotel.m_id, hotel);
}
// Try to get object address from existing MWMs.
if (!addressReferencePath.empty())
{
LOG(LINFO, ("Reference addresses for sponsored objects", addressReferencePath));
Platform & platform = GetPlatform();
string const backupPath = platform.WritableDir();
// MWMs can be loaded only from a writebledir or from a resourcedir,
// changig resourcedir can lead to probles with classificator, so
// we change writebledir.
platform.SetWritableDirForTests(addressReferencePath);
AddressMatcher addressMatcher;
size_t matchedCount = 0;
size_t emptyCount = 0;
for (auto & item : m_objects)
{
auto & object = item.second;
addressMatcher(object);
if (object.m_address.empty())
++emptyCount;
if (object.HasAddresParts())
++matchedCount;
}
LOG(LINFO,
("Num of hotels:", m_objects.size(), "matched:", matchedCount, "empty addresses:", emptyCount));
platform.SetWritableDirForTests(backupPath);
}
for (auto const & item : m_objects)
{
auto const & object = item.second;
Box b(Point(object.m_latLon.lat, object.m_latLon.lon),
Point(object.m_latLon.lat, object.m_latLon.lon));
m_rtree.insert(make_pair(b, object.m_id));
}
}
} // namespace generator

View file

@ -0,0 +1,108 @@
#include "generator/sponsored_scoring.hpp"
#include "indexer/search_delimiters.hpp"
#include "indexer/search_string_utils.hpp"
#include "geometry/distance_on_sphere.hpp"
#include "std/algorithm.hpp"
#include "std/vector.hpp"
namespace
{
using WeightedBagOfWords = vector<pair<strings::UniString, double>>;
vector<strings::UniString> StringToWords(string const & str)
{
vector<strings::UniString> result;
search::NormalizeAndTokenizeString(str, result, search::Delimiters{});
sort(begin(result), end(result));
return result;
}
WeightedBagOfWords MakeWeightedBagOfWords(vector<strings::UniString> const & words)
{
// TODO(mgsergio): Calculate tf-idsf score for every word.
auto constexpr kTfIdfScorePlaceholder = 1;
WeightedBagOfWords result;
for (size_t i = 0; i < words.size(); ++i)
{
result.emplace_back(words[i], kTfIdfScorePlaceholder);
while (i + 1 < words.size() && words[i] == words[i + 1])
{
result.back().second += kTfIdfScorePlaceholder; // TODO(mgsergio): tf-idf score for result[i].frist;
++i;
}
}
return result;
}
double WeightedBagsDotProduct(WeightedBagOfWords const & lhs, WeightedBagOfWords const & rhs)
{
double result{};
auto lhsIt = begin(lhs);
auto rhsIt = begin(rhs);
while (lhsIt != end(lhs) && rhsIt != end(rhs))
{
if (lhsIt->first == rhsIt->first)
{
result += lhsIt->second * rhsIt->second;
++lhsIt;
++rhsIt;
}
else if (lhsIt->first < rhsIt->first)
{
++lhsIt;
}
else
{
++rhsIt;
}
}
return result;
}
double WeightedBagOfWordsCos(WeightedBagOfWords const & lhs, WeightedBagOfWords const & rhs)
{
auto const product = WeightedBagsDotProduct(lhs, rhs);
auto const lhsLength = sqrt(WeightedBagsDotProduct(lhs, lhs));
auto const rhsLength = sqrt(WeightedBagsDotProduct(rhs, rhs));
// WeightedBagsDotProduct returns 0.0 if lhs.empty() || rhs.empty() or
// if every element of either lhs or rhs is 0.0.
if (product == 0.0)
return 0.0;
return product / (lhsLength * rhsLength);
}
} // namespace
namespace generator
{
namespace impl
{
double GetLinearNormDistanceScore(double distance, double const maxDistance)
{
CHECK_NOT_EQUAL(maxDistance, 0.0, ("maxDistance cannot be 0."));
distance = my::clamp(distance, 0, maxDistance);
return 1.0 - distance / maxDistance;
}
double GetNameSimilarityScore(string const & booking_name, string const & osm_name)
{
auto const aws = MakeWeightedBagOfWords(StringToWords(booking_name));
auto const bws = MakeWeightedBagOfWords(StringToWords(osm_name));
if (aws.empty() && bws.empty())
return 1.0;
if (aws.empty() || bws.empty())
return 0.0;
return WeightedBagOfWordsCos(aws, bws);
}
} // namespace impl
} // namespace generator

View file

@ -0,0 +1,34 @@
#pragma once
#include "std/string.hpp"
class FeatureBuilder1;
namespace generator
{
namespace impl
{
double GetLinearNormDistanceScore(double distance, double maxDistance);
double GetNameSimilarityScore(string const & booking_name, string const & osm_name);
} // namespace impl
namespace sponsored_scoring
{
/// Represents a match scoring statystics of a sponsored object agains osm object.
template <typename SponsoredObject>
struct MatchStats
{
/// Returns some score based on geven fields and classificator tuning.
double GetMatchingScore() const;
/// Returns true if GetMatchingScore is greater then some theshold.
bool IsMatched() const;
double m_linearNormDistanceScore{};
double m_nameSimilarityScore{};
};
/// Matches a given sponsored object against a given OSM object.
template <typename SponsoredObject>
MatchStats<SponsoredObject> Match(SponsoredObject const & o, FeatureBuilder1 const & fb);
} // namespace booking_scoring
} // namespace generator

View file

@ -21,26 +21,22 @@ logging.basicConfig(level=logging.DEBUG, format='[%(asctime)s] %(levelname)s: %(
def load_binary_list(path):
"""
Loads reference binary classifier output.
"""
"""Loads reference binary classifier output. """
bits = []
with open(path, 'r') as fd:
for line in fd:
if (not line.strip()) or line[0] == '#':
if (not line.strip()) or line.startswith('#'):
continue
bits.append(1 if line[0] == 'y' else 0)
bits.append(1 if line.startswith('y') else 0)
return bits
def load_score_list(path):
"""
Loads list of matching scores.
"""
"""Loads list of matching scores. """
scores = []
with open(path, 'r') as fd:
for line in fd:
if (not line.strip()) or line[0] == '#':
if (not line.strip()) or line.startswith('#'):
continue
scores.append(float(re.search(r'result score: (\d*\.\d+)', line).group(1)))
return scores

View file

@ -0,0 +1,115 @@
#! /usr/bin/env python2.7
# coding: utf-8
from __future__ import print_function
import argparse
import base64
import copy
import json
import logging
import os
import sys
import urllib2
from datetime import datetime
# Initialize logging.
logging.basicConfig(level=logging.DEBUG, format='[%(asctime)s] %(levelname)s: %(message)s')
class OpentableDownloaderError(Exception):
pass
class OpentableDownloader(object):
def __init__(self, login, password, opentable_filename, tsv_filename=None):
self.login = login
self.password = password
self.token = None
self.opentable_filename = opentable_filename
self.tsv_filename = tsv_filename
# TODO(mgsergio): Check if token is actual in functions.
self._get_token()
def download(self):
headers = self._add_auth_header({'Content-Type': 'application/json'})
url = 'https://platform.opentable.com/sync/listings'
with open(self.opentable_filename, 'w') as f:
offset = 0
while True:
request = urllib2.Request(url + '?offset={}'.format(offset), headers=headers)
logging.debug('Fetching data with headers %s from %s',
str(headers), request.get_full_url())
resp = urllib2.urlopen(request)
# TODO(mgsergio): Handle exceptions
data = json.loads(resp.read())
for rest in data['items']:
print(json.dumps(rest), file=f)
total_items = int(data['total_items'])
offset = int(data['offset'])
items_count = len(data['items'])
if total_items <= offset + items_count:
break
offset += items_count
def _get_token(self):
url = 'https://oauth.opentable.com/api/v2/oauth/token?grant_type=client_credentials'
headers = self._add_auth_header({})
request = urllib2.Request(url, headers=headers)
logging.debug('Fetching token with headers %s', str(headers))
resp = urllib2.urlopen(request)
# TODO(mgsergio): Handle exceptions
if resp.getcode() != 200:
raise OpentableDownloaderError("Cant't get token. Response: {}".format(resp.read()))
self.token = json.loads(resp.read())
logging.debug('Token is %s', self.token)
def _add_auth_header(self, headers):
if self.token is None:
key = base64.b64encode('{}:{}'.format(self.login, self.password))
headers['Authorization'] = 'Basic {}'.format(key)
else:
headers['Authorization'] = '{} {}'.format(self.token['token_type'],
self.token['access_token'])
return headers
def make_tsv(data_file, output_file):
for rest in data_file:
rest = json.loads(rest)
try:
address = ' '.join([rest['address'], rest['city'], rest['country']])
except TypeError:
address = ''
row = '\t'.join(map(unicode, [rest['rid'], rest['latitude'], rest['longitude'],
rest['name'], address, rest['reservation_url'],
rest['phone_number']]))
print(row.encode('utf-8'), file=output_file)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Downloads opentable data.')
parser.add_argument('-d', '--download', action='store_true', help='Download data')
parser.add_argument('--tsv', type=str, nargs='?', const='',
help='A file to put data into, stdout if value is empty '
'If ommited, no tsv data is generated')
parser.add_argument('--opentable_data', type=str, help='Path to opentable data file')
# TODO(mgsergio): Allow config instead.
parser.add_argument('--client', required=True, help='Opentable client id')
parser.add_argument('--secret', required=True, help="Opentable client's secret")
args = parser.parse_args(sys.argv[1:])
if args.download:
print('Downloading')
loader = OpentableDownloader(args.client, args.secret, args.opentable_data)
loader.download()
if args.tsv is not None:
data = open(args.opentable_data)
tsv = open(args.tsv, 'w') if args.tsv else sys.stdout
make_tsv(data, tsv)

View file

@ -170,6 +170,8 @@ ROADS_SCRIPT="$PYTHON_SCRIPTS_PATH/road_runner.py"
HIERARCHY_SCRIPT="$PYTHON_SCRIPTS_PATH/hierarchy_to_countries.py"
BOOKING_SCRIPT="$PYTHON_SCRIPTS_PATH/booking_hotels.py"
BOOKING_FILE="${BOOKING_FILE:-$INTDIR/hotels.csv}"
OPENTABLE_SCRIPT="$PYTHON_SCRIPTS_PATH/opentable_restaurants.py"
OPENTABLE_FILE="${OPENTABLE_FILE:-$INTDIR/restaurants.csv}"
TESTING_SCRIPT="$SCRIPTS_PATH/test_planet.sh"
PYTHON="$(which python2.7)"
MWM_VERSION_FORMAT="%s"
@ -250,9 +252,20 @@ if [ "$MODE" == "coast" ]; then
# Download booking.com hotels. This takes around 3 hours, just like coastline processing.
if [ ! -f "$BOOKING_FILE" -a -n "${BOOKING_USER-}" -a -n "${BOOKING_PASS-}" ]; then
log "STATUS" "Step B: Starting background hotels downloading"
$PYTHON $BOOKING_SCRIPT --user $BOOKING_USER --password $BOOKING_PASS --path "$INTDIR" --download --translate --output "$BOOKING_FILE" 2>"$LOG_PATH"/booking.log &
echo "Hotels have been downloaded. Please ensure this line is before Step 4." >> "$PLANET_LOG"
log "STATUS" "Step S1: Starting background hotels downloading"
(
$PYTHON $BOOKING_SCRIPT --user $BOOKING_USER --password $BOOKING_PASS --path "$INTDIR" --download --translate --output "$BOOKING_FILE" 2>"$LOG_PATH"/booking.log &
echo "Hotels have been downloaded. Please ensure this line is before Step 4." >> "$PLANET_LOG"
) &
fi
# Download opentable.com restaurants. This takes around 30 minutes.
if [ ! -f "$OPENTABLE_FILE" -a -n "${OPENTABLE_USER-}" -a -n "${OPENTABLE_PASS-}" ]; then
log "STATUS" "Step S2: Starting background restaurants downloading"
(
$PYTHON $OPENTABLE_SCRIPT --client $OPENTABLE_USER --secrete $OPENTABLE_PASS --opentable_data "$INTDIR"/opentable.json --download --tsv "$OPENTABLE_FILE" 2>"$LOG_PATH"/opentable.log &
echo "Restaurants have been downloaded. Please ensure this line is before Step 4." >> "$PLANET_LOG"
) &
fi
[ ! -x "$OSMCTOOLS/osmupdate" ] && cc -x c "$OMIM_PATH/tools/osmctools/osmupdate.c" -o "$OSMCTOOLS/osmupdate"
@ -400,6 +413,7 @@ if [ "$MODE" == "features" ]; then
[ -n "$OPT_WORLD" ] && PARAMS_SPLIT="$PARAMS_SPLIT -generate_world"
[ -n "$OPT_WORLD" -a "$NODE_STORAGE" == "map" ] && log "WARNING: generating world files with NODE_STORAGE=map may lead to an out of memory error. Try NODE_STORAGE=mem if it fails."
[ -f "$BOOKING_FILE" ] && PARAMS_SPLIT="$PARAMS_SPLIT --booking_data=$BOOKING_FILE"
[ -f "$OPENTABLE_FILE" ] && PARAMS_SPLIT="$PARAMS_SPLIT --opentable_data=$OPENTABLE_FILE"
"$GENERATOR_TOOL" --intermediate_data_path="$INTDIR/" --node_storage=$NODE_STORAGE --osm_file_type=o5m --osm_file_name="$PLANET" \
--data_path="$TARGET" --user_resource_path="$DATA_PATH/" $PARAMS_SPLIT 2>> "$PLANET_LOG"
MODE=mwm