forked from organicmaps/organicmaps
Merge pull request #4152 from mgsergio/booking-workflow
[booking] Booking workflow
This commit is contained in:
commit
43dce88ec6
30 changed files with 1184 additions and 539 deletions
data
classificator.txtdrules_proto.bindrules_proto.txtdrules_proto_legacy.bindrules_proto_legacy.txtmapcss-mapping.csv
styles/legacy/include
types.txtvisibility.txtdocs
generator
booking_dataset.cppbooking_dataset.hpp
booking_quality_check
booking_scoring.cppbooking_scoring.hppgenerate_info.hppgenerator.progenerator_tool
opentable_dataset.cppopentable_dataset.hppopentable_scoring.cpposm_source.cppsponsored_dataset.hppsponsored_dataset_inl.hppsponsored_scoring.cppsponsored_scoring.hpptools
|
@ -807,6 +807,7 @@ world +
|
|||
{}
|
||||
sponsored +
|
||||
booking -
|
||||
opentable -
|
||||
{}
|
||||
sport +
|
||||
american_football -
|
||||
|
|
Binary file not shown.
|
@ -76689,6 +76689,33 @@ cont {
|
|||
}
|
||||
}
|
||||
}
|
||||
cont {
|
||||
name: "sponsored-opentable"
|
||||
element {
|
||||
scale: 16
|
||||
symbol {
|
||||
name: "restaurant"
|
||||
apply_for_type: 1
|
||||
priority: 16000
|
||||
}
|
||||
}
|
||||
element {
|
||||
scale: 17
|
||||
symbol {
|
||||
name: "restaurant"
|
||||
apply_for_type: 1
|
||||
priority: 16000
|
||||
}
|
||||
}
|
||||
element {
|
||||
scale: 18
|
||||
symbol {
|
||||
name: "restaurant"
|
||||
apply_for_type: 1
|
||||
priority: 16000
|
||||
}
|
||||
}
|
||||
}
|
||||
cont {
|
||||
name: "sport-american_football"
|
||||
element {
|
||||
|
|
Binary file not shown.
|
@ -65014,6 +65014,39 @@ cont {
|
|||
}
|
||||
}
|
||||
}
|
||||
cont {
|
||||
name: "sponsored-opentable"
|
||||
element {
|
||||
scale: 16
|
||||
symbol {
|
||||
name: "restaurant"
|
||||
apply_for_type: 1
|
||||
priority: 16000
|
||||
}
|
||||
}
|
||||
element {
|
||||
scale: 17
|
||||
symbol {
|
||||
name: "restaurant"
|
||||
apply_for_type: 1
|
||||
priority: 16000
|
||||
}
|
||||
}
|
||||
element {
|
||||
scale: 18
|
||||
symbol {
|
||||
name: "hotel"
|
||||
priority: 16000
|
||||
}
|
||||
}
|
||||
element {
|
||||
scale: 19
|
||||
symbol {
|
||||
name: "hotel"
|
||||
priority: 16000
|
||||
}
|
||||
}
|
||||
}
|
||||
cont {
|
||||
name: "sport-american_football"
|
||||
element {
|
||||
|
|
|
@ -1136,3 +1136,4 @@ olympics|stadium_main;1135;
|
|||
olympics|stadium;1136;
|
||||
olympics|water_sport;1137;
|
||||
olympics|bike_sport;1138;
|
||||
sponsored|opentable;1139;
|
||||
|
|
|
|
@ -602,7 +602,8 @@ line|z16[man_made=pipeline][location=overground]
|
|||
linecap: butt;
|
||||
}
|
||||
area|z16-[amenity=restaurant],
|
||||
node|z16-[amenity=restaurant]
|
||||
node|z16-[amenity=restaurant],
|
||||
node|z16-[sponsored=opentable]
|
||||
{
|
||||
icon-image: restaurant.svg;
|
||||
}
|
||||
|
|
|
@ -1136,3 +1136,4 @@ olympics|stadium_main
|
|||
olympics|stadium
|
||||
olympics|water_sport
|
||||
olympics|bike_sport
|
||||
sponsored|opentable
|
||||
|
|
|
@ -807,6 +807,7 @@ world 00000000000000000000 +
|
|||
{}
|
||||
sponsored 00000000000000000011 +
|
||||
booking 00000000000000000011 -
|
||||
opentable 00000000000000001111 -
|
||||
{}
|
||||
sport 00000000000000000000 +
|
||||
american_football 00000000000000000111 -
|
||||
|
|
|
@ -143,7 +143,9 @@ Can be empty. Example: `$(ls ../../data/borders/{UK*,Ireland}.poly)`.
|
|||
* `SRTM_PATH`: a path to `*.zip` files with SRTM data.
|
||||
* `OSC`: a path to an osmChange file to apply after updating the planet.
|
||||
* `BOOKING_FILE`: a path to hotels.csv with booking data.
|
||||
* `BOOKING_USER` and `BOOKING_PATH`: user name and password for booking.com API
|
||||
* `BOOKING_USER` and `BOOKING_PASS`: user name and password for booking.com API
|
||||
* `OPENTABLE_FILE`: a path to restaurants.csv with opentable data.
|
||||
* `OPENTABLE_USER` and `OPENTABLE_PASS`: user name and password for opentable.com API
|
||||
to download hotels data.
|
||||
|
||||
### Testing
|
||||
|
|
|
@ -1,133 +1,56 @@
|
|||
#include "generator/booking_dataset.hpp"
|
||||
|
||||
#include "generator/booking_scoring.hpp"
|
||||
#include "generator/feature_builder.hpp"
|
||||
|
||||
#include "platform/local_country_file_utils.hpp"
|
||||
#include "platform/platform.hpp"
|
||||
#include "generator/sponsored_scoring.hpp"
|
||||
|
||||
#include "indexer/classificator.hpp"
|
||||
#include "indexer/ftypes_matcher.hpp"
|
||||
|
||||
#include "geometry/distance_on_sphere.hpp"
|
||||
|
||||
#include "base/logging.hpp"
|
||||
#include "base/string_utils.hpp"
|
||||
|
||||
#include "std/fstream.hpp"
|
||||
#include "std/iostream.hpp"
|
||||
#include "std/sstream.hpp"
|
||||
#include "boost/algorithm/string/replace.hpp"
|
||||
|
||||
namespace generator
|
||||
{
|
||||
namespace
|
||||
{
|
||||
string EscapeTabs(string const & str)
|
||||
{
|
||||
stringstream ss;
|
||||
for (char c : str)
|
||||
{
|
||||
if (c == '\t')
|
||||
ss << "\\t";
|
||||
else
|
||||
ss << c;
|
||||
}
|
||||
return ss.str();
|
||||
}
|
||||
} // namespace
|
||||
|
||||
BookingDataset::Hotel::Hotel(string const & src)
|
||||
// BookingHotel ------------------------------------------------------------------------------------
|
||||
BookingHotel::BookingHotel(string const & src)
|
||||
{
|
||||
vector<string> rec;
|
||||
strings::ParseCSVRow(src, '\t', rec);
|
||||
CHECK(rec.size() == FieldsCount(), ("Error parsing hotels.tsv line:", EscapeTabs(src)));
|
||||
CHECK_EQUAL(rec.size(), FieldsCount(), ("Error parsing hotels.tsv line:",
|
||||
boost::replace_all_copy(src, "\t", "\\t")));
|
||||
|
||||
strings::to_uint(rec[Index(Fields::Id)], id);
|
||||
strings::to_double(rec[Index(Fields::Latitude)], lat);
|
||||
strings::to_double(rec[Index(Fields::Longtitude)], lon);
|
||||
strings::to_uint(rec[FieldIndex(Fields::Id)], m_id.Get());
|
||||
// TODO(mgsergio): Use ms::LatLon.
|
||||
strings::to_double(rec[FieldIndex(Fields::Latitude)], m_latLon.lat);
|
||||
strings::to_double(rec[FieldIndex(Fields::Longtitude)], m_latLon.lon);
|
||||
|
||||
name = rec[Index(Fields::Name)];
|
||||
address = rec[Index(Fields::Address)];
|
||||
m_name = rec[FieldIndex(Fields::Name)];
|
||||
m_address = rec[FieldIndex(Fields::Address)];
|
||||
|
||||
strings::to_uint(rec[Index(Fields::Stars)], stars);
|
||||
strings::to_uint(rec[Index(Fields::PriceCategory)], priceCategory);
|
||||
strings::to_double(rec[Index(Fields::RatingBooking)], ratingBooking);
|
||||
strings::to_double(rec[Index(Fields::RatingUsers)], ratingUser);
|
||||
strings::to_uint(rec[FieldIndex(Fields::Stars)], m_stars);
|
||||
strings::to_uint(rec[FieldIndex(Fields::PriceCategory)], m_priceCategory);
|
||||
strings::to_double(rec[FieldIndex(Fields::RatingBooking)], m_ratingBooking);
|
||||
strings::to_double(rec[FieldIndex(Fields::RatingUsers)], m_ratingUser);
|
||||
|
||||
descUrl = rec[Index(Fields::DescUrl)];
|
||||
m_descUrl = rec[FieldIndex(Fields::DescUrl)];
|
||||
|
||||
strings::to_uint(rec[Index(Fields::Type)], type);
|
||||
strings::to_uint(rec[FieldIndex(Fields::Type)], m_type);
|
||||
|
||||
translations = rec[Index(Fields::Translations)];
|
||||
m_translations = rec[FieldIndex(Fields::Translations)];
|
||||
}
|
||||
|
||||
ostream & operator<<(ostream & s, BookingDataset::Hotel const & h)
|
||||
ostream & operator<<(ostream & s, BookingHotel const & h)
|
||||
{
|
||||
s << fixed << setprecision(7);
|
||||
return s << "Id: " << h.id << "\t Name: " << h.name << "\t Address: " << h.address
|
||||
<< "\t lat: " << h.lat << " lon: " << h.lon;
|
||||
s << "Id: " << h.m_id << "\t Name: " << h.m_name << "\t Address: " << h.m_address
|
||||
<< "\t lat: " << h.m_latLon.lat << " lon: " << h.m_latLon.lon;
|
||||
return s;
|
||||
}
|
||||
|
||||
BookingDataset::AddressMatcher::AddressMatcher()
|
||||
{
|
||||
vector<platform::LocalCountryFile> localFiles;
|
||||
|
||||
Platform & platform = GetPlatform();
|
||||
platform::FindAllLocalMapsInDirectoryAndCleanup(platform.WritableDir(), 0 /* version */,
|
||||
-1 /* latestVersion */, localFiles);
|
||||
|
||||
for (platform::LocalCountryFile const & localFile : localFiles)
|
||||
{
|
||||
LOG(LINFO, ("Found mwm:", localFile));
|
||||
try
|
||||
{
|
||||
m_index.RegisterMap(localFile);
|
||||
}
|
||||
catch (RootException const & ex)
|
||||
{
|
||||
CHECK(false, ("Bad mwm file:", localFile));
|
||||
}
|
||||
}
|
||||
|
||||
m_coder = make_unique<search::ReverseGeocoder>(m_index);
|
||||
}
|
||||
|
||||
void BookingDataset::AddressMatcher::operator()(Hotel & hotel)
|
||||
{
|
||||
search::ReverseGeocoder::Address addr;
|
||||
m_coder->GetNearbyAddress(MercatorBounds::FromLatLon(hotel.lat, hotel.lon), addr);
|
||||
hotel.street = addr.GetStreetName();
|
||||
hotel.houseNumber = addr.GetHouseNumber();
|
||||
}
|
||||
|
||||
BookingDataset::BookingDataset(string const & dataPath, string const & addressReferencePath)
|
||||
{
|
||||
if (dataPath.empty())
|
||||
return;
|
||||
|
||||
ifstream dataSource(dataPath);
|
||||
if (!dataSource.is_open())
|
||||
{
|
||||
LOG(LERROR, ("Error while opening", dataPath, ":", strerror(errno)));
|
||||
return;
|
||||
}
|
||||
|
||||
LoadHotels(dataSource, addressReferencePath);
|
||||
}
|
||||
|
||||
BookingDataset::BookingDataset(istream & dataSource, string const & addressReferencePath)
|
||||
{
|
||||
LoadHotels(dataSource, addressReferencePath);
|
||||
}
|
||||
|
||||
size_t BookingDataset::GetMatchingHotelIndex(FeatureBuilder1 const & fb) const
|
||||
{
|
||||
if (CanBeBooking(fb))
|
||||
return MatchWithBooking(fb);
|
||||
return kInvalidHotelIndex;
|
||||
}
|
||||
|
||||
bool BookingDataset::CanBeBooking(FeatureBuilder1 const & fb) const
|
||||
// BookingDataset ----------------------------------------------------------------------------------
|
||||
template <>
|
||||
bool BookingDataset::NecessaryMatchingConditionHolds(FeatureBuilder1 const & fb) const
|
||||
{
|
||||
if (fb.GetName(StringUtf8Multilang::kDefaultCode).empty())
|
||||
return false;
|
||||
|
@ -135,78 +58,66 @@ bool BookingDataset::CanBeBooking(FeatureBuilder1 const & fb) const
|
|||
return ftypes::IsHotelChecker::Instance()(fb.GetTypes());
|
||||
}
|
||||
|
||||
BookingDataset::Hotel const & BookingDataset::GetHotelById(uint32_t const id) const
|
||||
template <>
|
||||
void BookingDataset::PreprocessMatchedOsmObject(ObjectId, FeatureBuilder1 & fb,
|
||||
function<void(FeatureBuilder1 &)> const fn) const
|
||||
{
|
||||
auto const it = m_hotels.find(id);
|
||||
CHECK(it != end(m_hotels), ("Got wrong hotel id:", id));
|
||||
return it->second;
|
||||
// Turn a hotel into a simple building.
|
||||
if (fb.GetGeomType() == feature::GEOM_AREA)
|
||||
{
|
||||
// Remove all information about the hotel.
|
||||
auto params = fb.GetParams();
|
||||
params.ClearName();
|
||||
auto & meta = params.GetMetadata();
|
||||
meta.Drop(feature::Metadata::EType::FMD_STARS);
|
||||
meta.Drop(feature::Metadata::EType::FMD_WEBSITE);
|
||||
meta.Drop(feature::Metadata::EType::FMD_PHONE_NUMBER);
|
||||
|
||||
auto const tourism = classif().GetTypeByPath({"tourism"});
|
||||
my::EraseIf(params.m_Types, [tourism](uint32_t type)
|
||||
{
|
||||
ftype::TruncValue(type, 1);
|
||||
return type == tourism;
|
||||
});
|
||||
fb.SetParams(params);
|
||||
}
|
||||
|
||||
fn(fb);
|
||||
}
|
||||
|
||||
BookingDataset::Hotel & BookingDataset::GetHotelById(uint32_t const id)
|
||||
{
|
||||
auto it = m_hotels.find(id);
|
||||
CHECK(it != end(m_hotels), ("Got wrong hotel id:", id));
|
||||
return it->second;
|
||||
}
|
||||
|
||||
vector<uint32_t> BookingDataset::GetNearestHotels(ms::LatLon const & latLon, size_t const limit,
|
||||
double const maxDistance /* = 0.0 */) const
|
||||
{
|
||||
namespace bgi = boost::geometry::index;
|
||||
|
||||
vector<uint32_t> indexes;
|
||||
for_each(bgi::qbegin(m_rtree, bgi::nearest(TPoint(latLon.lat, latLon.lon), limit)),
|
||||
bgi::qend(m_rtree), [this, &latLon, &indexes, maxDistance](TValue const & v)
|
||||
{
|
||||
auto const & hotel = GetHotelById(v.second);
|
||||
double const dist = ms::DistanceOnEarth(latLon.lat, latLon.lon, hotel.lat, hotel.lon);
|
||||
if (maxDistance != 0.0 && dist > maxDistance /* max distance in meters */)
|
||||
return;
|
||||
|
||||
indexes.emplace_back(v.second);
|
||||
});
|
||||
|
||||
return indexes;
|
||||
}
|
||||
|
||||
void BookingDataset::BuildHotels(function<void(FeatureBuilder1 &)> const & fn) const
|
||||
{
|
||||
for (auto const & item : m_hotels)
|
||||
BuildHotel(item.second, fn);
|
||||
}
|
||||
|
||||
void BookingDataset::BuildHotel(Hotel const & hotel,
|
||||
function<void(FeatureBuilder1 &)> const & fn) const
|
||||
template <>
|
||||
void BookingDataset::BuildObject(Object const & hotel,
|
||||
function<void(FeatureBuilder1 &)> const & fn) const
|
||||
{
|
||||
FeatureBuilder1 fb;
|
||||
FeatureParams params;
|
||||
|
||||
fb.SetCenter(MercatorBounds::FromLatLon(hotel.lat, hotel.lon));
|
||||
fb.SetCenter(MercatorBounds::FromLatLon(hotel.m_latLon.lat, hotel.m_latLon.lon));
|
||||
|
||||
auto & metadata = params.GetMetadata();
|
||||
metadata.Set(feature::Metadata::FMD_SPONSORED_ID, strings::to_string(hotel.id));
|
||||
metadata.Set(feature::Metadata::FMD_WEBSITE, hotel.descUrl);
|
||||
metadata.Set(feature::Metadata::FMD_RATING, strings::to_string(hotel.ratingUser));
|
||||
metadata.Set(feature::Metadata::FMD_STARS, strings::to_string(hotel.stars));
|
||||
metadata.Set(feature::Metadata::FMD_PRICE_RATE, strings::to_string(hotel.priceCategory));
|
||||
metadata.Set(feature::Metadata::FMD_SPONSORED_ID, strings::to_string(hotel.m_id.Get()));
|
||||
metadata.Set(feature::Metadata::FMD_WEBSITE, hotel.m_descUrl);
|
||||
metadata.Set(feature::Metadata::FMD_RATING, strings::to_string(hotel.m_ratingUser));
|
||||
metadata.Set(feature::Metadata::FMD_STARS, strings::to_string(hotel.m_stars));
|
||||
metadata.Set(feature::Metadata::FMD_PRICE_RATE, strings::to_string(hotel.m_priceCategory));
|
||||
|
||||
// params.AddAddress(hotel.address);
|
||||
// TODO(mgsergio): addr:full ???
|
||||
|
||||
if (!hotel.street.empty())
|
||||
fb.AddStreet(hotel.street);
|
||||
if (!hotel.m_street.empty())
|
||||
fb.AddStreet(hotel.m_street);
|
||||
|
||||
if (!hotel.houseNumber.empty())
|
||||
fb.AddHouseNumber(hotel.houseNumber);
|
||||
if (!hotel.m_houseNumber.empty())
|
||||
fb.AddHouseNumber(hotel.m_houseNumber);
|
||||
|
||||
params.AddName(StringUtf8Multilang::GetLangByCode(StringUtf8Multilang::kDefaultCode),
|
||||
hotel.name);
|
||||
if (!hotel.translations.empty())
|
||||
hotel.m_name);
|
||||
if (!hotel.m_translations.empty())
|
||||
{
|
||||
// TODO(mgsergio): Move parsing to the hotel costruction stage.
|
||||
vector<string> parts;
|
||||
strings::ParseCSVRow(hotel.translations, '|', parts);
|
||||
CHECK_EQUAL(parts.size() % 3, 0, ("Invalid translation string:", hotel.translations));
|
||||
strings::ParseCSVRow(hotel.m_translations, '|', parts);
|
||||
CHECK_EQUAL(parts.size() % 3, 0, ("Invalid translation string:", hotel.m_translations));
|
||||
for (auto i = 0; i < parts.size(); i += 3)
|
||||
{
|
||||
auto const langCode = StringUtf8Multilang::GetLangIndex(parts[i]);
|
||||
|
@ -219,7 +130,7 @@ void BookingDataset::BuildHotel(Hotel const & hotel,
|
|||
params.AddType(clf.GetTypeByPath({"sponsored", "booking"}));
|
||||
// Matching booking.com hotel types to OpenStreetMap values.
|
||||
// Booking types are listed in the closed API docs.
|
||||
switch (hotel.type)
|
||||
switch (hotel.m_type)
|
||||
{
|
||||
case 19:
|
||||
case 205: params.AddType(clf.GetTypeByPath({"tourism", "motel"})); break;
|
||||
|
@ -273,68 +184,24 @@ void BookingDataset::BuildHotel(Hotel const & hotel,
|
|||
fn(fb);
|
||||
}
|
||||
|
||||
void BookingDataset::LoadHotels(istream & src, string const & addressReferencePath)
|
||||
{
|
||||
m_hotels.clear();
|
||||
m_rtree.clear();
|
||||
|
||||
for (string line; getline(src, line);)
|
||||
{
|
||||
Hotel hotel(line);
|
||||
m_hotels.emplace(hotel.id, hotel);
|
||||
}
|
||||
|
||||
if (!addressReferencePath.empty())
|
||||
{
|
||||
LOG(LINFO, ("Reference addresses for booking objects", addressReferencePath));
|
||||
Platform & platform = GetPlatform();
|
||||
string const backupPath = platform.WritableDir();
|
||||
platform.SetWritableDirForTests(addressReferencePath);
|
||||
|
||||
AddressMatcher addressMatcher;
|
||||
|
||||
size_t matchedNum = 0;
|
||||
size_t emptyAddr = 0;
|
||||
for (auto & item : m_hotels)
|
||||
{
|
||||
auto & hotel = item.second;
|
||||
addressMatcher(hotel);
|
||||
|
||||
if (hotel.address.empty())
|
||||
++emptyAddr;
|
||||
if (hotel.IsAddressPartsFilled())
|
||||
++matchedNum;
|
||||
}
|
||||
LOG(LINFO,
|
||||
("Num of hotels:", m_hotels.size(), "matched:", matchedNum, "empty addresses:", emptyAddr));
|
||||
platform.SetWritableDirForTests(backupPath);
|
||||
}
|
||||
|
||||
for (auto const & item : m_hotels)
|
||||
{
|
||||
auto const & hotel = item.second;
|
||||
TBox b(TPoint(hotel.lat, hotel.lon), TPoint(hotel.lat, hotel.lon));
|
||||
m_rtree.insert(make_pair(b, hotel.id));
|
||||
}
|
||||
}
|
||||
|
||||
size_t BookingDataset::MatchWithBooking(FeatureBuilder1 const & fb) const
|
||||
template <>
|
||||
BookingDataset::ObjectId BookingDataset::FindMatchingObjectIdImpl(FeatureBuilder1 const & fb) const
|
||||
{
|
||||
auto const name = fb.GetName(StringUtf8Multilang::kDefaultCode);
|
||||
|
||||
if (name.empty())
|
||||
return false;
|
||||
return Object::InvalidObjectId();
|
||||
|
||||
// Find |kMaxSelectedElements| nearest values to a point.
|
||||
auto const bookingIndexes = GetNearestHotels(MercatorBounds::ToLatLon(fb.GetKeyPoint()),
|
||||
kMaxSelectedElements, kDistanceLimitInMeters);
|
||||
auto const bookingIndexes = GetNearestObjects(MercatorBounds::ToLatLon(fb.GetKeyPoint()),
|
||||
kMaxSelectedElements, kDistanceLimitInMeters);
|
||||
|
||||
for (uint32_t const j : bookingIndexes)
|
||||
for (auto const j : bookingIndexes)
|
||||
{
|
||||
if (booking_scoring::Match(GetHotelById(j), fb).IsMatched())
|
||||
if (sponsored_scoring::Match(GetObjectById(j), fb).IsMatched())
|
||||
return j;
|
||||
}
|
||||
|
||||
return kInvalidHotelIndex;
|
||||
return Object::InvalidObjectId();
|
||||
}
|
||||
} // namespace generator
|
||||
|
|
|
@ -1,116 +1,68 @@
|
|||
#pragma once
|
||||
|
||||
#include "indexer/index.hpp"
|
||||
#include "generator/sponsored_dataset.hpp"
|
||||
|
||||
#include "search/reverse_geocoder.hpp"
|
||||
#include "geometry/latlon.hpp"
|
||||
|
||||
#include "boost/geometry.hpp"
|
||||
#include "boost/geometry/geometries/point.hpp"
|
||||
#include "boost/geometry/geometries/box.hpp"
|
||||
#include "boost/geometry/index/rtree.hpp"
|
||||
#include "base/newtype.hpp"
|
||||
|
||||
#include "std/function.hpp"
|
||||
#include "std/limits.hpp"
|
||||
#include "std/map.hpp"
|
||||
#include "std/string.hpp"
|
||||
|
||||
class FeatureBuilder1;
|
||||
|
||||
namespace generator
|
||||
{
|
||||
class BookingDataset
|
||||
// TODO(mgsergio): Try to get rid of code duplication. (See OpenTableRestaurant)
|
||||
struct BookingHotel
|
||||
{
|
||||
public:
|
||||
static double constexpr kDistanceLimitInMeters = 150;
|
||||
static size_t constexpr kMaxSelectedElements = 3;
|
||||
static auto constexpr kInvalidHotelIndex = numeric_limits<uint32_t>::max();
|
||||
NEWTYPE(uint32_t, ObjectId);
|
||||
|
||||
struct Hotel
|
||||
enum class Fields
|
||||
{
|
||||
enum class Fields
|
||||
{
|
||||
Id = 0,
|
||||
Latitude = 1,
|
||||
Longtitude = 2,
|
||||
Name = 3,
|
||||
Address = 4,
|
||||
Stars = 5,
|
||||
PriceCategory = 6,
|
||||
RatingBooking = 7,
|
||||
RatingUsers = 8,
|
||||
DescUrl = 9,
|
||||
Type = 10,
|
||||
Translations = 11,
|
||||
|
||||
Counter
|
||||
};
|
||||
|
||||
// TODO(mgsergio): Make a separate type for this or an alias.
|
||||
uint32_t id = 0;
|
||||
double lat = 0.0;
|
||||
double lon = 0.0;
|
||||
string name;
|
||||
string address;
|
||||
string street;
|
||||
string houseNumber;
|
||||
uint32_t stars = 0;
|
||||
uint32_t priceCategory = 0;
|
||||
double ratingBooking = 0.0;
|
||||
double ratingUser = 0.0;
|
||||
string descUrl;
|
||||
uint32_t type = 0;
|
||||
string translations;
|
||||
|
||||
static constexpr size_t Index(Fields field) { return static_cast<size_t>(field); }
|
||||
static constexpr size_t FieldsCount() { return static_cast<size_t>(Fields::Counter); }
|
||||
explicit Hotel(string const & src);
|
||||
|
||||
inline bool IsAddressPartsFilled() const { return !street.empty() || !houseNumber.empty(); }
|
||||
Id = 0,
|
||||
Latitude = 1,
|
||||
Longtitude = 2,
|
||||
Name = 3,
|
||||
Address = 4,
|
||||
Stars = 5,
|
||||
PriceCategory = 6,
|
||||
RatingBooking = 7,
|
||||
RatingUsers = 8,
|
||||
DescUrl = 9,
|
||||
Type = 10,
|
||||
Translations = 11,
|
||||
Counter
|
||||
};
|
||||
|
||||
class AddressMatcher
|
||||
static constexpr ObjectId InvalidObjectId()
|
||||
{
|
||||
Index m_index;
|
||||
unique_ptr<search::ReverseGeocoder> m_coder;
|
||||
return ObjectId(numeric_limits<typename ObjectId::RepType>::max());
|
||||
}
|
||||
|
||||
public:
|
||||
AddressMatcher();
|
||||
void operator()(Hotel & hotel);
|
||||
};
|
||||
explicit BookingHotel(string const & src);
|
||||
|
||||
explicit BookingDataset(string const & dataPath, string const & addressReferencePath = string());
|
||||
explicit BookingDataset(istream & dataSource, string const & addressReferencePath = string());
|
||||
static constexpr size_t FieldIndex(Fields field) { return static_cast<size_t>(field); }
|
||||
static constexpr size_t FieldsCount() { return static_cast<size_t>(Fields::Counter); }
|
||||
|
||||
/// @return an index of a matched hotel or kInvalidHotelIndex on failure.
|
||||
size_t GetMatchingHotelIndex(FeatureBuilder1 const & fb) const;
|
||||
/// @return true if |fb| is a hotel with a name.
|
||||
bool CanBeBooking(FeatureBuilder1 const & fb) const;
|
||||
bool HasAddresParts() const { return !m_street.empty() || !m_houseNumber.empty(); }
|
||||
|
||||
inline size_t Size() const { return m_hotels.size(); }
|
||||
Hotel const & GetHotelById(uint32_t id) const;
|
||||
Hotel & GetHotelById(uint32_t id);
|
||||
vector<uint32_t> GetNearestHotels(ms::LatLon const & latLon, size_t limit,
|
||||
double maxDistance = 0.0) const;
|
||||
bool MatchByName(string const & osmName, vector<size_t> const & bookingIndexes) const;
|
||||
ObjectId m_id{InvalidObjectId()};
|
||||
ms::LatLon m_latLon = ms::LatLon::Zero();
|
||||
string m_name;
|
||||
string m_street;
|
||||
string m_houseNumber;
|
||||
|
||||
void BuildHotels(function<void(FeatureBuilder1 &)> const & fn) const;
|
||||
|
||||
protected:
|
||||
map<uint32_t, Hotel> m_hotels;
|
||||
|
||||
using TPoint = boost::geometry::model::point<float, 2, boost::geometry::cs::cartesian>;
|
||||
using TBox = boost::geometry::model::box<TPoint>;
|
||||
using TValue = pair<TBox, uint32_t>;
|
||||
|
||||
// Create the rtree using default constructor.
|
||||
boost::geometry::index::rtree<TValue, boost::geometry::index::quadratic<16>> m_rtree;
|
||||
|
||||
void BuildHotel(Hotel const & hotel, function<void(FeatureBuilder1 &)> const & fn) const;
|
||||
|
||||
void LoadHotels(istream & path, string const & addressReferencePath);
|
||||
/// @return an index of a matched hotel or numeric_limits<size_t>::max() on failure.
|
||||
size_t MatchWithBooking(FeatureBuilder1 const & e) const;
|
||||
string m_address;
|
||||
uint32_t m_stars = 0;
|
||||
uint32_t m_priceCategory = 0;
|
||||
double m_ratingBooking = 0.0;
|
||||
double m_ratingUser = 0.0;
|
||||
string m_descUrl;
|
||||
uint32_t m_type = 0;
|
||||
string m_translations;
|
||||
};
|
||||
|
||||
ostream & operator<<(ostream & s, BookingDataset::Hotel const & h);
|
||||
ostream & operator<<(ostream & s, BookingHotel const & h);
|
||||
|
||||
NEWTYPE_SIMPLE_OUTPUT(BookingHotel::ObjectId);
|
||||
using BookingDataset = SponsoredDataset<BookingHotel>;
|
||||
} // namespace generator
|
||||
|
|
|
@ -87,7 +87,7 @@ int main(int argc, char * argv[])
|
|||
if (hotel.address.empty())
|
||||
++emptyAddr;
|
||||
|
||||
if (hotel.IsAddressPartsFilled())
|
||||
if (hotel.HasAddresParts())
|
||||
{
|
||||
++matchedNum;
|
||||
cout << "[" << i << "/" << bookingDataset.Size() << "] Hotel: " << hotel.address
|
||||
|
|
|
@ -1,7 +1,8 @@
|
|||
#include "generator/booking_dataset.hpp"
|
||||
#include "generator/booking_scoring.hpp"
|
||||
#include "generator/feature_builder.hpp"
|
||||
#include "generator/opentable_dataset.hpp"
|
||||
#include "generator/osm_source.hpp"
|
||||
#include "generator/sponsored_scoring.hpp"
|
||||
|
||||
#include "indexer/classificator_loader.hpp"
|
||||
|
||||
|
@ -21,12 +22,19 @@
|
|||
|
||||
#include "3party/gflags/src/gflags/gflags.h"
|
||||
|
||||
#include "boost/range/adaptor/map.hpp"
|
||||
#include "boost/range/algorithm/copy.hpp"
|
||||
|
||||
|
||||
DEFINE_string(osm, "", "Input .o5m file");
|
||||
DEFINE_string(booking, "", "Path to booking data in .tsv format");
|
||||
DEFINE_string(opentable, "", "Path to opentable data in .tsv format");
|
||||
DEFINE_string(factors, "", "Factors output path");
|
||||
DEFINE_string(sample, "", "Path so sample file");
|
||||
DEFINE_uint64(selection_size, 1000, "Selection size");
|
||||
|
||||
DEFINE_uint64(seed, minstd_rand::default_seed, "Seed for random shuffle");
|
||||
DEFINE_uint64(selection_size, 1000, "Selection size");
|
||||
DEFINE_bool(generate, false, "Generate unmarked sample");
|
||||
|
||||
using namespace generator;
|
||||
|
||||
|
@ -89,11 +97,12 @@ osm::Id ReadDebuggedPrintedOsmId(string const & str)
|
|||
MYTHROW(ParseError, ("Can't make osmId from string", str));
|
||||
}
|
||||
|
||||
template <typename Dataset>
|
||||
class Emitter : public EmitterBase
|
||||
{
|
||||
public:
|
||||
Emitter(BookingDataset const & booking, map<osm::Id, FeatureBuilder1> & features)
|
||||
: m_bookingDataset(booking)
|
||||
Emitter(Dataset const & dataset, map<osm::Id, FeatureBuilder1> & features)
|
||||
: m_dataset(dataset)
|
||||
, m_features(features)
|
||||
{
|
||||
LOG_SHORT(LINFO, ("OSM data:", FLAGS_osm));
|
||||
|
@ -101,7 +110,7 @@ public:
|
|||
|
||||
void operator()(FeatureBuilder1 & fb) override
|
||||
{
|
||||
if (m_bookingDataset.CanBeBooking(fb))
|
||||
if (m_dataset.NecessaryMatchingConditionHolds(fb))
|
||||
m_features.emplace(fb.GetMostGenericOsmId(), fb);
|
||||
}
|
||||
|
||||
|
@ -117,7 +126,7 @@ public:
|
|||
}
|
||||
|
||||
private:
|
||||
BookingDataset const & m_bookingDataset;
|
||||
Dataset const & m_dataset;
|
||||
map<osm::Id, FeatureBuilder1> & m_features;
|
||||
};
|
||||
|
||||
|
@ -125,6 +134,7 @@ feature::GenerateInfo GetGenerateInfo()
|
|||
{
|
||||
feature::GenerateInfo info;
|
||||
info.m_bookingDatafileName = FLAGS_booking;
|
||||
info.m_opentableDatafileName = FLAGS_opentable;
|
||||
info.m_osmFileName = FLAGS_osm;
|
||||
info.SetNodeStorageType("map");
|
||||
info.SetOsmFileType("o5m");
|
||||
|
@ -136,111 +146,242 @@ feature::GenerateInfo GetGenerateInfo()
|
|||
return info;
|
||||
}
|
||||
|
||||
template <typename Object>
|
||||
struct SampleItem
|
||||
{
|
||||
enum MatchStatus {Uninitialized, Yes, No};
|
||||
using ObjectId = typename Object::ObjectId;
|
||||
|
||||
SampleItem() = default;
|
||||
|
||||
SampleItem(osm::Id const & osmId, uint32_t const bookingId, MatchStatus const match = Uninitialized)
|
||||
SampleItem(osm::Id const & osmId, ObjectId const sponsoredId, MatchStatus const match = Uninitialized)
|
||||
: m_osmId(osmId)
|
||||
, m_bookingId(bookingId)
|
||||
, m_sponsoredId(sponsoredId)
|
||||
, m_match(match)
|
||||
{
|
||||
}
|
||||
|
||||
osm::Id m_osmId;
|
||||
uint32_t m_bookingId = BookingDataset::kInvalidHotelIndex;
|
||||
ObjectId m_sponsoredId = Object::InvalidObjectId();
|
||||
|
||||
MatchStatus m_match = Uninitialized;
|
||||
};
|
||||
|
||||
SampleItem::MatchStatus ReadMatchStatus(string const & str)
|
||||
template <typename Object>
|
||||
typename SampleItem<Object>::MatchStatus ReadMatchStatus(string const & str)
|
||||
{
|
||||
if (str == "Yes")
|
||||
return SampleItem::Yes;
|
||||
return SampleItem<Object>::Yes;
|
||||
|
||||
if (str == "No")
|
||||
return SampleItem::No;
|
||||
return SampleItem<Object>::No;
|
||||
|
||||
if (str == "Uninitialized")
|
||||
return SampleItem::Uninitialized;
|
||||
return SampleItem<Object>::Uninitialized;
|
||||
|
||||
MYTHROW(ParseError, ("Can't make SampleItem::MatchStatus from string:", str));
|
||||
}
|
||||
|
||||
SampleItem ReadSampleItem(string const & str)
|
||||
template <typename Object>
|
||||
SampleItem<Object> ReadSampleItem(string const & str)
|
||||
{
|
||||
SampleItem item;
|
||||
SampleItem<Object> item;
|
||||
|
||||
auto const parts = strings::Tokenize(str, "\t");
|
||||
CHECK_EQUAL(parts.size(), 3, ("Cant't make SampleItem from string:", str,
|
||||
"due to wrong number of fields."));
|
||||
|
||||
item.m_osmId = ReadDebuggedPrintedOsmId(parts[0]);
|
||||
if (!strings::to_uint(parts[1], item.m_bookingId))
|
||||
if (!strings::to_uint(parts[1], item.m_sponsoredId.Get()))
|
||||
MYTHROW(ParseError, ("Can't make uint32 from string:", parts[1]));
|
||||
item.m_match = ReadMatchStatus(parts[2]);
|
||||
item.m_match = ReadMatchStatus<Object>(parts[2]);
|
||||
|
||||
return item;
|
||||
}
|
||||
|
||||
vector<SampleItem> ReadSample(istream & ist)
|
||||
template <typename Object>
|
||||
vector<SampleItem<Object>> ReadSample(istream & ist)
|
||||
{
|
||||
vector<SampleItem> result;
|
||||
vector<SampleItem<Object>> result;
|
||||
|
||||
size_t lineNumber = 1;
|
||||
try
|
||||
{
|
||||
for (string line; getline(ist, line); ++lineNumber)
|
||||
{
|
||||
result.emplace_back(ReadSampleItem(line));
|
||||
result.emplace_back(ReadSampleItem<Object>(line));
|
||||
}
|
||||
}
|
||||
catch (ParseError const & e)
|
||||
{
|
||||
LOG(LERROR, ("Wrong format: line", lineNumber, e.Msg()));
|
||||
LOG_SHORT(LERROR, ("Wrong format: line", lineNumber, e.Msg()));
|
||||
exit(1);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
vector<SampleItem> ReadSampleFromFile(string const & name)
|
||||
template <typename Object>
|
||||
vector<SampleItem<Object>> ReadSampleFromFile(string const & name)
|
||||
{
|
||||
ifstream ist(name);
|
||||
CHECK(ist.is_open(), ("Can't open file:", name, strerror(errno)));
|
||||
return ReadSample(ist);
|
||||
return ReadSample<Object>(ist);
|
||||
}
|
||||
|
||||
void GenerateFactors(BookingDataset const & booking, map<osm::Id, FeatureBuilder1> const & features,
|
||||
vector<SampleItem> const & sampleItems, ostream & ost)
|
||||
template <typename Dataset, typename Object = typename Dataset::Object>
|
||||
void GenerateFactors(Dataset const & dataset,
|
||||
map<osm::Id, FeatureBuilder1> const & features,
|
||||
vector<SampleItem<Object>> const & sampleItems, ostream & ost)
|
||||
{
|
||||
for (auto const & item : sampleItems)
|
||||
{
|
||||
auto const & hotel = booking.GetHotelById(item.m_bookingId);
|
||||
auto const & object = dataset.GetObjectById(item.m_sponsoredId);
|
||||
auto const & feature = features.at(item.m_osmId);
|
||||
|
||||
auto const score = booking_scoring::Match(hotel, feature);
|
||||
auto const score = generator::sponsored_scoring::Match(object, feature);
|
||||
|
||||
auto const center = MercatorBounds::ToLatLon(feature.GetKeyPoint());
|
||||
double const distanceMeters = ms::DistanceOnEarth(center.lat, center.lon,
|
||||
hotel.lat, hotel.lon);
|
||||
double const distanceMeters = ms::DistanceOnEarth(center, object.m_latLon);
|
||||
auto const matched = score.IsMatched();
|
||||
|
||||
ost << "# ------------------------------------------" << fixed << setprecision(6)
|
||||
<< endl;
|
||||
ost << (matched ? 'y' : 'n') << " \t" << DebugPrint(feature.GetMostGenericOsmId())
|
||||
<< "\t " << hotel.id
|
||||
<< "\t " << object.m_id
|
||||
<< "\tdistance: " << distanceMeters
|
||||
<< "\tdistance score: " << score.m_linearNormDistanceScore
|
||||
<< "\tname score: " << score.m_nameSimilarityScore
|
||||
<< "\tresult score: " << score.GetMatchingScore()
|
||||
<< endl;
|
||||
ost << "# " << PrintBuilder(feature) << endl;
|
||||
ost << "# " << hotel << endl;
|
||||
ost << "# URL: https://www.openstreetmap.org/?mlat=" << hotel.lat
|
||||
<< "&mlon=" << hotel.lon << "#map=18/" << hotel.lat << "/" << hotel.lon << endl;
|
||||
ost << "# " << object << endl;
|
||||
ost << "# URL: https://www.openstreetmap.org/?mlat="
|
||||
<< object.m_latLon.lat << "&mlon=" << object.m_latLon.lon << "#map=18/"
|
||||
<< object.m_latLon.lat << "/" << object.m_latLon.lon << endl;
|
||||
}
|
||||
}
|
||||
|
||||
enum class DatasetType
|
||||
{
|
||||
Booking,
|
||||
Opentable
|
||||
};
|
||||
|
||||
template <typename Dataset, typename Object = typename Dataset::Object>
|
||||
void GenerateSample(Dataset const & dataset,
|
||||
map<osm::Id, FeatureBuilder1> const & features,
|
||||
ostream & ost)
|
||||
{
|
||||
LOG_SHORT(LINFO, ("Num of elements:", features.size()));
|
||||
vector<osm::Id> elementIndexes(features.size());
|
||||
boost::copy(features | boost::adaptors::map_keys, begin(elementIndexes));
|
||||
|
||||
// TODO(mgsergio): Try RandomSample (from search:: at the moment of writing).
|
||||
shuffle(elementIndexes.begin(), elementIndexes.end(), minstd_rand(FLAGS_seed));
|
||||
if (FLAGS_selection_size < elementIndexes.size())
|
||||
elementIndexes.resize(FLAGS_selection_size);
|
||||
|
||||
stringstream outStream;
|
||||
|
||||
for (auto osmId : elementIndexes)
|
||||
{
|
||||
auto const & fb = features.at(osmId);
|
||||
auto const sponsoredIndexes = dataset.GetNearestObjects(
|
||||
MercatorBounds::ToLatLon(fb.GetKeyPoint()),
|
||||
Dataset::kMaxSelectedElements,
|
||||
Dataset::kDistanceLimitInMeters);
|
||||
|
||||
for (auto const sponsoredId : sponsoredIndexes)
|
||||
{
|
||||
auto const & object = dataset.GetObjectById(sponsoredId);
|
||||
auto const score = sponsored_scoring::Match(object, fb);
|
||||
|
||||
auto const center = MercatorBounds::ToLatLon(fb.GetKeyPoint());
|
||||
double const distanceMeters = ms::DistanceOnEarth(center, object.m_latLon);
|
||||
auto const matched = score.IsMatched();
|
||||
|
||||
outStream << "# ------------------------------------------" << fixed << setprecision(6)
|
||||
<< endl;
|
||||
outStream << (matched ? 'y' : 'n') << " \t" << DebugPrint(osmId) << "\t " << sponsoredId
|
||||
<< "\tdistance: " << distanceMeters
|
||||
<< "\tdistance score: " << score.m_linearNormDistanceScore
|
||||
<< "\tname score: " << score.m_nameSimilarityScore
|
||||
<< "\tresult score: " << score.GetMatchingScore()
|
||||
<< endl;
|
||||
outStream << "# " << PrintBuilder(fb) << endl;
|
||||
outStream << "# " << object << endl;
|
||||
outStream << "# URL: https://www.openstreetmap.org/?mlat="
|
||||
<< object.m_latLon.lat << "&mlon=" << object.m_latLon.lon
|
||||
<< "#map=18/" << object.m_latLon.lat << "/" << object.m_latLon.lon << endl;
|
||||
}
|
||||
if (!sponsoredIndexes.empty())
|
||||
outStream << endl << endl;
|
||||
}
|
||||
|
||||
if (FLAGS_sample.empty())
|
||||
{
|
||||
cout << outStream.str();
|
||||
}
|
||||
else
|
||||
{
|
||||
ofstream file(FLAGS_sample);
|
||||
if (file.is_open())
|
||||
file << outStream.str();
|
||||
else
|
||||
LOG_SHORT(LERROR, ("Can't output into", FLAGS_sample, strerror(errno)));
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Dataset>
|
||||
string GetDatasetFilePath(feature::GenerateInfo const & info);
|
||||
|
||||
template <>
|
||||
string GetDatasetFilePath<BookingDataset>(feature::GenerateInfo const & info)
|
||||
{
|
||||
return info.m_bookingDatafileName;
|
||||
}
|
||||
|
||||
template <>
|
||||
string GetDatasetFilePath<OpentableDataset>(feature::GenerateInfo const & info)
|
||||
{
|
||||
return info.m_opentableDatafileName;
|
||||
}
|
||||
|
||||
template <typename Dataset, typename Object = typename Dataset::Object>
|
||||
void RunImpl(feature::GenerateInfo & info)
|
||||
{
|
||||
// TODO(mgsergio): Log correctly LOG_SHORT(LINFO, ("Booking data:", FLAGS_booking));
|
||||
Dataset dataset(GetDatasetFilePath<Dataset>(info));
|
||||
LOG_SHORT(LINFO, (dataset.Size(), "objects are loaded from a Dataset."));
|
||||
|
||||
map<osm::Id, FeatureBuilder1> features;
|
||||
GenerateFeatures(info, [&dataset, &features](feature::GenerateInfo const & /* info */)
|
||||
{
|
||||
return make_unique<Emitter<Dataset>>(dataset, features);
|
||||
});
|
||||
|
||||
if (FLAGS_generate)
|
||||
{
|
||||
ofstream ost(FLAGS_sample);
|
||||
GenerateSample(dataset, features, ost);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto const sample = ReadSampleFromFile<Object>(FLAGS_sample);
|
||||
LOG_SHORT(LINFO, ("Sample size is", sample.size()));
|
||||
ofstream ost(FLAGS_factors);
|
||||
CHECK(ost.is_open(), ("Can't open file", FLAGS_factors, strerror(errno)));
|
||||
GenerateFactors<Dataset>(dataset, features, sample, ost);
|
||||
}
|
||||
}
|
||||
|
||||
void Run(DatasetType const datasetType, feature::GenerateInfo & info)
|
||||
{
|
||||
switch (datasetType)
|
||||
{
|
||||
case DatasetType::Booking: RunImpl<BookingDataset>(info); break;
|
||||
case DatasetType::Opentable: RunImpl<OpentableDataset>(info); break;
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
@ -259,31 +400,19 @@ int main(int argc, char * argv[])
|
|||
|
||||
CHECK(!FLAGS_sample.empty(), ("Please specify sample path."));
|
||||
CHECK(!FLAGS_osm.empty(), ("Please specify osm path."));
|
||||
CHECK(!FLAGS_booking.empty(), ("Please specify booking path."));
|
||||
CHECK(!FLAGS_factors.empty(), ("Please specify factors path."));
|
||||
CHECK(!FLAGS_booking.empty() ^ !FLAGS_opentable.empty(),
|
||||
("Please specify either booking or opentable path."));
|
||||
CHECK(!FLAGS_factors.empty() ^ FLAGS_generate, ("Please either specify factors path"
|
||||
"or use -generate."));
|
||||
|
||||
auto const datasetType = FLAGS_booking.empty() ? DatasetType::Opentable : DatasetType::Booking;
|
||||
|
||||
classificator::Load();
|
||||
|
||||
auto info = GetGenerateInfo();
|
||||
GenerateIntermediateData(info);
|
||||
|
||||
LOG_SHORT(LINFO, ("Booking data:", FLAGS_booking));
|
||||
BookingDataset booking(info.m_bookingDatafileName);
|
||||
LOG_SHORT(LINFO, (booking.Size(), "hotels are loaded from Booking."));
|
||||
|
||||
map<osm::Id, FeatureBuilder1> features;
|
||||
GenerateFeatures(info, [&booking, &features](feature::GenerateInfo const & /* info */)
|
||||
{
|
||||
return make_unique<Emitter>(booking, features);
|
||||
});
|
||||
|
||||
auto const sample = ReadSampleFromFile(FLAGS_sample);
|
||||
LOG(LINFO, ("Sample size is", sample.size()));
|
||||
{
|
||||
ofstream ost(FLAGS_factors);
|
||||
CHECK(ost.is_open(), ("Can't open file", FLAGS_factors, strerror(errno)));
|
||||
GenerateFactors(booking, features, sample, ost);
|
||||
}
|
||||
Run(datasetType, info);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -1,144 +1,47 @@
|
|||
#include "generator/booking_scoring.hpp"
|
||||
#include "generator/sponsored_scoring.hpp"
|
||||
|
||||
#include "generator/booking_dataset.hpp"
|
||||
#include "generator/feature_builder.hpp"
|
||||
|
||||
#include "indexer/search_delimiters.hpp"
|
||||
#include "indexer/search_string_utils.hpp"
|
||||
|
||||
#include "geometry/distance_on_sphere.hpp"
|
||||
|
||||
#include "base/collection_cast.hpp"
|
||||
#include "base/stl_iterator.hpp"
|
||||
|
||||
#include "std/algorithm.hpp"
|
||||
#include "std/vector.hpp"
|
||||
|
||||
namespace generator
|
||||
{
|
||||
namespace booking_scoring
|
||||
{
|
||||
namespace
|
||||
{
|
||||
// Calculated with tools/python/booking_hotels_quality.py.
|
||||
double constexpr kOptimalThreshold = 0.304875;
|
||||
|
||||
template <typename T, typename U>
|
||||
struct decay_equiv :
|
||||
std::is_same<typename std::decay<T>::type, U>::type
|
||||
{};
|
||||
|
||||
using WeightedBagOfWords = vector<pair<strings::UniString, double>>;
|
||||
|
||||
vector<strings::UniString> StringToSetOfWords(string const & str)
|
||||
{
|
||||
vector<strings::UniString> result;
|
||||
search::NormalizeAndTokenizeString(str, result, search::Delimiters{});
|
||||
sort(begin(result), end(result));
|
||||
return result;
|
||||
}
|
||||
|
||||
WeightedBagOfWords MakeWeightedBagOfWords(vector<strings::UniString> const & words)
|
||||
{
|
||||
// TODO(mgsergio): Calculate tf-idsf score for every word.
|
||||
auto constexpr kTfIdfScorePlaceholder = 1;
|
||||
|
||||
WeightedBagOfWords result;
|
||||
for (auto i = 0; i < words.size(); ++i)
|
||||
{
|
||||
result.emplace_back(words[i], kTfIdfScorePlaceholder);
|
||||
while (i + 1 < words.size() && words[i] == words[i + 1])
|
||||
{
|
||||
result.back().second += kTfIdfScorePlaceholder; // TODO(mgsergio): tf-idf score for result[i].frist;
|
||||
++i;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
double WeightedBagsDotProduct(WeightedBagOfWords const & lhs, WeightedBagOfWords const & rhs)
|
||||
{
|
||||
double result{};
|
||||
|
||||
auto lhsIt = begin(lhs);
|
||||
auto rhsIt = begin(rhs);
|
||||
|
||||
while (lhsIt != end(lhs) && rhsIt != end(rhs))
|
||||
{
|
||||
if (lhsIt->first == rhsIt->first)
|
||||
{
|
||||
result += lhsIt->second * rhsIt->second;
|
||||
++lhsIt;
|
||||
++rhsIt;
|
||||
}
|
||||
else if (lhsIt->first < rhsIt->first)
|
||||
{
|
||||
++lhsIt;
|
||||
}
|
||||
else
|
||||
{
|
||||
++rhsIt;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
double WeightedBagOfWordsCos(WeightedBagOfWords const & lhs, WeightedBagOfWords const & rhs)
|
||||
{
|
||||
auto const product = WeightedBagsDotProduct(lhs, rhs);
|
||||
auto const lhsLength = sqrt(WeightedBagsDotProduct(lhs, lhs));
|
||||
auto const rhsLength = sqrt(WeightedBagsDotProduct(rhs, rhs));
|
||||
|
||||
if (product == 0.0)
|
||||
return 0.0;
|
||||
|
||||
return product / (lhsLength * rhsLength);
|
||||
}
|
||||
|
||||
double GetLinearNormDistanceScore(double distance)
|
||||
{
|
||||
distance = my::clamp(distance, 0, BookingDataset::kDistanceLimitInMeters);
|
||||
return 1.0 - distance / BookingDataset::kDistanceLimitInMeters;
|
||||
}
|
||||
|
||||
double GetNameSimilarityScore(string const & booking_name, string const & osm_name)
|
||||
{
|
||||
auto const aws = MakeWeightedBagOfWords(StringToSetOfWords(booking_name));
|
||||
auto const bws = MakeWeightedBagOfWords(StringToSetOfWords(osm_name));
|
||||
|
||||
if (aws.empty() && bws.empty())
|
||||
return 1.0;
|
||||
if (aws.empty() || bws.empty())
|
||||
return 0.0;
|
||||
|
||||
return WeightedBagOfWordsCos(aws, bws);
|
||||
}
|
||||
} // namespace
|
||||
|
||||
double BookingMatchScore::GetMatchingScore() const
|
||||
namespace generator
|
||||
{
|
||||
namespace sponsored_scoring
|
||||
{
|
||||
template <>
|
||||
double MatchStats<BookingHotel>::GetMatchingScore() const
|
||||
{
|
||||
// TODO(mgsergio): Use tuner to get optimal function.
|
||||
return m_linearNormDistanceScore * m_nameSimilarityScore;
|
||||
}
|
||||
|
||||
bool BookingMatchScore::IsMatched() const
|
||||
template <>
|
||||
bool MatchStats<BookingHotel>::IsMatched() const
|
||||
{
|
||||
return GetMatchingScore() > kOptimalThreshold;
|
||||
}
|
||||
|
||||
BookingMatchScore Match(BookingDataset::Hotel const & h, FeatureBuilder1 const & fb)
|
||||
// TODO(mgsergio): Do I need to spesialize this method?
|
||||
template <>
|
||||
MatchStats<BookingHotel> Match(BookingHotel const & h, FeatureBuilder1 const & fb)
|
||||
{
|
||||
BookingMatchScore score;
|
||||
MatchStats<BookingHotel> score;
|
||||
|
||||
auto const fbCenter = MercatorBounds::ToLatLon(fb.GetKeyPoint());
|
||||
auto const distance = ms::DistanceOnEarth(fbCenter.lat, fbCenter.lon, h.lat, h.lon);
|
||||
score.m_linearNormDistanceScore = GetLinearNormDistanceScore(distance);
|
||||
auto const distance = ms::DistanceOnEarth(fbCenter, h.m_latLon);
|
||||
score.m_linearNormDistanceScore =
|
||||
impl::GetLinearNormDistanceScore(distance, BookingDataset::kDistanceLimitInMeters);
|
||||
|
||||
// TODO(mgsergio): Check all translations and use the best one.
|
||||
score.m_nameSimilarityScore =
|
||||
GetNameSimilarityScore(h.name, fb.GetName(StringUtf8Multilang::kDefaultCode));
|
||||
impl::GetNameSimilarityScore(h.m_name, fb.GetName(StringUtf8Multilang::kDefaultCode));
|
||||
|
||||
return score;
|
||||
}
|
||||
} // namespace booking_scoring
|
||||
} // namespace sponsored_scoring
|
||||
} // namespace generator
|
||||
|
|
|
@ -1,22 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include "generator/booking_dataset.hpp"
|
||||
|
||||
class FeatureBuilder1;
|
||||
|
||||
namespace generator
|
||||
{
|
||||
namespace booking_scoring
|
||||
{
|
||||
struct BookingMatchScore
|
||||
{
|
||||
double GetMatchingScore() const;
|
||||
bool IsMatched() const;
|
||||
|
||||
double m_linearNormDistanceScore{};
|
||||
double m_nameSimilarityScore{};
|
||||
};
|
||||
|
||||
BookingMatchScore Match(BookingDataset::Hotel const & h, FeatureBuilder1 const & fb);
|
||||
} // namespace booking_scoring
|
||||
} // namespace generator
|
|
@ -42,6 +42,8 @@ struct GenerateInfo
|
|||
|
||||
string m_bookingDatafileName;
|
||||
string m_bookingReferenceDir;
|
||||
string m_opentableDatafileName;
|
||||
string m_opentableReferenceDir;
|
||||
|
||||
uint32_t m_versionDate = 0;
|
||||
|
||||
|
|
|
@ -28,6 +28,8 @@ SOURCES += \
|
|||
feature_generator.cpp \
|
||||
feature_merger.cpp \
|
||||
feature_sorter.cpp \
|
||||
opentable_dataset.cpp \
|
||||
opentable_scoring.cpp \
|
||||
osm2meta.cpp \
|
||||
osm2type.cpp \
|
||||
osm_element.cpp \
|
||||
|
@ -36,6 +38,7 @@ SOURCES += \
|
|||
region_meta.cpp \
|
||||
routing_generator.cpp \
|
||||
search_index_builder.cpp \
|
||||
sponsored_scoring.cpp \
|
||||
srtm_parser.cpp \
|
||||
statistics.cpp \
|
||||
tesselator.cpp \
|
||||
|
@ -45,7 +48,6 @@ SOURCES += \
|
|||
HEADERS += \
|
||||
altitude_generator.hpp \
|
||||
booking_dataset.hpp \
|
||||
booking_scoring.hpp \
|
||||
borders_generator.hpp \
|
||||
borders_loader.hpp \
|
||||
centers_table_builder.hpp \
|
||||
|
@ -61,6 +63,7 @@ HEADERS += \
|
|||
generate_info.hpp \
|
||||
intermediate_data.hpp\
|
||||
intermediate_elements.hpp\
|
||||
opentable_dataset.hpp \
|
||||
osm2meta.hpp \
|
||||
osm2type.hpp \
|
||||
osm_element.hpp \
|
||||
|
@ -72,6 +75,9 @@ HEADERS += \
|
|||
region_meta.hpp \
|
||||
routing_generator.hpp \
|
||||
search_index_builder.hpp \
|
||||
sponsored_dataset.hpp \
|
||||
sponsored_dataset_inl.hpp \
|
||||
sponsored_scoring.hpp \
|
||||
srtm_parser.hpp \
|
||||
statistics.hpp \
|
||||
tag_admixer.hpp \
|
||||
|
|
|
@ -74,7 +74,9 @@ DEFINE_string(osm_file_name, "", "Input osm area file.");
|
|||
DEFINE_string(osm_file_type, "xml", "Input osm area file type [xml, o5m].");
|
||||
DEFINE_string(user_resource_path, "", "User defined resource path for classificator.txt and etc.");
|
||||
DEFINE_string(booking_data, "", "Path to booking data in .tsv format.");
|
||||
DEFINE_string(booking_reference_path, "", "Path to mwm dataset for match booking addresses.");
|
||||
DEFINE_string(booking_reference_path, "", "Path to mwm dataset for booking addresses matching.");
|
||||
DEFINE_string(opentable_data, "", "Path to opentable data in .tsv format.");
|
||||
DEFINE_string(opentable_reference_path, "", "Path to mwm dataset for opentable addresses matching.");
|
||||
DEFINE_uint64(planet_version, my::SecondsSinceEpoch(),
|
||||
"Version as seconds since epoch, by default - now.");
|
||||
DEFINE_string(srtm_path, "",
|
||||
|
@ -114,6 +116,8 @@ int main(int argc, char ** argv)
|
|||
genInfo.m_preloadCache = FLAGS_preload_cache;
|
||||
genInfo.m_bookingDatafileName = FLAGS_booking_data;
|
||||
genInfo.m_bookingReferenceDir = FLAGS_booking_reference_path;
|
||||
genInfo.m_opentableDatafileName = FLAGS_opentable_data;
|
||||
genInfo.m_opentableReferenceDir = FLAGS_opentable_reference_path;
|
||||
|
||||
genInfo.m_versionDate = static_cast<uint32_t>(FLAGS_planet_version);
|
||||
|
||||
|
|
94
generator/opentable_dataset.cpp
Normal file
94
generator/opentable_dataset.cpp
Normal file
|
@ -0,0 +1,94 @@
|
|||
#include "generator/opentable_dataset.hpp"
|
||||
|
||||
#include "generator/feature_builder.hpp"
|
||||
#include "generator/sponsored_scoring.hpp"
|
||||
|
||||
#include "indexer/classificator.hpp"
|
||||
#include "indexer/ftypes_matcher.hpp"
|
||||
|
||||
#include "base/string_utils.hpp"
|
||||
|
||||
#include "boost/algorithm/string/replace.hpp"
|
||||
|
||||
namespace generator
|
||||
{
|
||||
// OpentableRestaurant ------------------------------------------------------------------------------
|
||||
OpentableRestaurant::OpentableRestaurant(string const & src)
|
||||
{
|
||||
vector<string> rec;
|
||||
strings::ParseCSVRow(src, '\t', rec);
|
||||
CHECK_EQUAL(rec.size(), FieldsCount(), ("Error parsing restaurants.tsv line:",
|
||||
boost::replace_all_copy(src, "\t", "\\t")));
|
||||
|
||||
strings::to_uint(rec[FieldIndex(Fields::Id)], m_id.Get());
|
||||
strings::to_double(rec[FieldIndex(Fields::Latitude)], m_latLon.lat);
|
||||
strings::to_double(rec[FieldIndex(Fields::Longtitude)], m_latLon.lon);
|
||||
|
||||
m_name = rec[FieldIndex(Fields::Name)];
|
||||
m_address = rec[FieldIndex(Fields::Address)];
|
||||
m_descUrl = rec[FieldIndex(Fields::DescUrl)];
|
||||
}
|
||||
|
||||
ostream & operator<<(ostream & s, OpentableRestaurant const & h)
|
||||
{
|
||||
s << fixed << setprecision(7);
|
||||
return s << "Id: " << h.m_id << "\t Name: " << h.m_name << "\t Address: " << h.m_address
|
||||
<< "\t lat: " << h.m_latLon.lat << " lon: " << h.m_latLon.lon;
|
||||
}
|
||||
|
||||
// OpentableDataset ---------------------------------------------------------------------------------
|
||||
template <>
|
||||
bool OpentableDataset::NecessaryMatchingConditionHolds(FeatureBuilder1 const & fb) const
|
||||
{
|
||||
if (fb.GetName(StringUtf8Multilang::kDefaultCode).empty())
|
||||
return false;
|
||||
|
||||
return ftypes::IsFoodChecker::Instance()(fb.GetTypes());
|
||||
}
|
||||
|
||||
template <>
|
||||
void OpentableDataset::PreprocessMatchedOsmObject(ObjectId const matchedObjId, FeatureBuilder1 & fb,
|
||||
function<void(FeatureBuilder1 &)> const fn) const
|
||||
{
|
||||
FeatureParams params = fb.GetParams();
|
||||
|
||||
auto restaurant = GetObjectById(matchedObjId);
|
||||
auto & metadata = params.GetMetadata();
|
||||
metadata.Set(feature::Metadata::FMD_SPONSORED_ID, strings::to_string(restaurant.m_id.Get()));
|
||||
metadata.Set(feature::Metadata::FMD_WEBSITE, restaurant.m_descUrl);
|
||||
|
||||
// params.AddAddress(restaurant.address);
|
||||
// TODO(mgsergio): addr:full ???
|
||||
|
||||
params.AddName(StringUtf8Multilang::GetLangByCode(StringUtf8Multilang::kDefaultCode),
|
||||
restaurant.m_name);
|
||||
|
||||
auto const & clf = classif();
|
||||
params.AddType(clf.GetTypeByPath({"sponsored", "opentable"}));
|
||||
|
||||
fb.SetParams(params);
|
||||
|
||||
fn(fb);
|
||||
}
|
||||
|
||||
template <>
|
||||
OpentableDataset::ObjectId OpentableDataset::FindMatchingObjectIdImpl(FeatureBuilder1 const & fb) const
|
||||
{
|
||||
auto const name = fb.GetName(StringUtf8Multilang::kDefaultCode);
|
||||
|
||||
if (name.empty())
|
||||
return Object::InvalidObjectId();
|
||||
|
||||
// Find |kMaxSelectedElements| nearest values to a point.
|
||||
auto const nearbyIds = GetNearestObjects(MercatorBounds::ToLatLon(fb.GetKeyPoint()),
|
||||
kMaxSelectedElements, kDistanceLimitInMeters);
|
||||
|
||||
for (auto const objId : nearbyIds)
|
||||
{
|
||||
if (sponsored_scoring::Match(GetObjectById(objId), fb).IsMatched())
|
||||
return objId;
|
||||
}
|
||||
|
||||
return Object::InvalidObjectId();
|
||||
}
|
||||
} // namespace generator
|
60
generator/opentable_dataset.hpp
Normal file
60
generator/opentable_dataset.hpp
Normal file
|
@ -0,0 +1,60 @@
|
|||
#pragma once
|
||||
|
||||
#include "generator/sponsored_dataset.hpp"
|
||||
|
||||
#include "geometry/latlon.hpp"
|
||||
|
||||
#include "base/newtype.hpp"
|
||||
|
||||
#include "std/limits.hpp"
|
||||
#include "std/string.hpp"
|
||||
|
||||
namespace generator
|
||||
{
|
||||
// TODO(mgsergio): Try to get rid of code duplication. (See BookingHotel)
|
||||
struct OpentableRestaurant
|
||||
{
|
||||
NEWTYPE(uint32_t, ObjectId);
|
||||
|
||||
enum class Fields
|
||||
{
|
||||
Id = 0,
|
||||
Latitude,
|
||||
Longtitude,
|
||||
Name,
|
||||
Address,
|
||||
DescUrl,
|
||||
Phone,
|
||||
// Opentable doesn't have translations.
|
||||
// Translations,
|
||||
Counter
|
||||
};
|
||||
|
||||
static constexpr ObjectId InvalidObjectId()
|
||||
{
|
||||
return ObjectId(numeric_limits<typename ObjectId::RepType>::max());
|
||||
}
|
||||
|
||||
explicit OpentableRestaurant(string const & src);
|
||||
|
||||
static constexpr size_t FieldIndex(Fields field) { return static_cast<size_t>(field); }
|
||||
static constexpr size_t FieldsCount() { return static_cast<size_t>(Fields::Counter); }
|
||||
|
||||
bool HasAddresParts() const { return !m_street.empty() || !m_houseNumber.empty(); }
|
||||
|
||||
ObjectId m_id{InvalidObjectId()};
|
||||
ms::LatLon m_latLon = ms::LatLon::Zero();
|
||||
string m_name;
|
||||
string m_street;
|
||||
string m_houseNumber;
|
||||
|
||||
string m_address;
|
||||
string m_descUrl;
|
||||
// string m_translations;
|
||||
};
|
||||
|
||||
ostream & operator<<(ostream & s, OpentableRestaurant const & r);
|
||||
|
||||
NEWTYPE_SIMPLE_OUTPUT(OpentableRestaurant::ObjectId);
|
||||
using OpentableDataset = SponsoredDataset<OpentableRestaurant>;
|
||||
} // namespace generator
|
45
generator/opentable_scoring.cpp
Normal file
45
generator/opentable_scoring.cpp
Normal file
|
@ -0,0 +1,45 @@
|
|||
#include "generator/sponsored_scoring.hpp"
|
||||
|
||||
#include "generator/opentable_dataset.hpp"
|
||||
#include "generator/feature_builder.hpp"
|
||||
|
||||
namespace
|
||||
{
|
||||
// Calculated with tools/python/booking_hotels_quality.py.
|
||||
double constexpr kOptimalThreshold = 0.312887;
|
||||
} // namespace
|
||||
|
||||
namespace generator
|
||||
{
|
||||
namespace sponsored_scoring
|
||||
{
|
||||
template <>
|
||||
double MatchStats<OpentableRestaurant>::GetMatchingScore() const
|
||||
{
|
||||
// TODO(mgsergio): Use tuner to get optimal function.
|
||||
return m_linearNormDistanceScore * m_nameSimilarityScore;
|
||||
}
|
||||
|
||||
template <>
|
||||
bool MatchStats<OpentableRestaurant>::IsMatched() const
|
||||
{
|
||||
return GetMatchingScore() > kOptimalThreshold;
|
||||
}
|
||||
|
||||
template <>
|
||||
MatchStats<OpentableRestaurant> Match(OpentableRestaurant const & r, FeatureBuilder1 const & fb)
|
||||
{
|
||||
MatchStats<OpentableRestaurant> score;
|
||||
|
||||
auto const fbCenter = MercatorBounds::ToLatLon(fb.GetKeyPoint());
|
||||
auto const distance = ms::DistanceOnEarth(fbCenter, r.m_latLon);
|
||||
score.m_linearNormDistanceScore =
|
||||
impl::GetLinearNormDistanceScore(distance, OpentableDataset::kDistanceLimitInMeters);
|
||||
|
||||
score.m_nameSimilarityScore =
|
||||
impl::GetNameSimilarityScore(r.m_name, fb.GetName(StringUtf8Multilang::kDefaultCode));
|
||||
|
||||
return score;
|
||||
}
|
||||
} // namespace sponsored_scoring
|
||||
} // namespace generator
|
|
@ -1,4 +1,3 @@
|
|||
#include "generator/booking_dataset.hpp"
|
||||
#include "generator/coastlines_generator.hpp"
|
||||
#include "generator/feature_generator.hpp"
|
||||
#include "generator/intermediate_data.hpp"
|
||||
|
@ -13,6 +12,9 @@
|
|||
#include "generator/towns_dumper.hpp"
|
||||
#include "generator/world_map_generator.hpp"
|
||||
|
||||
#include "generator/booking_dataset.hpp"
|
||||
#include "generator/opentable_dataset.hpp"
|
||||
|
||||
#include "indexer/classificator.hpp"
|
||||
|
||||
#include "platform/platform.hpp"
|
||||
|
@ -278,6 +280,7 @@ class MainFeaturesEmitter : public EmitterBase
|
|||
bool m_failOnCoasts;
|
||||
|
||||
generator::BookingDataset m_bookingDataset;
|
||||
generator::OpentableDataset m_opentableDataset;
|
||||
|
||||
/// Used to prepare a list of cities to serve as a list of nodes
|
||||
/// for building a highway graph with OSRM for low zooms.
|
||||
|
@ -301,6 +304,8 @@ public:
|
|||
: m_skippedElementsPath(info.GetIntermediateFileName("skipped_elements", ".lst"))
|
||||
, m_failOnCoasts(info.m_failOnCoasts)
|
||||
, m_bookingDataset(info.m_bookingDatafileName, info.m_bookingReferenceDir)
|
||||
, m_opentableDataset(info.m_opentableDatafileName, info.m_opentableReferenceDir)
|
||||
|
||||
{
|
||||
Classificator const & c = classif();
|
||||
|
||||
|
@ -342,47 +347,43 @@ public:
|
|||
static uint32_t const placeType = classif().GetTypeByPath({"place"});
|
||||
uint32_t const type = fb.GetParams().FindType(placeType, 1);
|
||||
|
||||
auto hotelIndex = generator::BookingDataset::kInvalidHotelIndex;
|
||||
|
||||
// TODO(mgserigio): Would it be better to have objects that store callback
|
||||
// and can be piped: action-if-cond1 | action-if-cond-2 | ... ?
|
||||
// The first object which perform action terminates the cahin.
|
||||
if (type != ftype::GetEmptyValue() && !fb.GetName().empty())
|
||||
{
|
||||
m_places.ReplaceEqualInRect(
|
||||
Place(fb, type),
|
||||
[](Place const & p1, Place const & p2) { return p1.IsEqual(p2); },
|
||||
[](Place const & p1, Place const & p2) { return p1.IsBetterThan(p2); });
|
||||
return;
|
||||
}
|
||||
else if ((hotelIndex = m_bookingDataset.GetMatchingHotelIndex(fb)) !=
|
||||
generator::BookingDataset::kInvalidHotelIndex)
|
||||
{
|
||||
m_skippedElements << DebugPrint(fb.GetMostGenericOsmId()) << endl;
|
||||
|
||||
// Turn a hotel into a simple building.
|
||||
if (fb.GetGeomType() == feature::GEOM_AREA)
|
||||
auto const bookingObjId = m_bookingDataset.FindMatchingObjectId(fb);
|
||||
if (bookingObjId != generator::BookingHotel::InvalidObjectId())
|
||||
{
|
||||
m_bookingDataset.PreprocessMatchedOsmObject(bookingObjId, fb, [this, bookingObjId](FeatureBuilder1 & fb)
|
||||
{
|
||||
// Remove all information about a hotel.
|
||||
auto params = fb.GetParams();
|
||||
params.ClearName();
|
||||
auto & meta = params.GetMetadata();
|
||||
meta.Drop(feature::Metadata::EType::FMD_STARS);
|
||||
meta.Drop(feature::Metadata::EType::FMD_WEBSITE);
|
||||
meta.Drop(feature::Metadata::EType::FMD_PHONE_NUMBER);
|
||||
|
||||
auto const & c = classif();
|
||||
auto const tourism = c.GetTypeByPath({"tourism"});
|
||||
my::EraseIf(params.m_Types, [&c, tourism](uint32_t type)
|
||||
{
|
||||
ftype::TruncValue(type, 1);
|
||||
return type == tourism;
|
||||
});
|
||||
fb.SetParams(params);
|
||||
|
||||
m_skippedElements << "BOOKING\t" << DebugPrint(fb.GetMostGenericOsmId())
|
||||
<< '\t' << bookingObjId.Get() << endl;
|
||||
Emit(fb);
|
||||
}
|
||||
});
|
||||
return;
|
||||
}
|
||||
else
|
||||
|
||||
auto const opentableObjId = m_opentableDataset.FindMatchingObjectId(fb);
|
||||
if (opentableObjId != generator::OpentableRestaurant::InvalidObjectId())
|
||||
{
|
||||
Emit(fb);
|
||||
m_opentableDataset.PreprocessMatchedOsmObject(opentableObjId, fb, [this, opentableObjId](FeatureBuilder1 & fb)
|
||||
{
|
||||
m_skippedElements << "OPENTABLE\t" << DebugPrint(fb.GetMostGenericOsmId())
|
||||
<< '\t' << opentableObjId.Get() << endl;
|
||||
Emit(fb);
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
Emit(fb);
|
||||
}
|
||||
|
||||
/// @return false if coasts are not merged and FLAG_fail_on_coasts is set
|
||||
|
@ -390,8 +391,10 @@ public:
|
|||
{
|
||||
DumpSkippedElements();
|
||||
|
||||
// Emit all booking objecs to the map.
|
||||
m_bookingDataset.BuildHotels([this](FeatureBuilder1 & fb) { Emit(fb); });
|
||||
// Emit all required booking objects to the map.
|
||||
m_bookingDataset.BuildOsmObjects([this](FeatureBuilder1 & fb) { Emit(fb); });
|
||||
// No opentable objects should be emitted. Opentable data enriches some data
|
||||
// with a link to a restaurant's reservation page.
|
||||
|
||||
m_places.ForEach([this](Place const & p)
|
||||
{
|
||||
|
|
89
generator/sponsored_dataset.hpp
Normal file
89
generator/sponsored_dataset.hpp
Normal file
|
@ -0,0 +1,89 @@
|
|||
#pragma once
|
||||
|
||||
#include "indexer/index.hpp"
|
||||
|
||||
#include "search/reverse_geocoder.hpp"
|
||||
|
||||
#include "platform/local_country_file.hpp"
|
||||
#include "platform/local_country_file_utils.hpp"
|
||||
#include "platform/platform.hpp"
|
||||
|
||||
#include "base/newtype.hpp"
|
||||
|
||||
#include "std/function.hpp"
|
||||
#include "std/map.hpp"
|
||||
#include "std/string.hpp"
|
||||
|
||||
#include "boost/geometry.hpp"
|
||||
#include "boost/geometry/geometries/point.hpp"
|
||||
#include "boost/geometry/geometries/box.hpp"
|
||||
#include "boost/geometry/index/rtree.hpp"
|
||||
|
||||
class FeatureBuilder1;
|
||||
|
||||
namespace generator
|
||||
{
|
||||
template <typename SponsoredObject>
|
||||
class SponsoredDataset
|
||||
{
|
||||
public:
|
||||
using Object = SponsoredObject;
|
||||
using ObjectId = typename Object::ObjectId;
|
||||
|
||||
static double constexpr kDistanceLimitInMeters = 150;
|
||||
static size_t constexpr kMaxSelectedElements = 3;
|
||||
|
||||
explicit SponsoredDataset(string const & dataPath, string const & addressReferencePath = string());
|
||||
explicit SponsoredDataset(istream & dataSource, string const & addressReferencePath = string());
|
||||
|
||||
size_t Size() const { return m_objects.size(); }
|
||||
|
||||
Object const & GetObjectById(ObjectId id) const;
|
||||
Object & GetObjectById(ObjectId id);
|
||||
vector<ObjectId> GetNearestObjects(ms::LatLon const & latLon, size_t limit,
|
||||
double maxDistance = 0.0) const;
|
||||
|
||||
/// @return true if |fb| satisfies some necessary conditions to match one or serveral
|
||||
/// objects from dataset.
|
||||
bool NecessaryMatchingConditionHolds(FeatureBuilder1 const & fb) const;
|
||||
ObjectId FindMatchingObjectId(FeatureBuilder1 const & e) const;
|
||||
|
||||
void PreprocessMatchedOsmObject(ObjectId matchedObjId, FeatureBuilder1 & fb,
|
||||
function<void(FeatureBuilder1 &)> const fn) const;
|
||||
void BuildOsmObjects(function<void(FeatureBuilder1 &)> const & fn) const;
|
||||
|
||||
protected:
|
||||
class AddressMatcher
|
||||
{
|
||||
public:
|
||||
AddressMatcher();
|
||||
void operator()(Object & object);
|
||||
|
||||
private:
|
||||
Index m_index;
|
||||
unique_ptr<search::ReverseGeocoder> m_coder;
|
||||
};
|
||||
|
||||
// TODO(mgsergio): Get rid of Box since boost::rtree supports point as value type.
|
||||
// TODO(mgsergio): Use mercator instead of latlon or boost::geometry::cs::spherical_equatorial
|
||||
// instead of boost::geometry::cs::cartesian.
|
||||
using Point = boost::geometry::model::point<float, 2, boost::geometry::cs::cartesian>;
|
||||
using Box = boost::geometry::model::box<Point>;
|
||||
using Value = pair<Box, ObjectId>;
|
||||
|
||||
// Create the rtree using default constructor.
|
||||
boost::geometry::index::rtree<Value, boost::geometry::index::quadratic<16>> m_rtree;
|
||||
|
||||
void BuildObject(Object const & object,
|
||||
function<void(FeatureBuilder1 &)> const & fn) const;
|
||||
|
||||
void LoadData(istream & src, string const & addressReferencePath);
|
||||
|
||||
/// @return an id of a matched object or kInvalidObjectId on failure.
|
||||
ObjectId FindMatchingObjectIdImpl(FeatureBuilder1 const & fb) const;
|
||||
|
||||
map<ObjectId, Object> m_objects;
|
||||
};
|
||||
} // namespace generator
|
||||
|
||||
#include "generator/sponsored_dataset_inl.hpp" // SponsoredDataset implementation.
|
179
generator/sponsored_dataset_inl.hpp
Normal file
179
generator/sponsored_dataset_inl.hpp
Normal file
|
@ -0,0 +1,179 @@
|
|||
#include "generator/sponsored_dataset.hpp"
|
||||
|
||||
#include "geometry/distance_on_sphere.hpp"
|
||||
|
||||
#include "base/logging.hpp"
|
||||
#include "base/string_utils.hpp"
|
||||
|
||||
#include "std/fstream.hpp"
|
||||
#include "std/iostream.hpp"
|
||||
|
||||
namespace generator
|
||||
{
|
||||
// AddressMatcher ----------------------------------------------------------------------------------
|
||||
template <typename SponsoredObject>
|
||||
SponsoredDataset<SponsoredObject>::AddressMatcher::AddressMatcher()
|
||||
{
|
||||
vector<platform::LocalCountryFile> localFiles;
|
||||
|
||||
Platform & platform = GetPlatform();
|
||||
platform::FindAllLocalMapsInDirectoryAndCleanup(platform.WritableDir(), 0 /* version */,
|
||||
-1 /* latestVersion */, localFiles);
|
||||
|
||||
for (platform::LocalCountryFile const & localFile : localFiles)
|
||||
{
|
||||
LOG(LINFO, ("Found mwm:", localFile));
|
||||
try
|
||||
{
|
||||
m_index.RegisterMap(localFile);
|
||||
}
|
||||
catch (RootException const & ex)
|
||||
{
|
||||
CHECK(false, (ex.Msg(), "Bad mwm file:", localFile));
|
||||
}
|
||||
}
|
||||
|
||||
m_coder = make_unique<search::ReverseGeocoder>(m_index);
|
||||
}
|
||||
|
||||
template <typename SponsoredObject>
|
||||
void SponsoredDataset<SponsoredObject>::AddressMatcher::operator()(Object & object)
|
||||
{
|
||||
search::ReverseGeocoder::Address addr;
|
||||
m_coder->GetNearbyAddress(MercatorBounds::FromLatLon(object.m_latLon), addr);
|
||||
object.m_street = addr.GetStreetName();
|
||||
object.m_houseNumber = addr.GetHouseNumber();
|
||||
}
|
||||
|
||||
|
||||
// SponsoredDataset --------------------------------------------------------------------------------
|
||||
template <typename SponsoredObject>
|
||||
SponsoredDataset<SponsoredObject>::SponsoredDataset(string const & dataPath, string const & addressReferencePath)
|
||||
{
|
||||
if (dataPath.empty())
|
||||
return;
|
||||
|
||||
ifstream dataSource(dataPath);
|
||||
if (!dataSource.is_open())
|
||||
{
|
||||
LOG(LERROR, ("Error while opening", dataPath, ":", strerror(errno)));
|
||||
return;
|
||||
}
|
||||
|
||||
LoadData(dataSource, addressReferencePath);
|
||||
}
|
||||
|
||||
template <typename SponsoredObject>
|
||||
SponsoredDataset<SponsoredObject>::SponsoredDataset(istream & dataSource, string const & addressReferencePath)
|
||||
{
|
||||
LoadData(dataSource, addressReferencePath);
|
||||
}
|
||||
|
||||
template <typename SponsoredObject>
|
||||
typename SponsoredDataset<SponsoredObject>::Object const &
|
||||
SponsoredDataset<SponsoredObject>::GetObjectById(ObjectId id) const
|
||||
{
|
||||
auto const it = m_objects.find(id);
|
||||
CHECK(it != end(m_objects), ("Got wrong object id:", id));
|
||||
return it->second;
|
||||
}
|
||||
|
||||
template <typename SponsoredObject>
|
||||
typename SponsoredDataset<SponsoredObject>::Object &
|
||||
SponsoredDataset<SponsoredObject>::GetObjectById(ObjectId id)
|
||||
{
|
||||
auto const it = m_objects.find(id);
|
||||
CHECK(it != end(m_objects), ("Got wrong object id:", id));
|
||||
return it->second;
|
||||
}
|
||||
|
||||
template <typename SponsoredObject>
|
||||
void SponsoredDataset<SponsoredObject>::BuildOsmObjects(function<void(FeatureBuilder1 &)> const & fn) const
|
||||
{
|
||||
for (auto const & item : m_objects)
|
||||
BuildObject(item.second, fn);
|
||||
}
|
||||
|
||||
template <typename SponsoredObject>
|
||||
typename SponsoredDataset<SponsoredObject>::ObjectId
|
||||
SponsoredDataset<SponsoredObject>::FindMatchingObjectId(FeatureBuilder1 const & fb) const
|
||||
{
|
||||
if (NecessaryMatchingConditionHolds(fb))
|
||||
return FindMatchingObjectIdImpl(fb);
|
||||
return Object::InvalidObjectId();
|
||||
}
|
||||
|
||||
template <typename SponsoredObject>
|
||||
vector<typename SponsoredDataset<SponsoredObject>::ObjectId>
|
||||
SponsoredDataset<SponsoredObject>::GetNearestObjects(ms::LatLon const & latLon, size_t const limit,
|
||||
double const maxDistanceMeters /* = 0.0 */) const
|
||||
{
|
||||
namespace bgi = boost::geometry::index;
|
||||
|
||||
vector<ObjectId> indexes;
|
||||
for_each(bgi::qbegin(m_rtree, bgi::nearest(Point(latLon.lat, latLon.lon), limit)),
|
||||
bgi::qend(m_rtree), [this, &latLon, &indexes, maxDistanceMeters](Value const & v)
|
||||
{
|
||||
auto const & object = GetObjectById(v.second);
|
||||
double const dist = ms::DistanceOnEarth(latLon, object.m_latLon);
|
||||
if (maxDistanceMeters != 0.0 && dist > maxDistanceMeters /* max distance in meters */)
|
||||
return;
|
||||
|
||||
indexes.emplace_back(v.second);
|
||||
});
|
||||
|
||||
return indexes;
|
||||
}
|
||||
|
||||
template <typename SponsoredObject>
|
||||
void SponsoredDataset<SponsoredObject>::LoadData(istream & src, string const & addressReferencePath)
|
||||
{
|
||||
m_objects.clear();
|
||||
m_rtree.clear();
|
||||
|
||||
for (string line; getline(src, line);)
|
||||
{
|
||||
Object hotel(line);
|
||||
m_objects.emplace(hotel.m_id, hotel);
|
||||
}
|
||||
|
||||
// Try to get object address from existing MWMs.
|
||||
if (!addressReferencePath.empty())
|
||||
{
|
||||
LOG(LINFO, ("Reference addresses for sponsored objects", addressReferencePath));
|
||||
Platform & platform = GetPlatform();
|
||||
string const backupPath = platform.WritableDir();
|
||||
|
||||
// MWMs can be loaded only from a writebledir or from a resourcedir,
|
||||
// changig resourcedir can lead to probles with classificator, so
|
||||
// we change writebledir.
|
||||
platform.SetWritableDirForTests(addressReferencePath);
|
||||
|
||||
AddressMatcher addressMatcher;
|
||||
|
||||
size_t matchedCount = 0;
|
||||
size_t emptyCount = 0;
|
||||
for (auto & item : m_objects)
|
||||
{
|
||||
auto & object = item.second;
|
||||
addressMatcher(object);
|
||||
|
||||
if (object.m_address.empty())
|
||||
++emptyCount;
|
||||
if (object.HasAddresParts())
|
||||
++matchedCount;
|
||||
}
|
||||
LOG(LINFO,
|
||||
("Num of hotels:", m_objects.size(), "matched:", matchedCount, "empty addresses:", emptyCount));
|
||||
platform.SetWritableDirForTests(backupPath);
|
||||
}
|
||||
|
||||
for (auto const & item : m_objects)
|
||||
{
|
||||
auto const & object = item.second;
|
||||
Box b(Point(object.m_latLon.lat, object.m_latLon.lon),
|
||||
Point(object.m_latLon.lat, object.m_latLon.lon));
|
||||
m_rtree.insert(make_pair(b, object.m_id));
|
||||
}
|
||||
}
|
||||
} // namespace generator
|
108
generator/sponsored_scoring.cpp
Normal file
108
generator/sponsored_scoring.cpp
Normal file
|
@ -0,0 +1,108 @@
|
|||
#include "generator/sponsored_scoring.hpp"
|
||||
|
||||
#include "indexer/search_delimiters.hpp"
|
||||
#include "indexer/search_string_utils.hpp"
|
||||
|
||||
#include "geometry/distance_on_sphere.hpp"
|
||||
|
||||
#include "std/algorithm.hpp"
|
||||
#include "std/vector.hpp"
|
||||
|
||||
namespace
|
||||
{
|
||||
using WeightedBagOfWords = vector<pair<strings::UniString, double>>;
|
||||
|
||||
vector<strings::UniString> StringToWords(string const & str)
|
||||
{
|
||||
vector<strings::UniString> result;
|
||||
search::NormalizeAndTokenizeString(str, result, search::Delimiters{});
|
||||
sort(begin(result), end(result));
|
||||
return result;
|
||||
}
|
||||
|
||||
WeightedBagOfWords MakeWeightedBagOfWords(vector<strings::UniString> const & words)
|
||||
{
|
||||
// TODO(mgsergio): Calculate tf-idsf score for every word.
|
||||
auto constexpr kTfIdfScorePlaceholder = 1;
|
||||
|
||||
WeightedBagOfWords result;
|
||||
for (size_t i = 0; i < words.size(); ++i)
|
||||
{
|
||||
result.emplace_back(words[i], kTfIdfScorePlaceholder);
|
||||
while (i + 1 < words.size() && words[i] == words[i + 1])
|
||||
{
|
||||
result.back().second += kTfIdfScorePlaceholder; // TODO(mgsergio): tf-idf score for result[i].frist;
|
||||
++i;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
double WeightedBagsDotProduct(WeightedBagOfWords const & lhs, WeightedBagOfWords const & rhs)
|
||||
{
|
||||
double result{};
|
||||
|
||||
auto lhsIt = begin(lhs);
|
||||
auto rhsIt = begin(rhs);
|
||||
|
||||
while (lhsIt != end(lhs) && rhsIt != end(rhs))
|
||||
{
|
||||
if (lhsIt->first == rhsIt->first)
|
||||
{
|
||||
result += lhsIt->second * rhsIt->second;
|
||||
++lhsIt;
|
||||
++rhsIt;
|
||||
}
|
||||
else if (lhsIt->first < rhsIt->first)
|
||||
{
|
||||
++lhsIt;
|
||||
}
|
||||
else
|
||||
{
|
||||
++rhsIt;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
double WeightedBagOfWordsCos(WeightedBagOfWords const & lhs, WeightedBagOfWords const & rhs)
|
||||
{
|
||||
auto const product = WeightedBagsDotProduct(lhs, rhs);
|
||||
auto const lhsLength = sqrt(WeightedBagsDotProduct(lhs, lhs));
|
||||
auto const rhsLength = sqrt(WeightedBagsDotProduct(rhs, rhs));
|
||||
|
||||
// WeightedBagsDotProduct returns 0.0 if lhs.empty() || rhs.empty() or
|
||||
// if every element of either lhs or rhs is 0.0.
|
||||
if (product == 0.0)
|
||||
return 0.0;
|
||||
|
||||
return product / (lhsLength * rhsLength);
|
||||
}
|
||||
} // namespace
|
||||
|
||||
namespace generator
|
||||
{
|
||||
namespace impl
|
||||
{
|
||||
double GetLinearNormDistanceScore(double distance, double const maxDistance)
|
||||
{
|
||||
CHECK_NOT_EQUAL(maxDistance, 0.0, ("maxDistance cannot be 0."));
|
||||
distance = my::clamp(distance, 0, maxDistance);
|
||||
return 1.0 - distance / maxDistance;
|
||||
}
|
||||
|
||||
double GetNameSimilarityScore(string const & booking_name, string const & osm_name)
|
||||
{
|
||||
auto const aws = MakeWeightedBagOfWords(StringToWords(booking_name));
|
||||
auto const bws = MakeWeightedBagOfWords(StringToWords(osm_name));
|
||||
|
||||
if (aws.empty() && bws.empty())
|
||||
return 1.0;
|
||||
if (aws.empty() || bws.empty())
|
||||
return 0.0;
|
||||
|
||||
return WeightedBagOfWordsCos(aws, bws);
|
||||
}
|
||||
} // namespace impl
|
||||
} // namespace generator
|
34
generator/sponsored_scoring.hpp
Normal file
34
generator/sponsored_scoring.hpp
Normal file
|
@ -0,0 +1,34 @@
|
|||
#pragma once
|
||||
|
||||
#include "std/string.hpp"
|
||||
|
||||
class FeatureBuilder1;
|
||||
|
||||
namespace generator
|
||||
{
|
||||
namespace impl
|
||||
{
|
||||
double GetLinearNormDistanceScore(double distance, double maxDistance);
|
||||
double GetNameSimilarityScore(string const & booking_name, string const & osm_name);
|
||||
} // namespace impl
|
||||
|
||||
namespace sponsored_scoring
|
||||
{
|
||||
/// Represents a match scoring statystics of a sponsored object agains osm object.
|
||||
template <typename SponsoredObject>
|
||||
struct MatchStats
|
||||
{
|
||||
/// Returns some score based on geven fields and classificator tuning.
|
||||
double GetMatchingScore() const;
|
||||
/// Returns true if GetMatchingScore is greater then some theshold.
|
||||
bool IsMatched() const;
|
||||
|
||||
double m_linearNormDistanceScore{};
|
||||
double m_nameSimilarityScore{};
|
||||
};
|
||||
|
||||
/// Matches a given sponsored object against a given OSM object.
|
||||
template <typename SponsoredObject>
|
||||
MatchStats<SponsoredObject> Match(SponsoredObject const & o, FeatureBuilder1 const & fb);
|
||||
} // namespace booking_scoring
|
||||
} // namespace generator
|
|
@ -21,26 +21,22 @@ logging.basicConfig(level=logging.DEBUG, format='[%(asctime)s] %(levelname)s: %(
|
|||
|
||||
|
||||
def load_binary_list(path):
|
||||
"""
|
||||
Loads reference binary classifier output.
|
||||
"""
|
||||
"""Loads reference binary classifier output. """
|
||||
bits = []
|
||||
with open(path, 'r') as fd:
|
||||
for line in fd:
|
||||
if (not line.strip()) or line[0] == '#':
|
||||
if (not line.strip()) or line.startswith('#'):
|
||||
continue
|
||||
bits.append(1 if line[0] == 'y' else 0)
|
||||
bits.append(1 if line.startswith('y') else 0)
|
||||
return bits
|
||||
|
||||
|
||||
def load_score_list(path):
|
||||
"""
|
||||
Loads list of matching scores.
|
||||
"""
|
||||
"""Loads list of matching scores. """
|
||||
scores = []
|
||||
with open(path, 'r') as fd:
|
||||
for line in fd:
|
||||
if (not line.strip()) or line[0] == '#':
|
||||
if (not line.strip()) or line.startswith('#'):
|
||||
continue
|
||||
scores.append(float(re.search(r'result score: (\d*\.\d+)', line).group(1)))
|
||||
return scores
|
||||
|
|
115
tools/python/opentable_restaurants.py
Executable file
115
tools/python/opentable_restaurants.py
Executable file
|
@ -0,0 +1,115 @@
|
|||
#! /usr/bin/env python2.7
|
||||
# coding: utf-8
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
import argparse
|
||||
import base64
|
||||
import copy
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import urllib2
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
# Initialize logging.
|
||||
logging.basicConfig(level=logging.DEBUG, format='[%(asctime)s] %(levelname)s: %(message)s')
|
||||
|
||||
class OpentableDownloaderError(Exception):
|
||||
pass
|
||||
|
||||
class OpentableDownloader(object):
|
||||
def __init__(self, login, password, opentable_filename, tsv_filename=None):
|
||||
self.login = login
|
||||
self.password = password
|
||||
self.token = None
|
||||
self.opentable_filename = opentable_filename
|
||||
self.tsv_filename = tsv_filename
|
||||
|
||||
# TODO(mgsergio): Check if token is actual in functions.
|
||||
self._get_token()
|
||||
|
||||
def download(self):
|
||||
headers = self._add_auth_header({'Content-Type': 'application/json'})
|
||||
url = 'https://platform.opentable.com/sync/listings'
|
||||
|
||||
with open(self.opentable_filename, 'w') as f:
|
||||
offset = 0
|
||||
while True:
|
||||
request = urllib2.Request(url + '?offset={}'.format(offset), headers=headers)
|
||||
logging.debug('Fetching data with headers %s from %s',
|
||||
str(headers), request.get_full_url())
|
||||
resp = urllib2.urlopen(request)
|
||||
# TODO(mgsergio): Handle exceptions
|
||||
data = json.loads(resp.read())
|
||||
for rest in data['items']:
|
||||
print(json.dumps(rest), file=f)
|
||||
|
||||
total_items = int(data['total_items'])
|
||||
offset = int(data['offset'])
|
||||
items_count = len(data['items'])
|
||||
|
||||
if total_items <= offset + items_count:
|
||||
break
|
||||
|
||||
offset += items_count
|
||||
|
||||
def _get_token(self):
|
||||
url = 'https://oauth.opentable.com/api/v2/oauth/token?grant_type=client_credentials'
|
||||
headers = self._add_auth_header({})
|
||||
request = urllib2.Request(url, headers=headers)
|
||||
logging.debug('Fetching token with headers %s', str(headers))
|
||||
resp = urllib2.urlopen(request)
|
||||
# TODO(mgsergio): Handle exceptions
|
||||
if resp.getcode() != 200:
|
||||
raise OpentableDownloaderError("Cant't get token. Response: {}".format(resp.read()))
|
||||
self.token = json.loads(resp.read())
|
||||
logging.debug('Token is %s', self.token)
|
||||
|
||||
def _add_auth_header(self, headers):
|
||||
if self.token is None:
|
||||
key = base64.b64encode('{}:{}'.format(self.login, self.password))
|
||||
headers['Authorization'] = 'Basic {}'.format(key)
|
||||
else:
|
||||
headers['Authorization'] = '{} {}'.format(self.token['token_type'],
|
||||
self.token['access_token'])
|
||||
return headers
|
||||
|
||||
|
||||
def make_tsv(data_file, output_file):
|
||||
for rest in data_file:
|
||||
rest = json.loads(rest)
|
||||
try:
|
||||
address = ' '.join([rest['address'], rest['city'], rest['country']])
|
||||
except TypeError:
|
||||
address = ''
|
||||
row = '\t'.join(map(unicode, [rest['rid'], rest['latitude'], rest['longitude'],
|
||||
rest['name'], address, rest['reservation_url'],
|
||||
rest['phone_number']]))
|
||||
print(row.encode('utf-8'), file=output_file)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser(description='Downloads opentable data.')
|
||||
parser.add_argument('-d', '--download', action='store_true', help='Download data')
|
||||
parser.add_argument('--tsv', type=str, nargs='?', const='',
|
||||
help='A file to put data into, stdout if value is empty '
|
||||
'If ommited, no tsv data is generated')
|
||||
parser.add_argument('--opentable_data', type=str, help='Path to opentable data file')
|
||||
|
||||
# TODO(mgsergio): Allow config instead.
|
||||
parser.add_argument('--client', required=True, help='Opentable client id')
|
||||
parser.add_argument('--secret', required=True, help="Opentable client's secret")
|
||||
|
||||
args = parser.parse_args(sys.argv[1:])
|
||||
|
||||
if args.download:
|
||||
print('Downloading')
|
||||
loader = OpentableDownloader(args.client, args.secret, args.opentable_data)
|
||||
loader.download()
|
||||
if args.tsv is not None:
|
||||
data = open(args.opentable_data)
|
||||
tsv = open(args.tsv, 'w') if args.tsv else sys.stdout
|
||||
make_tsv(data, tsv)
|
|
@ -170,6 +170,8 @@ ROADS_SCRIPT="$PYTHON_SCRIPTS_PATH/road_runner.py"
|
|||
HIERARCHY_SCRIPT="$PYTHON_SCRIPTS_PATH/hierarchy_to_countries.py"
|
||||
BOOKING_SCRIPT="$PYTHON_SCRIPTS_PATH/booking_hotels.py"
|
||||
BOOKING_FILE="${BOOKING_FILE:-$INTDIR/hotels.csv}"
|
||||
OPENTABLE_SCRIPT="$PYTHON_SCRIPTS_PATH/opentable_restaurants.py"
|
||||
OPENTABLE_FILE="${OPENTABLE_FILE:-$INTDIR/restaurants.csv}"
|
||||
TESTING_SCRIPT="$SCRIPTS_PATH/test_planet.sh"
|
||||
PYTHON="$(which python2.7)"
|
||||
MWM_VERSION_FORMAT="%s"
|
||||
|
@ -250,9 +252,20 @@ if [ "$MODE" == "coast" ]; then
|
|||
|
||||
# Download booking.com hotels. This takes around 3 hours, just like coastline processing.
|
||||
if [ ! -f "$BOOKING_FILE" -a -n "${BOOKING_USER-}" -a -n "${BOOKING_PASS-}" ]; then
|
||||
log "STATUS" "Step B: Starting background hotels downloading"
|
||||
$PYTHON $BOOKING_SCRIPT --user $BOOKING_USER --password $BOOKING_PASS --path "$INTDIR" --download --translate --output "$BOOKING_FILE" 2>"$LOG_PATH"/booking.log &
|
||||
echo "Hotels have been downloaded. Please ensure this line is before Step 4." >> "$PLANET_LOG"
|
||||
log "STATUS" "Step S1: Starting background hotels downloading"
|
||||
(
|
||||
$PYTHON $BOOKING_SCRIPT --user $BOOKING_USER --password $BOOKING_PASS --path "$INTDIR" --download --translate --output "$BOOKING_FILE" 2>"$LOG_PATH"/booking.log &
|
||||
echo "Hotels have been downloaded. Please ensure this line is before Step 4." >> "$PLANET_LOG"
|
||||
) &
|
||||
fi
|
||||
|
||||
# Download opentable.com restaurants. This takes around 30 minutes.
|
||||
if [ ! -f "$OPENTABLE_FILE" -a -n "${OPENTABLE_USER-}" -a -n "${OPENTABLE_PASS-}" ]; then
|
||||
log "STATUS" "Step S2: Starting background restaurants downloading"
|
||||
(
|
||||
$PYTHON $OPENTABLE_SCRIPT --client $OPENTABLE_USER --secrete $OPENTABLE_PASS --opentable_data "$INTDIR"/opentable.json --download --tsv "$OPENTABLE_FILE" 2>"$LOG_PATH"/opentable.log &
|
||||
echo "Restaurants have been downloaded. Please ensure this line is before Step 4." >> "$PLANET_LOG"
|
||||
) &
|
||||
fi
|
||||
|
||||
[ ! -x "$OSMCTOOLS/osmupdate" ] && cc -x c "$OMIM_PATH/tools/osmctools/osmupdate.c" -o "$OSMCTOOLS/osmupdate"
|
||||
|
@ -400,6 +413,7 @@ if [ "$MODE" == "features" ]; then
|
|||
[ -n "$OPT_WORLD" ] && PARAMS_SPLIT="$PARAMS_SPLIT -generate_world"
|
||||
[ -n "$OPT_WORLD" -a "$NODE_STORAGE" == "map" ] && log "WARNING: generating world files with NODE_STORAGE=map may lead to an out of memory error. Try NODE_STORAGE=mem if it fails."
|
||||
[ -f "$BOOKING_FILE" ] && PARAMS_SPLIT="$PARAMS_SPLIT --booking_data=$BOOKING_FILE"
|
||||
[ -f "$OPENTABLE_FILE" ] && PARAMS_SPLIT="$PARAMS_SPLIT --opentable_data=$OPENTABLE_FILE"
|
||||
"$GENERATOR_TOOL" --intermediate_data_path="$INTDIR/" --node_storage=$NODE_STORAGE --osm_file_type=o5m --osm_file_name="$PLANET" \
|
||||
--data_path="$TARGET" --user_resource_path="$DATA_PATH/" $PARAMS_SPLIT 2>> "$PLANET_LOG"
|
||||
MODE=mwm
|
||||
|
|
Loading…
Add table
Reference in a new issue