Complete opentable intergration into data generatrion.

This commit is contained in:
Sergey Magidovich 2016-10-06 15:11:16 +03:00
parent 51ada39cfc
commit 29cbd04e32
23 changed files with 144 additions and 124 deletions

View file

@ -807,6 +807,7 @@ world +
{}
sponsored +
booking -
opentable -
{}
sport +
american_football -

Binary file not shown.

View file

@ -76689,6 +76689,16 @@ cont {
}
}
}
cont {
name: "sponsored-opentable"
element {
scale: 18
symbol {
name: "hotel"
priority: 16000
}
}
}
cont {
name: "sport-american_football"
element {

Binary file not shown.

View file

@ -65014,6 +65014,23 @@ cont {
}
}
}
cont {
name: "sponsored-opentable"
element {
scale: 18
symbol {
name: "hotel"
priority: 16000
}
}
element {
scale: 19
symbol {
name: "hotel"
priority: 16000
}
}
}
cont {
name: "sport-american_football"
element {

View file

@ -1136,3 +1136,4 @@ olympics|stadium_main;1135;
olympics|stadium;1136;
olympics|water_sport;1137;
olympics|bike_sport;1138;
sponsored|opentable;1139;
1 building;[building];;addr:housenumber;name;1;
1136 olympics|stadium;1136;
1137 olympics|water_sport;1137;
1138 olympics|bike_sport;1138;
1139 sponsored|opentable;1139;

View file

@ -1136,3 +1136,4 @@ olympics|stadium_main
olympics|stadium
olympics|water_sport
olympics|bike_sport
sponsored|opentable

View file

@ -807,6 +807,7 @@ world 00000000000000000000 +
{}
sponsored 00000000000000000011 +
booking 00000000000000000011 -
opentable 00000000000000000011 -
{}
sport 00000000000000000000 +
american_football 00000000000000000111 -

View file

@ -143,7 +143,9 @@ Can be empty. Example: `$(ls ../../data/borders/{UK*,Ireland}.poly)`.
* `SRTM_PATH`: a path to `*.zip` files with SRTM data.
* `OSC`: a path to an osmChange file to apply after updating the planet.
* `BOOKING_FILE`: a path to hotels.csv with booking data.
* `BOOKING_USER` and `BOOKING_PATH`: user name and password for booking.com API
* `BOOKING_USER` and `BOOKING_PASS`: user name and password for booking.com API
* `OPENTABLE_FILE`: a path to restaurants.csv with opentable data.
* `OPENTABLE_USER` and `OPENTABLE_PASS`: user name and password for opentable.com API
to download hotels data.
### Testing

View file

@ -1,19 +0,0 @@
#include "generator/aggregating_sponsored_dataset.hpp"
namespace generator
{
bool AggregatingSponsoredDataset::IsMatched(FeatureBuilder1 const & fb) const
{
return m_bookingDataset.FindMatchingObjectId(fb) != BookingHotel::InvalidObjectId();
}
void AggregatingSponsoredDataset::BuildOsmObjects(function<void(FeatureBuilder1 &)> const & fn) const
{
m_bookingDataset.BuildOsmObjects(fn);
}
size_t AggregatingSponsoredDataset::Size() const
{
return m_bookingDataset.Size();
}
} // namespace generator

View file

@ -1,27 +0,0 @@
#pragma once
#include "generator/booking_dataset.hpp"
#include "generator/generate_info.hpp"
#include "std/unique_ptr.hpp"
#include "std/vector.hpp"
namespace generator
{
class AggregatingSponsoredDataset
{
public:
explicit AggregatingSponsoredDataset(feature::GenerateInfo const & info)
: m_bookingDataset(info.m_bookingDatafileName, info.m_bookingReferenceDir)
{
}
bool IsMatched(FeatureBuilder1 const & e) const;
void BuildOsmObjects(function<void(FeatureBuilder1 &)> const & fn) const;
size_t Size() const;
private:
BookingDataset m_bookingDataset;
};
} // namespace generator;

View file

@ -72,7 +72,34 @@ bool BookingDataset::NecessaryMatchingConditionHolds(FeatureBuilder1 const & fb)
return ftypes::IsHotelChecker::Instance()(fb.GetTypes());
}
// TODO(mgsergio): Try to eliminate as much code duplication as possible. (See opentable_dataset.cpp)
template <>
void BookingDataset::PreprocessMatchedOsmObject(ObjectId, FeatureBuilder1 & fb,
function<void(FeatureBuilder1 &)> const fn) const
{
// Turn a hotel into a simple building.
if (fb.GetGeomType() == feature::GEOM_AREA)
{
// Remove all information about a hotel.
auto params = fb.GetParams();
params.ClearName();
auto & meta = params.GetMetadata();
meta.Drop(feature::Metadata::EType::FMD_STARS);
meta.Drop(feature::Metadata::EType::FMD_WEBSITE);
meta.Drop(feature::Metadata::EType::FMD_PHONE_NUMBER);
auto const & c = classif();
auto const tourism = c.GetTypeByPath({"tourism"});
my::EraseIf(params.m_Types, [&c, tourism](uint32_t type)
{
ftype::TruncValue(type, 1);
return type == tourism;
});
fb.SetParams(params);
}
fn(fb);
}
template <>
void BookingDataset::BuildObject(Object const & hotel,
function<void(FeatureBuilder1 &)> const & fn) const

View file

@ -134,7 +134,7 @@ feature::GenerateInfo GetGenerateInfo()
{
feature::GenerateInfo info;
info.m_bookingDatafileName = FLAGS_booking;
info.m_opentableDataFile = FLAGS_opentable;
info.m_opentableDatafileName = FLAGS_opentable;
info.m_osmFileName = FLAGS_osm;
info.SetNodeStorageType("map");
info.SetOsmFileType("o5m");
@ -345,7 +345,7 @@ string GetDatasetFilePath<BookingDataset>(feature::GenerateInfo const & info)
template <>
string GetDatasetFilePath<OpentableDataset>(feature::GenerateInfo const & info)
{
return info.m_opentableDataFile;
return info.m_opentableDatafileName;
}
template <typename Dataset, typename Object = typename Dataset::Object>

View file

@ -42,9 +42,8 @@ struct GenerateInfo
string m_bookingDatafileName;
string m_bookingReferenceDir;
string m_opentableDataFile;
// TODO(mgsergio): Uncomment when I need this.
// string m_opentableReferenceDir;
string m_opentableDatafileName;
string m_opentableReferenceDir;
uint32_t m_versionDate = 0;

View file

@ -15,7 +15,6 @@ INCLUDEPATH *= $$ROOT_DIR/3party/gflags/src \
QT *= core
SOURCES += \
aggregating_sponsored_dataset.cpp \
altitude_generator.cpp \
booking_dataset.cpp \
booking_scoring.cpp \
@ -47,7 +46,6 @@ SOURCES += \
unpack_mwm.cpp \
HEADERS += \
aggregating_sponsored_dataset.hpp \
altitude_generator.hpp \
booking_dataset.hpp \
borders_generator.hpp \

View file

@ -74,7 +74,9 @@ DEFINE_string(osm_file_name, "", "Input osm area file.");
DEFINE_string(osm_file_type, "xml", "Input osm area file type [xml, o5m].");
DEFINE_string(user_resource_path, "", "User defined resource path for classificator.txt and etc.");
DEFINE_string(booking_data, "", "Path to booking data in .tsv format.");
DEFINE_string(booking_reference_path, "", "Path to mwm dataset for match booking addresses.");
DEFINE_string(booking_reference_path, "", "Path to mwm dataset for booking addresses matching.");
DEFINE_string(opentable_data, "", "Path to opentable data in .tsv format.");
DEFINE_string(opentable_reference_path, "", "Path to mwm dataset for opentable addresses matching.");
DEFINE_uint64(planet_version, my::SecondsSinceEpoch(),
"Version as seconds since epoch, by default - now.");
DEFINE_string(srtm_path, "",
@ -114,6 +116,8 @@ int main(int argc, char ** argv)
genInfo.m_preloadCache = FLAGS_preload_cache;
genInfo.m_bookingDatafileName = FLAGS_booking_data;
genInfo.m_bookingReferenceDir = FLAGS_booking_reference_path;
genInfo.m_opentableDatafileName = FLAGS_opentable_data;
genInfo.m_opentableReferenceDir = FLAGS_opentable_reference_path;
genInfo.m_versionDate = static_cast<uint32_t>(FLAGS_planet_version);

View file

@ -1,7 +1,7 @@
#include "generator/opentable_dataset.hpp"
//#include "generator/openatble_scoring.hpp" // or just sonsored scoring
#include "generator/feature_builder.hpp"
#include "generator/sponsored_scoring.hpp"
#include "indexer/classificator.hpp"
#include "indexer/ftypes_matcher.hpp"
@ -60,41 +60,28 @@ bool OpentableDataset::NecessaryMatchingConditionHolds(FeatureBuilder1 const & f
if (fb.GetName(StringUtf8Multilang::kDefaultCode).empty())
return false;
// TODO(mgsergio): Handle all types of restaurants:
// bar cafe (fast_food ??) pub restaurant
// return ftypes::IsRestaurantChecker::Instance()(fb.GetTypes());
return true;
return ftypes::IsFoodChecker::Instance()(fb.GetTypes());
}
// TODO(mgsergio): Try to eliminate as much code duplication as possible. (See booking_dataset.cpp)
template <>
void OpentableDataset::BuildObject(Object const & restaurant,
function<void(FeatureBuilder1 &)> const & fn) const
void OpentableDataset::PreprocessMatchedOsmObject(ObjectId const matchedObjId, FeatureBuilder1 & fb,
function<void(FeatureBuilder1 &)> const fn) const
{
FeatureBuilder1 fb;
FeatureParams params;
fb.SetCenter(MercatorBounds::FromLatLon(restaurant.m_lat, restaurant.m_lon));
FeatureParams params = fb.GetParams();
auto restaurant = GetObjectById(matchedObjId);
auto & metadata = params.GetMetadata();
// TODO(mgsergio): Rename FMD_SPONSORED_ID to FMD_BOOKING_ID.
metadata.Set(feature::Metadata::FMD_SPONSORED_ID, strings::to_string(restaurant.m_id.Get()));
metadata.Set(feature::Metadata::FMD_WEBSITE, restaurant.m_descUrl);
// params.AddAddress(restaurant.address);
// TODO(mgsergio): addr:full ???
if (!restaurant.m_street.empty())
fb.AddStreet(restaurant.m_street);
if (!restaurant.m_houseNumber.empty())
fb.AddHouseNumber(restaurant.m_houseNumber);
params.AddName(StringUtf8Multilang::GetLangByCode(StringUtf8Multilang::kDefaultCode),
restaurant.m_name);
auto const & clf = classif();
params.AddType(clf.GetTypeByPath({"sponsored", "booking"}));
params.AddType(clf.GetTypeByPath({"sponsored", "opentable"}));
fb.SetParams(params);
@ -110,15 +97,14 @@ OpentableDataset::ObjectId OpentableDataset::FindMatchingObjectIdImpl(FeatureBui
return Object::InvalidObjectId();
// Find |kMaxSelectedElements| nearest values to a point.
auto const bookingIndexes = GetNearestObjects(MercatorBounds::ToLatLon(fb.GetKeyPoint()),
kMaxSelectedElements, kDistanceLimitInMeters);
auto const nearbyIds = GetNearestObjects(MercatorBounds::ToLatLon(fb.GetKeyPoint()),
kMaxSelectedElements, kDistanceLimitInMeters);
CHECK(false, ("Not implemented yet"));
// for (auto const j : bookingIndexes)
// {
// if (booking_scoring::Match(GetObjectById(j), fb).IsMatched())
// return j;
// }
for (auto const objId : nearbyIds)
{
if (sponsored_scoring::Match(GetObjectById(objId), fb).IsMatched())
return objId;
}
return Object::InvalidObjectId();
}

View file

@ -1,4 +1,3 @@
#include "generator/aggregating_sponsored_dataset.hpp"
#include "generator/coastlines_generator.hpp"
#include "generator/feature_generator.hpp"
#include "generator/intermediate_data.hpp"
@ -13,6 +12,9 @@
#include "generator/towns_dumper.hpp"
#include "generator/world_map_generator.hpp"
#include "generator/booking_dataset.hpp"
#include "generator/opentable_dataset.hpp"
#include "indexer/classificator.hpp"
#include "platform/platform.hpp"
@ -277,7 +279,8 @@ class MainFeaturesEmitter : public EmitterBase
string m_srcCoastsFile;
bool m_failOnCoasts;
generator::AggregatingSponsoredDataset m_dataset;
generator::BookingDataset m_bookingDataset;
generator::OpentableDataset m_opentableDataset;
/// Used to prepare a list of cities to serve as a list of nodes
/// for building a highway graph with OSRM for low zooms.
@ -300,7 +303,9 @@ public:
MainFeaturesEmitter(feature::GenerateInfo const & info)
: m_skippedElementsPath(info.GetIntermediateFileName("skipped_elements", ".lst"))
, m_failOnCoasts(info.m_failOnCoasts)
, m_dataset(info)
, m_bookingDataset(info.m_bookingDatafileName, info.m_bookingReferenceDir)
, m_opentableDataset(info.m_opentableDatafileName, info.m_opentableReferenceDir)
{
Classificator const & c = classif();
@ -342,44 +347,42 @@ public:
static uint32_t const placeType = classif().GetTypeByPath({"place"});
uint32_t const type = fb.GetParams().FindType(placeType, 1);
// TODO(mgserigio): Would it be better to have objects that store callback
// and can be piped: action-if-cond1 | action-if-cond-2 | ... ?
// The first object which perform action terminates the cahin.
if (type != ftype::GetEmptyValue() && !fb.GetName().empty())
{
m_places.ReplaceEqualInRect(
Place(fb, type),
[](Place const & p1, Place const & p2) { return p1.IsEqual(p2); },
[](Place const & p1, Place const & p2) { return p1.IsBetterThan(p2); });
return;
}
else if (m_dataset.IsMatched(fb))
{
m_skippedElements << DebugPrint(fb.GetMostGenericOsmId()) << endl;
// Turn a hotel into a simple building.
if (fb.GetGeomType() == feature::GEOM_AREA)
auto const bookingObjId = m_bookingDataset.FindMatchingObjectId(fb);
if (bookingObjId != generator::BookingHotel::InvalidObjectId())
{
m_bookingDataset.PreprocessMatchedOsmObject(bookingObjId, fb, [this](FeatureBuilder1 & fb)
{
// Remove all information about a hotel.
auto params = fb.GetParams();
params.ClearName();
auto & meta = params.GetMetadata();
meta.Drop(feature::Metadata::EType::FMD_STARS);
meta.Drop(feature::Metadata::EType::FMD_WEBSITE);
meta.Drop(feature::Metadata::EType::FMD_PHONE_NUMBER);
auto const & c = classif();
auto const tourism = c.GetTypeByPath({"tourism"});
my::EraseIf(params.m_Types, [&c, tourism](uint32_t type)
{
ftype::TruncValue(type, 1);
return type == tourism;
});
fb.SetParams(params);
m_skippedElements << "BOOKING\t" << DebugPrint(fb.GetMostGenericOsmId()) << endl;
Emit(fb);
}
});
return;
}
else
auto const opentableObjId = m_opentableDataset.FindMatchingObjectId(fb);
if (opentableObjId != generator::OpentableRestaurant::InvalidObjectId())
{
Emit(fb);
m_opentableDataset.PreprocessMatchedOsmObject(opentableObjId, fb, [this, opentableObjId](FeatureBuilder1 & fb)
{
m_skippedElements << "OPENTABLE\t" << opentableObjId.Get() << endl;
Emit(fb);
});
return;
}
LOG(LDEBUG, ("JUST EMIT"));
Emit(fb);
}
/// @return false if coasts are not merged and FLAG_fail_on_coasts is set
@ -387,8 +390,10 @@ public:
{
DumpSkippedElements();
// Emit all booking objecs to the map.
m_dataset.BuildOsmObjects([this](FeatureBuilder1 & fb) { Emit(fb); });
// Emit all required booking objecs to the map.
m_bookingDataset.BuildOsmObjects([this](FeatureBuilder1 & fb) { Emit(fb); });
// No opentable objects should be emitted. Opentable data enriches som data
// with a link to a restaurant's reservation page.
m_places.ForEach([this](Place const & p)
{

View file

@ -62,6 +62,8 @@ public:
bool NecessaryMatchingConditionHolds(FeatureBuilder1 const & fb) const;
ObjectId FindMatchingObjectId(FeatureBuilder1 const & e) const;
void PreprocessMatchedOsmObject(ObjectId matchedObjId, FeatureBuilder1 & fb,
function<void(FeatureBuilder1 &)> const fn) const;
void BuildOsmObjects(function<void(FeatureBuilder1 &)> const & fn) const;
protected:

View file

@ -134,12 +134,16 @@ void SponsoredDataset<SponsoredObject>::LoadData(istream & src, string const & a
m_objects.emplace(hotel.m_id, hotel);
}
// Try to get object address from existing MWMs.
if (!addressReferencePath.empty())
{
LOG(LINFO, ("Reference addresses for sponsored objects", addressReferencePath));
Platform & platform = GetPlatform();
string const backupPath = platform.WritableDir();
// TODO(mgsergio): What is this for?
// MWMs can be loaded only from a writebledir or from a resourcedir,
// changig resourcedir can lead to probles with classificator, so
// we change writebledir.
platform.SetWritableDirForTests(addressReferencePath);
AddressMatcher addressMatcher;
@ -156,18 +160,16 @@ void SponsoredDataset<SponsoredObject>::LoadData(istream & src, string const & a
if (object.IsAddressPartsFilled())
++matchedNum;
}
// TODO(mgsergio): Fix names.
LOG(LINFO,
("Num of hotels:", m_objects.size(), "matched:", matchedNum, "empty addresses:", emptyAddr));
// TODO(mgsergio): What is this for?
platform.SetWritableDirForTests(backupPath);
}
for (auto const & item : m_objects)
{
auto const & hotel = item.second;
TBox b(TPoint(hotel.m_lat, hotel.m_lon), TPoint(hotel.m_lat, hotel.m_lon));
m_rtree.insert(make_pair(b, hotel.m_id));
auto const & object = item.second;
TBox b(TPoint(object.m_lat, object.m_lon), TPoint(object.m_lat, object.m_lon));
m_rtree.insert(make_pair(b, object.m_id));
}
}
} // namespace generator

View file

@ -1,6 +1,6 @@
#pragma once
#include "generator/booking_dataset.hpp"
#include "std/string.hpp"
class FeatureBuilder1;

View file

@ -33,7 +33,7 @@ class OpentableDownloader(object):
def download(self):
headers = self._add_auth_header({'Content-Type': 'application/json'})
url = 'https://platform.otqa.com/sync/listings'
url = 'https://platform.opentable.com/sync/listings'
with open(self.opentable_filename, 'w') as f:
offset = 0
@ -57,7 +57,7 @@ class OpentableDownloader(object):
offset += items_count
def _get_token(self):
url = 'https://oauth-pp.opentable.com/api/v2/oauth/token?grant_type=client_credentials'
url = 'https://oauth.opentable.com/api/v2/oauth/token?grant_type=client_credentials'
headers = self._add_auth_header({})
request = urllib2.Request(url, headers=headers)
logging.debug('Fetching token with headers %s', str(headers))
@ -111,7 +111,7 @@ if __name__ == '__main__':
loader.download()
if args.tsv is not None:
data = open(args.opentable_data)
tsv = open(args.tsv) if args.tsv else sys.stdout
tsv = open(args.tsv, 'w') if args.tsv else sys.stdout
try:
make_tsv(data, tsv)
finally:

View file

@ -170,6 +170,8 @@ ROADS_SCRIPT="$PYTHON_SCRIPTS_PATH/road_runner.py"
HIERARCHY_SCRIPT="$PYTHON_SCRIPTS_PATH/hierarchy_to_countries.py"
BOOKING_SCRIPT="$PYTHON_SCRIPTS_PATH/booking_hotels.py"
BOOKING_FILE="${BOOKING_FILE:-$INTDIR/hotels.csv}"
OPENTABLE_SCRIPT="$PYTHON_SCRIPTS_PATH/opentable_restaurants.py"
OPENTABLE_FILE="${OPENTABLE_FILE:-$INTDIR/restaurants.csv}"
TESTING_SCRIPT="$SCRIPTS_PATH/test_planet.sh"
PYTHON="$(which python2.7)"
MWM_VERSION_FORMAT="%s"
@ -255,6 +257,13 @@ if [ "$MODE" == "coast" ]; then
echo "Hotels have been downloaded. Please ensure this line is before Step 4." >> "$PLANET_LOG"
fi
# Download opentable.com restaurants. This takes around 30 minutes.
if [ ! -f "$OPENTABLE_FILE" -a -n "${OPENTABLE_USER-}" -a -n "${OPENTABLE_PASS-}" ]; then
log "STATUS" "Step C: Starting background restaurants downloading"
$PYTHON $OPENTABLE_SCRIPT --client $OPENTABLE_USER --secrete $OPENTABLE_PASS --opentable_data "$INTDIR"/opentable.json --download --tsv "$OPENTABLE_FILE" 2>"$LOG_PATH"/opentable.log &
echo "Hotels have been downloaded. Please ensure this line is before Step 4." >> "$PLANET_LOG"
fi
[ ! -x "$OSMCTOOLS/osmupdate" ] && cc -x c "$OMIM_PATH/tools/osmctools/osmupdate.c" -o "$OSMCTOOLS/osmupdate"
[ ! -x "$OSMCTOOLS/osmfilter" ] && cc -x c -O3 "$OMIM_PATH/tools/osmctools/osmfilter.c" -o "$OSMCTOOLS/osmfilter"
if [ -n "$OPT_DOWNLOAD" ]; then
@ -400,6 +409,7 @@ if [ "$MODE" == "features" ]; then
[ -n "$OPT_WORLD" ] && PARAMS_SPLIT="$PARAMS_SPLIT -generate_world"
[ -n "$OPT_WORLD" -a "$NODE_STORAGE" == "map" ] && log "WARNING: generating world files with NODE_STORAGE=map may lead to an out of memory error. Try NODE_STORAGE=mem if it fails."
[ -f "$BOOKING_FILE" ] && PARAMS_SPLIT="$PARAMS_SPLIT --booking_data=$BOOKING_FILE"
[ -f "$OPENTABLE_FILE" ] && PARAMS_SPLIT="$PARAMS_SPLIT --opentable_data=$OPENTABLE_FILE"
"$GENERATOR_TOOL" --intermediate_data_path="$INTDIR/" --node_storage=$NODE_STORAGE --osm_file_type=o5m --osm_file_name="$PLANET" \
--data_path="$TARGET" --user_resource_path="$DATA_PATH/" $PARAMS_SPLIT 2>> "$PLANET_LOG"
MODE=mwm