Code review.

This commit is contained in:
Sergey Magidovich 2016-10-09 19:06:07 +03:00
parent 29cbd04e32
commit 35a73ff37c
22 changed files with 175 additions and 163 deletions

Binary file not shown.

View file

@ -76691,10 +76691,27 @@ cont {
}
cont {
name: "sponsored-opentable"
element {
scale: 16
symbol {
name: "restaurant"
apply_for_type: 1
priority: 16000
}
}
element {
scale: 17
symbol {
name: "restaurant"
apply_for_type: 1
priority: 16000
}
}
element {
scale: 18
symbol {
name: "hotel"
name: "restaurant"
apply_for_type: 1
priority: 16000
}
}

Binary file not shown.

View file

@ -65016,6 +65016,22 @@ cont {
}
cont {
name: "sponsored-opentable"
element {
scale: 16
symbol {
name: "restaurant"
apply_for_type: 1
priority: 16000
}
}
element {
scale: 17
symbol {
name: "restaurant"
apply_for_type: 1
priority: 16000
}
}
element {
scale: 18
symbol {

View file

@ -1136,4 +1136,4 @@ olympics|stadium_main;1135;
olympics|stadium;1136;
olympics|water_sport;1137;
olympics|bike_sport;1138;
sponsored|opentable;1139;
sponsored|opentable;1139;

1 building;[building];;addr:housenumber;name;1;
1136 olympics|stadium;1136;
1137 olympics|water_sport;1137;
1138 olympics|bike_sport;1138;
1139 sponsored|opentable;1139;

View file

@ -602,7 +602,8 @@ line|z16[man_made=pipeline][location=overground]
linecap: butt;
}
area|z16-[amenity=restaurant],
node|z16-[amenity=restaurant]
node|z16-[amenity=restaurant],
node|z16-[sponsored=opentable]
{
icon-image: restaurant.svg;
}

View file

@ -807,7 +807,7 @@ world 00000000000000000000 +
{}
sponsored 00000000000000000011 +
booking 00000000000000000011 -
opentable 00000000000000000011 -
opentable 00000000000000001111 -
{}
sport 00000000000000000000 +
american_football 00000000000000000111 -

View file

@ -8,61 +8,47 @@
#include "base/string_utils.hpp"
#include "boost/algorithm/string/replace.hpp"
namespace generator
{
namespace
{
string EscapeTabs(string const & str)
{
stringstream ss;
for (char c : str)
{
if (c == '\t')
ss << "\\t";
else
ss << c;
}
return ss.str();
}
} // namespace
// BookingHotel ------------------------------------------------------------------------------------
BookingHotel::BookingHotel(string const & src)
{
vector<string> rec;
strings::ParseCSVRow(src, '\t', rec);
CHECK(rec.size() == FieldsCount(), ("Error parsing hotels.tsv line:", EscapeTabs(src)));
CHECK_EQUAL(rec.size(), FieldsCount(), ("Error parsing hotels.tsv line:",
boost::replace_all_copy(src, "\t", "\\t")));
strings::to_uint(rec[Index(Fields::Id)], m_id.Get());
strings::to_uint(rec[FieldIndex(Fields::Id)], m_id.Get());
// TODO(mgsergio): Use ms::LatLon.
strings::to_double(rec[Index(Fields::Latitude)], m_lat);
strings::to_double(rec[Index(Fields::Longtitude)], m_lon);
strings::to_double(rec[FieldIndex(Fields::Latitude)], m_latLon.lat);
strings::to_double(rec[FieldIndex(Fields::Longtitude)], m_latLon.lon);
m_name = rec[Index(Fields::Name)];
m_address = rec[Index(Fields::Address)];
m_name = rec[FieldIndex(Fields::Name)];
m_address = rec[FieldIndex(Fields::Address)];
strings::to_uint(rec[Index(Fields::Stars)], m_stars);
strings::to_uint(rec[Index(Fields::PriceCategory)], m_priceCategory);
strings::to_double(rec[Index(Fields::RatingBooking)], m_ratingBooking);
strings::to_double(rec[Index(Fields::RatingUsers)], m_ratingUser);
strings::to_uint(rec[FieldIndex(Fields::Stars)], m_stars);
strings::to_uint(rec[FieldIndex(Fields::PriceCategory)], m_priceCategory);
strings::to_double(rec[FieldIndex(Fields::RatingBooking)], m_ratingBooking);
strings::to_double(rec[FieldIndex(Fields::RatingUsers)], m_ratingUser);
m_descUrl = rec[Index(Fields::DescUrl)];
m_descUrl = rec[FieldIndex(Fields::DescUrl)];
strings::to_uint(rec[Index(Fields::Type)], m_type);
strings::to_uint(rec[FieldIndex(Fields::Type)], m_type);
m_translations = rec[Index(Fields::Translations)];
m_translations = rec[FieldIndex(Fields::Translations)];
}
ostream & operator<<(ostream & s, BookingHotel const & h)
{
s << fixed << setprecision(7);
return s << "Id: " << h.m_id << "\t Name: " << h.m_name << "\t Address: " << h.m_address
<< "\t lat: " << h.m_lat << " lon: " << h.m_lon;
s << "Id: " << h.m_id << "\t Name: " << h.m_name << "\t Address: " << h.m_address
<< "\t lat: " << h.m_latLon.lat << " lon: " << h.m_latLon.lon;
return s;
}
// BookingDataset ----------------------------------------------------------------------------------
template <>
bool BookingDataset::NecessaryMatchingConditionHolds(FeatureBuilder1 const & fb) const
{
@ -79,7 +65,7 @@ void BookingDataset::PreprocessMatchedOsmObject(ObjectId, FeatureBuilder1 & fb,
// Turn a hotel into a simple building.
if (fb.GetGeomType() == feature::GEOM_AREA)
{
// Remove all information about a hotel.
// Remove all information about the hotel.
auto params = fb.GetParams();
params.ClearName();
auto & meta = params.GetMetadata();
@ -87,13 +73,12 @@ void BookingDataset::PreprocessMatchedOsmObject(ObjectId, FeatureBuilder1 & fb,
meta.Drop(feature::Metadata::EType::FMD_WEBSITE);
meta.Drop(feature::Metadata::EType::FMD_PHONE_NUMBER);
auto const & c = classif();
auto const tourism = c.GetTypeByPath({"tourism"});
my::EraseIf(params.m_Types, [&c, tourism](uint32_t type)
{
ftype::TruncValue(type, 1);
return type == tourism;
});
auto const tourism = classif().GetTypeByPath({"tourism"});
my::EraseIf(params.m_Types, [tourism](uint32_t type)
{
ftype::TruncValue(type, 1);
return type == tourism;
});
fb.SetParams(params);
}
@ -107,7 +92,7 @@ void BookingDataset::BuildObject(Object const & hotel,
FeatureBuilder1 fb;
FeatureParams params;
fb.SetCenter(MercatorBounds::FromLatLon(hotel.m_lat, hotel.m_lon));
fb.SetCenter(MercatorBounds::FromLatLon(hotel.m_latLon.lat, hotel.m_latLon.lon));
auto & metadata = params.GetMetadata();
metadata.Set(feature::Metadata::FMD_SPONSORED_ID, strings::to_string(hotel.m_id.Get()));

View file

@ -2,6 +2,8 @@
#include "generator/sponsored_dataset.hpp"
#include "geometry/latlon.hpp"
#include "base/newtype.hpp"
#include "std/limits.hpp"
@ -9,7 +11,7 @@
namespace generator
{
// TODO(mgsergio): Try to get rid of code deuplication. (See OpenTableRestaurant)
// TODO(mgsergio): Try to get rid of code duplication. (See OpenTableRestaurant)
struct BookingHotel
{
NEWTYPE(uint32_t, ObjectId);
@ -38,13 +40,13 @@ struct BookingHotel
explicit BookingHotel(string const & src);
static constexpr size_t Index(Fields field) { return static_cast<size_t>(field); }
static constexpr size_t FieldIndex(Fields field) { return static_cast<size_t>(field); }
static constexpr size_t FieldsCount() { return static_cast<size_t>(Fields::Counter); }
bool IsAddressPartsFilled() const { return !m_street.empty() || !m_houseNumber.empty(); }
bool HasAddresParts() const { return !m_street.empty() || !m_houseNumber.empty(); }
ObjectId m_id{InvalidObjectId()};
double m_lat = 0.0;
double m_lon = 0.0;
ms::LatLon m_latLon = ms::LatLon::Zero();
string m_name;
string m_street;
string m_houseNumber;

View file

@ -87,7 +87,7 @@ int main(int argc, char * argv[])
if (hotel.address.empty())
++emptyAddr;
if (hotel.IsAddressPartsFilled())
if (hotel.HasAddresParts())
{
++matchedNum;
cout << "[" << i << "/" << bookingDataset.Size() << "] Hotel: " << hotel.address

View file

@ -214,7 +214,7 @@ vector<SampleItem<Object>> ReadSample(istream & ist)
}
catch (ParseError const & e)
{
LOG(LERROR, ("Wrong format: line", lineNumber, e.Msg()));
LOG_SHORT(LERROR, ("Wrong format: line", lineNumber, e.Msg()));
exit(1);
}
@ -242,8 +242,7 @@ void GenerateFactors(Dataset const & dataset,
auto const score = generator::sponsored_scoring::Match(object, feature);
auto const center = MercatorBounds::ToLatLon(feature.GetKeyPoint());
double const distanceMeters = ms::DistanceOnEarth(center.lat, center.lon,
object.m_lat, object.m_lon);
double const distanceMeters = ms::DistanceOnEarth(center, object.m_latLon);
auto const matched = score.IsMatched();
ost << "# ------------------------------------------" << fixed << setprecision(6)
@ -257,8 +256,9 @@ void GenerateFactors(Dataset const & dataset,
<< endl;
ost << "# " << PrintBuilder(feature) << endl;
ost << "# " << object << endl;
ost << "# URL: https://www.openstreetmap.org/?mlat=" << object.m_lat
<< "&mlon=" << object.m_lon << "#map=18/" << object.m_lat << "/" << object.m_lon << endl;
ost << "# URL: https://www.openstreetmap.org/?mlat="
<< object.m_latLon.lat << "&mlon=" << object.m_latLon.lon << "#map=18/"
<< object.m_latLon.lat << "/" << object.m_latLon.lon << endl;
}
}
@ -277,6 +277,7 @@ void GenerateSample(Dataset const & dataset,
vector<osm::Id> elementIndexes(features.size());
boost::copy(features | boost::adaptors::map_keys, begin(elementIndexes));
// TODO(mgsergio): Try RandomSample (from search:: at the moment of writing).
shuffle(elementIndexes.begin(), elementIndexes.end(), minstd_rand(FLAGS_seed));
if (FLAGS_selection_size < elementIndexes.size())
elementIndexes.resize(FLAGS_selection_size);
@ -297,8 +298,7 @@ void GenerateSample(Dataset const & dataset,
auto const score = sponsored_scoring::Match(object, fb);
auto const center = MercatorBounds::ToLatLon(fb.GetKeyPoint());
double const distanceMeters = ms::DistanceOnEarth(center.lat, center.lon,
object.m_lat, object.m_lon);
double const distanceMeters = ms::DistanceOnEarth(center, object.m_latLon);
auto const matched = score.IsMatched();
outStream << "# ------------------------------------------" << fixed << setprecision(6)
@ -312,8 +312,8 @@ void GenerateSample(Dataset const & dataset,
outStream << "# " << PrintBuilder(fb) << endl;
outStream << "# " << object << endl;
outStream << "# URL: https://www.openstreetmap.org/?mlat="
<< object.m_lat << "&mlon=" << object.m_lon
<< "#map=18/" << object.m_lat << "/" << object.m_lon << endl;
<< object.m_latLon.lat << "&mlon=" << object.m_latLon.lon
<< "#map=18/" << object.m_latLon.lat << "/" << object.m_latLon.lon << endl;
}
if (!sponsoredIndexes.empty())
outStream << endl << endl;
@ -329,7 +329,7 @@ void GenerateSample(Dataset const & dataset,
if (file.is_open())
file << outStream.str();
else
LOG(LERROR, ("Can't output into", FLAGS_sample, strerror(errno)));
LOG_SHORT(LERROR, ("Can't output into", FLAGS_sample, strerror(errno)));
}
}
@ -369,7 +369,7 @@ void RunImpl(feature::GenerateInfo & info)
else
{
auto const sample = ReadSampleFromFile<Object>(FLAGS_sample);
LOG(LINFO, ("Sample size is", sample.size()));
LOG_SHORT(LINFO, ("Sample size is", sample.size()));
ofstream ost(FLAGS_factors);
CHECK(ost.is_open(), ("Can't open file", FLAGS_factors, strerror(errno)));
GenerateFactors<Dataset>(dataset, features, sample, ost);

View file

@ -33,7 +33,7 @@ MatchStats<BookingHotel> Match(BookingHotel const & h, FeatureBuilder1 const & f
MatchStats<BookingHotel> score;
auto const fbCenter = MercatorBounds::ToLatLon(fb.GetKeyPoint());
auto const distance = ms::DistanceOnEarth(fbCenter.lat, fbCenter.lon, h.m_lat, h.m_lon);
auto const distance = ms::DistanceOnEarth(fbCenter, h.m_latLon);
score.m_linearNormDistanceScore =
impl::GetLinearNormDistanceScore(distance, BookingDataset::kDistanceLimitInMeters);

View file

@ -63,7 +63,7 @@ HEADERS += \
generate_info.hpp \
intermediate_data.hpp\
intermediate_elements.hpp\
opentable_datatset.hpp \
opentable_dataset.hpp \
osm2meta.hpp \
osm2type.hpp \
osm_element.hpp \

View file

@ -8,52 +8,35 @@
#include "base/string_utils.hpp"
#include "boost/algorithm/string/replace.hpp"
namespace generator
{
namespace
{
string EscapeTabs(string const & str)
{
stringstream ss;
for (char c : str)
{
if (c == '\t')
ss << "\\t";
else
ss << c;
}
return ss.str();
}
} // namespace
// OpentableRestaurant ------------------------------------------------------------------------------
OpentableRestaurant::OpentableRestaurant(string const & src)
{
vector<string> rec;
strings::ParseCSVRow(src, '\t', rec);
CHECK_EQUAL(rec.size(), FieldsCount(), ("Error parsing restaurants.tsv line:", EscapeTabs(src)));
CHECK_EQUAL(rec.size(), FieldsCount(), ("Error parsing restaurants.tsv line:",
boost::replace_all_copy(src, "\t", "\\t")));
strings::to_uint(rec[Index(Fields::Id)], m_id.Get());
// TODO(mgsergio): Use ms::LatLon.
strings::to_double(rec[Index(Fields::Latitude)], m_lat);
strings::to_double(rec[Index(Fields::Longtitude)], m_lon);
strings::to_uint(rec[FieldIndex(Fields::Id)], m_id.Get());
strings::to_double(rec[FieldIndex(Fields::Latitude)], m_latLon.lat);
strings::to_double(rec[FieldIndex(Fields::Longtitude)], m_latLon.lon);
m_name = rec[Index(Fields::Name)];
m_address = rec[Index(Fields::Address)];
m_descUrl = rec[Index(Fields::DescUrl)];
m_name = rec[FieldIndex(Fields::Name)];
m_address = rec[FieldIndex(Fields::Address)];
m_descUrl = rec[FieldIndex(Fields::DescUrl)];
}
ostream & operator<<(ostream & s, OpentableRestaurant const & h)
{
s << fixed << setprecision(7);
return s << "Id: " << h.m_id << "\t Name: " << h.m_name << "\t Address: " << h.m_address
<< "\t lat: " << h.m_lat << " lon: " << h.m_lon;
<< "\t lat: " << h.m_latLon.lat << " lon: " << h.m_latLon.lon;
}
// OpentableDataset ---------------------------------------------------------------------------------
template <>
bool OpentableDataset::NecessaryMatchingConditionHolds(FeatureBuilder1 const & fb) const
{

View file

@ -2,6 +2,8 @@
#include "generator/sponsored_dataset.hpp"
#include "geometry/latlon.hpp"
#include "base/newtype.hpp"
#include "std/limits.hpp"
@ -9,7 +11,7 @@
namespace generator
{
// TODO(mgsergio): Try to get rid of code deuplication. (See BookingHotel)
// TODO(mgsergio): Try to get rid of code duplication. (See BookingHotel)
struct OpentableRestaurant
{
NEWTYPE(uint32_t, ObjectId);
@ -35,13 +37,13 @@ struct OpentableRestaurant
explicit OpentableRestaurant(string const & src);
static constexpr size_t Index(Fields field) { return static_cast<size_t>(field); }
static constexpr size_t FieldIndex(Fields field) { return static_cast<size_t>(field); }
static constexpr size_t FieldsCount() { return static_cast<size_t>(Fields::Counter); }
bool IsAddressPartsFilled() const { return !m_street.empty() || !m_houseNumber.empty(); }
bool HasAddresParts() const { return !m_street.empty() || !m_houseNumber.empty(); }
ObjectId m_id{InvalidObjectId()};
double m_lat = 0.0;
double m_lon = 0.0;
ms::LatLon m_latLon = ms::LatLon::Zero();
string m_name;
string m_street;
string m_houseNumber;

View file

@ -27,17 +27,17 @@ bool MatchStats<OpentableRestaurant>::IsMatched() const
}
template <>
MatchStats<OpentableRestaurant> Match(OpentableRestaurant const & h, FeatureBuilder1 const & fb)
MatchStats<OpentableRestaurant> Match(OpentableRestaurant const & r, FeatureBuilder1 const & fb)
{
MatchStats<OpentableRestaurant> score;
auto const fbCenter = MercatorBounds::ToLatLon(fb.GetKeyPoint());
auto const distance = ms::DistanceOnEarth(fbCenter.lat, fbCenter.lon, h.m_lat, h.m_lon);
auto const distance = ms::DistanceOnEarth(fbCenter, r.m_latLon);
score.m_linearNormDistanceScore =
impl::GetLinearNormDistanceScore(distance, OpentableDataset::kDistanceLimitInMeters);
score.m_nameSimilarityScore =
impl::GetNameSimilarityScore(h.m_name, fb.GetName(StringUtf8Multilang::kDefaultCode));
impl::GetNameSimilarityScore(r.m_name, fb.GetName(StringUtf8Multilang::kDefaultCode));
return score;
}

View file

@ -362,9 +362,10 @@ public:
auto const bookingObjId = m_bookingDataset.FindMatchingObjectId(fb);
if (bookingObjId != generator::BookingHotel::InvalidObjectId())
{
m_bookingDataset.PreprocessMatchedOsmObject(bookingObjId, fb, [this](FeatureBuilder1 & fb)
m_bookingDataset.PreprocessMatchedOsmObject(bookingObjId, fb, [this, bookingObjId](FeatureBuilder1 & fb)
{
m_skippedElements << "BOOKING\t" << DebugPrint(fb.GetMostGenericOsmId()) << endl;
m_skippedElements << "BOOKING\t" << DebugPrint(fb.GetMostGenericOsmId())
<< '\t' << bookingObjId.Get() << endl;
Emit(fb);
});
return;
@ -375,13 +376,13 @@ public:
{
m_opentableDataset.PreprocessMatchedOsmObject(opentableObjId, fb, [this, opentableObjId](FeatureBuilder1 & fb)
{
m_skippedElements << "OPENTABLE\t" << opentableObjId.Get() << endl;
m_skippedElements << "OPENTABLE\t" << DebugPrint(fb.GetMostGenericOsmId())
<< '\t' << opentableObjId.Get() << endl;
Emit(fb);
});
return;
}
LOG(LDEBUG, ("JUST EMIT"));
Emit(fb);
}
@ -390,9 +391,9 @@ public:
{
DumpSkippedElements();
// Emit all required booking objecs to the map.
// Emit all required booking objects to the map.
m_bookingDataset.BuildOsmObjects([this](FeatureBuilder1 & fb) { Emit(fb); });
// No opentable objects should be emitted. Opentable data enriches som data
// No opentable objects should be emitted. Opentable data enriches some data
// with a link to a restaurant's reservation page.
m_places.ForEach([this](Place const & p)

View file

@ -27,21 +27,7 @@ template <typename SponsoredObject>
class SponsoredDataset
{
public:
using Object = SponsoredObject;
private:
class AddressMatcher
{
public:
AddressMatcher();
void operator()(Object & object);
private:
Index m_index;
unique_ptr<search::ReverseGeocoder> m_coder;
};
public:
using Object = SponsoredObject;
using ObjectId = typename Object::ObjectId;
static double constexpr kDistanceLimitInMeters = 150;
@ -57,7 +43,7 @@ public:
vector<ObjectId> GetNearestObjects(ms::LatLon const & latLon, size_t limit,
double maxDistance = 0.0) const;
/// @return true if |fb| satisfies some necesary conditions to match one or serveral
/// @return true if |fb| satisfies some necessary conditions to match one or serveral
/// objects from dataset.
bool NecessaryMatchingConditionHolds(FeatureBuilder1 const & fb) const;
ObjectId FindMatchingObjectId(FeatureBuilder1 const & e) const;
@ -67,14 +53,26 @@ public:
void BuildOsmObjects(function<void(FeatureBuilder1 &)> const & fn) const;
protected:
map<ObjectId, Object> m_objects;
class AddressMatcher
{
public:
AddressMatcher();
void operator()(Object & object);
using TPoint = boost::geometry::model::point<float, 2, boost::geometry::cs::cartesian>;
using TBox = boost::geometry::model::box<TPoint>;
using TValue = pair<TBox, ObjectId>;
private:
Index m_index;
unique_ptr<search::ReverseGeocoder> m_coder;
};
// TODO(mgsergio): Get rid of Box since boost::rtree supports point as value type.
// TODO(mgsergio): Use mercator instead of latlon or boost::geometry::cs::spherical_equatorial
// instead of boost::geometry::cs::cartesian.
using Point = boost::geometry::model::point<float, 2, boost::geometry::cs::cartesian>;
using Box = boost::geometry::model::box<Point>;
using Value = pair<Box, ObjectId>;
// Create the rtree using default constructor.
boost::geometry::index::rtree<TValue, boost::geometry::index::quadratic<16>> m_rtree;
boost::geometry::index::rtree<Value, boost::geometry::index::quadratic<16>> m_rtree;
void BuildObject(Object const & object,
function<void(FeatureBuilder1 &)> const & fn) const;
@ -83,6 +81,8 @@ protected:
/// @return an id of a matched object or kInvalidObjectId on failure.
ObjectId FindMatchingObjectIdImpl(FeatureBuilder1 const & fb) const;
map<ObjectId, Object> m_objects;
};
} // namespace generator

View file

@ -10,6 +10,7 @@
namespace generator
{
// AddressMatcher ----------------------------------------------------------------------------------
template <typename SponsoredObject>
SponsoredDataset<SponsoredObject>::AddressMatcher::AddressMatcher()
{
@ -28,7 +29,7 @@ SponsoredDataset<SponsoredObject>::AddressMatcher::AddressMatcher()
}
catch (RootException const & ex)
{
CHECK(false, ("Bad mwm file:", localFile));
CHECK(false, (ex.Msg(), "Bad mwm file:", localFile));
}
}
@ -39,11 +40,13 @@ template <typename SponsoredObject>
void SponsoredDataset<SponsoredObject>::AddressMatcher::operator()(Object & object)
{
search::ReverseGeocoder::Address addr;
m_coder->GetNearbyAddress(MercatorBounds::FromLatLon(object.m_lat, object.m_lon), addr);
m_coder->GetNearbyAddress(MercatorBounds::FromLatLon(object.m_latLon), addr);
object.m_street = addr.GetStreetName();
object.m_houseNumber = addr.GetHouseNumber();
}
// SponsoredDataset --------------------------------------------------------------------------------
template <typename SponsoredObject>
SponsoredDataset<SponsoredObject>::SponsoredDataset(string const & dataPath, string const & addressReferencePath)
{
@ -103,17 +106,17 @@ SponsoredDataset<SponsoredObject>::FindMatchingObjectId(FeatureBuilder1 const &
template <typename SponsoredObject>
vector<typename SponsoredDataset<SponsoredObject>::ObjectId>
SponsoredDataset<SponsoredObject>::GetNearestObjects(ms::LatLon const & latLon, size_t const limit,
double const maxDistance /* = 0.0 */) const
double const maxDistanceMeters /* = 0.0 */) const
{
namespace bgi = boost::geometry::index;
vector<ObjectId> indexes;
for_each(bgi::qbegin(m_rtree, bgi::nearest(TPoint(latLon.lat, latLon.lon), limit)),
bgi::qend(m_rtree), [this, &latLon, &indexes, maxDistance](TValue const & v)
for_each(bgi::qbegin(m_rtree, bgi::nearest(Point(latLon.lat, latLon.lon), limit)),
bgi::qend(m_rtree), [this, &latLon, &indexes, maxDistanceMeters](Value const & v)
{
auto const & object = GetObjectById(v.second);
double const dist = ms::DistanceOnEarth(latLon.lat, latLon.lon, object.m_lat, object.m_lon);
if (maxDistance != 0.0 && dist > maxDistance /* max distance in meters */)
double const dist = ms::DistanceOnEarth(latLon, object.m_latLon);
if (maxDistanceMeters != 0.0 && dist > maxDistanceMeters /* max distance in meters */)
return;
indexes.emplace_back(v.second);
@ -148,27 +151,28 @@ void SponsoredDataset<SponsoredObject>::LoadData(istream & src, string const & a
AddressMatcher addressMatcher;
size_t matchedNum = 0;
size_t emptyAddr = 0;
size_t matchedCount = 0;
size_t emptyCount = 0;
for (auto & item : m_objects)
{
auto & object = item.second;
addressMatcher(object);
if (object.m_address.empty())
++emptyAddr;
if (object.IsAddressPartsFilled())
++matchedNum;
++emptyCount;
if (object.HasAddresParts())
++matchedCount;
}
LOG(LINFO,
("Num of hotels:", m_objects.size(), "matched:", matchedNum, "empty addresses:", emptyAddr));
("Num of hotels:", m_objects.size(), "matched:", matchedCount, "empty addresses:", emptyCount));
platform.SetWritableDirForTests(backupPath);
}
for (auto const & item : m_objects)
{
auto const & object = item.second;
TBox b(TPoint(object.m_lat, object.m_lon), TPoint(object.m_lat, object.m_lon));
Box b(Point(object.m_latLon.lat, object.m_latLon.lon),
Point(object.m_latLon.lat, object.m_latLon.lon));
m_rtree.insert(make_pair(b, object.m_id));
}
}

View file

@ -5,8 +5,6 @@
#include "geometry/distance_on_sphere.hpp"
// #include "base/stl_iterator.hpp"
#include "std/algorithm.hpp"
#include "std/vector.hpp"
@ -14,7 +12,7 @@ namespace
{
using WeightedBagOfWords = vector<pair<strings::UniString, double>>;
vector<strings::UniString> StringToSetOfWords(string const & str)
vector<strings::UniString> StringToWords(string const & str)
{
vector<strings::UniString> result;
search::NormalizeAndTokenizeString(str, result, search::Delimiters{});
@ -28,7 +26,7 @@ WeightedBagOfWords MakeWeightedBagOfWords(vector<strings::UniString> const & wor
auto constexpr kTfIdfScorePlaceholder = 1;
WeightedBagOfWords result;
for (auto i = 0; i < words.size(); ++i)
for (size_t i = 0; i < words.size(); ++i)
{
result.emplace_back(words[i], kTfIdfScorePlaceholder);
while (i + 1 < words.size() && words[i] == words[i + 1])
@ -74,6 +72,8 @@ double WeightedBagOfWordsCos(WeightedBagOfWords const & lhs, WeightedBagOfWords
auto const lhsLength = sqrt(WeightedBagsDotProduct(lhs, lhs));
auto const rhsLength = sqrt(WeightedBagsDotProduct(rhs, rhs));
// WeightedBagsDotProduct returns 0.0 if lhs.empty() || rhs.empty() or
// if every element of either lhs or rhs is 0.0.
if (product == 0.0)
return 0.0;
@ -87,14 +87,15 @@ namespace impl
{
double GetLinearNormDistanceScore(double distance, double const maxDistance)
{
CHECK_NOT_EQUAL(maxDistance, 0.0, ("maxDistance cannot be 0."));
distance = my::clamp(distance, 0, maxDistance);
return 1.0 - distance / maxDistance;
}
double GetNameSimilarityScore(string const & booking_name, string const & osm_name)
{
auto const aws = MakeWeightedBagOfWords(StringToSetOfWords(booking_name));
auto const bws = MakeWeightedBagOfWords(StringToSetOfWords(osm_name));
auto const aws = MakeWeightedBagOfWords(StringToWords(booking_name));
auto const bws = MakeWeightedBagOfWords(StringToWords(osm_name));
if (aws.empty() && bws.empty())
return 1.0;

View file

@ -1,4 +1,4 @@
#! /usr/bin/env python2
#! /usr/bin/env python2.7
# coding: utf-8
from __future__ import print_function
@ -28,7 +28,7 @@ class OpentableDownloader(object):
self.opentable_filename = opentable_filename
self.tsv_filename = tsv_filename
# TODO(mgsergio): Check if toke is atual in functions.
# TODO(mgsergio): Check if token is actual in functions.
self._get_token()
def download(self):
@ -101,19 +101,15 @@ if __name__ == '__main__':
# TODO(mgsergio): Allow config instead.
parser.add_argument('--client', required=True, help='Opentable client id')
parser.add_argument('--secrete', required=True, help="Opentable client's secrete")
parser.add_argument('--secret', required=True, help="Opentable client's secret")
args = parser.parse_args(sys.argv[1:])
if args.download:
print('Downloading')
loader = OpentableDownloader(args.client, args.secrete, args.opentable_data)
loader = OpentableDownloader(args.client, args.secret, args.opentable_data)
loader.download()
if args.tsv is not None:
data = open(args.opentable_data)
tsv = open(args.tsv, 'w') if args.tsv else sys.stdout
try:
make_tsv(data, tsv)
finally:
data.close()
tsv.close()
make_tsv(data, tsv)

View file

@ -252,16 +252,20 @@ if [ "$MODE" == "coast" ]; then
# Download booking.com hotels. This takes around 3 hours, just like coastline processing.
if [ ! -f "$BOOKING_FILE" -a -n "${BOOKING_USER-}" -a -n "${BOOKING_PASS-}" ]; then
log "STATUS" "Step B: Starting background hotels downloading"
$PYTHON $BOOKING_SCRIPT --user $BOOKING_USER --password $BOOKING_PASS --path "$INTDIR" --download --translate --output "$BOOKING_FILE" 2>"$LOG_PATH"/booking.log &
echo "Hotels have been downloaded. Please ensure this line is before Step 4." >> "$PLANET_LOG"
log "STATUS" "Step S1: Starting background hotels downloading"
(
$PYTHON $BOOKING_SCRIPT --user $BOOKING_USER --password $BOOKING_PASS --path "$INTDIR" --download --translate --output "$BOOKING_FILE" 2>"$LOG_PATH"/booking.log &
echo "Hotels have been downloaded. Please ensure this line is before Step 4." >> "$PLANET_LOG"
) &
fi
# Download opentable.com restaurants. This takes around 30 minutes.
if [ ! -f "$OPENTABLE_FILE" -a -n "${OPENTABLE_USER-}" -a -n "${OPENTABLE_PASS-}" ]; then
log "STATUS" "Step C: Starting background restaurants downloading"
$PYTHON $OPENTABLE_SCRIPT --client $OPENTABLE_USER --secrete $OPENTABLE_PASS --opentable_data "$INTDIR"/opentable.json --download --tsv "$OPENTABLE_FILE" 2>"$LOG_PATH"/opentable.log &
echo "Hotels have been downloaded. Please ensure this line is before Step 4." >> "$PLANET_LOG"
log "STATUS" "Step S2: Starting background restaurants downloading"
(
$PYTHON $OPENTABLE_SCRIPT --client $OPENTABLE_USER --secrete $OPENTABLE_PASS --opentable_data "$INTDIR"/opentable.json --download --tsv "$OPENTABLE_FILE" 2>"$LOG_PATH"/opentable.log &
echo "Restaurants have been downloaded. Please ensure this line is before Step 4." >> "$PLANET_LOG"
) &
fi
[ ! -x "$OSMCTOOLS/osmupdate" ] && cc -x c "$OMIM_PATH/tools/osmctools/osmupdate.c" -o "$OSMCTOOLS/osmupdate"