forked from organicmaps/organicmaps
Add SponsoredDataset as an abstruction.
This commit is contained in:
parent
78ea6a4038
commit
389ddf670f
10 changed files with 438 additions and 306 deletions
|
@ -3,133 +3,46 @@
|
|||
#include "generator/booking_scoring.hpp"
|
||||
#include "generator/feature_builder.hpp"
|
||||
|
||||
#include "platform/local_country_file_utils.hpp"
|
||||
#include "platform/platform.hpp"
|
||||
|
||||
#include "indexer/classificator.hpp"
|
||||
#include "indexer/ftypes_matcher.hpp"
|
||||
|
||||
#include "geometry/distance_on_sphere.hpp"
|
||||
|
||||
#include "base/logging.hpp"
|
||||
#include "base/string_utils.hpp"
|
||||
|
||||
#include "std/fstream.hpp"
|
||||
#include "std/iostream.hpp"
|
||||
#include "std/sstream.hpp"
|
||||
|
||||
namespace generator
|
||||
{
|
||||
namespace
|
||||
{
|
||||
string EscapeTabs(string const & str)
|
||||
{
|
||||
stringstream ss;
|
||||
for (char c : str)
|
||||
{
|
||||
if (c == '\t')
|
||||
ss << "\\t";
|
||||
else
|
||||
ss << c;
|
||||
}
|
||||
return ss.str();
|
||||
}
|
||||
} // namespace
|
||||
// BookingDataset::AddressMatcher::AddressMatcher()
|
||||
// {
|
||||
// vector<platform::LocalCountryFile> localFiles;
|
||||
|
||||
BookingDataset::BookingId const BookingDataset::kInvalidHotelIndex = BookingId(numeric_limits<BookingId::RepType>::max());
|
||||
// Platform & platform = GetPlatform();
|
||||
// platform::FindAllLocalMapsInDirectoryAndCleanup(platform.WritableDir(), 0 /* version */,
|
||||
// -1 /* latestVersion */, localFiles);
|
||||
|
||||
BookingDataset::Hotel::Hotel(string const & src)
|
||||
{
|
||||
vector<string> rec;
|
||||
strings::ParseCSVRow(src, '\t', rec);
|
||||
CHECK(rec.size() == FieldsCount(), ("Error parsing hotels.tsv line:", EscapeTabs(src)));
|
||||
// for (platform::LocalCountryFile const & localFile : localFiles)
|
||||
// {
|
||||
// LOG(LINFO, ("Found mwm:", localFile));
|
||||
// try
|
||||
// {
|
||||
// m_index.RegisterMap(localFile);
|
||||
// }
|
||||
// catch (RootException const & ex)
|
||||
// {
|
||||
// CHECK(false, ("Bad mwm file:", localFile));
|
||||
// }
|
||||
// }
|
||||
|
||||
strings::to_uint(rec[Index(Fields::Id)], id.Get());
|
||||
strings::to_double(rec[Index(Fields::Latitude)], lat);
|
||||
strings::to_double(rec[Index(Fields::Longtitude)], lon);
|
||||
// m_coder = make_unique<search::ReverseGeocoder>(m_index);
|
||||
// }
|
||||
|
||||
name = rec[Index(Fields::Name)];
|
||||
address = rec[Index(Fields::Address)];
|
||||
// void BookingDataset::AddressMatcher::operator()(Hotel & hotel)
|
||||
// {
|
||||
// search::ReverseGeocoder::Address addr;
|
||||
// m_coder->GetNearbyAddress(MercatorBounds::FromLatLon(hotel.lat, hotel.lon), addr);
|
||||
// hotel.street = addr.GetStreetName();
|
||||
// hotel.houseNumber = addr.GetHouseNumber();
|
||||
// }
|
||||
|
||||
strings::to_uint(rec[Index(Fields::Stars)], stars);
|
||||
strings::to_uint(rec[Index(Fields::PriceCategory)], priceCategory);
|
||||
strings::to_double(rec[Index(Fields::RatingBooking)], ratingBooking);
|
||||
strings::to_double(rec[Index(Fields::RatingUsers)], ratingUser);
|
||||
|
||||
descUrl = rec[Index(Fields::DescUrl)];
|
||||
|
||||
strings::to_uint(rec[Index(Fields::Type)], type);
|
||||
|
||||
translations = rec[Index(Fields::Translations)];
|
||||
}
|
||||
|
||||
ostream & operator<<(ostream & s, BookingDataset::Hotel const & h)
|
||||
{
|
||||
s << fixed << setprecision(7);
|
||||
return s << "Id: " << h.id << "\t Name: " << h.name << "\t Address: " << h.address
|
||||
<< "\t lat: " << h.lat << " lon: " << h.lon;
|
||||
}
|
||||
|
||||
BookingDataset::AddressMatcher::AddressMatcher()
|
||||
{
|
||||
vector<platform::LocalCountryFile> localFiles;
|
||||
|
||||
Platform & platform = GetPlatform();
|
||||
platform::FindAllLocalMapsInDirectoryAndCleanup(platform.WritableDir(), 0 /* version */,
|
||||
-1 /* latestVersion */, localFiles);
|
||||
|
||||
for (platform::LocalCountryFile const & localFile : localFiles)
|
||||
{
|
||||
LOG(LINFO, ("Found mwm:", localFile));
|
||||
try
|
||||
{
|
||||
m_index.RegisterMap(localFile);
|
||||
}
|
||||
catch (RootException const & ex)
|
||||
{
|
||||
CHECK(false, ("Bad mwm file:", localFile));
|
||||
}
|
||||
}
|
||||
|
||||
m_coder = make_unique<search::ReverseGeocoder>(m_index);
|
||||
}
|
||||
|
||||
void BookingDataset::AddressMatcher::operator()(Hotel & hotel)
|
||||
{
|
||||
search::ReverseGeocoder::Address addr;
|
||||
m_coder->GetNearbyAddress(MercatorBounds::FromLatLon(hotel.lat, hotel.lon), addr);
|
||||
hotel.street = addr.GetStreetName();
|
||||
hotel.houseNumber = addr.GetHouseNumber();
|
||||
}
|
||||
|
||||
BookingDataset::BookingDataset(string const & dataPath, string const & addressReferencePath)
|
||||
{
|
||||
if (dataPath.empty())
|
||||
return;
|
||||
|
||||
ifstream dataSource(dataPath);
|
||||
if (!dataSource.is_open())
|
||||
{
|
||||
LOG(LERROR, ("Error while opening", dataPath, ":", strerror(errno)));
|
||||
return;
|
||||
}
|
||||
|
||||
LoadHotels(dataSource, addressReferencePath);
|
||||
}
|
||||
|
||||
BookingDataset::BookingDataset(istream & dataSource, string const & addressReferencePath)
|
||||
{
|
||||
LoadHotels(dataSource, addressReferencePath);
|
||||
}
|
||||
|
||||
BookingDataset::BookingId BookingDataset::GetMatchingHotelId(FeatureBuilder1 const & fb) const
|
||||
{
|
||||
if (CanBeBooking(fb))
|
||||
return MatchWithBooking(fb);
|
||||
return kInvalidHotelIndex;
|
||||
}
|
||||
|
||||
bool BookingDataset::CanBeBooking(FeatureBuilder1 const & fb) const
|
||||
bool BookingDataset::NecessaryMatchingConditionHolds(FeatureBuilder1 const & fb) const
|
||||
{
|
||||
if (fb.GetName(StringUtf8Multilang::kDefaultCode).empty())
|
||||
return false;
|
||||
|
@ -137,49 +50,8 @@ bool BookingDataset::CanBeBooking(FeatureBuilder1 const & fb) const
|
|||
return ftypes::IsHotelChecker::Instance()(fb.GetTypes());
|
||||
}
|
||||
|
||||
BookingDataset::Hotel const & BookingDataset::GetHotelById(BookingId const id) const
|
||||
{
|
||||
auto const it = m_hotels.find(id);
|
||||
CHECK(it != end(m_hotels), ("Got wrong hotel id:", id));
|
||||
return it->second;
|
||||
}
|
||||
|
||||
BookingDataset::Hotel & BookingDataset::GetHotelById(BookingId const id)
|
||||
{
|
||||
auto it = m_hotels.find(id);
|
||||
CHECK(it != end(m_hotels), ("Got wrong hotel id:", id));
|
||||
return it->second;
|
||||
}
|
||||
|
||||
vector<BookingDataset::BookingId> BookingDataset::GetNearestHotels(
|
||||
ms::LatLon const & latLon, size_t const limit,
|
||||
double const maxDistance /* = 0.0 */) const
|
||||
{
|
||||
namespace bgi = boost::geometry::index;
|
||||
|
||||
vector<BookingId> indexes;
|
||||
for_each(bgi::qbegin(m_rtree, bgi::nearest(TPoint(latLon.lat, latLon.lon), limit)),
|
||||
bgi::qend(m_rtree), [this, &latLon, &indexes, maxDistance](TValue const & v)
|
||||
{
|
||||
auto const & hotel = GetHotelById(v.second);
|
||||
double const dist = ms::DistanceOnEarth(latLon.lat, latLon.lon, hotel.lat, hotel.lon);
|
||||
if (maxDistance != 0.0 && dist > maxDistance /* max distance in meters */)
|
||||
return;
|
||||
|
||||
indexes.emplace_back(v.second);
|
||||
});
|
||||
|
||||
return indexes;
|
||||
}
|
||||
|
||||
void BookingDataset::BuildHotels(function<void(FeatureBuilder1 &)> const & fn) const
|
||||
{
|
||||
for (auto const & item : m_hotels)
|
||||
BuildHotel(item.second, fn);
|
||||
}
|
||||
|
||||
void BookingDataset::BuildHotel(Hotel const & hotel,
|
||||
function<void(FeatureBuilder1 &)> const & fn) const
|
||||
void BookingDataset::BuildObject(SponsoredDataset::Object const & hotel,
|
||||
function<void(FeatureBuilder1 &)> const & fn) const
|
||||
{
|
||||
FeatureBuilder1 fb;
|
||||
FeatureParams params;
|
||||
|
@ -276,68 +148,23 @@ void BookingDataset::BuildHotel(Hotel const & hotel,
|
|||
fn(fb);
|
||||
}
|
||||
|
||||
void BookingDataset::LoadHotels(istream & src, string const & addressReferencePath)
|
||||
{
|
||||
m_hotels.clear();
|
||||
m_rtree.clear();
|
||||
|
||||
for (string line; getline(src, line);)
|
||||
{
|
||||
Hotel hotel(line);
|
||||
m_hotels.emplace(hotel.id, hotel);
|
||||
}
|
||||
|
||||
if (!addressReferencePath.empty())
|
||||
{
|
||||
LOG(LINFO, ("Reference addresses for booking objects", addressReferencePath));
|
||||
Platform & platform = GetPlatform();
|
||||
string const backupPath = platform.WritableDir();
|
||||
platform.SetWritableDirForTests(addressReferencePath);
|
||||
|
||||
AddressMatcher addressMatcher;
|
||||
|
||||
size_t matchedNum = 0;
|
||||
size_t emptyAddr = 0;
|
||||
for (auto & item : m_hotels)
|
||||
{
|
||||
auto & hotel = item.second;
|
||||
addressMatcher(hotel);
|
||||
|
||||
if (hotel.address.empty())
|
||||
++emptyAddr;
|
||||
if (hotel.IsAddressPartsFilled())
|
||||
++matchedNum;
|
||||
}
|
||||
LOG(LINFO,
|
||||
("Num of hotels:", m_hotels.size(), "matched:", matchedNum, "empty addresses:", emptyAddr));
|
||||
platform.SetWritableDirForTests(backupPath);
|
||||
}
|
||||
|
||||
for (auto const & item : m_hotels)
|
||||
{
|
||||
auto const & hotel = item.second;
|
||||
TBox b(TPoint(hotel.lat, hotel.lon), TPoint(hotel.lat, hotel.lon));
|
||||
m_rtree.insert(make_pair(b, hotel.id));
|
||||
}
|
||||
}
|
||||
|
||||
BookingDataset::BookingId BookingDataset::MatchWithBooking(FeatureBuilder1 const & fb) const
|
||||
BookingDataset::ObjectId BookingDataset::FindMatchingObjectIdImpl(FeatureBuilder1 const & fb) const
|
||||
{
|
||||
auto const name = fb.GetName(StringUtf8Multilang::kDefaultCode);
|
||||
|
||||
if (name.empty())
|
||||
return kInvalidHotelIndex;
|
||||
return kInvalidObjectId;
|
||||
|
||||
// Find |kMaxSelectedElements| nearest values to a point.
|
||||
auto const bookingIndexes = GetNearestHotels(MercatorBounds::ToLatLon(fb.GetKeyPoint()),
|
||||
kMaxSelectedElements, kDistanceLimitInMeters);
|
||||
auto const bookingIndexes = GetNearestObjects(MercatorBounds::ToLatLon(fb.GetKeyPoint()),
|
||||
kMaxSelectedElements, kDistanceLimitInMeters);
|
||||
|
||||
for (auto const j : bookingIndexes)
|
||||
{
|
||||
if (booking_scoring::Match(GetHotelById(j), fb).IsMatched())
|
||||
if (booking_scoring::Match(GetObjectById(j), fb).IsMatched())
|
||||
return j;
|
||||
}
|
||||
|
||||
return kInvalidHotelIndex;
|
||||
return kInvalidObjectId;
|
||||
}
|
||||
} // namespace generator
|
||||
|
|
|
@ -2,6 +2,8 @@
|
|||
|
||||
#include "indexer/index.hpp"
|
||||
|
||||
#include "generator/sponsored_dataset.hpp"
|
||||
|
||||
#include "search/reverse_geocoder.hpp"
|
||||
|
||||
#include "base/newtype.hpp"
|
||||
|
@ -12,7 +14,6 @@
|
|||
#include "boost/geometry/index/rtree.hpp"
|
||||
|
||||
#include "std/function.hpp"
|
||||
#include "std/limits.hpp"
|
||||
#include "std/map.hpp"
|
||||
#include "std/string.hpp"
|
||||
|
||||
|
@ -20,102 +21,36 @@ class FeatureBuilder1;
|
|||
|
||||
namespace generator
|
||||
{
|
||||
class BookingDataset
|
||||
class BookingDataset : public SponsoredDatasetBase
|
||||
{
|
||||
public:
|
||||
NEWTYPE(uint32_t, BookingId);
|
||||
|
||||
static double constexpr kDistanceLimitInMeters = 150;
|
||||
static size_t constexpr kMaxSelectedElements = 3;
|
||||
static BookingId const kInvalidHotelIndex;
|
||||
// class AddressMatcher
|
||||
// {
|
||||
// Index m_index;
|
||||
// unique_ptr<search::ReverseGeocoder> m_coder;
|
||||
|
||||
struct Hotel
|
||||
// public:
|
||||
// AddressMatcher();
|
||||
// void operator()(Hotel & hotel);
|
||||
// };
|
||||
|
||||
explicit BookingDataset(string const & dataPath, string const & addressReferencePath = string())
|
||||
: SponsoredDatasetBase(dataPath, addressReferencePath)
|
||||
{
|
||||
enum class Fields
|
||||
{
|
||||
Id = 0,
|
||||
Latitude = 1,
|
||||
Longtitude = 2,
|
||||
Name = 3,
|
||||
Address = 4,
|
||||
Stars = 5,
|
||||
PriceCategory = 6,
|
||||
RatingBooking = 7,
|
||||
RatingUsers = 8,
|
||||
DescUrl = 9,
|
||||
Type = 10,
|
||||
Translations = 11,
|
||||
}
|
||||
|
||||
Counter
|
||||
};
|
||||
|
||||
BookingId id{kInvalidHotelIndex};
|
||||
double lat = 0.0;
|
||||
double lon = 0.0;
|
||||
string name;
|
||||
string address;
|
||||
string street;
|
||||
string houseNumber;
|
||||
uint32_t stars = 0;
|
||||
uint32_t priceCategory = 0;
|
||||
double ratingBooking = 0.0;
|
||||
double ratingUser = 0.0;
|
||||
string descUrl;
|
||||
uint32_t type = 0;
|
||||
string translations;
|
||||
|
||||
static constexpr size_t Index(Fields field) { return static_cast<size_t>(field); }
|
||||
static constexpr size_t FieldsCount() { return static_cast<size_t>(Fields::Counter); }
|
||||
explicit Hotel(string const & src);
|
||||
|
||||
inline bool IsAddressPartsFilled() const { return !street.empty() || !houseNumber.empty(); }
|
||||
};
|
||||
|
||||
class AddressMatcher
|
||||
explicit BookingDataset(istream & dataSource, string const & addressReferencePath = string())
|
||||
: SponsoredDatasetBase(dataSource, addressReferencePath)
|
||||
{
|
||||
Index m_index;
|
||||
unique_ptr<search::ReverseGeocoder> m_coder;
|
||||
}
|
||||
|
||||
public:
|
||||
AddressMatcher();
|
||||
void operator()(Hotel & hotel);
|
||||
};
|
||||
|
||||
explicit BookingDataset(string const & dataPath, string const & addressReferencePath = string());
|
||||
explicit BookingDataset(istream & dataSource, string const & addressReferencePath = string());
|
||||
|
||||
/// @return an id of a matched hotel or kInvalidHotelIndex on failure.
|
||||
BookingId GetMatchingHotelId(FeatureBuilder1 const & fb) const;
|
||||
/// @return true if |fb| is a hotel with a name.
|
||||
bool CanBeBooking(FeatureBuilder1 const & fb) const;
|
||||
|
||||
inline size_t Size() const { return m_hotels.size(); }
|
||||
Hotel const & GetHotelById(BookingId id) const;
|
||||
Hotel & GetHotelById(BookingId id);
|
||||
vector<BookingId> GetNearestHotels(ms::LatLon const & latLon, size_t limit,
|
||||
double maxDistance = 0.0) const;
|
||||
bool MatchByName(string const & osmName, vector<size_t> const & bookingIndexes) const;
|
||||
|
||||
void BuildHotels(function<void(FeatureBuilder1 &)> const & fn) const;
|
||||
bool NecessaryMatchingConditionHolds(FeatureBuilder1 const & fb) const override;
|
||||
|
||||
protected:
|
||||
map<BookingId, Hotel> m_hotels;
|
||||
void BuildObject(Object const & hotel, function<void(FeatureBuilder1 &)> const & fn) const override;
|
||||
|
||||
using TPoint = boost::geometry::model::point<float, 2, boost::geometry::cs::cartesian>;
|
||||
using TBox = boost::geometry::model::box<TPoint>;
|
||||
using TValue = pair<TBox, BookingId>;
|
||||
|
||||
// Create the rtree using default constructor.
|
||||
boost::geometry::index::rtree<TValue, boost::geometry::index::quadratic<16>> m_rtree;
|
||||
|
||||
void BuildHotel(Hotel const & hotel, function<void(FeatureBuilder1 &)> const & fn) const;
|
||||
|
||||
void LoadHotels(istream & path, string const & addressReferencePath);
|
||||
/// @return an id of a matched hotel or kInvalidHotelIndex on failure.
|
||||
BookingId MatchWithBooking(FeatureBuilder1 const & e) const;
|
||||
ObjectId FindMatchingObjectIdImpl(FeatureBuilder1 const & e) const override;
|
||||
};
|
||||
|
||||
ostream & operator<<(ostream & s, BookingDataset::Hotel const & h);
|
||||
|
||||
NEWTYPE_SIMPLE_OUTPUT(BookingDataset::BookingId);
|
||||
} // namespace generator
|
||||
|
|
|
@ -101,7 +101,7 @@ public:
|
|||
|
||||
void operator()(FeatureBuilder1 & fb) override
|
||||
{
|
||||
if (m_bookingDataset.CanBeBooking(fb))
|
||||
if (m_bookingDataset.NecessaryMatchingConditionHolds(fb))
|
||||
m_features.emplace(fb.GetMostGenericOsmId(), fb);
|
||||
}
|
||||
|
||||
|
@ -139,11 +139,11 @@ feature::GenerateInfo GetGenerateInfo()
|
|||
struct SampleItem
|
||||
{
|
||||
enum MatchStatus {Uninitialized, Yes, No};
|
||||
using BookingId = BookingDataset::BookingID;
|
||||
using ObjectId = SponsoredDataset::ObjectId;
|
||||
|
||||
SampleItem() = default;
|
||||
|
||||
SampleItem(osm::Id const & osmId, BookingId const bookingId, MatchStatus const match = Uninitialized)
|
||||
SampleItem(osm::Id const & osmId, ObjectId const bookingId, MatchStatus const match = Uninitialized)
|
||||
: m_osmId(osmId)
|
||||
, m_bookingId(bookingId)
|
||||
, m_match(match)
|
||||
|
@ -151,7 +151,7 @@ struct SampleItem
|
|||
}
|
||||
|
||||
osm::Id m_osmId;
|
||||
BookingId m_bookingId = BookingDataset::kInvalidHotelIndex;
|
||||
ObjectId m_bookingId = SponsoredDataset::kInvalidObjectId;
|
||||
|
||||
MatchStatus m_match = Uninitialized;
|
||||
};
|
||||
|
@ -179,7 +179,7 @@ SampleItem ReadSampleItem(string const & str)
|
|||
"due to wrong number of fields."));
|
||||
|
||||
item.m_osmId = ReadDebuggedPrintedOsmId(parts[0]);
|
||||
if (!strings::to_uint(parts[1], item.m_bookingId))
|
||||
if (!strings::to_uint(parts[1], item.m_bookingId.Get()))
|
||||
MYTHROW(ParseError, ("Can't make uint32 from string:", parts[1]));
|
||||
item.m_match = ReadMatchStatus(parts[2]);
|
||||
|
||||
|
@ -219,7 +219,7 @@ void GenerateFactors(BookingDataset const & booking, map<osm::Id, FeatureBuilder
|
|||
{
|
||||
for (auto const & item : sampleItems)
|
||||
{
|
||||
auto const & hotel = booking.GetHotelById(item.m_bookingId);
|
||||
auto const & hotel = booking.GetObjectById(item.m_bookingId);
|
||||
auto const & feature = features.at(item.m_osmId);
|
||||
|
||||
auto const score = booking_scoring::Match(hotel, feature);
|
||||
|
|
|
@ -126,7 +126,7 @@ bool BookingMatchScore::IsMatched() const
|
|||
return GetMatchingScore() > kOptimalThreshold;
|
||||
}
|
||||
|
||||
BookingMatchScore Match(BookingDataset::Hotel const & h, FeatureBuilder1 const & fb)
|
||||
BookingMatchScore Match(BookingDataset::Object const & h, FeatureBuilder1 const & fb)
|
||||
{
|
||||
BookingMatchScore score;
|
||||
|
||||
|
|
|
@ -17,6 +17,6 @@ struct BookingMatchScore
|
|||
double m_nameSimilarityScore{};
|
||||
};
|
||||
|
||||
BookingMatchScore Match(BookingDataset::Hotel const & h, FeatureBuilder1 const & fb);
|
||||
BookingMatchScore Match(BookingDataset::Object const & h, FeatureBuilder1 const & fb);
|
||||
} // namespace booking_scoring
|
||||
} // namespace generator
|
||||
|
|
|
@ -36,6 +36,7 @@ SOURCES += \
|
|||
region_meta.cpp \
|
||||
routing_generator.cpp \
|
||||
search_index_builder.cpp \
|
||||
sponsored_dataset.cpp \
|
||||
srtm_parser.cpp \
|
||||
statistics.cpp \
|
||||
tesselator.cpp \
|
||||
|
@ -72,6 +73,7 @@ HEADERS += \
|
|||
region_meta.hpp \
|
||||
routing_generator.hpp \
|
||||
search_index_builder.hpp \
|
||||
sponsored_dataset.hpp \
|
||||
srtm_parser.hpp \
|
||||
statistics.hpp \
|
||||
tag_admixer.hpp \
|
||||
|
|
|
@ -349,8 +349,8 @@ public:
|
|||
[](Place const & p1, Place const & p2) { return p1.IsEqual(p2); },
|
||||
[](Place const & p1, Place const & p2) { return p1.IsBetterThan(p2); });
|
||||
}
|
||||
else if (m_bookingDataset.GetMatchingHotelId(fb) !=
|
||||
generator::BookingDataset::kInvalidHotelIndex)
|
||||
else if (m_bookingDataset.FindMatchingObjectId(fb) !=
|
||||
generator::SponsoredDatasetBase::kInvalidObjectId)
|
||||
{
|
||||
m_skippedElements << DebugPrint(fb.GetMostGenericOsmId()) << endl;
|
||||
|
||||
|
@ -389,7 +389,7 @@ public:
|
|||
DumpSkippedElements();
|
||||
|
||||
// Emit all booking objecs to the map.
|
||||
m_bookingDataset.BuildHotels([this](FeatureBuilder1 & fb) { Emit(fb); });
|
||||
m_bookingDataset.BuildOsmObjects([this](FeatureBuilder1 & fb) { Emit(fb); });
|
||||
|
||||
m_places.ForEach([this](Place const & p)
|
||||
{
|
||||
|
|
180
generator/sponsored_dataset.cpp
Normal file
180
generator/sponsored_dataset.cpp
Normal file
|
@ -0,0 +1,180 @@
|
|||
#include "generator/sponsored_dataset.hpp"
|
||||
|
||||
#include "platform/platform.hpp"
|
||||
|
||||
#include "geometry/distance_on_sphere.hpp"
|
||||
|
||||
#include "base/logging.hpp"
|
||||
#include "base/string_utils.hpp"
|
||||
|
||||
#include "std/limits.hpp"
|
||||
|
||||
#include "std/fstream.hpp"
|
||||
#include "std/iostream.hpp"
|
||||
|
||||
namespace generator
|
||||
{
|
||||
namespace
|
||||
{
|
||||
string EscapeTabs(string const & str)
|
||||
{
|
||||
stringstream ss;
|
||||
for (char c : str)
|
||||
{
|
||||
if (c == '\t')
|
||||
ss << "\\t";
|
||||
else
|
||||
ss << c;
|
||||
}
|
||||
return ss.str();
|
||||
}
|
||||
} // namespace
|
||||
|
||||
SponsoredDataset::Object::Object(string const & src)
|
||||
{
|
||||
vector<string> rec;
|
||||
strings::ParseCSVRow(src, '\t', rec);
|
||||
CHECK(rec.size() == FieldsCount(), ("Error parsing hotels.tsv line:", EscapeTabs(src)));
|
||||
|
||||
strings::to_uint(rec[Index(Fields::Id)], id.Get());
|
||||
strings::to_double(rec[Index(Fields::Latitude)], lat);
|
||||
strings::to_double(rec[Index(Fields::Longtitude)], lon);
|
||||
|
||||
name = rec[Index(Fields::Name)];
|
||||
address = rec[Index(Fields::Address)];
|
||||
|
||||
strings::to_uint(rec[Index(Fields::Stars)], stars);
|
||||
strings::to_uint(rec[Index(Fields::PriceCategory)], priceCategory);
|
||||
strings::to_double(rec[Index(Fields::RatingBooking)], ratingBooking);
|
||||
strings::to_double(rec[Index(Fields::RatingUsers)], ratingUser);
|
||||
|
||||
descUrl = rec[Index(Fields::DescUrl)];
|
||||
|
||||
strings::to_uint(rec[Index(Fields::Type)], type);
|
||||
|
||||
translations = rec[Index(Fields::Translations)];
|
||||
}
|
||||
|
||||
ostream & operator<<(ostream & s, SponsoredDataset::Object const & h)
|
||||
{
|
||||
s << fixed << setprecision(7);
|
||||
return s << "Id: " << h.id << "\t Name: " << h.name << "\t Address: " << h.address
|
||||
<< "\t lat: " << h.lat << " lon: " << h.lon;
|
||||
}
|
||||
|
||||
SponsoredDataset::ObjectId const SponsoredDataset::kInvalidObjectId =
|
||||
SponsoredDataset::ObjectId(numeric_limits<SponsoredDataset::ObjectId::RepType>::max());
|
||||
|
||||
SponsoredDatasetBase::SponsoredDatasetBase(string const & dataPath, string const & addressReferencePath)
|
||||
{
|
||||
if (dataPath.empty())
|
||||
return;
|
||||
|
||||
ifstream dataSource(dataPath);
|
||||
if (!dataSource.is_open())
|
||||
{
|
||||
LOG(LERROR, ("Error while opening", dataPath, ":", strerror(errno)));
|
||||
return;
|
||||
}
|
||||
|
||||
LoadData(dataSource, addressReferencePath);
|
||||
}
|
||||
|
||||
SponsoredDatasetBase::SponsoredDatasetBase(istream & dataSource, string const & addressReferencePath)
|
||||
{
|
||||
LoadData(dataSource, addressReferencePath);
|
||||
}
|
||||
|
||||
SponsoredDataset::Object const & SponsoredDatasetBase::GetObjectById(SponsoredDataset::ObjectId id) const
|
||||
{
|
||||
auto const it = m_hotels.find(id);
|
||||
CHECK(it != end(m_hotels), ("Got wrong object id:", id));
|
||||
return it->second;
|
||||
}
|
||||
|
||||
SponsoredDataset::Object & SponsoredDatasetBase::GetObjectById(SponsoredDataset::ObjectId id)
|
||||
{
|
||||
auto const it = m_hotels.find(id);
|
||||
CHECK(it != end(m_hotels), ("Got wrong object id:", id));
|
||||
return it->second;
|
||||
}
|
||||
|
||||
void SponsoredDatasetBase::BuildOsmObjects(function<void(FeatureBuilder1 &)> const & fn) const
|
||||
{
|
||||
for (auto const & item : m_hotels)
|
||||
BuildObject(item.second, fn);
|
||||
}
|
||||
|
||||
SponsoredDatasetBase::ObjectId SponsoredDatasetBase::FindMatchingObjectId(FeatureBuilder1 const & fb) const
|
||||
{
|
||||
if (NecessaryMatchingConditionHolds(fb))
|
||||
return FindMatchingObjectIdImpl(fb);
|
||||
return kInvalidObjectId;
|
||||
}
|
||||
|
||||
vector<SponsoredDataset::ObjectId> SponsoredDatasetBase::GetNearestObjects(
|
||||
ms::LatLon const & latLon, size_t const limit,
|
||||
double const maxDistance /* = 0.0 */) const
|
||||
{
|
||||
namespace bgi = boost::geometry::index;
|
||||
|
||||
vector<ObjectId> indexes;
|
||||
for_each(bgi::qbegin(m_rtree, bgi::nearest(TPoint(latLon.lat, latLon.lon), limit)),
|
||||
bgi::qend(m_rtree), [this, &latLon, &indexes, maxDistance](TValue const & v)
|
||||
{
|
||||
auto const & object = GetObjectById(v.second);
|
||||
double const dist = ms::DistanceOnEarth(latLon.lat, latLon.lon, object.lat, object.lon);
|
||||
if (maxDistance != 0.0 && dist > maxDistance /* max distance in meters */)
|
||||
return;
|
||||
|
||||
indexes.emplace_back(v.second);
|
||||
});
|
||||
|
||||
return indexes;
|
||||
}
|
||||
|
||||
void SponsoredDatasetBase::LoadData(istream & src, string const & addressReferencePath)
|
||||
{
|
||||
m_hotels.clear();
|
||||
m_rtree.clear();
|
||||
|
||||
for (string line; getline(src, line);)
|
||||
{
|
||||
Object hotel(line);
|
||||
m_hotels.emplace(hotel.id, hotel);
|
||||
}
|
||||
|
||||
if (!addressReferencePath.empty())
|
||||
{
|
||||
LOG(LINFO, ("Reference addresses for booking objects", addressReferencePath));
|
||||
Platform & platform = GetPlatform();
|
||||
string const backupPath = platform.WritableDir();
|
||||
platform.SetWritableDirForTests(addressReferencePath);
|
||||
|
||||
//TODO(mgsergio): AddressMatcher addressMatcher;
|
||||
|
||||
size_t matchedNum = 0;
|
||||
size_t emptyAddr = 0;
|
||||
for (auto & item : m_hotels)
|
||||
{
|
||||
auto & hotel = item.second;
|
||||
// TODO(mgsergio): addressMatcher(hotel);
|
||||
|
||||
if (hotel.address.empty())
|
||||
++emptyAddr;
|
||||
if (hotel.IsAddressPartsFilled())
|
||||
++matchedNum;
|
||||
}
|
||||
LOG(LINFO,
|
||||
("Num of hotels:", m_hotels.size(), "matched:", matchedNum, "empty addresses:", emptyAddr));
|
||||
platform.SetWritableDirForTests(backupPath);
|
||||
}
|
||||
|
||||
for (auto const & item : m_hotels)
|
||||
{
|
||||
auto const & hotel = item.second;
|
||||
TBox b(TPoint(hotel.lat, hotel.lon), TPoint(hotel.lat, hotel.lon));
|
||||
m_rtree.insert(make_pair(b, hotel.id));
|
||||
}
|
||||
}
|
||||
} // namespace generator
|
136
generator/sponsored_dataset.hpp
Normal file
136
generator/sponsored_dataset.hpp
Normal file
|
@ -0,0 +1,136 @@
|
|||
#pragma once
|
||||
|
||||
#include "indexer/index.hpp"
|
||||
|
||||
#include "search/reverse_geocoder.hpp"
|
||||
|
||||
#include "base/newtype.hpp"
|
||||
|
||||
#include "boost/geometry.hpp"
|
||||
#include "boost/geometry/geometries/point.hpp"
|
||||
#include "boost/geometry/geometries/box.hpp"
|
||||
#include "boost/geometry/index/rtree.hpp"
|
||||
|
||||
#include "std/function.hpp"
|
||||
#include "std/limits.hpp"
|
||||
#include "std/map.hpp"
|
||||
#include "std/string.hpp"
|
||||
|
||||
class FeatureBuilder1;
|
||||
|
||||
namespace generator
|
||||
{
|
||||
class SponsoredDataset
|
||||
{
|
||||
public:
|
||||
NEWTYPE(uint32_t, ObjectId);
|
||||
|
||||
static double constexpr kDistanceLimitInMeters = 150;
|
||||
static size_t constexpr kMaxSelectedElements = 3;
|
||||
static ObjectId const kInvalidObjectId;
|
||||
|
||||
struct Object
|
||||
{
|
||||
enum class Fields
|
||||
{
|
||||
Id = 0,
|
||||
Latitude = 1,
|
||||
Longtitude = 2,
|
||||
Name = 3,
|
||||
Address = 4,
|
||||
Stars = 5,
|
||||
PriceCategory = 6,
|
||||
RatingBooking = 7,
|
||||
RatingUsers = 8,
|
||||
DescUrl = 9,
|
||||
Type = 10,
|
||||
Translations = 11,
|
||||
|
||||
Counter
|
||||
};
|
||||
|
||||
ObjectId id{kInvalidObjectId};
|
||||
double lat = 0.0;
|
||||
double lon = 0.0;
|
||||
string name;
|
||||
string address;
|
||||
string street;
|
||||
string houseNumber;
|
||||
uint32_t stars = 0;
|
||||
uint32_t priceCategory = 0;
|
||||
double ratingBooking = 0.0;
|
||||
double ratingUser = 0.0;
|
||||
string descUrl;
|
||||
uint32_t type = 0;
|
||||
string translations;
|
||||
|
||||
static constexpr size_t Index(Fields field) { return static_cast<size_t>(field); }
|
||||
static constexpr size_t FieldsCount() { return static_cast<size_t>(Fields::Counter); }
|
||||
explicit Object(string const & src);
|
||||
|
||||
inline bool IsAddressPartsFilled() const { return !street.empty() || !houseNumber.empty(); }
|
||||
};
|
||||
|
||||
// class AddressMatcher
|
||||
// {
|
||||
// Index m_index;
|
||||
// unique_ptr<search::ReverseGeocoder> m_coder;
|
||||
|
||||
// public:
|
||||
// AddressMatcher();
|
||||
// void operator()(Hotel & hotel);
|
||||
// };
|
||||
|
||||
virtual ~SponsoredDataset() = default;
|
||||
|
||||
// TODO(mgsergio): Comment /// @return an id of a matched hotel or kInvalidHotelIndex on failure.
|
||||
virtual ObjectId FindMatchingObjectId(FeatureBuilder1 const & e) const = 0;
|
||||
|
||||
virtual size_t Size() const = 0;
|
||||
|
||||
virtual void BuildOsmObjects(function<void(FeatureBuilder1 &)> const & fn) const = 0;
|
||||
};
|
||||
|
||||
ostream & operator<<(ostream & s, SponsoredDataset::Object const & h);
|
||||
|
||||
NEWTYPE_SIMPLE_OUTPUT(SponsoredDataset::ObjectId);
|
||||
|
||||
class SponsoredDatasetBase : public SponsoredDataset
|
||||
{
|
||||
public:
|
||||
explicit SponsoredDatasetBase(string const & dataPath, string const & addressReferencePath = string());
|
||||
explicit SponsoredDatasetBase(istream & dataSource, string const & addressReferencePath = string());
|
||||
|
||||
size_t Size() const override { return m_hotels.size(); }
|
||||
|
||||
Object const & GetObjectById(ObjectId id) const;
|
||||
Object & GetObjectById(ObjectId id);
|
||||
vector<ObjectId> GetNearestObjects(ms::LatLon const & latLon, size_t limit,
|
||||
double maxDistance = 0.0) const;
|
||||
|
||||
/// @return true if |fb| satisfies some necesary conditions to match one or serveral
|
||||
/// objects from dataset.
|
||||
virtual bool NecessaryMatchingConditionHolds(FeatureBuilder1 const & fb) const = 0;
|
||||
/// @return an id of a matched object or kInvalidObjectId on failure.
|
||||
ObjectId FindMatchingObjectId(FeatureBuilder1 const & e) const override;
|
||||
|
||||
void BuildOsmObjects(function<void(FeatureBuilder1 &)> const & fn) const override;
|
||||
|
||||
protected:
|
||||
map<ObjectId, Object> m_hotels;
|
||||
|
||||
using TPoint = boost::geometry::model::point<float, 2, boost::geometry::cs::cartesian>;
|
||||
using TBox = boost::geometry::model::box<TPoint>;
|
||||
using TValue = pair<TBox, ObjectId>;
|
||||
|
||||
// Create the rtree using default constructor.
|
||||
boost::geometry::index::rtree<TValue, boost::geometry::index::quadratic<16>> m_rtree;
|
||||
|
||||
virtual void BuildObject(Object const & object, function<void(FeatureBuilder1 &)> const & fn) const = 0;
|
||||
|
||||
void LoadData(istream & src, string const & addressReferencePath);
|
||||
|
||||
/// @return an id of a matched object or kInvalidObjectId on failure.
|
||||
virtual ObjectId FindMatchingObjectIdImpl(FeatureBuilder1 const & e) const = 0;
|
||||
};
|
||||
} // namespace generator
|
52
generator/sponsored_object.hpp
Normal file
52
generator/sponsored_object.hpp
Normal file
|
@ -0,0 +1,52 @@
|
|||
#pragma once
|
||||
|
||||
NEWTYPE(uint32_t, ObjectId);
|
||||
NEWTYPE_SIMPLE_OUTPUT(ObjectId);
|
||||
|
||||
static double constexpr kDistanceLimitInMeters = 150;
|
||||
static size_t constexpr kMaxSelectedElements = 3;
|
||||
static ObjectId const kInvalidObjectId;
|
||||
|
||||
struct SponsoredObject
|
||||
{
|
||||
enum class Fields
|
||||
{
|
||||
Id = 0,
|
||||
Latitude = 1,
|
||||
Longtitude = 2,
|
||||
Name = 3,
|
||||
Address = 4,
|
||||
Stars = 5,
|
||||
PriceCategory = 6,
|
||||
RatingBooking = 7,
|
||||
RatingUsers = 8,
|
||||
DescUrl = 9,
|
||||
Type = 10,
|
||||
Translations = 11,
|
||||
|
||||
Counter
|
||||
};
|
||||
|
||||
ObjectId id{kInvalidObjectId};
|
||||
double lat = 0.0;
|
||||
double lon = 0.0;
|
||||
string name;
|
||||
string address;
|
||||
string street;
|
||||
string houseNumber;
|
||||
uint32_t stars = 0;
|
||||
uint32_t priceCategory = 0;
|
||||
double ratingBooking = 0.0;
|
||||
double ratingUser = 0.0;
|
||||
string descUrl;
|
||||
uint32_t type = 0;
|
||||
string translations;
|
||||
|
||||
static size_t constexpr Index(Fields field) { return static_cast<size_t>(field); }
|
||||
static size_t constexpr FieldsCount() { return static_cast<size_t>(Fields::Counter); }
|
||||
explicit SponsoredObject(string const & src);
|
||||
|
||||
inline bool IsAddressFilled() const { return !street.empty() || !houseNumber.empty(); }
|
||||
};
|
||||
|
||||
ostream & operator<<(ostream & s, BookingDataset::Hotel const & h);
|
Loading…
Add table
Reference in a new issue