Switch from inheritance to templates.

This commit is contained in:
Sergey Magidovich 2016-09-01 13:27:27 +03:00
parent 75afc71cf8
commit 22d138efd4
11 changed files with 365 additions and 364 deletions

View file

@ -2,23 +2,18 @@
namespace generator
{
SponsoredDataset::ObjectId AggregatingSponsoredDataset::FindMatchingObjectId(FeatureBuilder1 const & fb) const
bool AggregatingSponsoredDataset::IsMatched(FeatureBuilder1 const & fb) const
{
// There is only one source for now.
return m_datasets[0]->FindMatchingObjectId(fb);
}
size_t AggregatingSponsoredDataset::Size() const
{
size_t count{};
for (auto const & ds : m_datasets)
count += ds->Size();
return count;
return m_bookingDataset.FindMatchingObjectId(fb) != BookingHotel::InvalidObjectId();
}
void AggregatingSponsoredDataset::BuildOsmObjects(function<void(FeatureBuilder1 &)> const & fn) const
{
for (auto const & ds : m_datasets)
ds->BuildOsmObjects(fn);
m_bookingDataset.BuildOsmObjects(fn);
}
size_t AggregatingSponsoredDataset::Size() const
{
return m_bookingDataset.Size();
}
} // namespace generator

View file

@ -8,22 +8,20 @@
namespace generator
{
class AggregatingSponsoredDataset : public SponsoredDataset
class AggregatingSponsoredDataset
{
public:
explicit AggregatingSponsoredDataset(feature::GenerateInfo const & info)
: m_bookingDataset(info.m_bookingDatafileName, info.m_bookingReferenceDir)
{
m_datasets.emplace_back(make_unique<BookingDataset>(info.m_bookingDatafileName,
info.m_bookingReferenceDir));
}
ObjectId FindMatchingObjectId(FeatureBuilder1 const & e) const override;
bool IsMatched(FeatureBuilder1 const & e) const;
void BuildOsmObjects(function<void(FeatureBuilder1 &)> const & fn) const;
size_t Size() const override;
void BuildOsmObjects(function<void(FeatureBuilder1 &)> const & fn) const override;
size_t Size() const;
private:
vector<unique_ptr<SponsoredDatasetBase>> m_datasets;
BookingDataset m_bookingDataset;
};
} // namespace generator;

View file

@ -10,6 +10,60 @@
namespace generator
{
namespace
{
string EscapeTabs(string const & str)
{
stringstream ss;
for (char c : str)
{
if (c == '\t')
ss << "\\t";
else
ss << c;
}
return ss.str();
}
} // namespace
// BookingHotel ------------------------------------------------------------------------------------
BookingHotel::BookingHotel(string const & src)
{
vector<string> rec;
strings::ParseCSVRow(src, '\t', rec);
CHECK(rec.size() == FieldsCount(), ("Error parsing hotels.tsv line:", EscapeTabs(src)));
strings::to_uint(rec[Index(Fields::Id)], m_id.Get());
// TODO(mgsergio): Use ms::LatLon.
strings::to_double(rec[Index(Fields::Latitude)], m_lat);
strings::to_double(rec[Index(Fields::Longtitude)], m_lon);
m_name = rec[Index(Fields::Name)];
m_address = rec[Index(Fields::Address)];
strings::to_uint(rec[Index(Fields::Stars)], m_stars);
strings::to_uint(rec[Index(Fields::PriceCategory)], m_priceCategory);
strings::to_double(rec[Index(Fields::RatingBooking)], m_ratingBooking);
strings::to_double(rec[Index(Fields::RatingUsers)], m_ratingUser);
m_descUrl = rec[Index(Fields::DescUrl)];
strings::to_uint(rec[Index(Fields::Type)], m_type);
m_translations = rec[Index(Fields::Translations)];
}
ostream & operator<<(ostream & s, BookingHotel const & h)
{
s << fixed << setprecision(7);
return s << "Id: " << h.m_id << "\t Name: " << h.m_name << "\t Address: " << h.m_address
<< "\t lat: " << h.m_lat << " lon: " << h.m_lon;
}
// BookingDataset ----------------------------------------------------------------------------------
template <>
bool BookingDataset::NecessaryMatchingConditionHolds(FeatureBuilder1 const & fb) const
{
if (fb.GetName(StringUtf8Multilang::kDefaultCode).empty())
@ -18,39 +72,40 @@ bool BookingDataset::NecessaryMatchingConditionHolds(FeatureBuilder1 const & fb)
return ftypes::IsHotelChecker::Instance()(fb.GetTypes());
}
void BookingDataset::BuildObject(SponsoredDataset::Object const & hotel,
template <>
void BookingDataset::BuildObject(Object const & hotel,
function<void(FeatureBuilder1 &)> const & fn) const
{
FeatureBuilder1 fb;
FeatureParams params;
fb.SetCenter(MercatorBounds::FromLatLon(hotel.lat, hotel.lon));
fb.SetCenter(MercatorBounds::FromLatLon(hotel.m_lat, hotel.m_lon));
auto & metadata = params.GetMetadata();
// TODO(mgsergio): Rename FMD_SPONSORED_ID to FMD_BOOKING_ID.
metadata.Set(feature::Metadata::FMD_SPONSORED_ID, strings::to_string(hotel.id.Get()));
metadata.Set(feature::Metadata::FMD_WEBSITE, hotel.descUrl);
metadata.Set(feature::Metadata::FMD_RATING, strings::to_string(hotel.ratingUser));
metadata.Set(feature::Metadata::FMD_STARS, strings::to_string(hotel.stars));
metadata.Set(feature::Metadata::FMD_PRICE_RATE, strings::to_string(hotel.priceCategory));
metadata.Set(feature::Metadata::FMD_SPONSORED_ID, strings::to_string(hotel.m_id.Get()));
metadata.Set(feature::Metadata::FMD_WEBSITE, hotel.m_descUrl);
metadata.Set(feature::Metadata::FMD_RATING, strings::to_string(hotel.m_ratingUser));
metadata.Set(feature::Metadata::FMD_STARS, strings::to_string(hotel.m_stars));
metadata.Set(feature::Metadata::FMD_PRICE_RATE, strings::to_string(hotel.m_priceCategory));
// params.AddAddress(hotel.address);
// TODO(mgsergio): addr:full ???
if (!hotel.street.empty())
fb.AddStreet(hotel.street);
if (!hotel.m_street.empty())
fb.AddStreet(hotel.m_street);
if (!hotel.houseNumber.empty())
fb.AddHouseNumber(hotel.houseNumber);
if (!hotel.m_houseNumber.empty())
fb.AddHouseNumber(hotel.m_houseNumber);
params.AddName(StringUtf8Multilang::GetLangByCode(StringUtf8Multilang::kDefaultCode),
hotel.name);
if (!hotel.translations.empty())
hotel.m_name);
if (!hotel.m_translations.empty())
{
// TODO(mgsergio): Move parsing to the hotel costruction stage.
vector<string> parts;
strings::ParseCSVRow(hotel.translations, '|', parts);
CHECK_EQUAL(parts.size() % 3, 0, ("Invalid translation string:", hotel.translations));
strings::ParseCSVRow(hotel.m_translations, '|', parts);
CHECK_EQUAL(parts.size() % 3, 0, ("Invalid translation string:", hotel.m_translations));
for (auto i = 0; i < parts.size(); i += 3)
{
auto const langCode = StringUtf8Multilang::GetLangIndex(parts[i]);
@ -63,7 +118,7 @@ void BookingDataset::BuildObject(SponsoredDataset::Object const & hotel,
params.AddType(clf.GetTypeByPath({"sponsored", "booking"}));
// Matching booking.com hotel types to OpenStreetMap values.
// Booking types are listed in the closed API docs.
switch (hotel.type)
switch (hotel.m_type)
{
case 19:
case 205: params.AddType(clf.GetTypeByPath({"tourism", "motel"})); break;
@ -117,12 +172,13 @@ void BookingDataset::BuildObject(SponsoredDataset::Object const & hotel,
fn(fb);
}
template <>
BookingDataset::ObjectId BookingDataset::FindMatchingObjectIdImpl(FeatureBuilder1 const & fb) const
{
auto const name = fb.GetName(StringUtf8Multilang::kDefaultCode);
if (name.empty())
return kInvalidObjectId;
return Object::InvalidObjectId();
// Find |kMaxSelectedElements| nearest values to a point.
auto const bookingIndexes = GetNearestObjects(MercatorBounds::ToLatLon(fb.GetKeyPoint()),
@ -134,6 +190,6 @@ BookingDataset::ObjectId BookingDataset::FindMatchingObjectIdImpl(FeatureBuilder
return j;
}
return kInvalidObjectId;
return Object::InvalidObjectId();
}
} // namespace generator

View file

@ -21,25 +21,58 @@ class FeatureBuilder1;
namespace generator
{
class BookingDataset : public SponsoredDatasetBase
// TODO(mgsergio): Try to get rid of code deuplication. (See OpenTableRestaurant)
struct BookingHotel
{
public:
NEWTYPE(uint32_t, ObjectId);
explicit BookingDataset(string const & dataPath, string const & addressReferencePath = string())
: SponsoredDatasetBase(dataPath, addressReferencePath)
enum class Fields
{
Id = 0,
Latitude = 1,
Longtitude = 2,
Name = 3,
Address = 4,
Stars = 5,
PriceCategory = 6,
RatingBooking = 7,
RatingUsers = 8,
DescUrl = 9,
Type = 10,
Translations = 11,
Counter
};
static constexpr ObjectId InvalidObjectId()
{
return ObjectId(numeric_limits<typename ObjectId::RepType>::max());
}
explicit BookingDataset(istream & dataSource, string const & addressReferencePath = string())
: SponsoredDatasetBase(dataSource, addressReferencePath)
{
}
explicit BookingHotel(string const & src);
bool NecessaryMatchingConditionHolds(FeatureBuilder1 const & fb) const override;
static constexpr size_t Index(Fields field) { return static_cast<size_t>(field); }
static constexpr size_t FieldsCount() { return static_cast<size_t>(Fields::Counter); }
bool IsAddressPartsFilled() const { return !m_street.empty() || !m_houseNumber.empty(); }
protected:
void BuildObject(Object const & hotel, function<void(FeatureBuilder1 &)> const & fn) const override;
ObjectId m_id{InvalidObjectId()};
double m_lat = 0.0;
double m_lon = 0.0;
string m_name;
string m_street;
string m_houseNumber;
ObjectId FindMatchingObjectIdImpl(FeatureBuilder1 const & e) const override;
string m_address;
uint32_t m_stars = 0;
uint32_t m_priceCategory = 0;
double m_ratingBooking = 0.0;
double m_ratingUser = 0.0;
string m_descUrl;
uint32_t m_type = 0;
string m_translations;
};
ostream & operator<<(ostream & s, BookingHotel const & h);
NEWTYPE_SIMPLE_OUTPUT(BookingHotel::ObjectId);
using BookingDataset = SponsoredDataset<BookingHotel>;
} // namespace generator

View file

@ -136,10 +136,11 @@ feature::GenerateInfo GetGenerateInfo()
return info;
}
template <typename Object>
struct SampleItem
{
enum MatchStatus {Uninitialized, Yes, No};
using ObjectId = SponsoredDataset::ObjectId;
using ObjectId = typename Object::ObjectId;
SampleItem() = default;
@ -151,28 +152,30 @@ struct SampleItem
}
osm::Id m_osmId;
ObjectId m_bookingId = SponsoredDataset::kInvalidObjectId;
ObjectId m_bookingId = Object::InvalidObjectId();
MatchStatus m_match = Uninitialized;
};
SampleItem::MatchStatus ReadMatchStatus(string const & str)
template <typename Object>
typename SampleItem<Object>::MatchStatus ReadMatchStatus(string const & str)
{
if (str == "Yes")
return SampleItem::Yes;
return SampleItem<Object>::Yes;
if (str == "No")
return SampleItem::No;
return SampleItem<Object>::No;
if (str == "Uninitialized")
return SampleItem::Uninitialized;
return SampleItem<Object>::Uninitialized;
MYTHROW(ParseError, ("Can't make SampleItem::MatchStatus from string:", str));
}
SampleItem ReadSampleItem(string const & str)
template <typename Object>
SampleItem<Object> ReadSampleItem(string const & str)
{
SampleItem item;
SampleItem<Object> item;
auto const parts = strings::Tokenize(str, "\t");
CHECK_EQUAL(parts.size(), 3, ("Cant't make SampleItem from string:", str,
@ -181,21 +184,22 @@ SampleItem ReadSampleItem(string const & str)
item.m_osmId = ReadDebuggedPrintedOsmId(parts[0]);
if (!strings::to_uint(parts[1], item.m_bookingId.Get()))
MYTHROW(ParseError, ("Can't make uint32 from string:", parts[1]));
item.m_match = ReadMatchStatus(parts[2]);
item.m_match = ReadMatchStatus<Object>(parts[2]);
return item;
}
vector<SampleItem> ReadSample(istream & ist)
template <typename Object>
vector<SampleItem<Object>> ReadSample(istream & ist)
{
vector<SampleItem> result;
vector<SampleItem<Object>> result;
size_t lineNumber = 1;
try
{
for (string line; getline(ist, line); ++lineNumber)
{
result.emplace_back(ReadSampleItem(line));
result.emplace_back(ReadSampleItem<Object>(line));
}
}
catch (ParseError const & e)
@ -207,15 +211,17 @@ vector<SampleItem> ReadSample(istream & ist)
return result;
}
vector<SampleItem> ReadSampleFromFile(string const & name)
template <typename Object>
vector<SampleItem<Object>> ReadSampleFromFile(string const & name)
{
ifstream ist(name);
CHECK(ist.is_open(), ("Can't open file:", name, strerror(errno)));
return ReadSample(ist);
return ReadSample<Object>(ist);
}
template <typename Object>
void GenerateFactors(BookingDataset const & booking, map<osm::Id, FeatureBuilder1> const & features,
vector<SampleItem> const & sampleItems, ostream & ost)
vector<SampleItem<Object>> const & sampleItems, ostream & ost)
{
for (auto const & item : sampleItems)
{
@ -226,13 +232,13 @@ void GenerateFactors(BookingDataset const & booking, map<osm::Id, FeatureBuilder
auto const center = MercatorBounds::ToLatLon(feature.GetKeyPoint());
double const distanceMeters = ms::DistanceOnEarth(center.lat, center.lon,
hotel.lat, hotel.lon);
hotel.m_lat, hotel.m_lon);
auto const matched = score.IsMatched();
ost << "# ------------------------------------------" << fixed << setprecision(6)
<< endl;
ost << (matched ? 'y' : 'n') << " \t" << DebugPrint(feature.GetMostGenericOsmId())
<< "\t " << hotel.id
<< "\t " << hotel.m_id
<< "\tdistance: " << distanceMeters
<< "\tdistance score: " << score.m_linearNormDistanceScore
<< "\tname score: " << score.m_nameSimilarityScore
@ -240,8 +246,8 @@ void GenerateFactors(BookingDataset const & booking, map<osm::Id, FeatureBuilder
<< endl;
ost << "# " << PrintBuilder(feature) << endl;
ost << "# " << hotel << endl;
ost << "# URL: https://www.openstreetmap.org/?mlat=" << hotel.lat
<< "&mlon=" << hotel.lon << "#map=18/" << hotel.lat << "/" << hotel.lon << endl;
ost << "# URL: https://www.openstreetmap.org/?mlat=" << hotel.m_lat
<< "&mlon=" << hotel.m_lon << "#map=18/" << hotel.m_lat << "/" << hotel.m_lon << endl;
}
}
} // namespace
@ -278,7 +284,7 @@ int main(int argc, char * argv[])
return make_unique<Emitter>(booking, features);
});
auto const sample = ReadSampleFromFile(FLAGS_sample);
auto const sample = ReadSampleFromFile<BookingHotel>(FLAGS_sample);
LOG(LINFO, ("Sample size is", sample.size()));
{
ofstream ost(FLAGS_factors);

View file

@ -131,12 +131,12 @@ BookingMatchScore Match(BookingDataset::Object const & h, FeatureBuilder1 const
BookingMatchScore score;
auto const fbCenter = MercatorBounds::ToLatLon(fb.GetKeyPoint());
auto const distance = ms::DistanceOnEarth(fbCenter.lat, fbCenter.lon, h.lat, h.lon);
auto const distance = ms::DistanceOnEarth(fbCenter.lat, fbCenter.lon, h.m_lat, h.m_lon);
score.m_linearNormDistanceScore = GetLinearNormDistanceScore(distance);
// TODO(mgsergio): Check all translations and use the best one.
score.m_nameSimilarityScore =
GetNameSimilarityScore(h.name, fb.GetName(StringUtf8Multilang::kDefaultCode));
GetNameSimilarityScore(h.m_name, fb.GetName(StringUtf8Multilang::kDefaultCode));
return score;
}

View file

@ -37,7 +37,6 @@ SOURCES += \
region_meta.cpp \
routing_generator.cpp \
search_index_builder.cpp \
sponsored_dataset.cpp \
srtm_parser.cpp \
statistics.cpp \
tesselator.cpp \
@ -76,6 +75,7 @@ HEADERS += \
routing_generator.hpp \
search_index_builder.hpp \
sponsored_dataset.hpp \
sponsored_dataset_inl.hpp \
srtm_parser.hpp \
statistics.hpp \
tag_admixer.hpp \

View file

@ -349,8 +349,7 @@ public:
[](Place const & p1, Place const & p2) { return p1.IsEqual(p2); },
[](Place const & p1, Place const & p2) { return p1.IsBetterThan(p2); });
}
else if (m_dataset.FindMatchingObjectId(fb) !=
generator::SponsoredDatasetBase::kInvalidObjectId)
else if (m_dataset.IsMatched(fb))
{
m_skippedElements << DebugPrint(fb.GetMostGenericOsmId()) << endl;

View file

@ -1,213 +0,0 @@
#include "generator/sponsored_dataset.hpp"
#include "platform/local_country_file.hpp"
#include "platform/local_country_file_utils.hpp"
#include "platform/platform.hpp"
#include "geometry/distance_on_sphere.hpp"
#include "base/logging.hpp"
#include "base/string_utils.hpp"
#include "std/fstream.hpp"
#include "std/iostream.hpp"
#include "std/limits.hpp"
namespace generator
{
namespace
{
string EscapeTabs(string const & str)
{
stringstream ss;
for (char c : str)
{
if (c == '\t')
ss << "\\t";
else
ss << c;
}
return ss.str();
}
} // namespace
SponsoredDataset::AddressMatcher::AddressMatcher()
{
vector<platform::LocalCountryFile> localFiles;
Platform & platform = GetPlatform();
platform::FindAllLocalMapsInDirectoryAndCleanup(platform.WritableDir(), 0 /* version */,
-1 /* latestVersion */, localFiles);
for (platform::LocalCountryFile const & localFile : localFiles)
{
LOG(LINFO, ("Found mwm:", localFile));
try
{
m_index.RegisterMap(localFile);
}
catch (RootException const & ex)
{
CHECK(false, ("Bad mwm file:", localFile));
}
}
m_coder = make_unique<search::ReverseGeocoder>(m_index);
}
void SponsoredDataset::AddressMatcher::operator()(Object & object)
{
search::ReverseGeocoder::Address addr;
m_coder->GetNearbyAddress(MercatorBounds::FromLatLon(object.lat, object.lon), addr);
object.street = addr.GetStreetName();
object.houseNumber = addr.GetHouseNumber();
}
SponsoredDataset::Object::Object(string const & src)
{
vector<string> rec;
strings::ParseCSVRow(src, '\t', rec);
CHECK(rec.size() == FieldsCount(), ("Error parsing hotels.tsv line:", EscapeTabs(src)));
strings::to_uint(rec[Index(Fields::Id)], id.Get());
strings::to_double(rec[Index(Fields::Latitude)], lat);
strings::to_double(rec[Index(Fields::Longtitude)], lon);
name = rec[Index(Fields::Name)];
address = rec[Index(Fields::Address)];
strings::to_uint(rec[Index(Fields::Stars)], stars);
strings::to_uint(rec[Index(Fields::PriceCategory)], priceCategory);
strings::to_double(rec[Index(Fields::RatingBooking)], ratingBooking);
strings::to_double(rec[Index(Fields::RatingUsers)], ratingUser);
descUrl = rec[Index(Fields::DescUrl)];
strings::to_uint(rec[Index(Fields::Type)], type);
translations = rec[Index(Fields::Translations)];
}
ostream & operator<<(ostream & s, SponsoredDataset::Object const & h)
{
s << fixed << setprecision(7);
return s << "Id: " << h.id << "\t Name: " << h.name << "\t Address: " << h.address
<< "\t lat: " << h.lat << " lon: " << h.lon;
}
SponsoredDataset::ObjectId const SponsoredDataset::kInvalidObjectId =
SponsoredDataset::ObjectId(numeric_limits<SponsoredDataset::ObjectId::RepType>::max());
SponsoredDatasetBase::SponsoredDatasetBase(string const & dataPath, string const & addressReferencePath)
{
if (dataPath.empty())
return;
ifstream dataSource(dataPath);
if (!dataSource.is_open())
{
LOG(LERROR, ("Error while opening", dataPath, ":", strerror(errno)));
return;
}
LoadData(dataSource, addressReferencePath);
}
SponsoredDatasetBase::SponsoredDatasetBase(istream & dataSource, string const & addressReferencePath)
{
LoadData(dataSource, addressReferencePath);
}
SponsoredDataset::Object const & SponsoredDatasetBase::GetObjectById(SponsoredDataset::ObjectId id) const
{
auto const it = m_hotels.find(id);
CHECK(it != end(m_hotels), ("Got wrong object id:", id));
return it->second;
}
SponsoredDataset::Object & SponsoredDatasetBase::GetObjectById(SponsoredDataset::ObjectId id)
{
auto const it = m_hotels.find(id);
CHECK(it != end(m_hotels), ("Got wrong object id:", id));
return it->second;
}
void SponsoredDatasetBase::BuildOsmObjects(function<void(FeatureBuilder1 &)> const & fn) const
{
for (auto const & item : m_hotels)
BuildObject(item.second, fn);
}
SponsoredDatasetBase::ObjectId SponsoredDatasetBase::FindMatchingObjectId(FeatureBuilder1 const & fb) const
{
if (NecessaryMatchingConditionHolds(fb))
return FindMatchingObjectIdImpl(fb);
return kInvalidObjectId;
}
vector<SponsoredDataset::ObjectId> SponsoredDatasetBase::GetNearestObjects(
ms::LatLon const & latLon, size_t const limit,
double const maxDistance /* = 0.0 */) const
{
namespace bgi = boost::geometry::index;
vector<ObjectId> indexes;
for_each(bgi::qbegin(m_rtree, bgi::nearest(TPoint(latLon.lat, latLon.lon), limit)),
bgi::qend(m_rtree), [this, &latLon, &indexes, maxDistance](TValue const & v)
{
auto const & object = GetObjectById(v.second);
double const dist = ms::DistanceOnEarth(latLon.lat, latLon.lon, object.lat, object.lon);
if (maxDistance != 0.0 && dist > maxDistance /* max distance in meters */)
return;
indexes.emplace_back(v.second);
});
return indexes;
}
void SponsoredDatasetBase::LoadData(istream & src, string const & addressReferencePath)
{
m_hotels.clear();
m_rtree.clear();
for (string line; getline(src, line);)
{
Object hotel(line);
m_hotels.emplace(hotel.id, hotel);
}
if (!addressReferencePath.empty())
{
LOG(LINFO, ("Reference addresses for booking objects", addressReferencePath));
Platform & platform = GetPlatform();
string const backupPath = platform.WritableDir();
platform.SetWritableDirForTests(addressReferencePath);
AddressMatcher addressMatcher;
size_t matchedNum = 0;
size_t emptyAddr = 0;
for (auto & item : m_hotels)
{
auto & object = item.second;
addressMatcher(object);
if (object.address.empty())
++emptyAddr;
if (object.IsAddressPartsFilled())
++matchedNum;
}
LOG(LINFO,
("Num of hotels:", m_hotels.size(), "matched:", matchedNum, "empty addresses:", emptyAddr));
platform.SetWritableDirForTests(backupPath);
}
for (auto const & item : m_hotels)
{
auto const & hotel = item.second;
TBox b(TPoint(hotel.lat, hotel.lon), TPoint(hotel.lat, hotel.lon));
m_rtree.insert(make_pair(b, hotel.id));
}
}
} // namespace generator

View file

@ -4,6 +4,10 @@
#include "search/reverse_geocoder.hpp"
#include "platform/local_country_file.hpp"
#include "platform/local_country_file_utils.hpp"
#include "platform/platform.hpp"
#include "base/newtype.hpp"
#include "std/function.hpp"
@ -20,88 +24,34 @@ class FeatureBuilder1;
namespace generator
{
template <typename SponsoredObject>
class SponsoredDataset
{
public:
NEWTYPE(uint32_t, ObjectId);
static double constexpr kDistanceLimitInMeters = 150;
static size_t constexpr kMaxSelectedElements = 3;
static ObjectId const kInvalidObjectId;
struct Object
{
enum class Fields
{
Id = 0,
Latitude = 1,
Longtitude = 2,
Name = 3,
Address = 4,
Stars = 5,
PriceCategory = 6,
RatingBooking = 7,
RatingUsers = 8,
DescUrl = 9,
Type = 10,
Translations = 11,
Counter
};
ObjectId id{kInvalidObjectId};
double lat = 0.0;
double lon = 0.0;
string name;
string address;
string street;
string houseNumber;
uint32_t stars = 0;
uint32_t priceCategory = 0;
double ratingBooking = 0.0;
double ratingUser = 0.0;
string descUrl;
uint32_t type = 0;
string translations;
static constexpr size_t Index(Fields field) { return static_cast<size_t>(field); }
static constexpr size_t FieldsCount() { return static_cast<size_t>(Fields::Counter); }
explicit Object(string const & src);
inline bool IsAddressPartsFilled() const { return !street.empty() || !houseNumber.empty(); }
};
using Object = SponsoredObject;
private:
class AddressMatcher
{
Index m_index;
unique_ptr<search::ReverseGeocoder> m_coder;
public:
AddressMatcher();
void operator()(Object & object);
private:
Index m_index;
unique_ptr<search::ReverseGeocoder> m_coder;
};
virtual ~SponsoredDataset() = default;
/// @return an id of a matched object or kInvalidObjectId on failure.
virtual ObjectId FindMatchingObjectId(FeatureBuilder1 const & fb) const = 0;
virtual size_t Size() const = 0;
virtual void BuildOsmObjects(function<void(FeatureBuilder1 &)> const & fn) const = 0;
};
ostream & operator<<(ostream & s, SponsoredDataset::Object const & h);
NEWTYPE_SIMPLE_OUTPUT(SponsoredDataset::ObjectId);
class SponsoredDatasetBase : public SponsoredDataset
{
public:
explicit SponsoredDatasetBase(string const & dataPath, string const & addressReferencePath = string());
explicit SponsoredDatasetBase(istream & dataSource, string const & addressReferencePath = string());
using ObjectId = typename Object::ObjectId;
size_t Size() const override { return m_hotels.size(); }
static double constexpr kDistanceLimitInMeters = 150;
static size_t constexpr kMaxSelectedElements = 3;
explicit SponsoredDataset(string const & dataPath, string const & addressReferencePath = string());
explicit SponsoredDataset(istream & dataSource, string const & addressReferencePath = string());
size_t Size() const { return m_objects.size(); }
Object const & GetObjectById(ObjectId id) const;
Object & GetObjectById(ObjectId id);
@ -110,13 +60,13 @@ public:
/// @return true if |fb| satisfies some necesary conditions to match one or serveral
/// objects from dataset.
virtual bool NecessaryMatchingConditionHolds(FeatureBuilder1 const & fb) const = 0;
ObjectId FindMatchingObjectId(FeatureBuilder1 const & e) const override;
bool NecessaryMatchingConditionHolds(FeatureBuilder1 const & fb) const;
ObjectId FindMatchingObjectId(FeatureBuilder1 const & e) const;
void BuildOsmObjects(function<void(FeatureBuilder1 &)> const & fn) const override;
void BuildOsmObjects(function<void(FeatureBuilder1 &)> const & fn) const;
protected:
map<ObjectId, Object> m_hotels;
map<ObjectId, Object> m_objects;
using TPoint = boost::geometry::model::point<float, 2, boost::geometry::cs::cartesian>;
using TBox = boost::geometry::model::box<TPoint>;
@ -125,11 +75,14 @@ protected:
// Create the rtree using default constructor.
boost::geometry::index::rtree<TValue, boost::geometry::index::quadratic<16>> m_rtree;
virtual void BuildObject(Object const & object, function<void(FeatureBuilder1 &)> const & fn) const = 0;
void BuildObject(Object const & object,
function<void(FeatureBuilder1 &)> const & fn) const;
void LoadData(istream & src, string const & addressReferencePath);
/// @return an id of a matched object or kInvalidObjectId on failure.
virtual ObjectId FindMatchingObjectIdImpl(FeatureBuilder1 const & fb) const = 0;
ObjectId FindMatchingObjectIdImpl(FeatureBuilder1 const & fb) const;
};
} // namespace generator
#include "generator/sponsored_dataset_inl.hpp" // SponsoredDataset implementation.

View file

@ -0,0 +1,174 @@
#include "generator/sponsored_dataset.hpp"
#include "geometry/distance_on_sphere.hpp"
#include "base/logging.hpp"
#include "base/string_utils.hpp"
#include "std/fstream.hpp"
#include "std/iostream.hpp"
#include "std/limits.hpp"
namespace generator
{
template <typename SponsoredObject>
SponsoredDataset<SponsoredObject>::AddressMatcher::AddressMatcher()
{
vector<platform::LocalCountryFile> localFiles;
Platform & platform = GetPlatform();
platform::FindAllLocalMapsInDirectoryAndCleanup(platform.WritableDir(), 0 /* version */,
-1 /* latestVersion */, localFiles);
for (platform::LocalCountryFile const & localFile : localFiles)
{
LOG(LINFO, ("Found mwm:", localFile));
try
{
m_index.RegisterMap(localFile);
}
catch (RootException const & ex)
{
CHECK(false, ("Bad mwm file:", localFile));
}
}
m_coder = make_unique<search::ReverseGeocoder>(m_index);
}
template <typename SponsoredObject>
void SponsoredDataset<SponsoredObject>::AddressMatcher::operator()(Object & object)
{
search::ReverseGeocoder::Address addr;
m_coder->GetNearbyAddress(MercatorBounds::FromLatLon(object.m_lat, object.m_lon), addr);
object.m_street = addr.GetStreetName();
object.m_houseNumber = addr.GetHouseNumber();
}
template <typename SponsoredObject>
SponsoredDataset<SponsoredObject>::SponsoredDataset(string const & dataPath, string const & addressReferencePath)
{
if (dataPath.empty())
return;
ifstream dataSource(dataPath);
if (!dataSource.is_open())
{
LOG(LERROR, ("Error while opening", dataPath, ":", strerror(errno)));
return;
}
LoadData(dataSource, addressReferencePath);
}
template <typename SponsoredObject>
SponsoredDataset<SponsoredObject>::SponsoredDataset(istream & dataSource, string const & addressReferencePath)
{
LoadData(dataSource, addressReferencePath);
}
template <typename SponsoredObject>
typename SponsoredDataset<SponsoredObject>::Object const &
SponsoredDataset<SponsoredObject>::GetObjectById(ObjectId id) const
{
auto const it = m_objects.find(id);
CHECK(it != end(m_objects), ("Got wrong object id:", id));
return it->second;
}
template <typename SponsoredObject>
typename SponsoredDataset<SponsoredObject>::Object &
SponsoredDataset<SponsoredObject>::GetObjectById(ObjectId id)
{
auto const it = m_objects.find(id);
CHECK(it != end(m_objects), ("Got wrong object id:", id));
return it->second;
}
template <typename SponsoredObject>
void SponsoredDataset<SponsoredObject>::BuildOsmObjects(function<void(FeatureBuilder1 &)> const & fn) const
{
for (auto const & item : m_objects)
BuildObject(item.second, fn);
}
template <typename SponsoredObject>
typename SponsoredDataset<SponsoredObject>::ObjectId
SponsoredDataset<SponsoredObject>::FindMatchingObjectId(FeatureBuilder1 const & fb) const
{
if (NecessaryMatchingConditionHolds(fb))
return FindMatchingObjectIdImpl(fb);
return Object::InvalidObjectId();
}
template <typename SponsoredObject>
vector<typename SponsoredDataset<SponsoredObject>::ObjectId>
SponsoredDataset<SponsoredObject>::GetNearestObjects(ms::LatLon const & latLon, size_t const limit,
double const maxDistance /* = 0.0 */) const
{
namespace bgi = boost::geometry::index;
vector<ObjectId> indexes;
for_each(bgi::qbegin(m_rtree, bgi::nearest(TPoint(latLon.lat, latLon.lon), limit)),
bgi::qend(m_rtree), [this, &latLon, &indexes, maxDistance](TValue const & v)
{
auto const & object = GetObjectById(v.second);
double const dist = ms::DistanceOnEarth(latLon.lat, latLon.lon, object.m_lat, object.m_lon);
if (maxDistance != 0.0 && dist > maxDistance /* max distance in meters */)
return;
indexes.emplace_back(v.second);
});
return indexes;
}
template <typename SponsoredObject>
void SponsoredDataset<SponsoredObject>::LoadData(istream & src, string const & addressReferencePath)
{
m_objects.clear();
m_rtree.clear();
for (string line; getline(src, line);)
{
Object hotel(line);
m_objects.emplace(hotel.m_id, hotel);
}
if (!addressReferencePath.empty())
{
LOG(LINFO, ("Reference addresses for booking objects", addressReferencePath));
Platform & platform = GetPlatform();
string const backupPath = platform.WritableDir();
// TODO(mgsergio): What is this for?
platform.SetWritableDirForTests(addressReferencePath);
AddressMatcher addressMatcher;
size_t matchedNum = 0;
size_t emptyAddr = 0;
for (auto & item : m_objects)
{
auto & object = item.second;
addressMatcher(object);
if (object.m_address.empty())
++emptyAddr;
if (object.IsAddressPartsFilled())
++matchedNum;
}
// TODO(mgsergio): Fix names.
LOG(LINFO,
("Num of hotels:", m_objects.size(), "matched:", matchedNum, "empty addresses:", emptyAddr));
// TODO(mgsergio): What is this for?
platform.SetWritableDirForTests(backupPath);
}
for (auto const & item : m_objects)
{
auto const & hotel = item.second;
TBox b(TPoint(hotel.m_lat, hotel.m_lon), TPoint(hotel.m_lat, hotel.m_lon));
m_rtree.insert(make_pair(b, hotel.m_id));
}
}
} // namespace generator