forked from organicmaps/organicmaps
Switch from inheritance to templates.
This commit is contained in:
parent
75afc71cf8
commit
22d138efd4
11 changed files with 365 additions and 364 deletions
|
@ -2,23 +2,18 @@
|
|||
|
||||
namespace generator
|
||||
{
|
||||
SponsoredDataset::ObjectId AggregatingSponsoredDataset::FindMatchingObjectId(FeatureBuilder1 const & fb) const
|
||||
bool AggregatingSponsoredDataset::IsMatched(FeatureBuilder1 const & fb) const
|
||||
{
|
||||
// There is only one source for now.
|
||||
return m_datasets[0]->FindMatchingObjectId(fb);
|
||||
}
|
||||
|
||||
size_t AggregatingSponsoredDataset::Size() const
|
||||
{
|
||||
size_t count{};
|
||||
for (auto const & ds : m_datasets)
|
||||
count += ds->Size();
|
||||
return count;
|
||||
return m_bookingDataset.FindMatchingObjectId(fb) != BookingHotel::InvalidObjectId();
|
||||
}
|
||||
|
||||
void AggregatingSponsoredDataset::BuildOsmObjects(function<void(FeatureBuilder1 &)> const & fn) const
|
||||
{
|
||||
for (auto const & ds : m_datasets)
|
||||
ds->BuildOsmObjects(fn);
|
||||
m_bookingDataset.BuildOsmObjects(fn);
|
||||
}
|
||||
|
||||
size_t AggregatingSponsoredDataset::Size() const
|
||||
{
|
||||
return m_bookingDataset.Size();
|
||||
}
|
||||
} // namespace generator
|
||||
|
|
|
@ -8,22 +8,20 @@
|
|||
|
||||
namespace generator
|
||||
{
|
||||
class AggregatingSponsoredDataset : public SponsoredDataset
|
||||
class AggregatingSponsoredDataset
|
||||
{
|
||||
public:
|
||||
explicit AggregatingSponsoredDataset(feature::GenerateInfo const & info)
|
||||
: m_bookingDataset(info.m_bookingDatafileName, info.m_bookingReferenceDir)
|
||||
{
|
||||
m_datasets.emplace_back(make_unique<BookingDataset>(info.m_bookingDatafileName,
|
||||
info.m_bookingReferenceDir));
|
||||
}
|
||||
|
||||
ObjectId FindMatchingObjectId(FeatureBuilder1 const & e) const override;
|
||||
bool IsMatched(FeatureBuilder1 const & e) const;
|
||||
void BuildOsmObjects(function<void(FeatureBuilder1 &)> const & fn) const;
|
||||
|
||||
size_t Size() const override;
|
||||
|
||||
void BuildOsmObjects(function<void(FeatureBuilder1 &)> const & fn) const override;
|
||||
size_t Size() const;
|
||||
|
||||
private:
|
||||
vector<unique_ptr<SponsoredDatasetBase>> m_datasets;
|
||||
BookingDataset m_bookingDataset;
|
||||
};
|
||||
} // namespace generator;
|
||||
|
|
|
@ -10,6 +10,60 @@
|
|||
|
||||
namespace generator
|
||||
{
|
||||
namespace
|
||||
{
|
||||
string EscapeTabs(string const & str)
|
||||
{
|
||||
stringstream ss;
|
||||
for (char c : str)
|
||||
{
|
||||
if (c == '\t')
|
||||
ss << "\\t";
|
||||
else
|
||||
ss << c;
|
||||
}
|
||||
return ss.str();
|
||||
}
|
||||
} // namespace
|
||||
|
||||
// BookingHotel ------------------------------------------------------------------------------------
|
||||
|
||||
BookingHotel::BookingHotel(string const & src)
|
||||
{
|
||||
vector<string> rec;
|
||||
strings::ParseCSVRow(src, '\t', rec);
|
||||
CHECK(rec.size() == FieldsCount(), ("Error parsing hotels.tsv line:", EscapeTabs(src)));
|
||||
|
||||
strings::to_uint(rec[Index(Fields::Id)], m_id.Get());
|
||||
// TODO(mgsergio): Use ms::LatLon.
|
||||
strings::to_double(rec[Index(Fields::Latitude)], m_lat);
|
||||
strings::to_double(rec[Index(Fields::Longtitude)], m_lon);
|
||||
|
||||
m_name = rec[Index(Fields::Name)];
|
||||
m_address = rec[Index(Fields::Address)];
|
||||
|
||||
strings::to_uint(rec[Index(Fields::Stars)], m_stars);
|
||||
strings::to_uint(rec[Index(Fields::PriceCategory)], m_priceCategory);
|
||||
strings::to_double(rec[Index(Fields::RatingBooking)], m_ratingBooking);
|
||||
strings::to_double(rec[Index(Fields::RatingUsers)], m_ratingUser);
|
||||
|
||||
m_descUrl = rec[Index(Fields::DescUrl)];
|
||||
|
||||
strings::to_uint(rec[Index(Fields::Type)], m_type);
|
||||
|
||||
m_translations = rec[Index(Fields::Translations)];
|
||||
}
|
||||
|
||||
ostream & operator<<(ostream & s, BookingHotel const & h)
|
||||
{
|
||||
s << fixed << setprecision(7);
|
||||
return s << "Id: " << h.m_id << "\t Name: " << h.m_name << "\t Address: " << h.m_address
|
||||
<< "\t lat: " << h.m_lat << " lon: " << h.m_lon;
|
||||
}
|
||||
|
||||
// BookingDataset ----------------------------------------------------------------------------------
|
||||
|
||||
template <>
|
||||
bool BookingDataset::NecessaryMatchingConditionHolds(FeatureBuilder1 const & fb) const
|
||||
{
|
||||
if (fb.GetName(StringUtf8Multilang::kDefaultCode).empty())
|
||||
|
@ -18,39 +72,40 @@ bool BookingDataset::NecessaryMatchingConditionHolds(FeatureBuilder1 const & fb)
|
|||
return ftypes::IsHotelChecker::Instance()(fb.GetTypes());
|
||||
}
|
||||
|
||||
void BookingDataset::BuildObject(SponsoredDataset::Object const & hotel,
|
||||
template <>
|
||||
void BookingDataset::BuildObject(Object const & hotel,
|
||||
function<void(FeatureBuilder1 &)> const & fn) const
|
||||
{
|
||||
FeatureBuilder1 fb;
|
||||
FeatureParams params;
|
||||
|
||||
fb.SetCenter(MercatorBounds::FromLatLon(hotel.lat, hotel.lon));
|
||||
fb.SetCenter(MercatorBounds::FromLatLon(hotel.m_lat, hotel.m_lon));
|
||||
|
||||
auto & metadata = params.GetMetadata();
|
||||
// TODO(mgsergio): Rename FMD_SPONSORED_ID to FMD_BOOKING_ID.
|
||||
metadata.Set(feature::Metadata::FMD_SPONSORED_ID, strings::to_string(hotel.id.Get()));
|
||||
metadata.Set(feature::Metadata::FMD_WEBSITE, hotel.descUrl);
|
||||
metadata.Set(feature::Metadata::FMD_RATING, strings::to_string(hotel.ratingUser));
|
||||
metadata.Set(feature::Metadata::FMD_STARS, strings::to_string(hotel.stars));
|
||||
metadata.Set(feature::Metadata::FMD_PRICE_RATE, strings::to_string(hotel.priceCategory));
|
||||
metadata.Set(feature::Metadata::FMD_SPONSORED_ID, strings::to_string(hotel.m_id.Get()));
|
||||
metadata.Set(feature::Metadata::FMD_WEBSITE, hotel.m_descUrl);
|
||||
metadata.Set(feature::Metadata::FMD_RATING, strings::to_string(hotel.m_ratingUser));
|
||||
metadata.Set(feature::Metadata::FMD_STARS, strings::to_string(hotel.m_stars));
|
||||
metadata.Set(feature::Metadata::FMD_PRICE_RATE, strings::to_string(hotel.m_priceCategory));
|
||||
|
||||
// params.AddAddress(hotel.address);
|
||||
// TODO(mgsergio): addr:full ???
|
||||
|
||||
if (!hotel.street.empty())
|
||||
fb.AddStreet(hotel.street);
|
||||
if (!hotel.m_street.empty())
|
||||
fb.AddStreet(hotel.m_street);
|
||||
|
||||
if (!hotel.houseNumber.empty())
|
||||
fb.AddHouseNumber(hotel.houseNumber);
|
||||
if (!hotel.m_houseNumber.empty())
|
||||
fb.AddHouseNumber(hotel.m_houseNumber);
|
||||
|
||||
params.AddName(StringUtf8Multilang::GetLangByCode(StringUtf8Multilang::kDefaultCode),
|
||||
hotel.name);
|
||||
if (!hotel.translations.empty())
|
||||
hotel.m_name);
|
||||
if (!hotel.m_translations.empty())
|
||||
{
|
||||
// TODO(mgsergio): Move parsing to the hotel costruction stage.
|
||||
vector<string> parts;
|
||||
strings::ParseCSVRow(hotel.translations, '|', parts);
|
||||
CHECK_EQUAL(parts.size() % 3, 0, ("Invalid translation string:", hotel.translations));
|
||||
strings::ParseCSVRow(hotel.m_translations, '|', parts);
|
||||
CHECK_EQUAL(parts.size() % 3, 0, ("Invalid translation string:", hotel.m_translations));
|
||||
for (auto i = 0; i < parts.size(); i += 3)
|
||||
{
|
||||
auto const langCode = StringUtf8Multilang::GetLangIndex(parts[i]);
|
||||
|
@ -63,7 +118,7 @@ void BookingDataset::BuildObject(SponsoredDataset::Object const & hotel,
|
|||
params.AddType(clf.GetTypeByPath({"sponsored", "booking"}));
|
||||
// Matching booking.com hotel types to OpenStreetMap values.
|
||||
// Booking types are listed in the closed API docs.
|
||||
switch (hotel.type)
|
||||
switch (hotel.m_type)
|
||||
{
|
||||
case 19:
|
||||
case 205: params.AddType(clf.GetTypeByPath({"tourism", "motel"})); break;
|
||||
|
@ -117,12 +172,13 @@ void BookingDataset::BuildObject(SponsoredDataset::Object const & hotel,
|
|||
fn(fb);
|
||||
}
|
||||
|
||||
template <>
|
||||
BookingDataset::ObjectId BookingDataset::FindMatchingObjectIdImpl(FeatureBuilder1 const & fb) const
|
||||
{
|
||||
auto const name = fb.GetName(StringUtf8Multilang::kDefaultCode);
|
||||
|
||||
if (name.empty())
|
||||
return kInvalidObjectId;
|
||||
return Object::InvalidObjectId();
|
||||
|
||||
// Find |kMaxSelectedElements| nearest values to a point.
|
||||
auto const bookingIndexes = GetNearestObjects(MercatorBounds::ToLatLon(fb.GetKeyPoint()),
|
||||
|
@ -134,6 +190,6 @@ BookingDataset::ObjectId BookingDataset::FindMatchingObjectIdImpl(FeatureBuilder
|
|||
return j;
|
||||
}
|
||||
|
||||
return kInvalidObjectId;
|
||||
return Object::InvalidObjectId();
|
||||
}
|
||||
} // namespace generator
|
||||
|
|
|
@ -21,25 +21,58 @@ class FeatureBuilder1;
|
|||
|
||||
namespace generator
|
||||
{
|
||||
class BookingDataset : public SponsoredDatasetBase
|
||||
// TODO(mgsergio): Try to get rid of code deuplication. (See OpenTableRestaurant)
|
||||
struct BookingHotel
|
||||
{
|
||||
public:
|
||||
NEWTYPE(uint32_t, ObjectId);
|
||||
|
||||
explicit BookingDataset(string const & dataPath, string const & addressReferencePath = string())
|
||||
: SponsoredDatasetBase(dataPath, addressReferencePath)
|
||||
enum class Fields
|
||||
{
|
||||
Id = 0,
|
||||
Latitude = 1,
|
||||
Longtitude = 2,
|
||||
Name = 3,
|
||||
Address = 4,
|
||||
Stars = 5,
|
||||
PriceCategory = 6,
|
||||
RatingBooking = 7,
|
||||
RatingUsers = 8,
|
||||
DescUrl = 9,
|
||||
Type = 10,
|
||||
Translations = 11,
|
||||
Counter
|
||||
};
|
||||
|
||||
static constexpr ObjectId InvalidObjectId()
|
||||
{
|
||||
return ObjectId(numeric_limits<typename ObjectId::RepType>::max());
|
||||
}
|
||||
|
||||
explicit BookingDataset(istream & dataSource, string const & addressReferencePath = string())
|
||||
: SponsoredDatasetBase(dataSource, addressReferencePath)
|
||||
{
|
||||
}
|
||||
explicit BookingHotel(string const & src);
|
||||
|
||||
bool NecessaryMatchingConditionHolds(FeatureBuilder1 const & fb) const override;
|
||||
static constexpr size_t Index(Fields field) { return static_cast<size_t>(field); }
|
||||
static constexpr size_t FieldsCount() { return static_cast<size_t>(Fields::Counter); }
|
||||
bool IsAddressPartsFilled() const { return !m_street.empty() || !m_houseNumber.empty(); }
|
||||
|
||||
protected:
|
||||
void BuildObject(Object const & hotel, function<void(FeatureBuilder1 &)> const & fn) const override;
|
||||
ObjectId m_id{InvalidObjectId()};
|
||||
double m_lat = 0.0;
|
||||
double m_lon = 0.0;
|
||||
string m_name;
|
||||
string m_street;
|
||||
string m_houseNumber;
|
||||
|
||||
ObjectId FindMatchingObjectIdImpl(FeatureBuilder1 const & e) const override;
|
||||
string m_address;
|
||||
uint32_t m_stars = 0;
|
||||
uint32_t m_priceCategory = 0;
|
||||
double m_ratingBooking = 0.0;
|
||||
double m_ratingUser = 0.0;
|
||||
string m_descUrl;
|
||||
uint32_t m_type = 0;
|
||||
string m_translations;
|
||||
};
|
||||
|
||||
ostream & operator<<(ostream & s, BookingHotel const & h);
|
||||
|
||||
NEWTYPE_SIMPLE_OUTPUT(BookingHotel::ObjectId);
|
||||
using BookingDataset = SponsoredDataset<BookingHotel>;
|
||||
} // namespace generator
|
||||
|
|
|
@ -136,10 +136,11 @@ feature::GenerateInfo GetGenerateInfo()
|
|||
return info;
|
||||
}
|
||||
|
||||
template <typename Object>
|
||||
struct SampleItem
|
||||
{
|
||||
enum MatchStatus {Uninitialized, Yes, No};
|
||||
using ObjectId = SponsoredDataset::ObjectId;
|
||||
using ObjectId = typename Object::ObjectId;
|
||||
|
||||
SampleItem() = default;
|
||||
|
||||
|
@ -151,28 +152,30 @@ struct SampleItem
|
|||
}
|
||||
|
||||
osm::Id m_osmId;
|
||||
ObjectId m_bookingId = SponsoredDataset::kInvalidObjectId;
|
||||
ObjectId m_bookingId = Object::InvalidObjectId();
|
||||
|
||||
MatchStatus m_match = Uninitialized;
|
||||
};
|
||||
|
||||
SampleItem::MatchStatus ReadMatchStatus(string const & str)
|
||||
template <typename Object>
|
||||
typename SampleItem<Object>::MatchStatus ReadMatchStatus(string const & str)
|
||||
{
|
||||
if (str == "Yes")
|
||||
return SampleItem::Yes;
|
||||
return SampleItem<Object>::Yes;
|
||||
|
||||
if (str == "No")
|
||||
return SampleItem::No;
|
||||
return SampleItem<Object>::No;
|
||||
|
||||
if (str == "Uninitialized")
|
||||
return SampleItem::Uninitialized;
|
||||
return SampleItem<Object>::Uninitialized;
|
||||
|
||||
MYTHROW(ParseError, ("Can't make SampleItem::MatchStatus from string:", str));
|
||||
}
|
||||
|
||||
SampleItem ReadSampleItem(string const & str)
|
||||
template <typename Object>
|
||||
SampleItem<Object> ReadSampleItem(string const & str)
|
||||
{
|
||||
SampleItem item;
|
||||
SampleItem<Object> item;
|
||||
|
||||
auto const parts = strings::Tokenize(str, "\t");
|
||||
CHECK_EQUAL(parts.size(), 3, ("Cant't make SampleItem from string:", str,
|
||||
|
@ -181,21 +184,22 @@ SampleItem ReadSampleItem(string const & str)
|
|||
item.m_osmId = ReadDebuggedPrintedOsmId(parts[0]);
|
||||
if (!strings::to_uint(parts[1], item.m_bookingId.Get()))
|
||||
MYTHROW(ParseError, ("Can't make uint32 from string:", parts[1]));
|
||||
item.m_match = ReadMatchStatus(parts[2]);
|
||||
item.m_match = ReadMatchStatus<Object>(parts[2]);
|
||||
|
||||
return item;
|
||||
}
|
||||
|
||||
vector<SampleItem> ReadSample(istream & ist)
|
||||
template <typename Object>
|
||||
vector<SampleItem<Object>> ReadSample(istream & ist)
|
||||
{
|
||||
vector<SampleItem> result;
|
||||
vector<SampleItem<Object>> result;
|
||||
|
||||
size_t lineNumber = 1;
|
||||
try
|
||||
{
|
||||
for (string line; getline(ist, line); ++lineNumber)
|
||||
{
|
||||
result.emplace_back(ReadSampleItem(line));
|
||||
result.emplace_back(ReadSampleItem<Object>(line));
|
||||
}
|
||||
}
|
||||
catch (ParseError const & e)
|
||||
|
@ -207,15 +211,17 @@ vector<SampleItem> ReadSample(istream & ist)
|
|||
return result;
|
||||
}
|
||||
|
||||
vector<SampleItem> ReadSampleFromFile(string const & name)
|
||||
template <typename Object>
|
||||
vector<SampleItem<Object>> ReadSampleFromFile(string const & name)
|
||||
{
|
||||
ifstream ist(name);
|
||||
CHECK(ist.is_open(), ("Can't open file:", name, strerror(errno)));
|
||||
return ReadSample(ist);
|
||||
return ReadSample<Object>(ist);
|
||||
}
|
||||
|
||||
template <typename Object>
|
||||
void GenerateFactors(BookingDataset const & booking, map<osm::Id, FeatureBuilder1> const & features,
|
||||
vector<SampleItem> const & sampleItems, ostream & ost)
|
||||
vector<SampleItem<Object>> const & sampleItems, ostream & ost)
|
||||
{
|
||||
for (auto const & item : sampleItems)
|
||||
{
|
||||
|
@ -226,13 +232,13 @@ void GenerateFactors(BookingDataset const & booking, map<osm::Id, FeatureBuilder
|
|||
|
||||
auto const center = MercatorBounds::ToLatLon(feature.GetKeyPoint());
|
||||
double const distanceMeters = ms::DistanceOnEarth(center.lat, center.lon,
|
||||
hotel.lat, hotel.lon);
|
||||
hotel.m_lat, hotel.m_lon);
|
||||
auto const matched = score.IsMatched();
|
||||
|
||||
ost << "# ------------------------------------------" << fixed << setprecision(6)
|
||||
<< endl;
|
||||
ost << (matched ? 'y' : 'n') << " \t" << DebugPrint(feature.GetMostGenericOsmId())
|
||||
<< "\t " << hotel.id
|
||||
<< "\t " << hotel.m_id
|
||||
<< "\tdistance: " << distanceMeters
|
||||
<< "\tdistance score: " << score.m_linearNormDistanceScore
|
||||
<< "\tname score: " << score.m_nameSimilarityScore
|
||||
|
@ -240,8 +246,8 @@ void GenerateFactors(BookingDataset const & booking, map<osm::Id, FeatureBuilder
|
|||
<< endl;
|
||||
ost << "# " << PrintBuilder(feature) << endl;
|
||||
ost << "# " << hotel << endl;
|
||||
ost << "# URL: https://www.openstreetmap.org/?mlat=" << hotel.lat
|
||||
<< "&mlon=" << hotel.lon << "#map=18/" << hotel.lat << "/" << hotel.lon << endl;
|
||||
ost << "# URL: https://www.openstreetmap.org/?mlat=" << hotel.m_lat
|
||||
<< "&mlon=" << hotel.m_lon << "#map=18/" << hotel.m_lat << "/" << hotel.m_lon << endl;
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
@ -278,7 +284,7 @@ int main(int argc, char * argv[])
|
|||
return make_unique<Emitter>(booking, features);
|
||||
});
|
||||
|
||||
auto const sample = ReadSampleFromFile(FLAGS_sample);
|
||||
auto const sample = ReadSampleFromFile<BookingHotel>(FLAGS_sample);
|
||||
LOG(LINFO, ("Sample size is", sample.size()));
|
||||
{
|
||||
ofstream ost(FLAGS_factors);
|
||||
|
|
|
@ -131,12 +131,12 @@ BookingMatchScore Match(BookingDataset::Object const & h, FeatureBuilder1 const
|
|||
BookingMatchScore score;
|
||||
|
||||
auto const fbCenter = MercatorBounds::ToLatLon(fb.GetKeyPoint());
|
||||
auto const distance = ms::DistanceOnEarth(fbCenter.lat, fbCenter.lon, h.lat, h.lon);
|
||||
auto const distance = ms::DistanceOnEarth(fbCenter.lat, fbCenter.lon, h.m_lat, h.m_lon);
|
||||
score.m_linearNormDistanceScore = GetLinearNormDistanceScore(distance);
|
||||
|
||||
// TODO(mgsergio): Check all translations and use the best one.
|
||||
score.m_nameSimilarityScore =
|
||||
GetNameSimilarityScore(h.name, fb.GetName(StringUtf8Multilang::kDefaultCode));
|
||||
GetNameSimilarityScore(h.m_name, fb.GetName(StringUtf8Multilang::kDefaultCode));
|
||||
|
||||
return score;
|
||||
}
|
||||
|
|
|
@ -37,7 +37,6 @@ SOURCES += \
|
|||
region_meta.cpp \
|
||||
routing_generator.cpp \
|
||||
search_index_builder.cpp \
|
||||
sponsored_dataset.cpp \
|
||||
srtm_parser.cpp \
|
||||
statistics.cpp \
|
||||
tesselator.cpp \
|
||||
|
@ -76,6 +75,7 @@ HEADERS += \
|
|||
routing_generator.hpp \
|
||||
search_index_builder.hpp \
|
||||
sponsored_dataset.hpp \
|
||||
sponsored_dataset_inl.hpp \
|
||||
srtm_parser.hpp \
|
||||
statistics.hpp \
|
||||
tag_admixer.hpp \
|
||||
|
|
|
@ -349,8 +349,7 @@ public:
|
|||
[](Place const & p1, Place const & p2) { return p1.IsEqual(p2); },
|
||||
[](Place const & p1, Place const & p2) { return p1.IsBetterThan(p2); });
|
||||
}
|
||||
else if (m_dataset.FindMatchingObjectId(fb) !=
|
||||
generator::SponsoredDatasetBase::kInvalidObjectId)
|
||||
else if (m_dataset.IsMatched(fb))
|
||||
{
|
||||
m_skippedElements << DebugPrint(fb.GetMostGenericOsmId()) << endl;
|
||||
|
||||
|
|
|
@ -1,213 +0,0 @@
|
|||
#include "generator/sponsored_dataset.hpp"
|
||||
|
||||
#include "platform/local_country_file.hpp"
|
||||
#include "platform/local_country_file_utils.hpp"
|
||||
#include "platform/platform.hpp"
|
||||
|
||||
#include "geometry/distance_on_sphere.hpp"
|
||||
|
||||
#include "base/logging.hpp"
|
||||
#include "base/string_utils.hpp"
|
||||
|
||||
#include "std/fstream.hpp"
|
||||
#include "std/iostream.hpp"
|
||||
#include "std/limits.hpp"
|
||||
|
||||
namespace generator
|
||||
{
|
||||
namespace
|
||||
{
|
||||
string EscapeTabs(string const & str)
|
||||
{
|
||||
stringstream ss;
|
||||
for (char c : str)
|
||||
{
|
||||
if (c == '\t')
|
||||
ss << "\\t";
|
||||
else
|
||||
ss << c;
|
||||
}
|
||||
return ss.str();
|
||||
}
|
||||
} // namespace
|
||||
|
||||
SponsoredDataset::AddressMatcher::AddressMatcher()
|
||||
{
|
||||
vector<platform::LocalCountryFile> localFiles;
|
||||
|
||||
Platform & platform = GetPlatform();
|
||||
platform::FindAllLocalMapsInDirectoryAndCleanup(platform.WritableDir(), 0 /* version */,
|
||||
-1 /* latestVersion */, localFiles);
|
||||
|
||||
for (platform::LocalCountryFile const & localFile : localFiles)
|
||||
{
|
||||
LOG(LINFO, ("Found mwm:", localFile));
|
||||
try
|
||||
{
|
||||
m_index.RegisterMap(localFile);
|
||||
}
|
||||
catch (RootException const & ex)
|
||||
{
|
||||
CHECK(false, ("Bad mwm file:", localFile));
|
||||
}
|
||||
}
|
||||
|
||||
m_coder = make_unique<search::ReverseGeocoder>(m_index);
|
||||
}
|
||||
|
||||
void SponsoredDataset::AddressMatcher::operator()(Object & object)
|
||||
{
|
||||
search::ReverseGeocoder::Address addr;
|
||||
m_coder->GetNearbyAddress(MercatorBounds::FromLatLon(object.lat, object.lon), addr);
|
||||
object.street = addr.GetStreetName();
|
||||
object.houseNumber = addr.GetHouseNumber();
|
||||
}
|
||||
|
||||
SponsoredDataset::Object::Object(string const & src)
|
||||
{
|
||||
vector<string> rec;
|
||||
strings::ParseCSVRow(src, '\t', rec);
|
||||
CHECK(rec.size() == FieldsCount(), ("Error parsing hotels.tsv line:", EscapeTabs(src)));
|
||||
|
||||
strings::to_uint(rec[Index(Fields::Id)], id.Get());
|
||||
strings::to_double(rec[Index(Fields::Latitude)], lat);
|
||||
strings::to_double(rec[Index(Fields::Longtitude)], lon);
|
||||
|
||||
name = rec[Index(Fields::Name)];
|
||||
address = rec[Index(Fields::Address)];
|
||||
|
||||
strings::to_uint(rec[Index(Fields::Stars)], stars);
|
||||
strings::to_uint(rec[Index(Fields::PriceCategory)], priceCategory);
|
||||
strings::to_double(rec[Index(Fields::RatingBooking)], ratingBooking);
|
||||
strings::to_double(rec[Index(Fields::RatingUsers)], ratingUser);
|
||||
|
||||
descUrl = rec[Index(Fields::DescUrl)];
|
||||
|
||||
strings::to_uint(rec[Index(Fields::Type)], type);
|
||||
|
||||
translations = rec[Index(Fields::Translations)];
|
||||
}
|
||||
|
||||
ostream & operator<<(ostream & s, SponsoredDataset::Object const & h)
|
||||
{
|
||||
s << fixed << setprecision(7);
|
||||
return s << "Id: " << h.id << "\t Name: " << h.name << "\t Address: " << h.address
|
||||
<< "\t lat: " << h.lat << " lon: " << h.lon;
|
||||
}
|
||||
|
||||
SponsoredDataset::ObjectId const SponsoredDataset::kInvalidObjectId =
|
||||
SponsoredDataset::ObjectId(numeric_limits<SponsoredDataset::ObjectId::RepType>::max());
|
||||
|
||||
SponsoredDatasetBase::SponsoredDatasetBase(string const & dataPath, string const & addressReferencePath)
|
||||
{
|
||||
if (dataPath.empty())
|
||||
return;
|
||||
|
||||
ifstream dataSource(dataPath);
|
||||
if (!dataSource.is_open())
|
||||
{
|
||||
LOG(LERROR, ("Error while opening", dataPath, ":", strerror(errno)));
|
||||
return;
|
||||
}
|
||||
|
||||
LoadData(dataSource, addressReferencePath);
|
||||
}
|
||||
|
||||
SponsoredDatasetBase::SponsoredDatasetBase(istream & dataSource, string const & addressReferencePath)
|
||||
{
|
||||
LoadData(dataSource, addressReferencePath);
|
||||
}
|
||||
|
||||
SponsoredDataset::Object const & SponsoredDatasetBase::GetObjectById(SponsoredDataset::ObjectId id) const
|
||||
{
|
||||
auto const it = m_hotels.find(id);
|
||||
CHECK(it != end(m_hotels), ("Got wrong object id:", id));
|
||||
return it->second;
|
||||
}
|
||||
|
||||
SponsoredDataset::Object & SponsoredDatasetBase::GetObjectById(SponsoredDataset::ObjectId id)
|
||||
{
|
||||
auto const it = m_hotels.find(id);
|
||||
CHECK(it != end(m_hotels), ("Got wrong object id:", id));
|
||||
return it->second;
|
||||
}
|
||||
|
||||
void SponsoredDatasetBase::BuildOsmObjects(function<void(FeatureBuilder1 &)> const & fn) const
|
||||
{
|
||||
for (auto const & item : m_hotels)
|
||||
BuildObject(item.second, fn);
|
||||
}
|
||||
|
||||
SponsoredDatasetBase::ObjectId SponsoredDatasetBase::FindMatchingObjectId(FeatureBuilder1 const & fb) const
|
||||
{
|
||||
if (NecessaryMatchingConditionHolds(fb))
|
||||
return FindMatchingObjectIdImpl(fb);
|
||||
return kInvalidObjectId;
|
||||
}
|
||||
|
||||
vector<SponsoredDataset::ObjectId> SponsoredDatasetBase::GetNearestObjects(
|
||||
ms::LatLon const & latLon, size_t const limit,
|
||||
double const maxDistance /* = 0.0 */) const
|
||||
{
|
||||
namespace bgi = boost::geometry::index;
|
||||
|
||||
vector<ObjectId> indexes;
|
||||
for_each(bgi::qbegin(m_rtree, bgi::nearest(TPoint(latLon.lat, latLon.lon), limit)),
|
||||
bgi::qend(m_rtree), [this, &latLon, &indexes, maxDistance](TValue const & v)
|
||||
{
|
||||
auto const & object = GetObjectById(v.second);
|
||||
double const dist = ms::DistanceOnEarth(latLon.lat, latLon.lon, object.lat, object.lon);
|
||||
if (maxDistance != 0.0 && dist > maxDistance /* max distance in meters */)
|
||||
return;
|
||||
|
||||
indexes.emplace_back(v.second);
|
||||
});
|
||||
|
||||
return indexes;
|
||||
}
|
||||
|
||||
void SponsoredDatasetBase::LoadData(istream & src, string const & addressReferencePath)
|
||||
{
|
||||
m_hotels.clear();
|
||||
m_rtree.clear();
|
||||
|
||||
for (string line; getline(src, line);)
|
||||
{
|
||||
Object hotel(line);
|
||||
m_hotels.emplace(hotel.id, hotel);
|
||||
}
|
||||
|
||||
if (!addressReferencePath.empty())
|
||||
{
|
||||
LOG(LINFO, ("Reference addresses for booking objects", addressReferencePath));
|
||||
Platform & platform = GetPlatform();
|
||||
string const backupPath = platform.WritableDir();
|
||||
platform.SetWritableDirForTests(addressReferencePath);
|
||||
|
||||
AddressMatcher addressMatcher;
|
||||
|
||||
size_t matchedNum = 0;
|
||||
size_t emptyAddr = 0;
|
||||
for (auto & item : m_hotels)
|
||||
{
|
||||
auto & object = item.second;
|
||||
addressMatcher(object);
|
||||
|
||||
if (object.address.empty())
|
||||
++emptyAddr;
|
||||
if (object.IsAddressPartsFilled())
|
||||
++matchedNum;
|
||||
}
|
||||
LOG(LINFO,
|
||||
("Num of hotels:", m_hotels.size(), "matched:", matchedNum, "empty addresses:", emptyAddr));
|
||||
platform.SetWritableDirForTests(backupPath);
|
||||
}
|
||||
|
||||
for (auto const & item : m_hotels)
|
||||
{
|
||||
auto const & hotel = item.second;
|
||||
TBox b(TPoint(hotel.lat, hotel.lon), TPoint(hotel.lat, hotel.lon));
|
||||
m_rtree.insert(make_pair(b, hotel.id));
|
||||
}
|
||||
}
|
||||
} // namespace generator
|
|
@ -4,6 +4,10 @@
|
|||
|
||||
#include "search/reverse_geocoder.hpp"
|
||||
|
||||
#include "platform/local_country_file.hpp"
|
||||
#include "platform/local_country_file_utils.hpp"
|
||||
#include "platform/platform.hpp"
|
||||
|
||||
#include "base/newtype.hpp"
|
||||
|
||||
#include "std/function.hpp"
|
||||
|
@ -20,88 +24,34 @@ class FeatureBuilder1;
|
|||
|
||||
namespace generator
|
||||
{
|
||||
template <typename SponsoredObject>
|
||||
class SponsoredDataset
|
||||
{
|
||||
public:
|
||||
NEWTYPE(uint32_t, ObjectId);
|
||||
|
||||
static double constexpr kDistanceLimitInMeters = 150;
|
||||
static size_t constexpr kMaxSelectedElements = 3;
|
||||
static ObjectId const kInvalidObjectId;
|
||||
|
||||
struct Object
|
||||
{
|
||||
enum class Fields
|
||||
{
|
||||
Id = 0,
|
||||
Latitude = 1,
|
||||
Longtitude = 2,
|
||||
Name = 3,
|
||||
Address = 4,
|
||||
Stars = 5,
|
||||
PriceCategory = 6,
|
||||
RatingBooking = 7,
|
||||
RatingUsers = 8,
|
||||
DescUrl = 9,
|
||||
Type = 10,
|
||||
Translations = 11,
|
||||
|
||||
Counter
|
||||
};
|
||||
|
||||
ObjectId id{kInvalidObjectId};
|
||||
double lat = 0.0;
|
||||
double lon = 0.0;
|
||||
string name;
|
||||
string address;
|
||||
string street;
|
||||
string houseNumber;
|
||||
uint32_t stars = 0;
|
||||
uint32_t priceCategory = 0;
|
||||
double ratingBooking = 0.0;
|
||||
double ratingUser = 0.0;
|
||||
string descUrl;
|
||||
uint32_t type = 0;
|
||||
string translations;
|
||||
|
||||
static constexpr size_t Index(Fields field) { return static_cast<size_t>(field); }
|
||||
static constexpr size_t FieldsCount() { return static_cast<size_t>(Fields::Counter); }
|
||||
explicit Object(string const & src);
|
||||
|
||||
inline bool IsAddressPartsFilled() const { return !street.empty() || !houseNumber.empty(); }
|
||||
};
|
||||
using Object = SponsoredObject;
|
||||
|
||||
private:
|
||||
class AddressMatcher
|
||||
{
|
||||
Index m_index;
|
||||
unique_ptr<search::ReverseGeocoder> m_coder;
|
||||
|
||||
public:
|
||||
AddressMatcher();
|
||||
void operator()(Object & object);
|
||||
|
||||
private:
|
||||
Index m_index;
|
||||
unique_ptr<search::ReverseGeocoder> m_coder;
|
||||
};
|
||||
|
||||
virtual ~SponsoredDataset() = default;
|
||||
|
||||
/// @return an id of a matched object or kInvalidObjectId on failure.
|
||||
virtual ObjectId FindMatchingObjectId(FeatureBuilder1 const & fb) const = 0;
|
||||
|
||||
virtual size_t Size() const = 0;
|
||||
|
||||
virtual void BuildOsmObjects(function<void(FeatureBuilder1 &)> const & fn) const = 0;
|
||||
};
|
||||
|
||||
ostream & operator<<(ostream & s, SponsoredDataset::Object const & h);
|
||||
|
||||
NEWTYPE_SIMPLE_OUTPUT(SponsoredDataset::ObjectId);
|
||||
|
||||
class SponsoredDatasetBase : public SponsoredDataset
|
||||
{
|
||||
public:
|
||||
explicit SponsoredDatasetBase(string const & dataPath, string const & addressReferencePath = string());
|
||||
explicit SponsoredDatasetBase(istream & dataSource, string const & addressReferencePath = string());
|
||||
using ObjectId = typename Object::ObjectId;
|
||||
|
||||
size_t Size() const override { return m_hotels.size(); }
|
||||
static double constexpr kDistanceLimitInMeters = 150;
|
||||
static size_t constexpr kMaxSelectedElements = 3;
|
||||
|
||||
explicit SponsoredDataset(string const & dataPath, string const & addressReferencePath = string());
|
||||
explicit SponsoredDataset(istream & dataSource, string const & addressReferencePath = string());
|
||||
|
||||
size_t Size() const { return m_objects.size(); }
|
||||
|
||||
Object const & GetObjectById(ObjectId id) const;
|
||||
Object & GetObjectById(ObjectId id);
|
||||
|
@ -110,13 +60,13 @@ public:
|
|||
|
||||
/// @return true if |fb| satisfies some necesary conditions to match one or serveral
|
||||
/// objects from dataset.
|
||||
virtual bool NecessaryMatchingConditionHolds(FeatureBuilder1 const & fb) const = 0;
|
||||
ObjectId FindMatchingObjectId(FeatureBuilder1 const & e) const override;
|
||||
bool NecessaryMatchingConditionHolds(FeatureBuilder1 const & fb) const;
|
||||
ObjectId FindMatchingObjectId(FeatureBuilder1 const & e) const;
|
||||
|
||||
void BuildOsmObjects(function<void(FeatureBuilder1 &)> const & fn) const override;
|
||||
void BuildOsmObjects(function<void(FeatureBuilder1 &)> const & fn) const;
|
||||
|
||||
protected:
|
||||
map<ObjectId, Object> m_hotels;
|
||||
map<ObjectId, Object> m_objects;
|
||||
|
||||
using TPoint = boost::geometry::model::point<float, 2, boost::geometry::cs::cartesian>;
|
||||
using TBox = boost::geometry::model::box<TPoint>;
|
||||
|
@ -125,11 +75,14 @@ protected:
|
|||
// Create the rtree using default constructor.
|
||||
boost::geometry::index::rtree<TValue, boost::geometry::index::quadratic<16>> m_rtree;
|
||||
|
||||
virtual void BuildObject(Object const & object, function<void(FeatureBuilder1 &)> const & fn) const = 0;
|
||||
void BuildObject(Object const & object,
|
||||
function<void(FeatureBuilder1 &)> const & fn) const;
|
||||
|
||||
void LoadData(istream & src, string const & addressReferencePath);
|
||||
|
||||
/// @return an id of a matched object or kInvalidObjectId on failure.
|
||||
virtual ObjectId FindMatchingObjectIdImpl(FeatureBuilder1 const & fb) const = 0;
|
||||
ObjectId FindMatchingObjectIdImpl(FeatureBuilder1 const & fb) const;
|
||||
};
|
||||
} // namespace generator
|
||||
|
||||
#include "generator/sponsored_dataset_inl.hpp" // SponsoredDataset implementation.
|
||||
|
|
174
generator/sponsored_dataset_inl.hpp
Normal file
174
generator/sponsored_dataset_inl.hpp
Normal file
|
@ -0,0 +1,174 @@
|
|||
#include "generator/sponsored_dataset.hpp"
|
||||
|
||||
#include "geometry/distance_on_sphere.hpp"
|
||||
|
||||
#include "base/logging.hpp"
|
||||
#include "base/string_utils.hpp"
|
||||
|
||||
#include "std/fstream.hpp"
|
||||
#include "std/iostream.hpp"
|
||||
#include "std/limits.hpp"
|
||||
|
||||
namespace generator
|
||||
{
|
||||
template <typename SponsoredObject>
|
||||
SponsoredDataset<SponsoredObject>::AddressMatcher::AddressMatcher()
|
||||
{
|
||||
vector<platform::LocalCountryFile> localFiles;
|
||||
|
||||
Platform & platform = GetPlatform();
|
||||
platform::FindAllLocalMapsInDirectoryAndCleanup(platform.WritableDir(), 0 /* version */,
|
||||
-1 /* latestVersion */, localFiles);
|
||||
|
||||
for (platform::LocalCountryFile const & localFile : localFiles)
|
||||
{
|
||||
LOG(LINFO, ("Found mwm:", localFile));
|
||||
try
|
||||
{
|
||||
m_index.RegisterMap(localFile);
|
||||
}
|
||||
catch (RootException const & ex)
|
||||
{
|
||||
CHECK(false, ("Bad mwm file:", localFile));
|
||||
}
|
||||
}
|
||||
|
||||
m_coder = make_unique<search::ReverseGeocoder>(m_index);
|
||||
}
|
||||
|
||||
template <typename SponsoredObject>
|
||||
void SponsoredDataset<SponsoredObject>::AddressMatcher::operator()(Object & object)
|
||||
{
|
||||
search::ReverseGeocoder::Address addr;
|
||||
m_coder->GetNearbyAddress(MercatorBounds::FromLatLon(object.m_lat, object.m_lon), addr);
|
||||
object.m_street = addr.GetStreetName();
|
||||
object.m_houseNumber = addr.GetHouseNumber();
|
||||
}
|
||||
|
||||
template <typename SponsoredObject>
|
||||
SponsoredDataset<SponsoredObject>::SponsoredDataset(string const & dataPath, string const & addressReferencePath)
|
||||
{
|
||||
if (dataPath.empty())
|
||||
return;
|
||||
|
||||
ifstream dataSource(dataPath);
|
||||
if (!dataSource.is_open())
|
||||
{
|
||||
LOG(LERROR, ("Error while opening", dataPath, ":", strerror(errno)));
|
||||
return;
|
||||
}
|
||||
|
||||
LoadData(dataSource, addressReferencePath);
|
||||
}
|
||||
|
||||
template <typename SponsoredObject>
|
||||
SponsoredDataset<SponsoredObject>::SponsoredDataset(istream & dataSource, string const & addressReferencePath)
|
||||
{
|
||||
LoadData(dataSource, addressReferencePath);
|
||||
}
|
||||
|
||||
template <typename SponsoredObject>
|
||||
typename SponsoredDataset<SponsoredObject>::Object const &
|
||||
SponsoredDataset<SponsoredObject>::GetObjectById(ObjectId id) const
|
||||
{
|
||||
auto const it = m_objects.find(id);
|
||||
CHECK(it != end(m_objects), ("Got wrong object id:", id));
|
||||
return it->second;
|
||||
}
|
||||
|
||||
template <typename SponsoredObject>
|
||||
typename SponsoredDataset<SponsoredObject>::Object &
|
||||
SponsoredDataset<SponsoredObject>::GetObjectById(ObjectId id)
|
||||
{
|
||||
auto const it = m_objects.find(id);
|
||||
CHECK(it != end(m_objects), ("Got wrong object id:", id));
|
||||
return it->second;
|
||||
}
|
||||
|
||||
template <typename SponsoredObject>
|
||||
void SponsoredDataset<SponsoredObject>::BuildOsmObjects(function<void(FeatureBuilder1 &)> const & fn) const
|
||||
{
|
||||
for (auto const & item : m_objects)
|
||||
BuildObject(item.second, fn);
|
||||
}
|
||||
|
||||
template <typename SponsoredObject>
|
||||
typename SponsoredDataset<SponsoredObject>::ObjectId
|
||||
SponsoredDataset<SponsoredObject>::FindMatchingObjectId(FeatureBuilder1 const & fb) const
|
||||
{
|
||||
if (NecessaryMatchingConditionHolds(fb))
|
||||
return FindMatchingObjectIdImpl(fb);
|
||||
return Object::InvalidObjectId();
|
||||
}
|
||||
|
||||
template <typename SponsoredObject>
|
||||
vector<typename SponsoredDataset<SponsoredObject>::ObjectId>
|
||||
SponsoredDataset<SponsoredObject>::GetNearestObjects(ms::LatLon const & latLon, size_t const limit,
|
||||
double const maxDistance /* = 0.0 */) const
|
||||
{
|
||||
namespace bgi = boost::geometry::index;
|
||||
|
||||
vector<ObjectId> indexes;
|
||||
for_each(bgi::qbegin(m_rtree, bgi::nearest(TPoint(latLon.lat, latLon.lon), limit)),
|
||||
bgi::qend(m_rtree), [this, &latLon, &indexes, maxDistance](TValue const & v)
|
||||
{
|
||||
auto const & object = GetObjectById(v.second);
|
||||
double const dist = ms::DistanceOnEarth(latLon.lat, latLon.lon, object.m_lat, object.m_lon);
|
||||
if (maxDistance != 0.0 && dist > maxDistance /* max distance in meters */)
|
||||
return;
|
||||
|
||||
indexes.emplace_back(v.second);
|
||||
});
|
||||
|
||||
return indexes;
|
||||
}
|
||||
|
||||
template <typename SponsoredObject>
|
||||
void SponsoredDataset<SponsoredObject>::LoadData(istream & src, string const & addressReferencePath)
|
||||
{
|
||||
m_objects.clear();
|
||||
m_rtree.clear();
|
||||
|
||||
for (string line; getline(src, line);)
|
||||
{
|
||||
Object hotel(line);
|
||||
m_objects.emplace(hotel.m_id, hotel);
|
||||
}
|
||||
|
||||
if (!addressReferencePath.empty())
|
||||
{
|
||||
LOG(LINFO, ("Reference addresses for booking objects", addressReferencePath));
|
||||
Platform & platform = GetPlatform();
|
||||
string const backupPath = platform.WritableDir();
|
||||
// TODO(mgsergio): What is this for?
|
||||
platform.SetWritableDirForTests(addressReferencePath);
|
||||
|
||||
AddressMatcher addressMatcher;
|
||||
|
||||
size_t matchedNum = 0;
|
||||
size_t emptyAddr = 0;
|
||||
for (auto & item : m_objects)
|
||||
{
|
||||
auto & object = item.second;
|
||||
addressMatcher(object);
|
||||
|
||||
if (object.m_address.empty())
|
||||
++emptyAddr;
|
||||
if (object.IsAddressPartsFilled())
|
||||
++matchedNum;
|
||||
}
|
||||
// TODO(mgsergio): Fix names.
|
||||
LOG(LINFO,
|
||||
("Num of hotels:", m_objects.size(), "matched:", matchedNum, "empty addresses:", emptyAddr));
|
||||
// TODO(mgsergio): What is this for?
|
||||
platform.SetWritableDirForTests(backupPath);
|
||||
}
|
||||
|
||||
for (auto const & item : m_objects)
|
||||
{
|
||||
auto const & hotel = item.second;
|
||||
TBox b(TPoint(hotel.m_lat, hotel.m_lon), TPoint(hotel.m_lat, hotel.m_lon));
|
||||
m_rtree.insert(make_pair(b, hotel.m_id));
|
||||
}
|
||||
}
|
||||
} // namespace generator
|
Loading…
Add table
Reference in a new issue