diff --git a/generator/aggregating_sponsored_dataset.cpp b/generator/aggregating_sponsored_dataset.cpp index b49845a19e..16a4c3b9b4 100644 --- a/generator/aggregating_sponsored_dataset.cpp +++ b/generator/aggregating_sponsored_dataset.cpp @@ -2,23 +2,18 @@ namespace generator { -SponsoredDataset::ObjectId AggregatingSponsoredDataset::FindMatchingObjectId(FeatureBuilder1 const & fb) const +bool AggregatingSponsoredDataset::IsMatched(FeatureBuilder1 const & fb) const { - // There is only one source for now. - return m_datasets[0]->FindMatchingObjectId(fb); -} - -size_t AggregatingSponsoredDataset::Size() const -{ - size_t count{}; - for (auto const & ds : m_datasets) - count += ds->Size(); - return count; + return m_bookingDataset.FindMatchingObjectId(fb) != BookingHotel::InvalidObjectId(); } void AggregatingSponsoredDataset::BuildOsmObjects(function const & fn) const { - for (auto const & ds : m_datasets) - ds->BuildOsmObjects(fn); + m_bookingDataset.BuildOsmObjects(fn); +} + +size_t AggregatingSponsoredDataset::Size() const +{ + return m_bookingDataset.Size(); } } // namespace generator diff --git a/generator/aggregating_sponsored_dataset.hpp b/generator/aggregating_sponsored_dataset.hpp index d9f90098fb..268800f1f5 100644 --- a/generator/aggregating_sponsored_dataset.hpp +++ b/generator/aggregating_sponsored_dataset.hpp @@ -8,22 +8,20 @@ namespace generator { -class AggregatingSponsoredDataset : public SponsoredDataset +class AggregatingSponsoredDataset { public: explicit AggregatingSponsoredDataset(feature::GenerateInfo const & info) + : m_bookingDataset(info.m_bookingDatafileName, info.m_bookingReferenceDir) { - m_datasets.emplace_back(make_unique(info.m_bookingDatafileName, - info.m_bookingReferenceDir)); } - ObjectId FindMatchingObjectId(FeatureBuilder1 const & e) const override; + bool IsMatched(FeatureBuilder1 const & e) const; + void BuildOsmObjects(function const & fn) const; - size_t Size() const override; - - void BuildOsmObjects(function const & fn) const override; + size_t Size() const; private: - vector> m_datasets; + BookingDataset m_bookingDataset; }; } // namespace generator; diff --git a/generator/booking_dataset.cpp b/generator/booking_dataset.cpp index b81800ecd6..40b887b8af 100644 --- a/generator/booking_dataset.cpp +++ b/generator/booking_dataset.cpp @@ -10,6 +10,60 @@ namespace generator { +namespace +{ +string EscapeTabs(string const & str) +{ + stringstream ss; + for (char c : str) + { + if (c == '\t') + ss << "\\t"; + else + ss << c; + } + return ss.str(); +} +} // namespace + +// BookingHotel ------------------------------------------------------------------------------------ + +BookingHotel::BookingHotel(string const & src) +{ + vector rec; + strings::ParseCSVRow(src, '\t', rec); + CHECK(rec.size() == FieldsCount(), ("Error parsing hotels.tsv line:", EscapeTabs(src))); + + strings::to_uint(rec[Index(Fields::Id)], m_id.Get()); + // TODO(mgsergio): Use ms::LatLon. + strings::to_double(rec[Index(Fields::Latitude)], m_lat); + strings::to_double(rec[Index(Fields::Longtitude)], m_lon); + + m_name = rec[Index(Fields::Name)]; + m_address = rec[Index(Fields::Address)]; + + strings::to_uint(rec[Index(Fields::Stars)], m_stars); + strings::to_uint(rec[Index(Fields::PriceCategory)], m_priceCategory); + strings::to_double(rec[Index(Fields::RatingBooking)], m_ratingBooking); + strings::to_double(rec[Index(Fields::RatingUsers)], m_ratingUser); + + m_descUrl = rec[Index(Fields::DescUrl)]; + + strings::to_uint(rec[Index(Fields::Type)], m_type); + + m_translations = rec[Index(Fields::Translations)]; +} + +ostream & operator<<(ostream & s, BookingHotel const & h) +{ + s << fixed << setprecision(7); + return s << "Id: " << h.m_id << "\t Name: " << h.m_name << "\t Address: " << h.m_address + << "\t lat: " << h.m_lat << " lon: " << h.m_lon; +} + +// BookingDataset ---------------------------------------------------------------------------------- + +template <> bool BookingDataset::NecessaryMatchingConditionHolds(FeatureBuilder1 const & fb) const { if (fb.GetName(StringUtf8Multilang::kDefaultCode).empty()) @@ -18,39 +72,40 @@ bool BookingDataset::NecessaryMatchingConditionHolds(FeatureBuilder1 const & fb) return ftypes::IsHotelChecker::Instance()(fb.GetTypes()); } -void BookingDataset::BuildObject(SponsoredDataset::Object const & hotel, +template <> +void BookingDataset::BuildObject(Object const & hotel, function const & fn) const { FeatureBuilder1 fb; FeatureParams params; - fb.SetCenter(MercatorBounds::FromLatLon(hotel.lat, hotel.lon)); + fb.SetCenter(MercatorBounds::FromLatLon(hotel.m_lat, hotel.m_lon)); auto & metadata = params.GetMetadata(); // TODO(mgsergio): Rename FMD_SPONSORED_ID to FMD_BOOKING_ID. - metadata.Set(feature::Metadata::FMD_SPONSORED_ID, strings::to_string(hotel.id.Get())); - metadata.Set(feature::Metadata::FMD_WEBSITE, hotel.descUrl); - metadata.Set(feature::Metadata::FMD_RATING, strings::to_string(hotel.ratingUser)); - metadata.Set(feature::Metadata::FMD_STARS, strings::to_string(hotel.stars)); - metadata.Set(feature::Metadata::FMD_PRICE_RATE, strings::to_string(hotel.priceCategory)); + metadata.Set(feature::Metadata::FMD_SPONSORED_ID, strings::to_string(hotel.m_id.Get())); + metadata.Set(feature::Metadata::FMD_WEBSITE, hotel.m_descUrl); + metadata.Set(feature::Metadata::FMD_RATING, strings::to_string(hotel.m_ratingUser)); + metadata.Set(feature::Metadata::FMD_STARS, strings::to_string(hotel.m_stars)); + metadata.Set(feature::Metadata::FMD_PRICE_RATE, strings::to_string(hotel.m_priceCategory)); // params.AddAddress(hotel.address); // TODO(mgsergio): addr:full ??? - if (!hotel.street.empty()) - fb.AddStreet(hotel.street); + if (!hotel.m_street.empty()) + fb.AddStreet(hotel.m_street); - if (!hotel.houseNumber.empty()) - fb.AddHouseNumber(hotel.houseNumber); + if (!hotel.m_houseNumber.empty()) + fb.AddHouseNumber(hotel.m_houseNumber); params.AddName(StringUtf8Multilang::GetLangByCode(StringUtf8Multilang::kDefaultCode), - hotel.name); - if (!hotel.translations.empty()) + hotel.m_name); + if (!hotel.m_translations.empty()) { // TODO(mgsergio): Move parsing to the hotel costruction stage. vector parts; - strings::ParseCSVRow(hotel.translations, '|', parts); - CHECK_EQUAL(parts.size() % 3, 0, ("Invalid translation string:", hotel.translations)); + strings::ParseCSVRow(hotel.m_translations, '|', parts); + CHECK_EQUAL(parts.size() % 3, 0, ("Invalid translation string:", hotel.m_translations)); for (auto i = 0; i < parts.size(); i += 3) { auto const langCode = StringUtf8Multilang::GetLangIndex(parts[i]); @@ -63,7 +118,7 @@ void BookingDataset::BuildObject(SponsoredDataset::Object const & hotel, params.AddType(clf.GetTypeByPath({"sponsored", "booking"})); // Matching booking.com hotel types to OpenStreetMap values. // Booking types are listed in the closed API docs. - switch (hotel.type) + switch (hotel.m_type) { case 19: case 205: params.AddType(clf.GetTypeByPath({"tourism", "motel"})); break; @@ -117,12 +172,13 @@ void BookingDataset::BuildObject(SponsoredDataset::Object const & hotel, fn(fb); } +template <> BookingDataset::ObjectId BookingDataset::FindMatchingObjectIdImpl(FeatureBuilder1 const & fb) const { auto const name = fb.GetName(StringUtf8Multilang::kDefaultCode); if (name.empty()) - return kInvalidObjectId; + return Object::InvalidObjectId(); // Find |kMaxSelectedElements| nearest values to a point. auto const bookingIndexes = GetNearestObjects(MercatorBounds::ToLatLon(fb.GetKeyPoint()), @@ -134,6 +190,6 @@ BookingDataset::ObjectId BookingDataset::FindMatchingObjectIdImpl(FeatureBuilder return j; } - return kInvalidObjectId; + return Object::InvalidObjectId(); } } // namespace generator diff --git a/generator/booking_dataset.hpp b/generator/booking_dataset.hpp index 4d0ab6fa34..ee0780bf13 100644 --- a/generator/booking_dataset.hpp +++ b/generator/booking_dataset.hpp @@ -21,25 +21,58 @@ class FeatureBuilder1; namespace generator { -class BookingDataset : public SponsoredDatasetBase +// TODO(mgsergio): Try to get rid of code deuplication. (See OpenTableRestaurant) +struct BookingHotel { -public: + NEWTYPE(uint32_t, ObjectId); - explicit BookingDataset(string const & dataPath, string const & addressReferencePath = string()) - : SponsoredDatasetBase(dataPath, addressReferencePath) + enum class Fields { + Id = 0, + Latitude = 1, + Longtitude = 2, + Name = 3, + Address = 4, + Stars = 5, + PriceCategory = 6, + RatingBooking = 7, + RatingUsers = 8, + DescUrl = 9, + Type = 10, + Translations = 11, + Counter + }; + + static constexpr ObjectId InvalidObjectId() + { + return ObjectId(numeric_limits::max()); } - explicit BookingDataset(istream & dataSource, string const & addressReferencePath = string()) - : SponsoredDatasetBase(dataSource, addressReferencePath) - { - } + explicit BookingHotel(string const & src); - bool NecessaryMatchingConditionHolds(FeatureBuilder1 const & fb) const override; + static constexpr size_t Index(Fields field) { return static_cast(field); } + static constexpr size_t FieldsCount() { return static_cast(Fields::Counter); } + bool IsAddressPartsFilled() const { return !m_street.empty() || !m_houseNumber.empty(); } -protected: - void BuildObject(Object const & hotel, function const & fn) const override; + ObjectId m_id{InvalidObjectId()}; + double m_lat = 0.0; + double m_lon = 0.0; + string m_name; + string m_street; + string m_houseNumber; - ObjectId FindMatchingObjectIdImpl(FeatureBuilder1 const & e) const override; + string m_address; + uint32_t m_stars = 0; + uint32_t m_priceCategory = 0; + double m_ratingBooking = 0.0; + double m_ratingUser = 0.0; + string m_descUrl; + uint32_t m_type = 0; + string m_translations; }; + +ostream & operator<<(ostream & s, BookingHotel const & h); + +NEWTYPE_SIMPLE_OUTPUT(BookingHotel::ObjectId); +using BookingDataset = SponsoredDataset; } // namespace generator diff --git a/generator/booking_quality_check/booking_quality_check.cpp b/generator/booking_quality_check/booking_quality_check.cpp index 066046fb34..d6f291e838 100644 --- a/generator/booking_quality_check/booking_quality_check.cpp +++ b/generator/booking_quality_check/booking_quality_check.cpp @@ -136,10 +136,11 @@ feature::GenerateInfo GetGenerateInfo() return info; } +template struct SampleItem { enum MatchStatus {Uninitialized, Yes, No}; - using ObjectId = SponsoredDataset::ObjectId; + using ObjectId = typename Object::ObjectId; SampleItem() = default; @@ -151,28 +152,30 @@ struct SampleItem } osm::Id m_osmId; - ObjectId m_bookingId = SponsoredDataset::kInvalidObjectId; + ObjectId m_bookingId = Object::InvalidObjectId(); MatchStatus m_match = Uninitialized; }; -SampleItem::MatchStatus ReadMatchStatus(string const & str) +template +typename SampleItem::MatchStatus ReadMatchStatus(string const & str) { if (str == "Yes") - return SampleItem::Yes; + return SampleItem::Yes; if (str == "No") - return SampleItem::No; + return SampleItem::No; if (str == "Uninitialized") - return SampleItem::Uninitialized; + return SampleItem::Uninitialized; MYTHROW(ParseError, ("Can't make SampleItem::MatchStatus from string:", str)); } -SampleItem ReadSampleItem(string const & str) +template +SampleItem ReadSampleItem(string const & str) { - SampleItem item; + SampleItem item; auto const parts = strings::Tokenize(str, "\t"); CHECK_EQUAL(parts.size(), 3, ("Cant't make SampleItem from string:", str, @@ -181,21 +184,22 @@ SampleItem ReadSampleItem(string const & str) item.m_osmId = ReadDebuggedPrintedOsmId(parts[0]); if (!strings::to_uint(parts[1], item.m_bookingId.Get())) MYTHROW(ParseError, ("Can't make uint32 from string:", parts[1])); - item.m_match = ReadMatchStatus(parts[2]); + item.m_match = ReadMatchStatus(parts[2]); return item; } -vector ReadSample(istream & ist) +template +vector> ReadSample(istream & ist) { - vector result; + vector> result; size_t lineNumber = 1; try { for (string line; getline(ist, line); ++lineNumber) { - result.emplace_back(ReadSampleItem(line)); + result.emplace_back(ReadSampleItem(line)); } } catch (ParseError const & e) @@ -207,15 +211,17 @@ vector ReadSample(istream & ist) return result; } -vector ReadSampleFromFile(string const & name) +template +vector> ReadSampleFromFile(string const & name) { ifstream ist(name); CHECK(ist.is_open(), ("Can't open file:", name, strerror(errno))); - return ReadSample(ist); + return ReadSample(ist); } +template void GenerateFactors(BookingDataset const & booking, map const & features, - vector const & sampleItems, ostream & ost) + vector> const & sampleItems, ostream & ost) { for (auto const & item : sampleItems) { @@ -226,13 +232,13 @@ void GenerateFactors(BookingDataset const & booking, map(booking, features); }); - auto const sample = ReadSampleFromFile(FLAGS_sample); + auto const sample = ReadSampleFromFile(FLAGS_sample); LOG(LINFO, ("Sample size is", sample.size())); { ofstream ost(FLAGS_factors); diff --git a/generator/booking_scoring.cpp b/generator/booking_scoring.cpp index d07fed0930..7d4adc8bab 100644 --- a/generator/booking_scoring.cpp +++ b/generator/booking_scoring.cpp @@ -131,12 +131,12 @@ BookingMatchScore Match(BookingDataset::Object const & h, FeatureBuilder1 const BookingMatchScore score; auto const fbCenter = MercatorBounds::ToLatLon(fb.GetKeyPoint()); - auto const distance = ms::DistanceOnEarth(fbCenter.lat, fbCenter.lon, h.lat, h.lon); + auto const distance = ms::DistanceOnEarth(fbCenter.lat, fbCenter.lon, h.m_lat, h.m_lon); score.m_linearNormDistanceScore = GetLinearNormDistanceScore(distance); // TODO(mgsergio): Check all translations and use the best one. score.m_nameSimilarityScore = - GetNameSimilarityScore(h.name, fb.GetName(StringUtf8Multilang::kDefaultCode)); + GetNameSimilarityScore(h.m_name, fb.GetName(StringUtf8Multilang::kDefaultCode)); return score; } diff --git a/generator/generator.pro b/generator/generator.pro index 35cae7dbd6..38d6ee2f2b 100644 --- a/generator/generator.pro +++ b/generator/generator.pro @@ -37,7 +37,6 @@ SOURCES += \ region_meta.cpp \ routing_generator.cpp \ search_index_builder.cpp \ - sponsored_dataset.cpp \ srtm_parser.cpp \ statistics.cpp \ tesselator.cpp \ @@ -76,6 +75,7 @@ HEADERS += \ routing_generator.hpp \ search_index_builder.hpp \ sponsored_dataset.hpp \ + sponsored_dataset_inl.hpp \ srtm_parser.hpp \ statistics.hpp \ tag_admixer.hpp \ diff --git a/generator/osm_source.cpp b/generator/osm_source.cpp index 8972e98b91..7088efd5db 100644 --- a/generator/osm_source.cpp +++ b/generator/osm_source.cpp @@ -349,8 +349,7 @@ public: [](Place const & p1, Place const & p2) { return p1.IsEqual(p2); }, [](Place const & p1, Place const & p2) { return p1.IsBetterThan(p2); }); } - else if (m_dataset.FindMatchingObjectId(fb) != - generator::SponsoredDatasetBase::kInvalidObjectId) + else if (m_dataset.IsMatched(fb)) { m_skippedElements << DebugPrint(fb.GetMostGenericOsmId()) << endl; diff --git a/generator/sponsored_dataset.cpp b/generator/sponsored_dataset.cpp deleted file mode 100644 index 4a7f885ea7..0000000000 --- a/generator/sponsored_dataset.cpp +++ /dev/null @@ -1,213 +0,0 @@ -#include "generator/sponsored_dataset.hpp" - -#include "platform/local_country_file.hpp" -#include "platform/local_country_file_utils.hpp" -#include "platform/platform.hpp" - -#include "geometry/distance_on_sphere.hpp" - -#include "base/logging.hpp" -#include "base/string_utils.hpp" - -#include "std/fstream.hpp" -#include "std/iostream.hpp" -#include "std/limits.hpp" - -namespace generator -{ -namespace -{ -string EscapeTabs(string const & str) -{ - stringstream ss; - for (char c : str) - { - if (c == '\t') - ss << "\\t"; - else - ss << c; - } - return ss.str(); -} -} // namespace - -SponsoredDataset::AddressMatcher::AddressMatcher() -{ - vector localFiles; - - Platform & platform = GetPlatform(); - platform::FindAllLocalMapsInDirectoryAndCleanup(platform.WritableDir(), 0 /* version */, - -1 /* latestVersion */, localFiles); - - for (platform::LocalCountryFile const & localFile : localFiles) - { - LOG(LINFO, ("Found mwm:", localFile)); - try - { - m_index.RegisterMap(localFile); - } - catch (RootException const & ex) - { - CHECK(false, ("Bad mwm file:", localFile)); - } - } - - m_coder = make_unique(m_index); -} - -void SponsoredDataset::AddressMatcher::operator()(Object & object) -{ - search::ReverseGeocoder::Address addr; - m_coder->GetNearbyAddress(MercatorBounds::FromLatLon(object.lat, object.lon), addr); - object.street = addr.GetStreetName(); - object.houseNumber = addr.GetHouseNumber(); -} - -SponsoredDataset::Object::Object(string const & src) -{ - vector rec; - strings::ParseCSVRow(src, '\t', rec); - CHECK(rec.size() == FieldsCount(), ("Error parsing hotels.tsv line:", EscapeTabs(src))); - - strings::to_uint(rec[Index(Fields::Id)], id.Get()); - strings::to_double(rec[Index(Fields::Latitude)], lat); - strings::to_double(rec[Index(Fields::Longtitude)], lon); - - name = rec[Index(Fields::Name)]; - address = rec[Index(Fields::Address)]; - - strings::to_uint(rec[Index(Fields::Stars)], stars); - strings::to_uint(rec[Index(Fields::PriceCategory)], priceCategory); - strings::to_double(rec[Index(Fields::RatingBooking)], ratingBooking); - strings::to_double(rec[Index(Fields::RatingUsers)], ratingUser); - - descUrl = rec[Index(Fields::DescUrl)]; - - strings::to_uint(rec[Index(Fields::Type)], type); - - translations = rec[Index(Fields::Translations)]; -} - -ostream & operator<<(ostream & s, SponsoredDataset::Object const & h) -{ - s << fixed << setprecision(7); - return s << "Id: " << h.id << "\t Name: " << h.name << "\t Address: " << h.address - << "\t lat: " << h.lat << " lon: " << h.lon; -} - -SponsoredDataset::ObjectId const SponsoredDataset::kInvalidObjectId = - SponsoredDataset::ObjectId(numeric_limits::max()); - -SponsoredDatasetBase::SponsoredDatasetBase(string const & dataPath, string const & addressReferencePath) -{ - if (dataPath.empty()) - return; - - ifstream dataSource(dataPath); - if (!dataSource.is_open()) - { - LOG(LERROR, ("Error while opening", dataPath, ":", strerror(errno))); - return; - } - - LoadData(dataSource, addressReferencePath); -} - -SponsoredDatasetBase::SponsoredDatasetBase(istream & dataSource, string const & addressReferencePath) -{ - LoadData(dataSource, addressReferencePath); -} - -SponsoredDataset::Object const & SponsoredDatasetBase::GetObjectById(SponsoredDataset::ObjectId id) const -{ - auto const it = m_hotels.find(id); - CHECK(it != end(m_hotels), ("Got wrong object id:", id)); - return it->second; -} - -SponsoredDataset::Object & SponsoredDatasetBase::GetObjectById(SponsoredDataset::ObjectId id) -{ - auto const it = m_hotels.find(id); - CHECK(it != end(m_hotels), ("Got wrong object id:", id)); - return it->second; -} - -void SponsoredDatasetBase::BuildOsmObjects(function const & fn) const -{ - for (auto const & item : m_hotels) - BuildObject(item.second, fn); -} - -SponsoredDatasetBase::ObjectId SponsoredDatasetBase::FindMatchingObjectId(FeatureBuilder1 const & fb) const -{ - if (NecessaryMatchingConditionHolds(fb)) - return FindMatchingObjectIdImpl(fb); - return kInvalidObjectId; -} - -vector SponsoredDatasetBase::GetNearestObjects( - ms::LatLon const & latLon, size_t const limit, - double const maxDistance /* = 0.0 */) const -{ - namespace bgi = boost::geometry::index; - - vector indexes; - for_each(bgi::qbegin(m_rtree, bgi::nearest(TPoint(latLon.lat, latLon.lon), limit)), - bgi::qend(m_rtree), [this, &latLon, &indexes, maxDistance](TValue const & v) - { - auto const & object = GetObjectById(v.second); - double const dist = ms::DistanceOnEarth(latLon.lat, latLon.lon, object.lat, object.lon); - if (maxDistance != 0.0 && dist > maxDistance /* max distance in meters */) - return; - - indexes.emplace_back(v.second); - }); - - return indexes; -} - -void SponsoredDatasetBase::LoadData(istream & src, string const & addressReferencePath) -{ - m_hotels.clear(); - m_rtree.clear(); - - for (string line; getline(src, line);) - { - Object hotel(line); - m_hotels.emplace(hotel.id, hotel); - } - - if (!addressReferencePath.empty()) - { - LOG(LINFO, ("Reference addresses for booking objects", addressReferencePath)); - Platform & platform = GetPlatform(); - string const backupPath = platform.WritableDir(); - platform.SetWritableDirForTests(addressReferencePath); - - AddressMatcher addressMatcher; - - size_t matchedNum = 0; - size_t emptyAddr = 0; - for (auto & item : m_hotels) - { - auto & object = item.second; - addressMatcher(object); - - if (object.address.empty()) - ++emptyAddr; - if (object.IsAddressPartsFilled()) - ++matchedNum; - } - LOG(LINFO, - ("Num of hotels:", m_hotels.size(), "matched:", matchedNum, "empty addresses:", emptyAddr)); - platform.SetWritableDirForTests(backupPath); - } - - for (auto const & item : m_hotels) - { - auto const & hotel = item.second; - TBox b(TPoint(hotel.lat, hotel.lon), TPoint(hotel.lat, hotel.lon)); - m_rtree.insert(make_pair(b, hotel.id)); - } -} -} // namespace generator diff --git a/generator/sponsored_dataset.hpp b/generator/sponsored_dataset.hpp index e24b9066af..99871c3b27 100644 --- a/generator/sponsored_dataset.hpp +++ b/generator/sponsored_dataset.hpp @@ -4,6 +4,10 @@ #include "search/reverse_geocoder.hpp" +#include "platform/local_country_file.hpp" +#include "platform/local_country_file_utils.hpp" +#include "platform/platform.hpp" + #include "base/newtype.hpp" #include "std/function.hpp" @@ -20,88 +24,34 @@ class FeatureBuilder1; namespace generator { +template class SponsoredDataset { public: - NEWTYPE(uint32_t, ObjectId); - - static double constexpr kDistanceLimitInMeters = 150; - static size_t constexpr kMaxSelectedElements = 3; - static ObjectId const kInvalidObjectId; - - struct Object - { - enum class Fields - { - Id = 0, - Latitude = 1, - Longtitude = 2, - Name = 3, - Address = 4, - Stars = 5, - PriceCategory = 6, - RatingBooking = 7, - RatingUsers = 8, - DescUrl = 9, - Type = 10, - Translations = 11, - - Counter - }; - - ObjectId id{kInvalidObjectId}; - double lat = 0.0; - double lon = 0.0; - string name; - string address; - string street; - string houseNumber; - uint32_t stars = 0; - uint32_t priceCategory = 0; - double ratingBooking = 0.0; - double ratingUser = 0.0; - string descUrl; - uint32_t type = 0; - string translations; - - static constexpr size_t Index(Fields field) { return static_cast(field); } - static constexpr size_t FieldsCount() { return static_cast(Fields::Counter); } - explicit Object(string const & src); - - inline bool IsAddressPartsFilled() const { return !street.empty() || !houseNumber.empty(); } - }; + using Object = SponsoredObject; +private: class AddressMatcher { - Index m_index; - unique_ptr m_coder; - public: AddressMatcher(); void operator()(Object & object); + + private: + Index m_index; + unique_ptr m_coder; }; - virtual ~SponsoredDataset() = default; - - /// @return an id of a matched object or kInvalidObjectId on failure. - virtual ObjectId FindMatchingObjectId(FeatureBuilder1 const & fb) const = 0; - - virtual size_t Size() const = 0; - - virtual void BuildOsmObjects(function const & fn) const = 0; -}; - -ostream & operator<<(ostream & s, SponsoredDataset::Object const & h); - -NEWTYPE_SIMPLE_OUTPUT(SponsoredDataset::ObjectId); - -class SponsoredDatasetBase : public SponsoredDataset -{ public: - explicit SponsoredDatasetBase(string const & dataPath, string const & addressReferencePath = string()); - explicit SponsoredDatasetBase(istream & dataSource, string const & addressReferencePath = string()); + using ObjectId = typename Object::ObjectId; - size_t Size() const override { return m_hotels.size(); } + static double constexpr kDistanceLimitInMeters = 150; + static size_t constexpr kMaxSelectedElements = 3; + + explicit SponsoredDataset(string const & dataPath, string const & addressReferencePath = string()); + explicit SponsoredDataset(istream & dataSource, string const & addressReferencePath = string()); + + size_t Size() const { return m_objects.size(); } Object const & GetObjectById(ObjectId id) const; Object & GetObjectById(ObjectId id); @@ -110,13 +60,13 @@ public: /// @return true if |fb| satisfies some necesary conditions to match one or serveral /// objects from dataset. - virtual bool NecessaryMatchingConditionHolds(FeatureBuilder1 const & fb) const = 0; - ObjectId FindMatchingObjectId(FeatureBuilder1 const & e) const override; + bool NecessaryMatchingConditionHolds(FeatureBuilder1 const & fb) const; + ObjectId FindMatchingObjectId(FeatureBuilder1 const & e) const; - void BuildOsmObjects(function const & fn) const override; + void BuildOsmObjects(function const & fn) const; protected: - map m_hotels; + map m_objects; using TPoint = boost::geometry::model::point; using TBox = boost::geometry::model::box; @@ -125,11 +75,14 @@ protected: // Create the rtree using default constructor. boost::geometry::index::rtree> m_rtree; - virtual void BuildObject(Object const & object, function const & fn) const = 0; + void BuildObject(Object const & object, + function const & fn) const; void LoadData(istream & src, string const & addressReferencePath); /// @return an id of a matched object or kInvalidObjectId on failure. - virtual ObjectId FindMatchingObjectIdImpl(FeatureBuilder1 const & fb) const = 0; + ObjectId FindMatchingObjectIdImpl(FeatureBuilder1 const & fb) const; }; } // namespace generator + +#include "generator/sponsored_dataset_inl.hpp" // SponsoredDataset implementation. diff --git a/generator/sponsored_dataset_inl.hpp b/generator/sponsored_dataset_inl.hpp new file mode 100644 index 0000000000..da0dcf34ad --- /dev/null +++ b/generator/sponsored_dataset_inl.hpp @@ -0,0 +1,174 @@ +#include "generator/sponsored_dataset.hpp" + +#include "geometry/distance_on_sphere.hpp" + +#include "base/logging.hpp" +#include "base/string_utils.hpp" + +#include "std/fstream.hpp" +#include "std/iostream.hpp" +#include "std/limits.hpp" + +namespace generator +{ +template +SponsoredDataset::AddressMatcher::AddressMatcher() +{ + vector localFiles; + + Platform & platform = GetPlatform(); + platform::FindAllLocalMapsInDirectoryAndCleanup(platform.WritableDir(), 0 /* version */, + -1 /* latestVersion */, localFiles); + + for (platform::LocalCountryFile const & localFile : localFiles) + { + LOG(LINFO, ("Found mwm:", localFile)); + try + { + m_index.RegisterMap(localFile); + } + catch (RootException const & ex) + { + CHECK(false, ("Bad mwm file:", localFile)); + } + } + + m_coder = make_unique(m_index); +} + +template +void SponsoredDataset::AddressMatcher::operator()(Object & object) +{ + search::ReverseGeocoder::Address addr; + m_coder->GetNearbyAddress(MercatorBounds::FromLatLon(object.m_lat, object.m_lon), addr); + object.m_street = addr.GetStreetName(); + object.m_houseNumber = addr.GetHouseNumber(); +} + +template +SponsoredDataset::SponsoredDataset(string const & dataPath, string const & addressReferencePath) +{ + if (dataPath.empty()) + return; + + ifstream dataSource(dataPath); + if (!dataSource.is_open()) + { + LOG(LERROR, ("Error while opening", dataPath, ":", strerror(errno))); + return; + } + + LoadData(dataSource, addressReferencePath); +} + +template +SponsoredDataset::SponsoredDataset(istream & dataSource, string const & addressReferencePath) +{ + LoadData(dataSource, addressReferencePath); +} + +template +typename SponsoredDataset::Object const & +SponsoredDataset::GetObjectById(ObjectId id) const +{ + auto const it = m_objects.find(id); + CHECK(it != end(m_objects), ("Got wrong object id:", id)); + return it->second; +} + +template +typename SponsoredDataset::Object & +SponsoredDataset::GetObjectById(ObjectId id) +{ + auto const it = m_objects.find(id); + CHECK(it != end(m_objects), ("Got wrong object id:", id)); + return it->second; +} + +template +void SponsoredDataset::BuildOsmObjects(function const & fn) const +{ + for (auto const & item : m_objects) + BuildObject(item.second, fn); +} + +template +typename SponsoredDataset::ObjectId +SponsoredDataset::FindMatchingObjectId(FeatureBuilder1 const & fb) const +{ + if (NecessaryMatchingConditionHolds(fb)) + return FindMatchingObjectIdImpl(fb); + return Object::InvalidObjectId(); +} + +template +vector::ObjectId> +SponsoredDataset::GetNearestObjects(ms::LatLon const & latLon, size_t const limit, + double const maxDistance /* = 0.0 */) const +{ + namespace bgi = boost::geometry::index; + + vector indexes; + for_each(bgi::qbegin(m_rtree, bgi::nearest(TPoint(latLon.lat, latLon.lon), limit)), + bgi::qend(m_rtree), [this, &latLon, &indexes, maxDistance](TValue const & v) + { + auto const & object = GetObjectById(v.second); + double const dist = ms::DistanceOnEarth(latLon.lat, latLon.lon, object.m_lat, object.m_lon); + if (maxDistance != 0.0 && dist > maxDistance /* max distance in meters */) + return; + + indexes.emplace_back(v.second); + }); + + return indexes; +} + +template +void SponsoredDataset::LoadData(istream & src, string const & addressReferencePath) +{ + m_objects.clear(); + m_rtree.clear(); + + for (string line; getline(src, line);) + { + Object hotel(line); + m_objects.emplace(hotel.m_id, hotel); + } + + if (!addressReferencePath.empty()) + { + LOG(LINFO, ("Reference addresses for booking objects", addressReferencePath)); + Platform & platform = GetPlatform(); + string const backupPath = platform.WritableDir(); + // TODO(mgsergio): What is this for? + platform.SetWritableDirForTests(addressReferencePath); + + AddressMatcher addressMatcher; + + size_t matchedNum = 0; + size_t emptyAddr = 0; + for (auto & item : m_objects) + { + auto & object = item.second; + addressMatcher(object); + + if (object.m_address.empty()) + ++emptyAddr; + if (object.IsAddressPartsFilled()) + ++matchedNum; + } + // TODO(mgsergio): Fix names. + LOG(LINFO, + ("Num of hotels:", m_objects.size(), "matched:", matchedNum, "empty addresses:", emptyAddr)); + // TODO(mgsergio): What is this for? + platform.SetWritableDirForTests(backupPath); + } + + for (auto const & item : m_objects) + { + auto const & hotel = item.second; + TBox b(TPoint(hotel.m_lat, hotel.m_lon), TPoint(hotel.m_lat, hotel.m_lon)); + m_rtree.insert(make_pair(b, hotel.m_id)); + } +} +} // namespace generator