From 531389165278edef3a16e8a749b28981a8170a1c Mon Sep 17 00:00:00 2001 From: Arsentiy Milchakov Date: Fri, 27 Apr 2018 16:11:06 +0300 Subject: [PATCH] [generator] added possibility to exclude some booking hotels --- base/newtype.hpp | 12 ++- base/string_utils.hpp | 12 +++ data/booking_excluded.txt | 0 defines.hpp | 2 + generator/generate_info.hpp | 2 - generator/generator_tests/CMakeLists.txt | 1 + .../sponsored_storage_tests.cpp | 37 ++++++++++ generator/generator_tool/generator_tool.cpp | 4 - generator/osm_source.cpp | 4 +- generator/sponsored_dataset.hpp | 5 +- generator/sponsored_dataset_inl.hpp | 32 +------- generator/sponsored_object_storage.hpp | 74 ++++++++++++------- generator/viator_dataset.cpp | 2 +- 13 files changed, 115 insertions(+), 72 deletions(-) create mode 100644 data/booking_excluded.txt create mode 100644 generator/generator_tests/sponsored_storage_tests.cpp diff --git a/base/newtype.hpp b/base/newtype.hpp index 644f29314e..50a65c8c3b 100644 --- a/base/newtype.hpp +++ b/base/newtype.hpp @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -13,7 +14,7 @@ using IsConvertibleGuard = std::enable_if_t::value } // namespace impl /// Creates a typesafe alias to a given numeric Type. -template +template > class NewType { static_assert(std::is_integral::value || std::is_floating_point::value, @@ -138,6 +139,15 @@ public: NewType operator|(NewType const & o) const { return NewType(m_value | o.m_value); } NewType operator&(NewType const & o) const { return NewType(m_value & o.m_value); } + struct Hash + { + size_t operator()(NewType const & v) const + { + Hasher h; + return h(v.Get()); + } + }; + private: Type m_value; }; diff --git a/base/string_utils.hpp b/base/string_utils.hpp index fe9e99a6c1..1b2f308de0 100644 --- a/base/string_utils.hpp +++ b/base/string_utils.hpp @@ -376,6 +376,18 @@ std::string to_string(T t) return ss.str(); } +WARN_UNUSED_RESULT inline bool to_any(std::string const & s, int & i) { return to_int(s, i); } +WARN_UNUSED_RESULT inline bool to_any(std::string const & s, unsigned int & i) { return to_uint(s, i); } +WARN_UNUSED_RESULT inline bool to_any(std::string const & s, uint64_t & i) { return to_uint64(s, i); } +WARN_UNUSED_RESULT inline bool to_any(std::string const & s, int64_t & i) { return to_int64(s, i); } +WARN_UNUSED_RESULT inline bool to_any(std::string const & s, float & f) { return to_float(s, f); } +WARN_UNUSED_RESULT inline bool to_any(std::string const & s, double & d) { return to_double(s, d); } +WARN_UNUSED_RESULT inline bool to_any(std::string const & s, std::string & result) +{ + result = s; + return true; +} + namespace impl { template diff --git a/data/booking_excluded.txt b/data/booking_excluded.txt new file mode 100644 index 0000000000..e69de29bb2 diff --git a/defines.hpp b/defines.hpp index c9043c3b18..bcc9c5abd6 100644 --- a/defines.hpp +++ b/defines.hpp @@ -102,4 +102,6 @@ #define LOCALIZATION_DESCRIPTION_SUFFIX " Description" +#define BOOKING_EXCLUDED_FILE "booking_excluded.txt" + auto constexpr kInvalidRatingValue = -1.0f; diff --git a/generator/generate_info.hpp b/generator/generate_info.hpp index ed53c53e37..7388bd8a66 100644 --- a/generator/generate_info.hpp +++ b/generator/generate_info.hpp @@ -44,9 +44,7 @@ struct GenerateInfo std::string m_osmFileName; std::string m_bookingDatafileName; - std::string m_bookingReferenceDir; std::string m_opentableDatafileName; - std::string m_opentableReferenceDir; std::string m_viatorDatafileName; std::shared_ptr m_boundariesTable; diff --git a/generator/generator_tests/CMakeLists.txt b/generator/generator_tests/CMakeLists.txt index da83ea4e53..37233ad01f 100644 --- a/generator/generator_tests/CMakeLists.txt +++ b/generator/generator_tests/CMakeLists.txt @@ -19,6 +19,7 @@ set( source_data.cpp source_data.hpp source_to_element_test.cpp + sponsored_storage_tests.cpp srtm_parser_test.cpp tag_admixer_test.cpp tesselator_test.cpp diff --git a/generator/generator_tests/sponsored_storage_tests.cpp b/generator/generator_tests/sponsored_storage_tests.cpp new file mode 100644 index 0000000000..4dbe48c27d --- /dev/null +++ b/generator/generator_tests/sponsored_storage_tests.cpp @@ -0,0 +1,37 @@ +#include "testing/testing.hpp" + +#include "generator/booking_dataset.hpp" +#include "generator/sponsored_object_storage.hpp" + +#include "platform/platform_tests_support/scoped_file.hpp" + +#include "coding/file_name_utils.hpp" + +using platform::tests_support::ScopedFile; + +double const kDummyDistanseForTesting = 1.0; +size_t const kDummyCountOfObjectsForTesting = 1; +std::string const kExcludedContent = "100\n200\n300"; +std::string const kExcludedIdsFileName = "excluded_for_testing.txt"; + +namespace +{ +UNIT_TEST(LoadExcludedIds) +{ + ScopedFile sf(kExcludedIdsFileName, kExcludedContent); + + generator::SponsoredObjectStorage storage( + kDummyDistanseForTesting, kDummyCountOfObjectsForTesting); + + auto const & path = my::JoinPath(GetPlatform().WritableDir(), kExcludedIdsFileName); + auto const excludedIds = storage.LoadExcludedIds(path); + generator::BookingHotel::ObjectId id; + TEST_EQUAL(excludedIds.size(), 3, ()); + id.Set(100); + TEST(excludedIds.find(id) != excludedIds.cend(), ()); + id.Set(200); + TEST(excludedIds.find(id) != excludedIds.cend(), ()); + id.Set(300); + TEST(excludedIds.find(id) != excludedIds.cend(), ()); +} +} // namespace diff --git a/generator/generator_tool/generator_tool.cpp b/generator/generator_tool/generator_tool.cpp index 5bba9e4f5b..497d9ff75d 100644 --- a/generator/generator_tool/generator_tool.cpp +++ b/generator/generator_tool/generator_tool.cpp @@ -121,9 +121,7 @@ DEFINE_string(transit_path, "", "Path to directory with transit graphs in json." // Sponsored-related. DEFINE_string(booking_data, "", "Path to booking data in .tsv format."); -DEFINE_string(booking_reference_path, "", "Path to mwm dataset for booking addresses matching."); DEFINE_string(opentable_data, "", "Path to opentable data in .tsv format."); -DEFINE_string(opentable_reference_path, "", "Path to mwm dataset for opentable addresses matching."); DEFINE_string(viator_data, "", "Path to viator data in .tsv format."); // UGC @@ -188,9 +186,7 @@ int main(int argc, char ** argv) genInfo.m_failOnCoasts = FLAGS_fail_on_coasts; genInfo.m_preloadCache = FLAGS_preload_cache; genInfo.m_bookingDatafileName = FLAGS_booking_data; - genInfo.m_bookingReferenceDir = FLAGS_booking_reference_path; genInfo.m_opentableDatafileName = FLAGS_opentable_data; - genInfo.m_opentableReferenceDir = FLAGS_opentable_reference_path; genInfo.m_viatorDatafileName = FLAGS_viator_data; genInfo.m_boundariesTable = make_shared(); diff --git a/generator/osm_source.cpp b/generator/osm_source.cpp index 5b7f601e5b..18c5d2f42d 100644 --- a/generator/osm_source.cpp +++ b/generator/osm_source.cpp @@ -332,8 +332,8 @@ public: MainFeaturesEmitter(feature::GenerateInfo const & info) : m_skippedElementsPath(info.GetIntermediateFileName("skipped_elements", ".lst")) , m_failOnCoasts(info.m_failOnCoasts) - , m_bookingDataset(info.m_bookingDatafileName, info.m_bookingReferenceDir) - , m_opentableDataset(info.m_opentableDatafileName, info.m_opentableReferenceDir) + , m_bookingDataset(info.m_bookingDatafileName) + , m_opentableDataset(info.m_opentableDatafileName) , m_viatorDataset(info.m_viatorDatafileName) , m_boundariesTable(info.m_boundariesTable) { diff --git a/generator/sponsored_dataset.hpp b/generator/sponsored_dataset.hpp index 3b8bad6eb7..45322bdc08 100644 --- a/generator/sponsored_dataset.hpp +++ b/generator/sponsored_dataset.hpp @@ -21,8 +21,7 @@ public: static double constexpr kDistanceLimitInMeters = 150; static size_t constexpr kMaxSelectedElements = 3; - explicit SponsoredDataset(std::string const & dataPath, - std::string const & addressReferencePath = std::string()); + explicit SponsoredDataset(std::string const & dataPath); /// @return true if |fb| satisfies some necessary conditions to match one or serveral /// objects from dataset. @@ -39,8 +38,6 @@ public: SponsoredObjectStorage const & GetStorage() const { return m_storage; } private: - void InitStorage(); - void BuildObject(Object const & object, std::function const & fn) const; diff --git a/generator/sponsored_dataset_inl.hpp b/generator/sponsored_dataset_inl.hpp index 2380a12892..73c3dcfc22 100644 --- a/generator/sponsored_dataset_inl.hpp +++ b/generator/sponsored_dataset_inl.hpp @@ -38,38 +38,10 @@ private: // SponsoredDataset -------------------------------------------------------------------------------- template -SponsoredDataset::SponsoredDataset(std::string const & dataPath, - std::string const & addressReferencePath) +SponsoredDataset::SponsoredDataset(std::string const & dataPath) : m_storage(kDistanceLimitInMeters, kMaxSelectedElements) { - InitStorage(); - m_storage.LoadData(dataPath, addressReferencePath); -} - -template -void SponsoredDataset::InitStorage() -{ - using Container = typename SponsoredObjectStorage::ObjectsContainer; - - m_storage.SetFillObjects([](Container & objects) { - AddressMatcher addressMatcher; - - size_t matchedCount = 0; - size_t emptyCount = 0; - for (auto & item : objects) - { - auto & object = item.second; - addressMatcher(object); - - if (object.m_address.empty()) - ++emptyCount; - if (object.HasAddresParts()) - ++matchedCount; - } - - LOG(LINFO, ("Num of objects:", objects.size(), "matched:", matchedCount, - "empty addresses:", emptyCount)); - }); + m_storage.LoadData(dataPath); } template diff --git a/generator/sponsored_object_storage.hpp b/generator/sponsored_object_storage.hpp index 0f9a46d096..b6b82c6fe7 100644 --- a/generator/sponsored_object_storage.hpp +++ b/generator/sponsored_object_storage.hpp @@ -5,12 +5,16 @@ #include "geometry/distance_on_sphere.hpp" #include "geometry/latlon.hpp" +#include "coding/file_name_utils.hpp" + #include "base/logging.hpp" +#include "base/string_utils.hpp" #include #include #include #include +#include #include #include "boost/geometry.hpp" @@ -18,6 +22,8 @@ #include "boost/geometry/geometries/point.hpp" #include "boost/geometry/index/rtree.hpp" +#include "defines.hpp" + namespace generator { template @@ -26,7 +32,7 @@ class SponsoredObjectStorage public: using ObjectId = typename Object::ObjectId; using ObjectsContainer = std::map; - using FillObject = std::function; + using ExcludedIdsContainer = std::unordered_set; SponsoredObjectStorage(double distanceLimitMeters, size_t maxSelectedElements) : m_distanceLimitMeters(distanceLimitMeters) @@ -54,12 +60,7 @@ public: return m_objects.size(); } - void SetFillObjects(FillObject const & fn) - { - m_fillObject = fn; - } - - void LoadData(std::string const & dataPath, std::string const & addressReferencePath) + void LoadData(std::string const & dataPath) { if (dataPath.empty()) return; @@ -71,10 +72,42 @@ public: return; } - LoadData(dataSource, addressReferencePath); + auto const excludedIdsPath = my::JoinPath(GetPlatform().ResourcesDir(), BOOKING_EXCLUDED_FILE); + + LoadData(dataSource, LoadExcludedIds(excludedIdsPath)); } - void LoadData(std::istream & src, std::string const & addressReferencePath) + ExcludedIdsContainer LoadExcludedIds(std::string const & excludedIdsPath) + { + if (excludedIdsPath.empty()) + return {}; + + std::ifstream source(excludedIdsPath); + if (!source) + { + LOG(LERROR, ("Error while opening", excludedIdsPath, ":", strerror(errno))); + return {}; + } + + ExcludedIdsContainer result; + for (std::string line; std::getline(source, line);) + { + ObjectId id{Object::InvalidObjectId()}; + + if (!strings::to_any(line, id.Get())) + { + LOG(LWARNING, ("Incorrect excluded sponsored id:", line)); + continue; + } + + if (id != Object::InvalidObjectId()) + result.emplace(id); + } + + return result; + } + + void LoadData(std::istream & src, ExcludedIdsContainer const & excludedIds) { m_objects.clear(); m_rtree.clear(); @@ -82,25 +115,11 @@ public: for (std::string line; std::getline(src, line);) { Object object(line); - if (object.m_id != Object::InvalidObjectId()) + if (object.m_id != Object::InvalidObjectId() && + excludedIds.find(object.m_id) == excludedIds.cend()) + { m_objects.emplace(object.m_id, object); - } - - // Try to get object address from existing MWMs. - if (!addressReferencePath.empty()) - { - LOG(LINFO, ("Reference addresses for sponsored objects", addressReferencePath)); - Platform & platform = GetPlatform(); - std::string const backupPath = platform.WritableDir(); - - // MWMs can be loaded only from a writebledir or from a resourcedir, - // changig resourcedir can lead to problems with classificator, so - // we change writebledir. - platform.SetWritableDirForTests(addressReferencePath); - - m_fillObject(m_objects); - - platform.SetWritableDirForTests(backupPath); + } } for (auto const & item : m_objects) @@ -159,6 +178,5 @@ private: double const m_distanceLimitMeters; size_t const m_maxSelectedElements; - FillObject m_fillObject; }; } // namespace generator diff --git a/generator/viator_dataset.cpp b/generator/viator_dataset.cpp index 7a83bbb58c..12155d87d4 100644 --- a/generator/viator_dataset.cpp +++ b/generator/viator_dataset.cpp @@ -59,7 +59,7 @@ ViatorDataset::ViatorDataset(std::string const & dataPath) LoadIndex(m_index); m_cityFinder = make_unique(m_index); - m_storage.LoadData(dataPath, ""); + m_storage.LoadData(dataPath); } ViatorCity::ObjectId ViatorDataset::FindMatchingObjectId(FeatureBuilder1 const & fb) const