diff --git a/indexer/ftypes_matcher.cpp b/indexer/ftypes_matcher.cpp index 276e963994..4275dba571 100644 --- a/indexer/ftypes_matcher.cpp +++ b/indexer/ftypes_matcher.cpp @@ -550,6 +550,11 @@ char const * IsHotelChecker::GetHotelTypeTag(Type type) UNREACHABLE(); } +IsBookingHotelChecker::IsBookingHotelChecker() +{ + m_types.push_back(classif().GetTypeByPath({"sponsored", "booking"})); +} + IsWifiChecker::IsWifiChecker() { m_types.push_back(classif().GetTypeByPath({"internet_access", "wlan"})); diff --git a/indexer/ftypes_matcher.hpp b/indexer/ftypes_matcher.hpp index c2adbe18f9..b77a45d722 100644 --- a/indexer/ftypes_matcher.hpp +++ b/indexer/ftypes_matcher.hpp @@ -303,6 +303,14 @@ private: std::array, base::Underlying(Type::Count)> m_sortedTypes; }; +class IsBookingHotelChecker : public BaseChecker +{ + IsBookingHotelChecker(); + +public: + DECLARE_CHECKER_INSTANCE(IsBookingHotelChecker); +}; + // WiFi is a type in classificator.txt, // it should be checked for filling metadata in MapObject. class IsWifiChecker : public BaseChecker diff --git a/search/search_quality/CMakeLists.txt b/search/search_quality/CMakeLists.txt index 4f89f6df5e..72a65159a9 100644 --- a/search/search_quality/CMakeLists.txt +++ b/search/search_quality/CMakeLists.txt @@ -17,6 +17,7 @@ if (NOT SKIP_DESKTOP) add_subdirectory(assessment_tool) endif() +add_subdirectory(booking_dataset_generator) add_subdirectory(features_collector_tool) add_subdirectory(search_quality_tool) omim_add_test_subdirectory(search_quality_tests) diff --git a/search/search_quality/booking_dataset_generator/CMakeLists.txt b/search/search_quality/booking_dataset_generator/CMakeLists.txt new file mode 100644 index 0000000000..362e589a1f --- /dev/null +++ b/search/search_quality/booking_dataset_generator/CMakeLists.txt @@ -0,0 +1,35 @@ +project(booking_dataset_generator) + +include_directories(${OMIM_ROOT}/3party/gflags/src) + +set(SRC booking_dataset_generator.cpp) + +omim_add_executable(${PROJECT_NAME} ${SRC}) + +omim_link_libraries( + ${PROJECT_NAME} + search + search_quality + storage + editor + indexer + platform + mwm_diff + bsdiff + geometry + coding + base + oauthcpp + gflags + jansson + protobuf + stats_client + minizip + succinct + opening_hours + pugixml + icu + ${Qt5Core_LIBRARIES} + ${Qt5Network_LIBRARIES} + ${LIBZ} +) diff --git a/search/search_quality/booking_dataset_generator/booking_dataset_generator.cpp b/search/search_quality/booking_dataset_generator/booking_dataset_generator.cpp new file mode 100644 index 0000000000..4e69efbc0a --- /dev/null +++ b/search/search_quality/booking_dataset_generator/booking_dataset_generator.cpp @@ -0,0 +1,201 @@ +#include "search/result.hpp" +#include "search/search_quality/helpers.hpp" +#include "search/search_quality/sample.hpp" +#include "search/utils.hpp" + +#include "indexer/classificator_loader.hpp" +#include "indexer/data_source.hpp" +#include "indexer/feature_algo.hpp" +#include "indexer/ftypes_matcher.hpp" +#include "indexer/scales.hpp" + +#include "storage/country_info_getter.hpp" +#include "storage/storage.hpp" +#include "storage/storage_defines.hpp" + +#include "coding/file_name_utils.hpp" + +#include "platform/local_country_file.hpp" +#include "platform/local_country_file_utils.hpp" +#include "platform/platform.hpp" + +#include "geometry/mercator.hpp" + +#include "base/macros.hpp" +#include "base/string_utils.hpp" + +#include +#include +#include +#include +#include +#include +#include + +#include "3party/gflags/src/gflags/gflags.h" + +#include "defines.hpp" + +using namespace search; +using namespace std; +using namespace storage; + +DEFINE_string(data_path, "", "Path to data directory (resources dir)"); +DEFINE_string(mwm_path, "", "Path to mwm files (writable dir)"); +DEFINE_string(out_path, "samples.json", "Path to output samples file"); + +string GetSampleString(FeatureType & hotel, m2::PointD const & userPos) +{ + Sample sample; + string hotelName; + double constexpr kViewportRadiusM = 1000.0; + if (!hotel.GetName(StringUtf8Multilang::kEnglishCode, hotelName) && + !hotel.GetName(StringUtf8Multilang::kDefaultCode, hotelName)) + { + LOG(LINFO, ("Cannot get name for", hotel.GetID())); + return ""; + } + + sample.m_query = strings::MakeUniString(hotelName + " "); + sample.m_locale = "en"; + sample.m_pos = userPos; + sample.m_viewport = MercatorBounds::RectByCenterXYAndSizeInMeters(userPos, kViewportRadiusM); + sample.m_results.push_back(Sample::Result::Build(hotel, Sample::Result::Relevance::Vital)); + string json; + Sample::SerializeToJSONLines({sample}, json); + return json; +} + +int main(int argc, char * argv[]) +{ + ChangeMaxNumberOfOpenFiles(kMaxOpenFiles); + CheckLocale(); + + google::SetUsageMessage("Booking dataset generator."); + google::ParseCommandLineFlags(&argc, &argv, true); + + Platform & platform = GetPlatform(); + + string countriesFile = COUNTRIES_FILE; + if (!FLAGS_data_path.empty()) + { + platform.SetResourceDir(FLAGS_data_path); + countriesFile = base::JoinPath(FLAGS_data_path, COUNTRIES_FILE); + } + + if (!FLAGS_mwm_path.empty()) + platform.SetWritableDirForTests(FLAGS_mwm_path); + + ofstream out; + out.open(FLAGS_out_path); + if (!out.is_open()) + { + LOG(LERROR, ("Can't open output file", FLAGS_out_path)); + return -1; + } + + LOG(LINFO, ("writable dir =", platform.WritableDir())); + LOG(LINFO, ("resources dir =", platform.ResourcesDir())); + + auto didDownload = [](CountryId const &, shared_ptr const &) {}; + auto willDelete = [](CountryId const &, shared_ptr const &) { + return false; + }; + + Storage storage(countriesFile); + storage.Init(didDownload, willDelete); + auto infoGetter = CountryInfoReader::CreateCountryInfoReader(platform); + infoGetter->InitAffiliationsInfo(&storage.GetAffiliations()); + + classificator::Load(); + FrozenDataSource dataSource; + + vector mwms; + platform::FindAllLocalMapsAndCleanup(numeric_limits::max() /* the latest version */, + mwms); + for (auto & mwm : mwms) + { + mwm.SyncWithDisk(); + dataSource.RegisterMap(mwm); + } + + auto const & hotelChecker = ftypes::IsBookingHotelChecker::Instance(); + + // For all airports from World.mwm (international or other important airports) and all + // hotels which are closer than 100 km from airport we create sample with query=|hotel name| and + // viewport and position in the airport. + double constexpr kDistanceToHotelM = 1e5; + std::set hotelsNextToAirport; + { + auto const handle = FindWorld(dataSource); + if (!handle.IsAlive()) + { + LOG(LERROR, ("Cannot find World.mwm")); + return -1; + } + + auto const & airportChecker = ftypes::IsAirportChecker::Instance(); + FeaturesLoaderGuard const guard(dataSource, handle.GetId()); + for (uint32_t i = 0; i < guard.GetNumFeatures(); ++i) + { + auto airport = guard.GetFeatureByIndex(i); + if (!airportChecker(*airport)) + continue; + + auto const airportPos = feature::GetCenter(*airport); + auto addHotel = [&](FeatureType & hotel) { + if (!hotelChecker(hotel)) + return; + + if (MercatorBounds::DistanceOnEarth(airportPos, feature::GetCenter(hotel)) > + kDistanceToHotelM) + { + return; + } + + string json = GetSampleString(hotel, airportPos); + if (json.empty()) + return; + out << json; + hotelsNextToAirport.insert(hotel.GetID()); + }; + + dataSource.ForEachInRect( + addHotel, MercatorBounds::RectByCenterXYAndSizeInMeters(airportPos, kDistanceToHotelM), + scales::GetUpperScale()); + } + LOG(LINFO, (hotelsNextToAirport.size(), "hotels have nearby airport.")); + } + + // For all hotels without an airport nearby we set user position 100km away from hotel. + vector> infos; + dataSource.GetMwmsInfo(infos); + for (auto const & info : infos) + { + auto handle = dataSource.GetMwmHandleById(MwmSet::MwmId(info)); + if (!handle.IsAlive()) + { + LOG(LERROR, ("Mwm reading error", info)); + return -1; + } + FeaturesLoaderGuard const guard(dataSource, handle.GetId()); + for (uint32_t i = 0; i < guard.GetNumFeatures(); ++i) + { + auto hotel = guard.GetFeatureByIndex(i); + if (!hotelChecker(*hotel)) + continue; + if (hotelsNextToAirport.count(hotel->GetID()) != 0) + continue; + + static double kRadiusToHotelM = kDistanceToHotelM / sqrt(2.0); + string json = GetSampleString( + *hotel, + MercatorBounds::GetSmPoint(feature::GetCenter(*hotel), kRadiusToHotelM, kRadiusToHotelM)); + + if (!json.empty()) + out << json; + } + } + + return 0; +}