forked from organicmaps/organicmaps
[search_quality] Booking dataset generation tool.
This commit is contained in:
parent
c08ce405d3
commit
58d86f929f
5 changed files with 250 additions and 0 deletions
|
@ -550,6 +550,11 @@ char const * IsHotelChecker::GetHotelTypeTag(Type type)
|
|||
UNREACHABLE();
|
||||
}
|
||||
|
||||
IsBookingHotelChecker::IsBookingHotelChecker()
|
||||
{
|
||||
m_types.push_back(classif().GetTypeByPath({"sponsored", "booking"}));
|
||||
}
|
||||
|
||||
IsWifiChecker::IsWifiChecker()
|
||||
{
|
||||
m_types.push_back(classif().GetTypeByPath({"internet_access", "wlan"}));
|
||||
|
|
|
@ -303,6 +303,14 @@ private:
|
|||
std::array<std::pair<uint32_t, Type>, base::Underlying(Type::Count)> m_sortedTypes;
|
||||
};
|
||||
|
||||
class IsBookingHotelChecker : public BaseChecker
|
||||
{
|
||||
IsBookingHotelChecker();
|
||||
|
||||
public:
|
||||
DECLARE_CHECKER_INSTANCE(IsBookingHotelChecker);
|
||||
};
|
||||
|
||||
// WiFi is a type in classificator.txt,
|
||||
// it should be checked for filling metadata in MapObject.
|
||||
class IsWifiChecker : public BaseChecker
|
||||
|
|
|
@ -17,6 +17,7 @@ if (NOT SKIP_DESKTOP)
|
|||
add_subdirectory(assessment_tool)
|
||||
endif()
|
||||
|
||||
add_subdirectory(booking_dataset_generator)
|
||||
add_subdirectory(features_collector_tool)
|
||||
add_subdirectory(search_quality_tool)
|
||||
omim_add_test_subdirectory(search_quality_tests)
|
||||
|
|
|
@ -0,0 +1,35 @@
|
|||
project(booking_dataset_generator)
|
||||
|
||||
include_directories(${OMIM_ROOT}/3party/gflags/src)
|
||||
|
||||
set(SRC booking_dataset_generator.cpp)
|
||||
|
||||
omim_add_executable(${PROJECT_NAME} ${SRC})
|
||||
|
||||
omim_link_libraries(
|
||||
${PROJECT_NAME}
|
||||
search
|
||||
search_quality
|
||||
storage
|
||||
editor
|
||||
indexer
|
||||
platform
|
||||
mwm_diff
|
||||
bsdiff
|
||||
geometry
|
||||
coding
|
||||
base
|
||||
oauthcpp
|
||||
gflags
|
||||
jansson
|
||||
protobuf
|
||||
stats_client
|
||||
minizip
|
||||
succinct
|
||||
opening_hours
|
||||
pugixml
|
||||
icu
|
||||
${Qt5Core_LIBRARIES}
|
||||
${Qt5Network_LIBRARIES}
|
||||
${LIBZ}
|
||||
)
|
|
@ -0,0 +1,201 @@
|
|||
#include "search/result.hpp"
|
||||
#include "search/search_quality/helpers.hpp"
|
||||
#include "search/search_quality/sample.hpp"
|
||||
#include "search/utils.hpp"
|
||||
|
||||
#include "indexer/classificator_loader.hpp"
|
||||
#include "indexer/data_source.hpp"
|
||||
#include "indexer/feature_algo.hpp"
|
||||
#include "indexer/ftypes_matcher.hpp"
|
||||
#include "indexer/scales.hpp"
|
||||
|
||||
#include "storage/country_info_getter.hpp"
|
||||
#include "storage/storage.hpp"
|
||||
#include "storage/storage_defines.hpp"
|
||||
|
||||
#include "coding/file_name_utils.hpp"
|
||||
|
||||
#include "platform/local_country_file.hpp"
|
||||
#include "platform/local_country_file_utils.hpp"
|
||||
#include "platform/platform.hpp"
|
||||
|
||||
#include "geometry/mercator.hpp"
|
||||
|
||||
#include "base/macros.hpp"
|
||||
#include "base/string_utils.hpp"
|
||||
|
||||
#include <cstddef>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <limits>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "3party/gflags/src/gflags/gflags.h"
|
||||
|
||||
#include "defines.hpp"
|
||||
|
||||
using namespace search;
|
||||
using namespace std;
|
||||
using namespace storage;
|
||||
|
||||
DEFINE_string(data_path, "", "Path to data directory (resources dir)");
|
||||
DEFINE_string(mwm_path, "", "Path to mwm files (writable dir)");
|
||||
DEFINE_string(out_path, "samples.json", "Path to output samples file");
|
||||
|
||||
string GetSampleString(FeatureType & hotel, m2::PointD const & userPos)
|
||||
{
|
||||
Sample sample;
|
||||
string hotelName;
|
||||
double constexpr kViewportRadiusM = 1000.0;
|
||||
if (!hotel.GetName(StringUtf8Multilang::kEnglishCode, hotelName) &&
|
||||
!hotel.GetName(StringUtf8Multilang::kDefaultCode, hotelName))
|
||||
{
|
||||
LOG(LINFO, ("Cannot get name for", hotel.GetID()));
|
||||
return "";
|
||||
}
|
||||
|
||||
sample.m_query = strings::MakeUniString(hotelName + " ");
|
||||
sample.m_locale = "en";
|
||||
sample.m_pos = userPos;
|
||||
sample.m_viewport = MercatorBounds::RectByCenterXYAndSizeInMeters(userPos, kViewportRadiusM);
|
||||
sample.m_results.push_back(Sample::Result::Build(hotel, Sample::Result::Relevance::Vital));
|
||||
string json;
|
||||
Sample::SerializeToJSONLines({sample}, json);
|
||||
return json;
|
||||
}
|
||||
|
||||
int main(int argc, char * argv[])
|
||||
{
|
||||
ChangeMaxNumberOfOpenFiles(kMaxOpenFiles);
|
||||
CheckLocale();
|
||||
|
||||
google::SetUsageMessage("Booking dataset generator.");
|
||||
google::ParseCommandLineFlags(&argc, &argv, true);
|
||||
|
||||
Platform & platform = GetPlatform();
|
||||
|
||||
string countriesFile = COUNTRIES_FILE;
|
||||
if (!FLAGS_data_path.empty())
|
||||
{
|
||||
platform.SetResourceDir(FLAGS_data_path);
|
||||
countriesFile = base::JoinPath(FLAGS_data_path, COUNTRIES_FILE);
|
||||
}
|
||||
|
||||
if (!FLAGS_mwm_path.empty())
|
||||
platform.SetWritableDirForTests(FLAGS_mwm_path);
|
||||
|
||||
ofstream out;
|
||||
out.open(FLAGS_out_path);
|
||||
if (!out.is_open())
|
||||
{
|
||||
LOG(LERROR, ("Can't open output file", FLAGS_out_path));
|
||||
return -1;
|
||||
}
|
||||
|
||||
LOG(LINFO, ("writable dir =", platform.WritableDir()));
|
||||
LOG(LINFO, ("resources dir =", platform.ResourcesDir()));
|
||||
|
||||
auto didDownload = [](CountryId const &, shared_ptr<platform::LocalCountryFile> const &) {};
|
||||
auto willDelete = [](CountryId const &, shared_ptr<platform::LocalCountryFile> const &) {
|
||||
return false;
|
||||
};
|
||||
|
||||
Storage storage(countriesFile);
|
||||
storage.Init(didDownload, willDelete);
|
||||
auto infoGetter = CountryInfoReader::CreateCountryInfoReader(platform);
|
||||
infoGetter->InitAffiliationsInfo(&storage.GetAffiliations());
|
||||
|
||||
classificator::Load();
|
||||
FrozenDataSource dataSource;
|
||||
|
||||
vector<platform::LocalCountryFile> mwms;
|
||||
platform::FindAllLocalMapsAndCleanup(numeric_limits<int64_t>::max() /* the latest version */,
|
||||
mwms);
|
||||
for (auto & mwm : mwms)
|
||||
{
|
||||
mwm.SyncWithDisk();
|
||||
dataSource.RegisterMap(mwm);
|
||||
}
|
||||
|
||||
auto const & hotelChecker = ftypes::IsBookingHotelChecker::Instance();
|
||||
|
||||
// For all airports from World.mwm (international or other important airports) and all
|
||||
// hotels which are closer than 100 km from airport we create sample with query=|hotel name| and
|
||||
// viewport and position in the airport.
|
||||
double constexpr kDistanceToHotelM = 1e5;
|
||||
std::set<FeatureID> hotelsNextToAirport;
|
||||
{
|
||||
auto const handle = FindWorld(dataSource);
|
||||
if (!handle.IsAlive())
|
||||
{
|
||||
LOG(LERROR, ("Cannot find World.mwm"));
|
||||
return -1;
|
||||
}
|
||||
|
||||
auto const & airportChecker = ftypes::IsAirportChecker::Instance();
|
||||
FeaturesLoaderGuard const guard(dataSource, handle.GetId());
|
||||
for (uint32_t i = 0; i < guard.GetNumFeatures(); ++i)
|
||||
{
|
||||
auto airport = guard.GetFeatureByIndex(i);
|
||||
if (!airportChecker(*airport))
|
||||
continue;
|
||||
|
||||
auto const airportPos = feature::GetCenter(*airport);
|
||||
auto addHotel = [&](FeatureType & hotel) {
|
||||
if (!hotelChecker(hotel))
|
||||
return;
|
||||
|
||||
if (MercatorBounds::DistanceOnEarth(airportPos, feature::GetCenter(hotel)) >
|
||||
kDistanceToHotelM)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
string json = GetSampleString(hotel, airportPos);
|
||||
if (json.empty())
|
||||
return;
|
||||
out << json;
|
||||
hotelsNextToAirport.insert(hotel.GetID());
|
||||
};
|
||||
|
||||
dataSource.ForEachInRect(
|
||||
addHotel, MercatorBounds::RectByCenterXYAndSizeInMeters(airportPos, kDistanceToHotelM),
|
||||
scales::GetUpperScale());
|
||||
}
|
||||
LOG(LINFO, (hotelsNextToAirport.size(), "hotels have nearby airport."));
|
||||
}
|
||||
|
||||
// For all hotels without an airport nearby we set user position 100km away from hotel.
|
||||
vector<shared_ptr<MwmInfo>> infos;
|
||||
dataSource.GetMwmsInfo(infos);
|
||||
for (auto const & info : infos)
|
||||
{
|
||||
auto handle = dataSource.GetMwmHandleById(MwmSet::MwmId(info));
|
||||
if (!handle.IsAlive())
|
||||
{
|
||||
LOG(LERROR, ("Mwm reading error", info));
|
||||
return -1;
|
||||
}
|
||||
FeaturesLoaderGuard const guard(dataSource, handle.GetId());
|
||||
for (uint32_t i = 0; i < guard.GetNumFeatures(); ++i)
|
||||
{
|
||||
auto hotel = guard.GetFeatureByIndex(i);
|
||||
if (!hotelChecker(*hotel))
|
||||
continue;
|
||||
if (hotelsNextToAirport.count(hotel->GetID()) != 0)
|
||||
continue;
|
||||
|
||||
static double kRadiusToHotelM = kDistanceToHotelM / sqrt(2.0);
|
||||
string json = GetSampleString(
|
||||
*hotel,
|
||||
MercatorBounds::GetSmPoint(feature::GetCenter(*hotel), kRadiusToHotelM, kRadiusToHotelM));
|
||||
|
||||
if (!json.empty())
|
||||
out << json;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
Loading…
Add table
Reference in a new issue