forked from organicmaps/organicmaps
[search_quality] Add address mode for booking_dataset_generator tool.
This commit is contained in:
parent
4d6b881baa
commit
d8ab055070
1 changed files with 124 additions and 13 deletions
|
@ -27,7 +27,9 @@
|
|||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <limits>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
|
@ -40,23 +42,32 @@ using namespace search;
|
|||
using namespace std;
|
||||
using namespace storage;
|
||||
|
||||
DEFINE_string(data_path, "", "Path to data directory (resources dir)");
|
||||
DEFINE_string(mwm_path, "", "Path to mwm files (writable dir)");
|
||||
DEFINE_string(out_path, "samples.json", "Path to output samples file");
|
||||
DEFINE_string(data_path, "", "Path to data directory (resources dir).");
|
||||
DEFINE_string(mwm_path, "", "Path to mwm files (writable dir).");
|
||||
DEFINE_string(out_path, "samples.json", "Path to output samples file.");
|
||||
DEFINE_string(dataset_type, "name",
|
||||
"Dataset type: name (search hotel by name) or address (search hotel by address).");
|
||||
DEFINE_string(address_dataset_path, "", "Path to address dataset.");
|
||||
|
||||
string GetSampleString(FeatureType & hotel, m2::PointD const & userPos)
|
||||
string GetSampleString(FeatureType & hotel, m2::PointD const & userPos, string const & address)
|
||||
{
|
||||
Sample sample;
|
||||
string hotelName;
|
||||
double constexpr kViewportRadiusM = 1000.0;
|
||||
if (!hotel.GetName(StringUtf8Multilang::kEnglishCode, hotelName) &&
|
||||
!hotel.GetName(StringUtf8Multilang::kDefaultCode, hotelName))
|
||||
if (!address.empty())
|
||||
{
|
||||
LOG(LINFO, ("Cannot get name for", hotel.GetID()));
|
||||
return "";
|
||||
sample.m_query = strings::MakeUniString(address + " ");
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!hotel.GetName(StringUtf8Multilang::kEnglishCode, hotelName) &&
|
||||
!hotel.GetName(StringUtf8Multilang::kDefaultCode, hotelName))
|
||||
{
|
||||
LOG(LINFO, ("Cannot get name for", hotel.GetID()));
|
||||
return "";
|
||||
}
|
||||
sample.m_query = strings::MakeUniString(hotelName + " ");
|
||||
}
|
||||
|
||||
sample.m_query = strings::MakeUniString(hotelName + " ");
|
||||
sample.m_locale = "en";
|
||||
sample.m_pos = userPos;
|
||||
sample.m_viewport = MercatorBounds::RectByCenterXYAndSizeInMeters(userPos, kViewportRadiusM);
|
||||
|
@ -66,6 +77,56 @@ string GetSampleString(FeatureType & hotel, m2::PointD const & userPos)
|
|||
return json;
|
||||
}
|
||||
|
||||
enum class Fields : uint8_t
|
||||
{
|
||||
SponsoredId = 0,
|
||||
Address = 1,
|
||||
Zip = 2,
|
||||
City = 3,
|
||||
District = 4,
|
||||
Country = 5,
|
||||
Count = 6
|
||||
};
|
||||
|
||||
string CreateAddress(vector<string> const & fields)
|
||||
{
|
||||
string result = fields[base::Underlying(Fields::Address)];
|
||||
if (result.empty())
|
||||
return {};
|
||||
|
||||
auto const district = fields[base::Underlying(Fields::District)];
|
||||
if (district != "None")
|
||||
result += ", " + district;
|
||||
result += ", " + fields[base::Underlying(Fields::Zip)];
|
||||
result += ", " + fields[base::Underlying(Fields::City)];
|
||||
result += ", " + fields[base::Underlying(Fields::Country)];
|
||||
return result;
|
||||
}
|
||||
|
||||
map<string, string> ParseAddressDataset(string const & filename)
|
||||
{
|
||||
if (filename.empty())
|
||||
return {};
|
||||
|
||||
map<string, string> result;
|
||||
ifstream data(filename);
|
||||
for (string line; getline(data, line);)
|
||||
{
|
||||
vector<string> fields;
|
||||
strings::ParseCSVRow(line, '\t', fields);
|
||||
CHECK_EQUAL(fields.size(), base::Underlying(Fields::Count), ());
|
||||
auto const id = fields[base::Underlying(Fields::SponsoredId)];
|
||||
auto const address = CreateAddress(fields);
|
||||
if (address.empty())
|
||||
continue;
|
||||
auto const ret = result.emplace(id, address);
|
||||
// Hotel may appear several times.
|
||||
if (!ret.second)
|
||||
CHECK_EQUAL(result[id], address, ());
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
int main(int argc, char * argv[])
|
||||
{
|
||||
ChangeMaxNumberOfOpenFiles(kMaxOpenFiles);
|
||||
|
@ -74,6 +135,20 @@ int main(int argc, char * argv[])
|
|||
google::SetUsageMessage("Booking dataset generator.");
|
||||
google::ParseCommandLineFlags(&argc, &argv, true);
|
||||
|
||||
if (FLAGS_dataset_type != "name" && FLAGS_dataset_type != "address")
|
||||
{
|
||||
LOG(LERROR, ("Wrong dataset type:", FLAGS_dataset_type, ". Supported types: name, address"));
|
||||
return -1;
|
||||
}
|
||||
|
||||
auto const generateAddress = FLAGS_dataset_type == "address";
|
||||
|
||||
if (generateAddress && FLAGS_address_dataset_path.empty())
|
||||
{
|
||||
LOG(LERROR, ("Set address_dataset_path."));
|
||||
return -1;
|
||||
}
|
||||
|
||||
SetPlatformDirs(FLAGS_data_path, FLAGS_mwm_path);
|
||||
|
||||
classificator::Load();
|
||||
|
@ -91,11 +166,29 @@ int main(int argc, char * argv[])
|
|||
|
||||
auto const & hotelChecker = ftypes::IsBookingHotelChecker::Instance();
|
||||
|
||||
map<string, string> addressData;
|
||||
if (generateAddress)
|
||||
{
|
||||
addressData = ParseAddressDataset(FLAGS_address_dataset_path);
|
||||
}
|
||||
|
||||
auto const getAddress = [&](FeatureType & hotel) -> string {
|
||||
auto const id = hotel.GetMetadata().Get(feature::Metadata::FMD_SPONSORED_ID);
|
||||
if (id.empty())
|
||||
return {};
|
||||
|
||||
auto const it = addressData.find(id);
|
||||
if (it == addressData.end())
|
||||
return {};
|
||||
|
||||
return it->second;
|
||||
};
|
||||
|
||||
// For all airports from World.mwm (international or other important airports) and all
|
||||
// hotels which are closer than 100 km from airport we create sample with query=|hotel name| and
|
||||
// viewport and position in the airport.
|
||||
double constexpr kDistanceToHotelM = 1e5;
|
||||
std::set<FeatureID> hotelsNextToAirport;
|
||||
set<FeatureID> hotelsNextToAirport;
|
||||
{
|
||||
auto const handle = indexer::FindWorld(dataSource);
|
||||
if (!handle.IsAlive())
|
||||
|
@ -123,7 +216,15 @@ int main(int argc, char * argv[])
|
|||
return;
|
||||
}
|
||||
|
||||
string json = GetSampleString(hotel, airportPos);
|
||||
string address;
|
||||
if (generateAddress)
|
||||
{
|
||||
address = getAddress(hotel);
|
||||
if (address.empty())
|
||||
return;
|
||||
}
|
||||
|
||||
string const json = GetSampleString(hotel, airportPos, address);
|
||||
if (json.empty())
|
||||
return;
|
||||
out << json;
|
||||
|
@ -154,13 +255,23 @@ int main(int argc, char * argv[])
|
|||
auto hotel = guard.GetFeatureByIndex(i);
|
||||
if (!hotelChecker(*hotel))
|
||||
continue;
|
||||
|
||||
if (hotelsNextToAirport.count(hotel->GetID()) != 0)
|
||||
continue;
|
||||
|
||||
string address;
|
||||
if (generateAddress)
|
||||
{
|
||||
address = getAddress(*hotel);
|
||||
if (address.empty())
|
||||
continue;
|
||||
}
|
||||
|
||||
static double kRadiusToHotelM = kDistanceToHotelM / sqrt(2.0);
|
||||
string json = GetSampleString(
|
||||
*hotel,
|
||||
MercatorBounds::GetSmPoint(feature::GetCenter(*hotel), kRadiusToHotelM, kRadiusToHotelM));
|
||||
MercatorBounds::GetSmPoint(feature::GetCenter(*hotel), kRadiusToHotelM, kRadiusToHotelM),
|
||||
address);
|
||||
|
||||
if (!json.empty())
|
||||
out << json;
|
||||
|
|
Loading…
Add table
Reference in a new issue