forked from organicmaps/organicmaps
[booking] Support data from booking.com
This commit is contained in:
parent
1ac780c71d
commit
706e4467f3
9 changed files with 355 additions and 1 deletions
252
generator/booking_dataset.cpp
Normal file
252
generator/booking_dataset.cpp
Normal file
|
@ -0,0 +1,252 @@
|
|||
#include "generator/booking_dataset.hpp"
|
||||
|
||||
#include "base/string_utils.hpp"
|
||||
|
||||
#include "geometry/distance_on_sphere.hpp"
|
||||
|
||||
#include "indexer/search_delimiters.hpp"
|
||||
#include "indexer/search_string_utils.hpp"
|
||||
|
||||
#include "std/fstream.hpp"
|
||||
#include "std/iostream.hpp"
|
||||
#include "std/sstream.hpp"
|
||||
|
||||
BookingDataset::BookingHotel::BookingHotel(string const & src)
|
||||
{
|
||||
stringstream ss(src);
|
||||
string elem;
|
||||
vector<string> rec(FieldsCount());
|
||||
for (size_t i = 0; getline(ss, elem, '\t') && i < rec.size(); ++i)
|
||||
rec[i] = elem;
|
||||
|
||||
id = static_cast<uint32_t>(strtoul(rec[Index(Fields::Id)].c_str(), nullptr, 10));
|
||||
|
||||
lat = strtod(rec[Index(Fields::Latitude)].c_str(), nullptr);
|
||||
lon = strtod(rec[Index(Fields::Longtitude)].c_str(), nullptr);
|
||||
name = rec[Index(Fields::Name)];
|
||||
address = rec[Index(Fields::Address)];
|
||||
|
||||
stars = rec[Index(Fields::Stars)].empty()
|
||||
? 0
|
||||
: static_cast<uint32_t>(strtoul(rec[Index(Fields::Stars)].c_str(), nullptr, 10));
|
||||
|
||||
priceCategory =
|
||||
rec[Index(Fields::PriceCategory)].empty()
|
||||
? 0
|
||||
: static_cast<uint32_t>(strtoul(rec[Index(Fields::PriceCategory)].c_str(), nullptr, 10));
|
||||
|
||||
ratingBooking = rec[Index(Fields::RatingBooking)].empty()
|
||||
? 0
|
||||
: strtod(rec[Index(Fields::RatingBooking)].c_str(), nullptr);
|
||||
|
||||
ratingUser = rec[Index(Fields::RatingUsers)].empty()
|
||||
? 0
|
||||
: strtod(rec[Index(Fields::RatingUsers)].c_str(), nullptr);
|
||||
|
||||
descUrl = rec[Index(Fields::DescUrl)];
|
||||
|
||||
type = rec[Index(Fields::Type)].empty()
|
||||
? 0
|
||||
: static_cast<uint32_t>(strtoul(rec[Index(Fields::Type)].c_str(), nullptr, 10));
|
||||
}
|
||||
|
||||
ostream & operator<<(ostream & s, BookingDataset::BookingHotel const & h)
|
||||
{
|
||||
return s << "Name: " << h.name << " lon: " << h.lon << " lat: " << h.lat;
|
||||
}
|
||||
|
||||
void BookingDataset::LoadBookingHotels(string const & path)
|
||||
{
|
||||
m_hotels.clear();
|
||||
|
||||
if(path.empty())
|
||||
return;
|
||||
|
||||
ifstream src(path);
|
||||
for (string elem; getline(src, elem);)
|
||||
m_hotels.emplace_back(elem);
|
||||
}
|
||||
|
||||
BookingDataset::BookingDataset(string const & dataPath)
|
||||
{
|
||||
LoadBookingHotels(dataPath);
|
||||
|
||||
size_t counter = 0;
|
||||
for (auto const & hotel : m_hotels)
|
||||
{
|
||||
TBox b(TPoint(hotel.lon, hotel.lat), TPoint(hotel.lon, hotel.lat));
|
||||
m_rtree.insert(std::make_pair(b, counter++));
|
||||
}
|
||||
}
|
||||
|
||||
bool CheckForValues(string const & value)
|
||||
{
|
||||
for (char const * val :
|
||||
{"hotel", "apartment", "camp_site", "chalet", "guest_house", "hostel", "motel", "resort"})
|
||||
{
|
||||
if (value == val)
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool BookingDataset::MatchWithBooking(OsmElement const & e) const
|
||||
{
|
||||
string name;
|
||||
for (auto const & tag : e.Tags())
|
||||
{
|
||||
if (tag.key == "name")
|
||||
{
|
||||
name = tag.value;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (name.empty())
|
||||
return false;
|
||||
|
||||
// Find 3 nearest values to a point.
|
||||
vector<TValue> result;
|
||||
for_each(boost::geometry::index::qbegin(m_rtree,
|
||||
boost::geometry::index::nearest(TPoint(e.lon, e.lat), 3)),
|
||||
boost::geometry::index::qend(m_rtree), [&](TValue const & v)
|
||||
{
|
||||
auto const & hotel = m_hotels[v.second];
|
||||
double dist = ms::DistanceOnEarth(e.lon, e.lat, hotel.lon, hotel.lat);
|
||||
if (dist > 150 /* max distance in meters */)
|
||||
return;
|
||||
|
||||
result.emplace_back(v);
|
||||
});
|
||||
|
||||
if (result.empty())
|
||||
return false;
|
||||
|
||||
// Match name.
|
||||
vector<strings::UniString> osmTokens;
|
||||
NormalizeAndTokenizeString(name, osmTokens, search::Delimiters());
|
||||
|
||||
// cout << "\n------------- " << name << endl;
|
||||
|
||||
bool matched = false;
|
||||
for (auto const & e : result)
|
||||
{
|
||||
vector<strings::UniString> bookingTokens;
|
||||
NormalizeAndTokenizeString(m_hotels[e.second].name, bookingTokens, search::Delimiters());
|
||||
|
||||
map<size_t, vector<pair<size_t, size_t>>> weightPair;
|
||||
|
||||
for (size_t j = 0; j < osmTokens.size(); ++j)
|
||||
{
|
||||
for (size_t i = 0; i < bookingTokens.size(); ++i)
|
||||
{
|
||||
size_t distance = strings::EditDistance(osmTokens[j].begin(), osmTokens[j].end(),
|
||||
bookingTokens[i].begin(), bookingTokens[i].end());
|
||||
if (distance < 3)
|
||||
weightPair[distance].emplace_back(i, j);
|
||||
}
|
||||
}
|
||||
|
||||
if (!weightPair.empty())
|
||||
{
|
||||
// cout << m_hotels[e.second] << endl;
|
||||
matched = true;
|
||||
}
|
||||
}
|
||||
return matched;
|
||||
}
|
||||
|
||||
bool BookingDataset::Filter(OsmElement const & e) const
|
||||
{
|
||||
if (e.type != OsmElement::EntityType::Node)
|
||||
return false;
|
||||
|
||||
if (e.Tags().empty())
|
||||
return false;
|
||||
|
||||
bool matched = false;
|
||||
for (auto const & tag : e.Tags())
|
||||
{
|
||||
if (tag.key == "tourism" && CheckForValues(tag.value))
|
||||
{
|
||||
matched = MatchWithBooking(e);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: Need to write file with dropped osm features.
|
||||
|
||||
return matched;
|
||||
}
|
||||
|
||||
void BookingDataset::BuildFeatures(function<void(OsmElement *)> const & fn) const
|
||||
{
|
||||
for (auto const & hotel : m_hotels)
|
||||
{
|
||||
OsmElement e;
|
||||
e.type = OsmElement::EntityType::Node;
|
||||
e.id = 1;
|
||||
|
||||
e.lon = hotel.lon;
|
||||
e.lat = hotel.lat;
|
||||
|
||||
e.AddTag("name", hotel.name);
|
||||
e.AddTag("ref:sponsored", strings::to_string(hotel.id));
|
||||
e.AddTag("website", hotel.descUrl);
|
||||
e.AddTag("rating:sponsored", strings::to_string(hotel.ratingUser));
|
||||
e.AddTag("stars", strings::to_string(hotel.stars));
|
||||
e.AddTag("price_rate", strings::to_string(hotel.priceCategory));
|
||||
e.AddTag("addr:full", hotel.address);
|
||||
|
||||
switch (hotel.type)
|
||||
{
|
||||
case 19:
|
||||
case 205: e.AddTag("tourism", "motel"); break;
|
||||
|
||||
case 21:
|
||||
case 206:
|
||||
case 212: e.AddTag("tourism", "resort"); break;
|
||||
|
||||
case 3:
|
||||
case 23:
|
||||
case 24:
|
||||
case 25:
|
||||
case 202:
|
||||
case 207:
|
||||
case 208:
|
||||
case 209:
|
||||
case 210:
|
||||
case 216:
|
||||
case 220:
|
||||
case 223: e.AddTag("tourism", "guest_house"); break;
|
||||
|
||||
case 14:
|
||||
case 204:
|
||||
case 213:
|
||||
case 218:
|
||||
case 219:
|
||||
case 226:
|
||||
case 222: e.AddTag("tourism", "hotel"); break;
|
||||
|
||||
case 211:
|
||||
case 224:
|
||||
case 228: e.AddTag("tourism", "chalet"); break;
|
||||
|
||||
case 13:
|
||||
case 225:
|
||||
case 203: e.AddTag("tourism", "hostel"); break;
|
||||
|
||||
case 215:
|
||||
case 221:
|
||||
case 227:
|
||||
case 2:
|
||||
case 201: e.AddTag("tourism", "apartment"); break;
|
||||
|
||||
case 214: e.AddTag("tourism", "camp_site"); break;
|
||||
|
||||
default: e.AddTag("tourism", "hotel"); break;
|
||||
}
|
||||
|
||||
fn(&e);
|
||||
}
|
||||
}
|
70
generator/booking_dataset.hpp
Normal file
70
generator/booking_dataset.hpp
Normal file
|
@ -0,0 +1,70 @@
|
|||
#pragma once
|
||||
|
||||
#include "generator/osm_element.hpp"
|
||||
|
||||
#include "boost/geometry.hpp"
|
||||
#include "boost/geometry/geometries/point.hpp"
|
||||
#include "boost/geometry/geometries/box.hpp"
|
||||
#include "boost/geometry/index/rtree.hpp"
|
||||
|
||||
#include "std/function.hpp"
|
||||
#include "std/string.hpp"
|
||||
|
||||
class BookingDataset
|
||||
{
|
||||
public:
|
||||
struct BookingHotel
|
||||
{
|
||||
enum class Fields : size_t
|
||||
{
|
||||
Id = 0,
|
||||
Latitude = 1,
|
||||
Longtitude = 2,
|
||||
Name = 3,
|
||||
Address = 4,
|
||||
Stars = 5,
|
||||
PriceCategory = 6,
|
||||
RatingBooking = 7,
|
||||
RatingUsers = 8,
|
||||
DescUrl = 9,
|
||||
Type = 10,
|
||||
|
||||
Counter
|
||||
};
|
||||
|
||||
uint32_t id = 0;
|
||||
double lat = 0.0;
|
||||
double lon = 0.0;
|
||||
string name;
|
||||
string address;
|
||||
uint32_t stars = 0;
|
||||
uint32_t priceCategory = 0;
|
||||
double ratingBooking = 0.0;
|
||||
double ratingUser = 0.0;
|
||||
string descUrl;
|
||||
uint32_t type = 0;
|
||||
|
||||
constexpr size_t Index(Fields field) const { return static_cast<size_t>(field); }
|
||||
constexpr size_t FieldsCount() const { return static_cast<size_t>(Fields::Counter); }
|
||||
|
||||
BookingHotel(string const &src);
|
||||
};
|
||||
|
||||
BookingDataset(string const & dataPath);
|
||||
|
||||
bool Filter(OsmElement const & e) const;
|
||||
void BuildFeatures(function<void(OsmElement *)> const & fn) const;
|
||||
|
||||
protected:
|
||||
vector<BookingHotel> m_hotels;
|
||||
|
||||
// create the rtree using default constructor
|
||||
using TPoint = boost::geometry::model::point<float, 2, boost::geometry::cs::cartesian>;
|
||||
using TBox = boost::geometry::model::box<TPoint>;
|
||||
using TValue = pair<TBox, size_t>;
|
||||
|
||||
boost::geometry::index::rtree<TValue, boost::geometry::index::quadratic<16>> m_rtree;
|
||||
|
||||
void LoadBookingHotels(string const & path);
|
||||
bool MatchWithBooking(OsmElement const & e) const;
|
||||
};
|
|
@ -41,6 +41,8 @@ struct GenerateInfo
|
|||
NodeStorageType m_nodeStorageType;
|
||||
OsmSourceType m_osmFileType;
|
||||
string m_osmFileName;
|
||||
|
||||
string m_bookingDatafileName;
|
||||
|
||||
uint32_t m_versionDate = 0;
|
||||
|
||||
|
|
|
@ -14,6 +14,7 @@ INCLUDEPATH *= $$ROOT_DIR/3party/gflags/src \
|
|||
QT *= core
|
||||
|
||||
SOURCES += \
|
||||
booking_dataset.cpp \
|
||||
borders_generator.cpp \
|
||||
borders_loader.cpp \
|
||||
check_model.cpp \
|
||||
|
@ -37,6 +38,7 @@ SOURCES += \
|
|||
unpack_mwm.cpp \
|
||||
|
||||
HEADERS += \
|
||||
booking_dataset.hpp \
|
||||
borders_generator.hpp \
|
||||
borders_loader.hpp \
|
||||
check_model.hpp \
|
||||
|
|
|
@ -67,6 +67,7 @@ DEFINE_bool(make_cross_section, false, "Make corss section in routing file for c
|
|||
DEFINE_string(osm_file_name, "", "Input osm area file");
|
||||
DEFINE_string(osm_file_type, "xml", "Input osm area file type [xml, o5m]");
|
||||
DEFINE_string(user_resource_path, "", "User defined resource path for classificator.txt and etc.");
|
||||
DEFINE_string(booking_data, "", "Path to booking data in .tsv format");
|
||||
DEFINE_uint64(planet_version, my::SecondsSinceEpoch(), "Version as seconds since epoch, by default - now");
|
||||
|
||||
int main(int argc, char ** argv)
|
||||
|
@ -100,6 +101,7 @@ int main(int argc, char ** argv)
|
|||
genInfo.m_osmFileName = FLAGS_osm_file_name;
|
||||
genInfo.m_failOnCoasts = FLAGS_fail_on_coasts;
|
||||
genInfo.m_preloadCache = FLAGS_preload_cache;
|
||||
genInfo.m_bookingDatafileName = FLAGS_booking_data;
|
||||
|
||||
genInfo.m_versionDate = static_cast<uint32_t>(FLAGS_planet_version);
|
||||
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
#include "generator/osm_element.hpp"
|
||||
|
||||
#include "base/string_utils.hpp"
|
||||
#include "coding/parse_xml.hpp"
|
||||
|
||||
#include "std/cstdio.hpp"
|
||||
|
@ -63,7 +64,9 @@ void OsmElement::AddTag(string const & k, string const & v)
|
|||
SKIP_KEY("official_name");
|
||||
#undef SKIP_KEY
|
||||
|
||||
m_tags.emplace_back(k, v);
|
||||
string value = v;
|
||||
strings::Trim(value);
|
||||
m_tags.emplace_back(k, value);
|
||||
}
|
||||
|
||||
string OsmElement::ToString(string const & shift) const
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
#include "generator/booking_dataset.hpp"
|
||||
#include "generator/coastlines_generator.hpp"
|
||||
#include "generator/feature_generator.hpp"
|
||||
#include "generator/intermediate_data.hpp"
|
||||
|
@ -511,12 +512,19 @@ bool GenerateFeaturesImpl(feature::GenerateInfo & info)
|
|||
TagAdmixer tagAdmixer(info.GetIntermediateFileName("ways", ".csv"),
|
||||
info.GetIntermediateFileName("towns", ".csv"));
|
||||
TagReplacer tagReplacer(GetPlatform().ResourcesDir() + REPLACED_TAGS_FILE);
|
||||
|
||||
// If info.m_bookingDatafileName is empty then no data will be loaded.
|
||||
BookingDataset bookingDataset(info.m_bookingDatafileName);
|
||||
|
||||
// Here we can add new tags to element!!!
|
||||
auto const fn = [&](OsmElement * e)
|
||||
{
|
||||
tagReplacer(e);
|
||||
tagAdmixer(e);
|
||||
|
||||
if (bookingDataset.Filter(*e))
|
||||
return;
|
||||
|
||||
parser.EmitElement(e);
|
||||
};
|
||||
|
||||
|
@ -533,6 +541,12 @@ bool GenerateFeaturesImpl(feature::GenerateInfo & info)
|
|||
|
||||
LOG(LINFO, ("Processing", info.m_osmFileName, "done."));
|
||||
|
||||
if (!info.m_bookingDatafileName.empty())
|
||||
{
|
||||
bookingDataset.BuildFeatures([&](OsmElement * e) { parser.EmitElement(e); });
|
||||
LOG(LINFO, ("Processing booking data from", info.m_bookingDatafileName, "done."));
|
||||
}
|
||||
|
||||
parser.Finish();
|
||||
|
||||
// Stop if coasts are not merged and FLAG_fail_on_coasts is set
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
#pragma once
|
||||
#include "base/stl_add.hpp"
|
||||
#include "base/string_utils.hpp"
|
||||
|
||||
#include "std/algorithm.hpp"
|
||||
|
|
|
@ -57,6 +57,8 @@
|
|||
677E2A161CAACC5F001DC42A /* tag_admixer.hpp in Headers */ = {isa = PBXBuildFile; fileRef = 677E2A111CAACC5F001DC42A /* tag_admixer.hpp */; };
|
||||
677E2A171CAACC5F001DC42A /* towns_dumper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 677E2A121CAACC5F001DC42A /* towns_dumper.cpp */; };
|
||||
677E2A181CAACC5F001DC42A /* towns_dumper.hpp in Headers */ = {isa = PBXBuildFile; fileRef = 677E2A131CAACC5F001DC42A /* towns_dumper.hpp */; };
|
||||
67A0FEBE1CEB467F008F2A61 /* booking_dataset.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 67A0FEBC1CEB467F008F2A61 /* booking_dataset.cpp */; };
|
||||
67A0FEBF1CEB467F008F2A61 /* booking_dataset.hpp in Headers */ = {isa = PBXBuildFile; fileRef = 67A0FEBD1CEB467F008F2A61 /* booking_dataset.hpp */; };
|
||||
/* End PBXBuildFile section */
|
||||
|
||||
/* Begin PBXFileReference section */
|
||||
|
@ -113,6 +115,8 @@
|
|||
677E2A111CAACC5F001DC42A /* tag_admixer.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = tag_admixer.hpp; sourceTree = "<group>"; };
|
||||
677E2A121CAACC5F001DC42A /* towns_dumper.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = towns_dumper.cpp; sourceTree = "<group>"; };
|
||||
677E2A131CAACC5F001DC42A /* towns_dumper.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = towns_dumper.hpp; sourceTree = "<group>"; };
|
||||
67A0FEBC1CEB467F008F2A61 /* booking_dataset.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = booking_dataset.cpp; sourceTree = "<group>"; };
|
||||
67A0FEBD1CEB467F008F2A61 /* booking_dataset.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = booking_dataset.hpp; sourceTree = "<group>"; };
|
||||
67F0F6761B8C9DCE003F52FF /* osm_xml_source.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = osm_xml_source.hpp; sourceTree = "<group>"; };
|
||||
/* End PBXFileReference section */
|
||||
|
||||
|
@ -199,6 +203,8 @@
|
|||
670B84BB1A8CDB0000CE4492 /* osm_source.hpp */,
|
||||
6764B8921ADD6A3300DD8B15 /* osm_o5m_source.hpp */,
|
||||
67F0F6761B8C9DCE003F52FF /* osm_xml_source.hpp */,
|
||||
67A0FEBC1CEB467F008F2A61 /* booking_dataset.cpp */,
|
||||
67A0FEBD1CEB467F008F2A61 /* booking_dataset.hpp */,
|
||||
);
|
||||
name = generator;
|
||||
path = ../../generator;
|
||||
|
@ -227,6 +233,7 @@
|
|||
675340741A3F2A7400A0A8C3 /* generate_info.hpp in Headers */,
|
||||
677E2A161CAACC5F001DC42A /* tag_admixer.hpp in Headers */,
|
||||
675340861A3F2A7400A0A8C3 /* tesselator.hpp in Headers */,
|
||||
67A0FEBF1CEB467F008F2A61 /* booking_dataset.hpp in Headers */,
|
||||
6753405F1A3F2A7400A0A8C3 /* borders_loader.hpp in Headers */,
|
||||
675340801A3F2A7400A0A8C3 /* polygonizer.hpp in Headers */,
|
||||
675340941C5231BA002CF0D9 /* search_index_builder.hpp in Headers */,
|
||||
|
@ -309,6 +316,7 @@
|
|||
675340811A3F2A7400A0A8C3 /* routing_generator.cpp in Sources */,
|
||||
675340931C5231BA002CF0D9 /* search_index_builder.cpp in Sources */,
|
||||
6753406E1A3F2A7400A0A8C3 /* feature_merger.cpp in Sources */,
|
||||
67A0FEBE1CEB467F008F2A61 /* booking_dataset.cpp in Sources */,
|
||||
6753408D1A3F2A7400A0A8C3 /* osm_element.cpp in Sources */,
|
||||
6726C1D51A4AFEF4005EEA39 /* osm2meta.cpp in Sources */,
|
||||
6753405E1A3F2A7400A0A8C3 /* borders_loader.cpp in Sources */,
|
||||
|
|
Loading…
Add table
Reference in a new issue