diff --git a/generator/booking_dataset.cpp b/generator/booking_dataset.cpp new file mode 100644 index 0000000000..4a488cda86 --- /dev/null +++ b/generator/booking_dataset.cpp @@ -0,0 +1,252 @@ +#include "generator/booking_dataset.hpp" + +#include "base/string_utils.hpp" + +#include "geometry/distance_on_sphere.hpp" + +#include "indexer/search_delimiters.hpp" +#include "indexer/search_string_utils.hpp" + +#include "std/fstream.hpp" +#include "std/iostream.hpp" +#include "std/sstream.hpp" + +BookingDataset::BookingHotel::BookingHotel(string const & src) +{ + stringstream ss(src); + string elem; + vector rec(FieldsCount()); + for (size_t i = 0; getline(ss, elem, '\t') && i < rec.size(); ++i) + rec[i] = elem; + + id = static_cast(strtoul(rec[Index(Fields::Id)].c_str(), nullptr, 10)); + + lat = strtod(rec[Index(Fields::Latitude)].c_str(), nullptr); + lon = strtod(rec[Index(Fields::Longtitude)].c_str(), nullptr); + name = rec[Index(Fields::Name)]; + address = rec[Index(Fields::Address)]; + + stars = rec[Index(Fields::Stars)].empty() + ? 0 + : static_cast(strtoul(rec[Index(Fields::Stars)].c_str(), nullptr, 10)); + + priceCategory = + rec[Index(Fields::PriceCategory)].empty() + ? 0 + : static_cast(strtoul(rec[Index(Fields::PriceCategory)].c_str(), nullptr, 10)); + + ratingBooking = rec[Index(Fields::RatingBooking)].empty() + ? 0 + : strtod(rec[Index(Fields::RatingBooking)].c_str(), nullptr); + + ratingUser = rec[Index(Fields::RatingUsers)].empty() + ? 0 + : strtod(rec[Index(Fields::RatingUsers)].c_str(), nullptr); + + descUrl = rec[Index(Fields::DescUrl)]; + + type = rec[Index(Fields::Type)].empty() + ? 0 + : static_cast(strtoul(rec[Index(Fields::Type)].c_str(), nullptr, 10)); +} + +ostream & operator<<(ostream & s, BookingDataset::BookingHotel const & h) +{ + return s << "Name: " << h.name << " lon: " << h.lon << " lat: " << h.lat; +} + +void BookingDataset::LoadBookingHotels(string const & path) +{ + m_hotels.clear(); + + if(path.empty()) + return; + + ifstream src(path); + for (string elem; getline(src, elem);) + m_hotels.emplace_back(elem); +} + +BookingDataset::BookingDataset(string const & dataPath) +{ + LoadBookingHotels(dataPath); + + size_t counter = 0; + for (auto const & hotel : m_hotels) + { + TBox b(TPoint(hotel.lon, hotel.lat), TPoint(hotel.lon, hotel.lat)); + m_rtree.insert(std::make_pair(b, counter++)); + } +} + +bool CheckForValues(string const & value) +{ + for (char const * val : + {"hotel", "apartment", "camp_site", "chalet", "guest_house", "hostel", "motel", "resort"}) + { + if (value == val) + return true; + } + return false; +} + +bool BookingDataset::MatchWithBooking(OsmElement const & e) const +{ + string name; + for (auto const & tag : e.Tags()) + { + if (tag.key == "name") + { + name = tag.value; + break; + } + } + + if (name.empty()) + return false; + + // Find 3 nearest values to a point. + vector result; + for_each(boost::geometry::index::qbegin(m_rtree, + boost::geometry::index::nearest(TPoint(e.lon, e.lat), 3)), + boost::geometry::index::qend(m_rtree), [&](TValue const & v) + { + auto const & hotel = m_hotels[v.second]; + double dist = ms::DistanceOnEarth(e.lon, e.lat, hotel.lon, hotel.lat); + if (dist > 150 /* max distance in meters */) + return; + + result.emplace_back(v); + }); + + if (result.empty()) + return false; + + // Match name. + vector osmTokens; + NormalizeAndTokenizeString(name, osmTokens, search::Delimiters()); + + // cout << "\n------------- " << name << endl; + + bool matched = false; + for (auto const & e : result) + { + vector bookingTokens; + NormalizeAndTokenizeString(m_hotels[e.second].name, bookingTokens, search::Delimiters()); + + map>> weightPair; + + for (size_t j = 0; j < osmTokens.size(); ++j) + { + for (size_t i = 0; i < bookingTokens.size(); ++i) + { + size_t distance = strings::EditDistance(osmTokens[j].begin(), osmTokens[j].end(), + bookingTokens[i].begin(), bookingTokens[i].end()); + if (distance < 3) + weightPair[distance].emplace_back(i, j); + } + } + + if (!weightPair.empty()) + { + // cout << m_hotels[e.second] << endl; + matched = true; + } + } + return matched; +} + +bool BookingDataset::Filter(OsmElement const & e) const +{ + if (e.type != OsmElement::EntityType::Node) + return false; + + if (e.Tags().empty()) + return false; + + bool matched = false; + for (auto const & tag : e.Tags()) + { + if (tag.key == "tourism" && CheckForValues(tag.value)) + { + matched = MatchWithBooking(e); + break; + } + } + + // TODO: Need to write file with dropped osm features. + + return matched; +} + +void BookingDataset::BuildFeatures(function const & fn) const +{ + for (auto const & hotel : m_hotels) + { + OsmElement e; + e.type = OsmElement::EntityType::Node; + e.id = 1; + + e.lon = hotel.lon; + e.lat = hotel.lat; + + e.AddTag("name", hotel.name); + e.AddTag("ref:sponsored", strings::to_string(hotel.id)); + e.AddTag("website", hotel.descUrl); + e.AddTag("rating:sponsored", strings::to_string(hotel.ratingUser)); + e.AddTag("stars", strings::to_string(hotel.stars)); + e.AddTag("price_rate", strings::to_string(hotel.priceCategory)); + e.AddTag("addr:full", hotel.address); + + switch (hotel.type) + { + case 19: + case 205: e.AddTag("tourism", "motel"); break; + + case 21: + case 206: + case 212: e.AddTag("tourism", "resort"); break; + + case 3: + case 23: + case 24: + case 25: + case 202: + case 207: + case 208: + case 209: + case 210: + case 216: + case 220: + case 223: e.AddTag("tourism", "guest_house"); break; + + case 14: + case 204: + case 213: + case 218: + case 219: + case 226: + case 222: e.AddTag("tourism", "hotel"); break; + + case 211: + case 224: + case 228: e.AddTag("tourism", "chalet"); break; + + case 13: + case 225: + case 203: e.AddTag("tourism", "hostel"); break; + + case 215: + case 221: + case 227: + case 2: + case 201: e.AddTag("tourism", "apartment"); break; + + case 214: e.AddTag("tourism", "camp_site"); break; + + default: e.AddTag("tourism", "hotel"); break; + } + + fn(&e); + } +} diff --git a/generator/booking_dataset.hpp b/generator/booking_dataset.hpp new file mode 100644 index 0000000000..55ffdd575a --- /dev/null +++ b/generator/booking_dataset.hpp @@ -0,0 +1,70 @@ +#pragma once + +#include "generator/osm_element.hpp" + +#include "boost/geometry.hpp" +#include "boost/geometry/geometries/point.hpp" +#include "boost/geometry/geometries/box.hpp" +#include "boost/geometry/index/rtree.hpp" + +#include "std/function.hpp" +#include "std/string.hpp" + +class BookingDataset +{ +public: + struct BookingHotel + { + enum class Fields : size_t + { + Id = 0, + Latitude = 1, + Longtitude = 2, + Name = 3, + Address = 4, + Stars = 5, + PriceCategory = 6, + RatingBooking = 7, + RatingUsers = 8, + DescUrl = 9, + Type = 10, + + Counter + }; + + uint32_t id = 0; + double lat = 0.0; + double lon = 0.0; + string name; + string address; + uint32_t stars = 0; + uint32_t priceCategory = 0; + double ratingBooking = 0.0; + double ratingUser = 0.0; + string descUrl; + uint32_t type = 0; + + constexpr size_t Index(Fields field) const { return static_cast(field); } + constexpr size_t FieldsCount() const { return static_cast(Fields::Counter); } + + BookingHotel(string const &src); + }; + + BookingDataset(string const & dataPath); + + bool Filter(OsmElement const & e) const; + void BuildFeatures(function const & fn) const; + +protected: + vector m_hotels; + + // create the rtree using default constructor + using TPoint = boost::geometry::model::point; + using TBox = boost::geometry::model::box; + using TValue = pair; + + boost::geometry::index::rtree> m_rtree; + + void LoadBookingHotels(string const & path); + bool MatchWithBooking(OsmElement const & e) const; +}; diff --git a/generator/generate_info.hpp b/generator/generate_info.hpp index 0713d1bf64..6acca8fa80 100644 --- a/generator/generate_info.hpp +++ b/generator/generate_info.hpp @@ -41,6 +41,8 @@ struct GenerateInfo NodeStorageType m_nodeStorageType; OsmSourceType m_osmFileType; string m_osmFileName; + + string m_bookingDatafileName; uint32_t m_versionDate = 0; diff --git a/generator/generator.pro b/generator/generator.pro index 42725c1953..083c7d3bf8 100644 --- a/generator/generator.pro +++ b/generator/generator.pro @@ -14,6 +14,7 @@ INCLUDEPATH *= $$ROOT_DIR/3party/gflags/src \ QT *= core SOURCES += \ + booking_dataset.cpp \ borders_generator.cpp \ borders_loader.cpp \ check_model.cpp \ @@ -37,6 +38,7 @@ SOURCES += \ unpack_mwm.cpp \ HEADERS += \ + booking_dataset.hpp \ borders_generator.hpp \ borders_loader.hpp \ check_model.hpp \ diff --git a/generator/generator_tool/generator_tool.cpp b/generator/generator_tool/generator_tool.cpp index cea7fc874d..9c884917a4 100644 --- a/generator/generator_tool/generator_tool.cpp +++ b/generator/generator_tool/generator_tool.cpp @@ -67,6 +67,7 @@ DEFINE_bool(make_cross_section, false, "Make corss section in routing file for c DEFINE_string(osm_file_name, "", "Input osm area file"); DEFINE_string(osm_file_type, "xml", "Input osm area file type [xml, o5m]"); DEFINE_string(user_resource_path, "", "User defined resource path for classificator.txt and etc."); +DEFINE_string(booking_data, "", "Path to booking data in .tsv format"); DEFINE_uint64(planet_version, my::SecondsSinceEpoch(), "Version as seconds since epoch, by default - now"); int main(int argc, char ** argv) @@ -100,6 +101,7 @@ int main(int argc, char ** argv) genInfo.m_osmFileName = FLAGS_osm_file_name; genInfo.m_failOnCoasts = FLAGS_fail_on_coasts; genInfo.m_preloadCache = FLAGS_preload_cache; + genInfo.m_bookingDatafileName = FLAGS_booking_data; genInfo.m_versionDate = static_cast(FLAGS_planet_version); diff --git a/generator/osm_element.cpp b/generator/osm_element.cpp index cf22bf41b2..1c65dd599d 100644 --- a/generator/osm_element.cpp +++ b/generator/osm_element.cpp @@ -1,5 +1,6 @@ #include "generator/osm_element.hpp" +#include "base/string_utils.hpp" #include "coding/parse_xml.hpp" #include "std/cstdio.hpp" @@ -63,7 +64,9 @@ void OsmElement::AddTag(string const & k, string const & v) SKIP_KEY("official_name"); #undef SKIP_KEY - m_tags.emplace_back(k, v); + string value = v; + strings::Trim(value); + m_tags.emplace_back(k, value); } string OsmElement::ToString(string const & shift) const diff --git a/generator/osm_source.cpp b/generator/osm_source.cpp index 8d564d5aae..d454abefab 100644 --- a/generator/osm_source.cpp +++ b/generator/osm_source.cpp @@ -1,3 +1,4 @@ +#include "generator/booking_dataset.hpp" #include "generator/coastlines_generator.hpp" #include "generator/feature_generator.hpp" #include "generator/intermediate_data.hpp" @@ -511,12 +512,19 @@ bool GenerateFeaturesImpl(feature::GenerateInfo & info) TagAdmixer tagAdmixer(info.GetIntermediateFileName("ways", ".csv"), info.GetIntermediateFileName("towns", ".csv")); TagReplacer tagReplacer(GetPlatform().ResourcesDir() + REPLACED_TAGS_FILE); + + // If info.m_bookingDatafileName is empty then no data will be loaded. + BookingDataset bookingDataset(info.m_bookingDatafileName); // Here we can add new tags to element!!! auto const fn = [&](OsmElement * e) { tagReplacer(e); tagAdmixer(e); + + if (bookingDataset.Filter(*e)) + return; + parser.EmitElement(e); }; @@ -533,6 +541,12 @@ bool GenerateFeaturesImpl(feature::GenerateInfo & info) LOG(LINFO, ("Processing", info.m_osmFileName, "done.")); + if (!info.m_bookingDatafileName.empty()) + { + bookingDataset.BuildFeatures([&](OsmElement * e) { parser.EmitElement(e); }); + LOG(LINFO, ("Processing booking data from", info.m_bookingDatafileName, "done.")); + } + parser.Finish(); // Stop if coasts are not merged and FLAG_fail_on_coasts is set diff --git a/indexer/search_string_utils.hpp b/indexer/search_string_utils.hpp index 6734d33625..87e529e0d0 100644 --- a/indexer/search_string_utils.hpp +++ b/indexer/search_string_utils.hpp @@ -1,4 +1,5 @@ #pragma once +#include "base/stl_add.hpp" #include "base/string_utils.hpp" #include "std/algorithm.hpp" diff --git a/xcode/generator/generator.xcodeproj/project.pbxproj b/xcode/generator/generator.xcodeproj/project.pbxproj index 9c5e8ae064..73a6567f2e 100644 --- a/xcode/generator/generator.xcodeproj/project.pbxproj +++ b/xcode/generator/generator.xcodeproj/project.pbxproj @@ -57,6 +57,8 @@ 677E2A161CAACC5F001DC42A /* tag_admixer.hpp in Headers */ = {isa = PBXBuildFile; fileRef = 677E2A111CAACC5F001DC42A /* tag_admixer.hpp */; }; 677E2A171CAACC5F001DC42A /* towns_dumper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 677E2A121CAACC5F001DC42A /* towns_dumper.cpp */; }; 677E2A181CAACC5F001DC42A /* towns_dumper.hpp in Headers */ = {isa = PBXBuildFile; fileRef = 677E2A131CAACC5F001DC42A /* towns_dumper.hpp */; }; + 67A0FEBE1CEB467F008F2A61 /* booking_dataset.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 67A0FEBC1CEB467F008F2A61 /* booking_dataset.cpp */; }; + 67A0FEBF1CEB467F008F2A61 /* booking_dataset.hpp in Headers */ = {isa = PBXBuildFile; fileRef = 67A0FEBD1CEB467F008F2A61 /* booking_dataset.hpp */; }; /* End PBXBuildFile section */ /* Begin PBXFileReference section */ @@ -113,6 +115,8 @@ 677E2A111CAACC5F001DC42A /* tag_admixer.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = tag_admixer.hpp; sourceTree = ""; }; 677E2A121CAACC5F001DC42A /* towns_dumper.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = towns_dumper.cpp; sourceTree = ""; }; 677E2A131CAACC5F001DC42A /* towns_dumper.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = towns_dumper.hpp; sourceTree = ""; }; + 67A0FEBC1CEB467F008F2A61 /* booking_dataset.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = booking_dataset.cpp; sourceTree = ""; }; + 67A0FEBD1CEB467F008F2A61 /* booking_dataset.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = booking_dataset.hpp; sourceTree = ""; }; 67F0F6761B8C9DCE003F52FF /* osm_xml_source.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = osm_xml_source.hpp; sourceTree = ""; }; /* End PBXFileReference section */ @@ -199,6 +203,8 @@ 670B84BB1A8CDB0000CE4492 /* osm_source.hpp */, 6764B8921ADD6A3300DD8B15 /* osm_o5m_source.hpp */, 67F0F6761B8C9DCE003F52FF /* osm_xml_source.hpp */, + 67A0FEBC1CEB467F008F2A61 /* booking_dataset.cpp */, + 67A0FEBD1CEB467F008F2A61 /* booking_dataset.hpp */, ); name = generator; path = ../../generator; @@ -227,6 +233,7 @@ 675340741A3F2A7400A0A8C3 /* generate_info.hpp in Headers */, 677E2A161CAACC5F001DC42A /* tag_admixer.hpp in Headers */, 675340861A3F2A7400A0A8C3 /* tesselator.hpp in Headers */, + 67A0FEBF1CEB467F008F2A61 /* booking_dataset.hpp in Headers */, 6753405F1A3F2A7400A0A8C3 /* borders_loader.hpp in Headers */, 675340801A3F2A7400A0A8C3 /* polygonizer.hpp in Headers */, 675340941C5231BA002CF0D9 /* search_index_builder.hpp in Headers */, @@ -309,6 +316,7 @@ 675340811A3F2A7400A0A8C3 /* routing_generator.cpp in Sources */, 675340931C5231BA002CF0D9 /* search_index_builder.cpp in Sources */, 6753406E1A3F2A7400A0A8C3 /* feature_merger.cpp in Sources */, + 67A0FEBE1CEB467F008F2A61 /* booking_dataset.cpp in Sources */, 6753408D1A3F2A7400A0A8C3 /* osm_element.cpp in Sources */, 6726C1D51A4AFEF4005EEA39 /* osm2meta.cpp in Sources */, 6753405E1A3F2A7400A0A8C3 /* borders_loader.cpp in Sources */,