diff --git a/generator/generate_info.hpp b/generator/generate_info.hpp index 57308bfaca..f141fd9741 100644 --- a/generator/generate_info.hpp +++ b/generator/generate_info.hpp @@ -49,6 +49,8 @@ struct GenerateInfo std::string m_opentableDatafileName; std::string m_viatorDatafileName; + std::string m_popularPlacesFilename; + std::shared_ptr m_boundariesTable; uint32_t m_versionDate = 0; diff --git a/generator/generator_tool/generator_tool.cpp b/generator/generator_tool/generator_tool.cpp index 399113ab83..5318052369 100644 --- a/generator/generator_tool/generator_tool.cpp +++ b/generator/generator_tool/generator_tool.cpp @@ -139,8 +139,13 @@ DEFINE_string(booking_data, "", "Path to booking data in .tsv format."); DEFINE_string(opentable_data, "", "Path to opentable data in .tsv format."); DEFINE_string(viator_data, "", "Path to viator data in .tsv format."); -DEFINE_string(ugc_data, "", "Input UGC source database file name"); -DEFINE_string(popular_places_data, "", "Input Popular Places source file name"); +DEFINE_string(ugc_data, "", "Input UGC source database file name."); + +DEFINE_bool(generate_popular_places, false, "Generate popular places section."); +DEFINE_string(popular_places_data, "", + "Input Popular Places source file name. Needed both for World intermediate features " + "generation (2nd pass for World) and popular places section generation (5th pass for " + "countries)."); // Printing stuff. DEFINE_bool(calc_statistics, false, "Calculate feature statistics for specified mwm bucket files."); @@ -211,6 +216,7 @@ int main(int argc, char ** argv) genInfo.m_bookingDatafileName = FLAGS_booking_data; genInfo.m_opentableDatafileName = FLAGS_opentable_data; genInfo.m_viatorDatafileName = FLAGS_viator_data; + genInfo.m_popularPlacesFilename = FLAGS_popular_places_data; genInfo.m_boundariesTable = make_shared(); genInfo.m_versionDate = static_cast(FLAGS_planet_version); @@ -514,9 +520,10 @@ int main(int argc, char ** argv) } } - if (!FLAGS_popular_places_data.empty()) + if (FLAGS_generate_popular_places) { - if (!BuildPopularPlacesMwmSection(FLAGS_popular_places_data, datFile, osmToFeatureFilename)) + if (!BuildPopularPlacesMwmSection(genInfo.m_popularPlacesFilename, datFile, + osmToFeatureFilename)) { LOG(LCRITICAL, ("Error generating popular places mwm section.")); } diff --git a/generator/popular_places_section_builder.cpp b/generator/popular_places_section_builder.cpp index cd7e10eba4..73dac9ce1b 100644 --- a/generator/popular_places_section_builder.cpp +++ b/generator/popular_places_section_builder.cpp @@ -12,20 +12,15 @@ #include "indexer/ftraits.hpp" #include "indexer/rank_table.hpp" -#include "base/geo_object_id.hpp" #include "base/string_utils.hpp" #include #include -#include #include #include -namespace +namespace generator { -using PopularityIndex = uint8_t; -using PopularPlaces = std::unordered_map; - void LoadPopularPlaces(std::string const & srcFilename, PopularPlaces & places) { coding::CSVReader reader; @@ -68,10 +63,7 @@ void LoadPopularPlaces(std::string const & srcFilename, PopularPlaces & places) } }); } -} // namespace -namespace generator -{ bool BuildPopularPlacesMwmSection(std::string const & srcFilename, std::string const & mwmFile, std::string const & osmToFeatureFilename) { diff --git a/generator/popular_places_section_builder.hpp b/generator/popular_places_section_builder.hpp index 2cb467493f..cdc71118bc 100644 --- a/generator/popular_places_section_builder.hpp +++ b/generator/popular_places_section_builder.hpp @@ -1,9 +1,17 @@ #pragma once +#include "base/geo_object_id.hpp" + #include +#include namespace generator { +using PopularityIndex = uint8_t; +using PopularPlaces = std::unordered_map; + +void LoadPopularPlaces(std::string const & srcFilename, PopularPlaces & places); + bool BuildPopularPlacesMwmSection(std::string const & srcFilename, std::string const & mwmFile, std::string const & osmToFeatureFilename); } // namespace generator diff --git a/generator/world_map_generator.hpp b/generator/world_map_generator.hpp index 0ea83cbcac..c54fb66eb3 100644 --- a/generator/world_map_generator.hpp +++ b/generator/world_map_generator.hpp @@ -2,6 +2,9 @@ #include "generator/feature_merger.hpp" #include "generator/generate_info.hpp" +#include "generator/popular_places_section_builder.hpp" + +#include "search/utils.hpp" #include "indexer/classificator.hpp" #include "indexer/scales.hpp" @@ -14,6 +17,7 @@ #include "base/logging.hpp" +#include #include #include #include @@ -246,6 +250,7 @@ class WorldMapGenerator FeatureTypesProcessor m_typesCorrector; FeatureMergeProcessor m_merger; WaterBoundaryChecker m_boundaryChecker; + generator::PopularPlaces m_popularPlaces; public: explicit WorldMapGenerator(feature::GenerateInfo const & info) @@ -264,18 +269,37 @@ public: char const * arr2[] = {"boundary", "administrative", "4", "state"}; m_typesCorrector.SetDontNormalizeType(arr2); + + if (!info.m_popularPlacesFilename.empty()) + generator::LoadPopularPlaces(info.m_popularPlacesFilename, m_popularPlaces); + else + LOG(LWARNING, ("popular_places_data option not set. Popular atractions will not be added to World.mwm")); } void operator()(FeatureBuilder1 fb) { - if (!m_worldBucket.NeedPushToWorld(fb)) + auto const isPopularAttraction = IsPopularAttraction(fb); + + if (!m_worldBucket.NeedPushToWorld(fb) && !isPopularAttraction) return; m_worldBucket.CalcStatistics(fb); if (!m_boundaryChecker.IsBoundaries(fb)) { - PushFeature(fb); + // Save original feature iff it is a popular attraction before PushFeature(fb) modifies fb. + auto originalFeature = isPopularAttraction ? fb : FeatureBuilder1(); + + if (PushFeature(fb) || !isPopularAttraction) + return; + + // We push GEOM_POINT with all the same tags, names and center instead of GEOM_WAY/GEOM_AREA + // because we do not need geometry for attractions (just search index and placepage data) + // and want to avoid size checks applied to areas. + if (originalFeature.GetGeomType() != feature::GEOM_POINT) + originalFeature.SetCenter(originalFeature.GetGeometryCenter()); + + m_worldBucket.PushSure(originalFeature); return; } @@ -285,7 +309,7 @@ public: PushFeature(f); } - void PushFeature(FeatureBuilder1 & fb) + bool PushFeature(FeatureBuilder1 & fb) { switch (fb.GetGeomType()) { @@ -294,7 +318,7 @@ public: MergedFeatureBuilder1 * p = m_typesCorrector(fb); if (p) m_merger(p); - return; + return false; } case feature::GEOM_AREA: { @@ -302,17 +326,52 @@ public: // Added approx 4Mb of data to the World.mwm auto const & geometry = fb.GetOuterGeometry(); if (GetPolygonArea(geometry.begin(), geometry.end()) < 0.01) - return; + return false; } default: break; } if (feature::PreprocessForWorldMap(fb)) + { m_worldBucket.PushSure(fb); + return true; + } + + return false; } void DoMerge() { m_merger.DoMerge(m_worldBucket); } + +private: + bool IsPopularAttraction(FeatureBuilder1 const & fb) const + { + if (fb.GetName().empty()) + return false; + + auto const attractionTypes = + search::GetCategoryTypes("attractions", "en", GetDefaultCategories()); + ASSERT(is_sorted(attractionTypes.begin(), attractionTypes.end()), ()); + auto const & featureTypes = fb.GetTypes(); + if (!std::any_of(featureTypes.begin(), featureTypes.end(), [&attractionTypes](uint32_t t) { + return binary_search(attractionTypes.begin(), attractionTypes.end(), t); + })) + { + return false; + } + + auto const it = m_popularPlaces.find(fb.GetMostGenericOsmId()); + if (it == m_popularPlaces.end()) + return false; + + // todo(@t.yan): adjust + uint8_t const kPopularityThreshold = 40; + if (it->second < kPopularityThreshold) + return false; + + // todo(@t.yan): maybe check place has wikipedia link. + return true; + } }; template diff --git a/tools/unix/generate_planet.sh b/tools/unix/generate_planet.sh index ab6dbcdc86..d1c00f5f37 100755 --- a/tools/unix/generate_planet.sh +++ b/tools/unix/generate_planet.sh @@ -466,6 +466,8 @@ if [ "$MODE" == "features" ]; then [ -f "$BOOKING_FILE" ] && PARAMS_SPLIT="$PARAMS_SPLIT --booking_data=$BOOKING_FILE" [ -f "$OPENTABLE_FILE" ] && PARAMS_SPLIT="$PARAMS_SPLIT --opentable_data=$OPENTABLE_FILE" [ -f "$VIATOR_FILE" ] && PARAMS_SPLIT="$PARAMS_SPLIT --viator_data=$VIATOR_FILE" + # Add popular_places_data to add popular attractions to World.mwm and World.mwm search index. + # [ -f "$POPULAR_PLACES_FILE" ] && PARAMS_SPLIT="$PARAMS_SPLIT --popular_places_data=$POPULAR_PLACES_FILE" "$GENERATOR_TOOL" --intermediate_data_path="$INTDIR/" \ --node_storage=$NODE_STORAGE \ --osm_file_type=o5m \ @@ -514,7 +516,7 @@ if [ "$MODE" == "mwm" ]; then PARAMS_WITH_SEARCH="$PARAMS --generate_search_index --cities_boundaries_data=$CITIES_BOUNDARIES_DATA --make_city_roads" [ -n "${SRTM_PATH-}" -a -d "${SRTM_PATH-}" ] && PARAMS_WITH_SEARCH="$PARAMS_WITH_SEARCH --srtm_path=$SRTM_PATH" [ -f "$UGC_FILE" ] && PARAMS_WITH_SEARCH="$PARAMS_WITH_SEARCH --ugc_data=$UGC_FILE" - [ -f "$POPULAR_PLACES_FILE" ] && PARAMS_WITH_SEARCH="$PARAMS_WITH_SEARCH --popular_places_data=$POPULAR_PLACES_FILE" + [ -f "$POPULAR_PLACES_FILE" ] && PARAMS_WITH_SEARCH="$PARAMS_WITH_SEARCH --popular_places_data=$POPULAR_PLACES_FILE --generate_popular_places" for file in "$INTDIR"/tmp/*.mwm.tmp; do if [[ "$file" != *minsk-pass* && "$file" != *World* ]]; then BASENAME="$(basename "$file" .mwm.tmp)"