[search][generator] Add popular places to world

This commit is contained in:
tatiana-yan 2018-08-29 18:59:51 +03:00 committed by mpimenov
parent c8c4622c71
commit be0c445239
6 changed files with 89 additions and 19 deletions

View file

@ -49,6 +49,8 @@ struct GenerateInfo
std::string m_opentableDatafileName;
std::string m_viatorDatafileName;
std::string m_popularPlacesFilename;
std::shared_ptr<generator::OsmIdToBoundariesTable> m_boundariesTable;
uint32_t m_versionDate = 0;

View file

@ -139,8 +139,13 @@ DEFINE_string(booking_data, "", "Path to booking data in .tsv format.");
DEFINE_string(opentable_data, "", "Path to opentable data in .tsv format.");
DEFINE_string(viator_data, "", "Path to viator data in .tsv format.");
DEFINE_string(ugc_data, "", "Input UGC source database file name");
DEFINE_string(popular_places_data, "", "Input Popular Places source file name");
DEFINE_string(ugc_data, "", "Input UGC source database file name.");
DEFINE_bool(generate_popular_places, false, "Generate popular places section.");
DEFINE_string(popular_places_data, "",
"Input Popular Places source file name. Needed both for World intermediate features "
"generation (2nd pass for World) and popular places section generation (5th pass for "
"countries).");
// Printing stuff.
DEFINE_bool(calc_statistics, false, "Calculate feature statistics for specified mwm bucket files.");
@ -211,6 +216,7 @@ int main(int argc, char ** argv)
genInfo.m_bookingDatafileName = FLAGS_booking_data;
genInfo.m_opentableDatafileName = FLAGS_opentable_data;
genInfo.m_viatorDatafileName = FLAGS_viator_data;
genInfo.m_popularPlacesFilename = FLAGS_popular_places_data;
genInfo.m_boundariesTable = make_shared<generator::OsmIdToBoundariesTable>();
genInfo.m_versionDate = static_cast<uint32_t>(FLAGS_planet_version);
@ -514,9 +520,10 @@ int main(int argc, char ** argv)
}
}
if (!FLAGS_popular_places_data.empty())
if (FLAGS_generate_popular_places)
{
if (!BuildPopularPlacesMwmSection(FLAGS_popular_places_data, datFile, osmToFeatureFilename))
if (!BuildPopularPlacesMwmSection(genInfo.m_popularPlacesFilename, datFile,
osmToFeatureFilename))
{
LOG(LCRITICAL, ("Error generating popular places mwm section."));
}

View file

@ -12,20 +12,15 @@
#include "indexer/ftraits.hpp"
#include "indexer/rank_table.hpp"
#include "base/geo_object_id.hpp"
#include "base/string_utils.hpp"
#include <cstdint>
#include <limits>
#include <unordered_map>
#include <utility>
#include <vector>
namespace
namespace generator
{
using PopularityIndex = uint8_t;
using PopularPlaces = std::unordered_map<base::GeoObjectId, PopularityIndex>;
void LoadPopularPlaces(std::string const & srcFilename, PopularPlaces & places)
{
coding::CSVReader reader;
@ -68,10 +63,7 @@ void LoadPopularPlaces(std::string const & srcFilename, PopularPlaces & places)
}
});
}
} // namespace
namespace generator
{
bool BuildPopularPlacesMwmSection(std::string const & srcFilename, std::string const & mwmFile,
std::string const & osmToFeatureFilename)
{

View file

@ -1,9 +1,17 @@
#pragma once
#include "base/geo_object_id.hpp"
#include <string>
#include <unordered_map>
namespace generator
{
using PopularityIndex = uint8_t;
using PopularPlaces = std::unordered_map<base::GeoObjectId, PopularityIndex>;
void LoadPopularPlaces(std::string const & srcFilename, PopularPlaces & places);
bool BuildPopularPlacesMwmSection(std::string const & srcFilename, std::string const & mwmFile,
std::string const & osmToFeatureFilename);
} // namespace generator

View file

@ -2,6 +2,9 @@
#include "generator/feature_merger.hpp"
#include "generator/generate_info.hpp"
#include "generator/popular_places_section_builder.hpp"
#include "search/utils.hpp"
#include "indexer/classificator.hpp"
#include "indexer/scales.hpp"
@ -14,6 +17,7 @@
#include "base/logging.hpp"
#include <algorithm>
#include <cstdint>
#include <map>
#include <sstream>
@ -246,6 +250,7 @@ class WorldMapGenerator
FeatureTypesProcessor m_typesCorrector;
FeatureMergeProcessor m_merger;
WaterBoundaryChecker m_boundaryChecker;
generator::PopularPlaces m_popularPlaces;
public:
explicit WorldMapGenerator(feature::GenerateInfo const & info)
@ -264,18 +269,37 @@ public:
char const * arr2[] = {"boundary", "administrative", "4", "state"};
m_typesCorrector.SetDontNormalizeType(arr2);
if (!info.m_popularPlacesFilename.empty())
generator::LoadPopularPlaces(info.m_popularPlacesFilename, m_popularPlaces);
else
LOG(LWARNING, ("popular_places_data option not set. Popular atractions will not be added to World.mwm"));
}
void operator()(FeatureBuilder1 fb)
{
if (!m_worldBucket.NeedPushToWorld(fb))
auto const isPopularAttraction = IsPopularAttraction(fb);
if (!m_worldBucket.NeedPushToWorld(fb) && !isPopularAttraction)
return;
m_worldBucket.CalcStatistics(fb);
if (!m_boundaryChecker.IsBoundaries(fb))
{
PushFeature(fb);
// Save original feature iff it is a popular attraction before PushFeature(fb) modifies fb.
auto originalFeature = isPopularAttraction ? fb : FeatureBuilder1();
if (PushFeature(fb) || !isPopularAttraction)
return;
// We push GEOM_POINT with all the same tags, names and center instead of GEOM_WAY/GEOM_AREA
// because we do not need geometry for attractions (just search index and placepage data)
// and want to avoid size checks applied to areas.
if (originalFeature.GetGeomType() != feature::GEOM_POINT)
originalFeature.SetCenter(originalFeature.GetGeometryCenter());
m_worldBucket.PushSure(originalFeature);
return;
}
@ -285,7 +309,7 @@ public:
PushFeature(f);
}
void PushFeature(FeatureBuilder1 & fb)
bool PushFeature(FeatureBuilder1 & fb)
{
switch (fb.GetGeomType())
{
@ -294,7 +318,7 @@ public:
MergedFeatureBuilder1 * p = m_typesCorrector(fb);
if (p)
m_merger(p);
return;
return false;
}
case feature::GEOM_AREA:
{
@ -302,17 +326,52 @@ public:
// Added approx 4Mb of data to the World.mwm
auto const & geometry = fb.GetOuterGeometry();
if (GetPolygonArea(geometry.begin(), geometry.end()) < 0.01)
return;
return false;
}
default:
break;
}
if (feature::PreprocessForWorldMap(fb))
{
m_worldBucket.PushSure(fb);
return true;
}
return false;
}
void DoMerge() { m_merger.DoMerge(m_worldBucket); }
private:
bool IsPopularAttraction(FeatureBuilder1 const & fb) const
{
if (fb.GetName().empty())
return false;
auto const attractionTypes =
search::GetCategoryTypes("attractions", "en", GetDefaultCategories());
ASSERT(is_sorted(attractionTypes.begin(), attractionTypes.end()), ());
auto const & featureTypes = fb.GetTypes();
if (!std::any_of(featureTypes.begin(), featureTypes.end(), [&attractionTypes](uint32_t t) {
return binary_search(attractionTypes.begin(), attractionTypes.end(), t);
}))
{
return false;
}
auto const it = m_popularPlaces.find(fb.GetMostGenericOsmId());
if (it == m_popularPlaces.end())
return false;
// todo(@t.yan): adjust
uint8_t const kPopularityThreshold = 40;
if (it->second < kPopularityThreshold)
return false;
// todo(@t.yan): maybe check place has wikipedia link.
return true;
}
};
template <class FeatureOut>

View file

@ -466,6 +466,8 @@ if [ "$MODE" == "features" ]; then
[ -f "$BOOKING_FILE" ] && PARAMS_SPLIT="$PARAMS_SPLIT --booking_data=$BOOKING_FILE"
[ -f "$OPENTABLE_FILE" ] && PARAMS_SPLIT="$PARAMS_SPLIT --opentable_data=$OPENTABLE_FILE"
[ -f "$VIATOR_FILE" ] && PARAMS_SPLIT="$PARAMS_SPLIT --viator_data=$VIATOR_FILE"
# Add popular_places_data to add popular attractions to World.mwm and World.mwm search index.
# [ -f "$POPULAR_PLACES_FILE" ] && PARAMS_SPLIT="$PARAMS_SPLIT --popular_places_data=$POPULAR_PLACES_FILE"
"$GENERATOR_TOOL" --intermediate_data_path="$INTDIR/" \
--node_storage=$NODE_STORAGE \
--osm_file_type=o5m \
@ -514,7 +516,7 @@ if [ "$MODE" == "mwm" ]; then
PARAMS_WITH_SEARCH="$PARAMS --generate_search_index --cities_boundaries_data=$CITIES_BOUNDARIES_DATA --make_city_roads"
[ -n "${SRTM_PATH-}" -a -d "${SRTM_PATH-}" ] && PARAMS_WITH_SEARCH="$PARAMS_WITH_SEARCH --srtm_path=$SRTM_PATH"
[ -f "$UGC_FILE" ] && PARAMS_WITH_SEARCH="$PARAMS_WITH_SEARCH --ugc_data=$UGC_FILE"
[ -f "$POPULAR_PLACES_FILE" ] && PARAMS_WITH_SEARCH="$PARAMS_WITH_SEARCH --popular_places_data=$POPULAR_PLACES_FILE"
[ -f "$POPULAR_PLACES_FILE" ] && PARAMS_WITH_SEARCH="$PARAMS_WITH_SEARCH --popular_places_data=$POPULAR_PLACES_FILE --generate_popular_places"
for file in "$INTDIR"/tmp/*.mwm.tmp; do
if [[ "$file" != *minsk-pass* && "$file" != *World* ]]; then
BASENAME="$(basename "$file" .mwm.tmp)"