From 1453a18f1bbea5450c4b270fcb3075a7db1cb979 Mon Sep 17 00:00:00 2001 From: Yuri Gorshenin Date: Mon, 23 Oct 2017 16:49:30 +0300 Subject: [PATCH] [search][generator] Fixed cities boundaries generation. --- base/base_tests/clustering_map_tests.cpp | 98 +++++++++++++++++++ base/clustering_map.hpp | 50 ++++++++++ generator/cities_boundaries_builder.cpp | 90 +++++++++++++++++ generator/cities_boundaries_builder.hpp | 3 + generator/generator_tool/generator_tool.cpp | 31 ++++-- map/framework.cpp | 3 + search/cities_boundaries_table.cpp | 3 + search/engine.cpp | 2 - search/engine.hpp | 7 +- search/processor.cpp | 8 +- .../test_search_engine.hpp | 2 +- tools/unix/generate_planet.sh | 18 +++- 12 files changed, 294 insertions(+), 21 deletions(-) diff --git a/base/base_tests/clustering_map_tests.cpp b/base/base_tests/clustering_map_tests.cpp index 7203a4fcc4..e1f9332492 100644 --- a/base/base_tests/clustering_map_tests.cpp +++ b/base/base_tests/clustering_map_tests.cpp @@ -3,6 +3,7 @@ #include "base/clustering_map.hpp" #include +#include #include #include #include @@ -23,6 +24,42 @@ template > class ClusteringMapAdapter { public: + struct Cluster + { + Cluster(Key const & key, Value const & value): m_keys({key}), m_values({value}) {} + + Cluster(vector const & keys, vector const & values) : m_keys(keys), m_values(values) + { + sort(m_keys.begin(), m_keys.end()); + sort(m_values.begin(), m_values.end()); + } + + bool operator<(Cluster const & rhs) const + { + if (m_keys != rhs.m_keys) + return m_keys < rhs.m_keys; + return m_values < rhs.m_values; + } + + bool operator==(Cluster const & rhs) const + { + return m_keys == rhs.m_keys && m_values == rhs.m_values; + } + + friend string DebugPrint(Cluster const & cluster) + { + ostringstream os; + os << "Cluster ["; + os << "keys: " << DebugPrint(cluster.m_keys) << ", "; + os << "values: " << DebugPrint(cluster.m_values); + os << "]"; + return os.str(); + } + + vector m_keys; + vector m_values; + }; + template void Append(Key const & key, V && value) { @@ -33,6 +70,17 @@ public: vector Get(Key const & key) { return Sort(m_m.Get(key)); } + vector Clusters() + { + vector clusters; + + m_m.ForEachCluster([&](vector const & keys, vector const & values) { + clusters.emplace_back(keys, values); + }); + sort(clusters.begin(), clusters.end()); + return clusters; + } + private: ClusteringMap m_m; }; @@ -81,4 +129,54 @@ UNIT_TEST(ClusteringMap_Smoke) TEST_EQUAL(m.Get(5), vector({"alpha", "beta", "gamma"}), ()); } } + +UNIT_TEST(ClusteringMap_ForEach) +{ + using Map = ClusteringMapAdapter; + using Cluster = Map::Cluster; + + { + Map m; + auto const clusters = m.Clusters(); + TEST(clusters.empty(), (clusters)); + } + + { + Map m; + m.Append(0, "Hello"); + m.Append(1, "World!"); + m.Append(2, "alpha"); + m.Append(3, "beta"); + m.Append(4, "gamma"); + + { + vector const expected = {{Cluster{0, "Hello"}, Cluster{1, "World!"}, + Cluster{2, "alpha"}, Cluster{3, "beta"}, + Cluster{4, "gamma"}}}; + TEST_EQUAL(expected, m.Clusters(), ()); + } + + m.Union(0, 1); + { + vector const expected = {{Cluster{{0, 1}, {"Hello", "World!"}}, Cluster{2, "alpha"}, + Cluster{3, "beta"}, Cluster{4, "gamma"}}}; + TEST_EQUAL(expected, m.Clusters(), ()); + } + + m.Union(2, 3); + m.Union(3, 4); + { + vector const expected = { + {Cluster{{0, 1}, {"Hello", "World!"}}, Cluster{{2, 3, 4}, {"alpha", "beta", "gamma"}}}}; + TEST_EQUAL(expected, m.Clusters(), ()); + } + + m.Union(0, 3); + { + vector const expected = { + {Cluster{{0, 1, 2, 3, 4}, {"Hello", "World!", "alpha", "beta", "gamma"}}}}; + TEST_EQUAL(expected, m.Clusters(), ()); + } + } +} } // namespace diff --git a/base/clustering_map.hpp b/base/clustering_map.hpp index 5df48da0dc..f6ff9db60c 100644 --- a/base/clustering_map.hpp +++ b/base/clustering_map.hpp @@ -24,6 +24,8 @@ public: // * m is the total number of values in the map // * α() is the inverse Ackermann function // * F is the complexity of find() in unordered_map + // + // Also, it's assumed that complexity to compare two keys is O(1). // Appends |value| to the list of values in the cluster // corresponding to |key|. @@ -61,6 +63,54 @@ public: return entry.m_values; } + // Complexity: O(n * log(n)). + template + void ForEachCluster(Fn && fn) + { + struct EntryWithKey : public std::less + { + EntryWithKey(Entry const * entry, Key const & key) : m_entry(entry), m_key(key) {} + + bool operator<(EntryWithKey const & rhs) const + { + return std::less::operator()(m_entry, rhs.m_entry); + } + + Entry const * m_entry; + Key m_key; + }; + + std::vector eks; + for (auto const & kv : m_table) + { + auto const & key = kv.first; + auto const & entry = GetRoot(key); + eks.emplace_back(&entry, key); + } + std::sort(eks.begin(), eks.end()); + + std::equal_to equal; + size_t i = 0; + while (i < eks.size()) + { + std::vector keys; + keys.push_back(eks[i].m_key); + + size_t j = i + 1; + while (j < eks.size() && equal(eks[i].m_entry, eks[j].m_entry)) + { + keys.push_back(eks[j].m_key); + ++j; + } + + fn(keys, eks[i].m_entry->m_values); + + i = j; + } + } + + void Clear() { m_table.clear(); } + private: struct Entry { diff --git a/generator/cities_boundaries_builder.cpp b/generator/cities_boundaries_builder.cpp index d60f73a9fe..6d93cedf9a 100644 --- a/generator/cities_boundaries_builder.cpp +++ b/generator/cities_boundaries_builder.cpp @@ -15,6 +15,10 @@ #include "platform/local_country_file.hpp" +#include "coding/file_reader.hpp" +#include "coding/file_writer.hpp" +#include "coding/reader.hpp" + #include "base/assert.hpp" #include "base/checked_cast.hpp" #include "base/logging.hpp" @@ -136,4 +140,90 @@ bool BuildCitiesBoundariesForTesting(string const & dataPath, TestIdToBoundaries return my::make_unique(move(mapping)); }); } + +bool SerializeBoundariesTable(std::string const & path, OsmIdToBoundariesTable & table) +{ + vector> allIds; + vector> allBoundaries; + table.ForEachCluster([&](vector const & ids, vector const & boundaries) { + allIds.push_back(ids); + allBoundaries.push_back(boundaries); + }); + + CHECK_EQUAL(allIds.size(), allBoundaries.size(), ()); + + try + { + FileWriter sink(path); + + indexer::CitiesBoundariesSerDes::Serialize(sink, allBoundaries); + + for (auto const & ids : allIds) + { + WriteToSink(sink, static_cast(ids.size())); + for (auto const & id : ids) + WriteToSink(sink, id.EncodedId()); + } + + return true; + } + catch (Writer::Exception const & e) + { + LOG(LERROR, ("Can't serialize boundaries table:", e.what())); + return false; + } +} + +bool DeserializeBoundariesTable(std::string const & path, OsmIdToBoundariesTable & table) +{ + vector> allIds; + vector> allBoundaries; + + try + { + FileReader reader(path); + NonOwningReaderSource source(reader); + + double precision; + indexer::CitiesBoundariesSerDes::Deserialize(source, allBoundaries, precision); + + auto const n = allBoundaries.size(); + allIds.resize(n); + + for (auto & ids : allIds) + { + auto const m = ReadPrimitiveFromSource(source); + ids.resize(m); + CHECK(m != 0, ()); + + for (auto & id : ids) + { + auto const encodedId = ReadPrimitiveFromSource(source); + id = osm::Id(encodedId); + } + } + } + catch (Reader::Exception const & e) + { + LOG(LERROR, ("Can't deserialize boundaries table:", e.what())); + return false; + } + + CHECK_EQUAL(allBoundaries.size(), allIds.size(), ()); + table.Clear(); + for (size_t i = 0; i < allBoundaries.size(); ++i) + { + auto const & ids = allIds[i]; + CHECK(!ids.empty(), ()); + auto const & id = ids.front(); + + for (auto const & b : allBoundaries[i]) + table.Append(id, b); + + for (size_t j = 1; j < ids.size(); ++j) + table.Union(id, ids[j]); + } + + return true; +} } // namespace generator diff --git a/generator/cities_boundaries_builder.hpp b/generator/cities_boundaries_builder.hpp index 6b1b013478..0abd7eb18a 100644 --- a/generator/cities_boundaries_builder.hpp +++ b/generator/cities_boundaries_builder.hpp @@ -17,4 +17,7 @@ using TestIdToBoundariesTable = base::ClusteringMap(); + genInfo.m_boundariesTable = make_shared(); genInfo.m_versionDate = static_cast(FLAGS_planet_version); @@ -224,6 +226,17 @@ int main(int argc, char ** argv) genInfo.m_bucketNames.push_back(WORLD_FILE_NAME); genInfo.m_bucketNames.push_back(WORLD_COASTS_FILE_NAME); } + + if (FLAGS_dump_cities_boundaries) + { + CHECK(!FLAGS_cities_boundaries_path.empty(), ()); + LOG(LINFO, ("Dumping cities boundaries to", FLAGS_cities_boundaries_path)); + if (!generator::SerializeBoundariesTable(FLAGS_cities_boundaries_path, + *genInfo.m_boundariesTable)) + { + LOG(LCRITICAL, ("Error serializing boundaries table to", FLAGS_cities_boundaries_path)); + } + } } else { @@ -295,13 +308,13 @@ int main(int argc, char ** argv) if (FLAGS_generate_cities_boundaries) { + CHECK(!FLAGS_cities_boundaries_path.empty(), ()); LOG(LINFO, ("Generating cities boundaries for", datFile)); - CHECK(genInfo.m_boundariesTable, ()); - if (!generator::BuildCitiesBoundaries(datFile, osmToFeatureFilename, - *genInfo.m_boundariesTable)) - { + generator::OsmIdToBoundariesTable table; + if (!generator::DeserializeBoundariesTable(FLAGS_cities_boundaries_path, table)) + LOG(LCRITICAL, ("Error deserializing boundaries table")); + if (!generator::BuildCitiesBoundaries(datFile, osmToFeatureFilename, table)) LOG(LCRITICAL, ("Error generating cities boundaries.")); - } } if (!FLAGS_srtm_path.empty()) diff --git a/map/framework.cpp b/map/framework.cpp index 18043ae75b..2207c8ee9f 100644 --- a/map/framework.cpp +++ b/map/framework.cpp @@ -455,6 +455,9 @@ Framework::Framework(FrameworkParams const & params) RegisterAllMaps(); LOG(LDEBUG, ("Maps initialized")); + // Need to reload cities boundaries because maps in indexer were updated. + m_searchEngine->LoadCitiesBoundaries(); + // Init storage with needed callback. m_storage.Init( bind(&Framework::OnCountryFileDownloaded, this, _1, _2), diff --git a/search/cities_boundaries_table.cpp b/search/cities_boundaries_table.cpp index 97ef366bb5..bd1cff62e3 100644 --- a/search/cities_boundaries_table.cpp +++ b/search/cities_boundaries_table.cpp @@ -34,7 +34,10 @@ bool CitiesBoundariesTable::Load() { auto handle = FindWorld(m_index); if (!handle.IsAlive()) + { + LOG(LWARNING, ("Can't find World map file.")); return false; + } // Skip if table was already loaded from this file. if (handle.GetId() == m_mwmId) diff --git a/search/engine.cpp b/search/engine.cpp index 99ac9f380c..9e8bce1d26 100644 --- a/search/engine.cpp +++ b/search/engine.cpp @@ -110,8 +110,6 @@ Engine::Engine(Index & index, CategoriesHolder const & categories, m_threads.reserve(params.m_numThreads); for (size_t i = 0; i < params.m_numThreads; ++i) m_threads.emplace_back(&Engine::MainLoop, this, ref(m_contexts[i])); - - LoadCitiesBoundaries(); } Engine::~Engine() diff --git a/search/engine.hpp b/search/engine.hpp index 779db4f474..5d16a2c4dc 100644 --- a/search/engine.hpp +++ b/search/engine.hpp @@ -106,9 +106,8 @@ public: // Posts request to clear caches to the queue. void ClearCaches(); - // Posts request to reload cities boundaries tables. Must be used - // for testing only. - void LoadCitiesBoundariesForTesting() { return LoadCitiesBoundaries(); } + // Posts request to reload cities boundaries tables. + void LoadCitiesBoundaries(); private: struct Message @@ -144,8 +143,6 @@ private: unique_ptr m_processor; }; - void LoadCitiesBoundaries(); - // *ALL* following methods are executed on the m_threads threads. // This method executes tasks from a common pool (|tasks|) in a FIFO diff --git a/search/processor.cpp b/search/processor.cpp index 2f4d1a78ec..da192a371f 100644 --- a/search/processor.cpp +++ b/search/processor.cpp @@ -391,7 +391,13 @@ void Processor::SetViewportByIndex(m2::RectD const & viewport, size_t idx, bool void Processor::ClearCache(size_t ind) { m_viewport[ind].MakeEmpty(); } -void Processor::LoadCitiesBoundaries() { m_citiesBoundaries.Load(); } +void Processor::LoadCitiesBoundaries() +{ + if (m_citiesBoundaries.Load()) + LOG(LINFO, ("Loaded cities boundaries")); + else + LOG(LWARNING, ("Can't load cities boundaries")); +} Locales Processor::GetCategoryLocales() const { diff --git a/search/search_tests_support/test_search_engine.hpp b/search/search_tests_support/test_search_engine.hpp index 58955383bd..c67c252001 100644 --- a/search/search_tests_support/test_search_engine.hpp +++ b/search/search_tests_support/test_search_engine.hpp @@ -33,7 +33,7 @@ public: void SetLocale(string const & locale) { m_engine.SetLocale(locale); } - void LoadCitiesBoundaries() { m_engine.LoadCitiesBoundariesForTesting(); } + void LoadCitiesBoundaries() { m_engine.LoadCitiesBoundaries(); } weak_ptr Search(search::SearchParams const & params, m2::RectD const & viewport); diff --git a/tools/unix/generate_planet.sh b/tools/unix/generate_planet.sh index d91e3cac13..c4651315dd 100755 --- a/tools/unix/generate_planet.sh +++ b/tools/unix/generate_planet.sh @@ -183,6 +183,7 @@ OPENTABLE_SCRIPT="$PYTHON_SCRIPTS_PATH/opentable_restaurants.py" OPENTABLE_FILE="${OPENTABLE_FILE:-$INTDIR/restaurants.csv}" VIATOR_SCRIPT="$PYTHON_SCRIPTS_PATH/viator_cities.py" VIATOR_FILE="${VIATOR_FILE:-$INTDIR/viator.csv}" +CITIES_BOUNDARIES_FILE="${CITIES_BOUNDARIES_FILE:-$INTDIR/cities_boundaries}" TESTING_SCRIPT="$SCRIPTS_PATH/test_planet.sh" PYTHON="$(which python2.7)" MWM_VERSION_FORMAT="%s" @@ -460,8 +461,15 @@ if [ "$MODE" == "features" ]; then [ -f "$BOOKING_FILE" ] && PARAMS_SPLIT="$PARAMS_SPLIT --booking_data=$BOOKING_FILE" [ -f "$OPENTABLE_FILE" ] && PARAMS_SPLIT="$PARAMS_SPLIT --opentable_data=$OPENTABLE_FILE" [ -f "$VIATOR_FILE" ] && PARAMS_SPLIT="$PARAMS_SPLIT --viator_data=$VIATOR_FILE" - "$GENERATOR_TOOL" --intermediate_data_path="$INTDIR/" --node_storage=$NODE_STORAGE --osm_file_type=o5m --osm_file_name="$PLANET" \ - --data_path="$TARGET" --user_resource_path="$DATA_PATH/" $PARAMS_SPLIT 2>> "$PLANET_LOG" + "$GENERATOR_TOOL" --intermediate_data_path="$INTDIR/" \ + --node_storage=$NODE_STORAGE \ + --osm_file_type=o5m \ + --osm_file_name="$PLANET" \ + --data_path="$TARGET" \ + --user_resource_path="$DATA_PATH/" \ + --dump_cities_boundaries \ + --cities_boundaries_path="$CITIES_BOUNDARIES_FILE" \ + $PARAMS_SPLIT 2>> "$PLANET_LOG" MODE=mwm fi @@ -487,7 +495,11 @@ if [ "$MODE" == "mwm" ]; then if [ -n "$OPT_WORLD" ]; then ( "$GENERATOR_TOOL" $PARAMS --output=World 2>> "$LOG_PATH/World.log" - "$GENERATOR_TOOL" --data_path="$TARGET" --user_resource_path="$DATA_PATH/" -generate_search_index --output=World 2>> "$LOG_PATH/World.log" + "$GENERATOR_TOOL" --data_path="$TARGET" --user_resource_path="$DATA_PATH/" \ + --generate_search_index \ + --generate_cities_boundaries \ + --cities_boundaries_path="$CITIES_BOUNDARIES_FILE" \ + --output=World 2>> "$LOG_PATH/World.log" ) & "$GENERATOR_TOOL" $PARAMS --output=WorldCoasts 2>> "$LOG_PATH/WorldCoasts.log" & fi