From d368af33a371abadf74a1f183f7b667943145d19 Mon Sep 17 00:00:00 2001 From: Maxim Pimenov Date: Fri, 3 Aug 2018 17:21:57 +0300 Subject: [PATCH] [geocoder] A command line interface. --- CMakeLists.txt | 2 +- generator/generator_tool/CMakeLists.txt | 2 +- geocoder/CMakeLists.txt | 1 + geocoder/geocoder_cli/CMakeLists.txt | 23 +++++++ geocoder/geocoder_cli/geocoder_cli.cpp | 89 +++++++++++++++++++++++++ geocoder/hierarchy.cpp | 2 + geocoder/hierarchy.hpp | 3 + geocoder/result.cpp | 2 +- geocoder/types.cpp | 16 ++--- 9 files changed, 129 insertions(+), 11 deletions(-) create mode 100644 geocoder/geocoder_cli/CMakeLists.txt create mode 100644 geocoder/geocoder_cli/geocoder_cli.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index c592c983f7..1097920e4e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -272,7 +272,6 @@ add_subdirectory(drape) add_subdirectory(drape_frontend) add_subdirectory(editor) add_subdirectory(generator/mwm_diff) -add_subdirectory(geocoder) add_subdirectory(geometry) add_subdirectory(indexer) add_subdirectory(kml) @@ -299,6 +298,7 @@ if (PLATFORM_DESKTOP) endif() add_subdirectory(feature_list) add_subdirectory(generator) + add_subdirectory(geocoder) add_subdirectory(openlr) add_subdirectory(track_analyzing) if (NOT SKIP_DESKTOP) diff --git a/generator/generator_tool/CMakeLists.txt b/generator/generator_tool/CMakeLists.txt index a06c28ba4c..6d90ba299f 100644 --- a/generator/generator_tool/CMakeLists.txt +++ b/generator/generator_tool/CMakeLists.txt @@ -1,6 +1,6 @@ project(generator_tool) -include_directories(${OMIM_PATH}/3party/gflags/src) +include_directories(${OMIM_ROOT}/3party/gflags/src) set(SRC generator_tool.cpp) diff --git a/geocoder/CMakeLists.txt b/geocoder/CMakeLists.txt index 7b1c8bf69d..88065b006c 100644 --- a/geocoder/CMakeLists.txt +++ b/geocoder/CMakeLists.txt @@ -16,4 +16,5 @@ set( omim_add_library(${PROJECT_NAME} ${SRC}) +add_subdirectory(geocoder_cli) omim_add_test_subdirectory(geocoder_tests) diff --git a/geocoder/geocoder_cli/CMakeLists.txt b/geocoder/geocoder_cli/CMakeLists.txt new file mode 100644 index 0000000000..1080e7d468 --- /dev/null +++ b/geocoder/geocoder_cli/CMakeLists.txt @@ -0,0 +1,23 @@ +project(geocoder_cli) + +include_directories(${OMIM_ROOT}/3party/gflags/src) + +set( + SRC + geocoder_cli.cpp +) + +omim_add_executable(${PROJECT_NAME} ${SRC}) + +omim_link_libraries( + ${PROJECT_NAME} + geocoder + indexer + platform + coding + base + stats_client + jansson + ${LIBZ} + gflags +) diff --git a/geocoder/geocoder_cli/geocoder_cli.cpp b/geocoder/geocoder_cli/geocoder_cli.cpp new file mode 100644 index 0000000000..629d830cfe --- /dev/null +++ b/geocoder/geocoder_cli/geocoder_cli.cpp @@ -0,0 +1,89 @@ +#include "geocoder/geocoder.hpp" +#include "geocoder/result.hpp" + +#include "base/string_utils.hpp" + +#include +#include +#include +#include + +#include "3party/gflags/src/gflags/gflags.h" + +using namespace geocoder; +using namespace std; + +DEFINE_string(hierarchy_path, "", "Path to the hierarchy file for the geocoder"); +DEFINE_string(queries_path, "", "Path to the file with queries"); +DEFINE_int32(top, 5, "Number of top results to show for every query, -1 to show all results"); + +void PrintResults(vector const & results) +{ + cout << "Found results: " << results.size() << endl; + if (results.empty()) + return; + cout << "Top results:" << endl; + for (size_t i = 0; i < results.size(); ++i) + { + if (FLAGS_top >= 0 && i >= FLAGS_top) + break; + cout << " " << DebugPrint(results[i]) << endl; + } +} + +void ProcessQueriesFromFile(string const & path) +{ + ifstream stream(path.c_str()); + CHECK(stream.is_open(), ("Can't open", path)); + + Geocoder geocoder(FLAGS_hierarchy_path); + + vector results; + string s; + while (getline(stream, s)) + { + strings::Trim(s); + if (s.empty()) + continue; + + cout << s << endl; + geocoder.ProcessQuery(s, results); + PrintResults(results); + cout << endl; + } +} + +void ProcessQueriesFromCommandLine() +{ + Geocoder geocoder(FLAGS_hierarchy_path); + + string query; + vector results; + while (true) + { + cout << "> "; + if (!getline(cin, query)) + break; + if (query == "q" || query == ":q" || query == "quit") + break; + geocoder.ProcessQuery(query, results); + PrintResults(results); + } +} + +int main(int argc, char * argv[]) +{ + ios_base::sync_with_stdio(false); + + google::SetUsageMessage("Geocoder command line interface."); + google::ParseCommandLineFlags(&argc, &argv, true); + + if (!FLAGS_queries_path.empty()) + { + ProcessQueriesFromFile(FLAGS_queries_path); + return 0; + } + + ProcessQueriesFromCommandLine(); + return 0; +} diff --git a/geocoder/hierarchy.cpp b/geocoder/hierarchy.cpp index 4fbc8a2172..b3def3cf72 100644 --- a/geocoder/hierarchy.cpp +++ b/geocoder/hierarchy.cpp @@ -118,12 +118,14 @@ Hierarchy::Hierarchy(string const & pathToJsonHierarchy) // todo(@m) Index it by name too. if (entry.m_type != Type::Count) { + ++stats.m_numLoaded; size_t const t = static_cast(entry.m_type); m_entries[entry.m_address[t]].emplace_back(entry); } } LOG(LINFO, ("Finished reading the hierarchy. Stats:")); + LOG(LINFO, ("Entries indexed:", stats.m_numLoaded)); LOG(LINFO, ("Corrupted json lines:", stats.m_badJsons)); LOG(LINFO, ("Unreadable base::GeoObjectIds:", stats.m_badOsmIds)); LOG(LINFO, ("Entries with duplicate address parts:", stats.m_duplicateAddresses)); diff --git a/geocoder/hierarchy.hpp b/geocoder/hierarchy.hpp index f9f45605ff..77b66f7793 100644 --- a/geocoder/hierarchy.hpp +++ b/geocoder/hierarchy.hpp @@ -22,6 +22,9 @@ class Hierarchy public: struct ParsingStats { + // Number of entries that the hierarchy was constructed from. + uint64_t m_numLoaded = 0; + // Number of corrupted json lines. uint64_t m_badJsons = 0; diff --git a/geocoder/result.cpp b/geocoder/result.cpp index fc649754c3..842bcfc8e3 100644 --- a/geocoder/result.cpp +++ b/geocoder/result.cpp @@ -9,7 +9,7 @@ namespace geocoder string DebugPrint(Result const & result) { ostringstream oss; - oss << DebugPrint(result.m_osmId) << " " << result.m_certainty; + oss << DebugPrint(result.m_osmId) << " certainty=" << result.m_certainty; return oss.str(); } } // namespace geocoder diff --git a/geocoder/types.cpp b/geocoder/types.cpp index 5c5733b49e..cd61053f04 100644 --- a/geocoder/types.cpp +++ b/geocoder/types.cpp @@ -10,14 +10,14 @@ string ToString(Type type) { switch (type) { - case Type::Country: return "country"; break; - case Type::Region: return "region"; break; - case Type::Subregion: return "subregion"; break; - case Type::Locality: return "locality"; break; - case Type::Sublocality: return "sublocality"; break; - case Type::Suburb: return "suburb"; break; - case Type::Building: return "building"; break; - case Type::Count: return "count"; break; + case Type::Country: return "country"; + case Type::Region: return "region"; + case Type::Subregion: return "subregion"; + case Type::Locality: return "locality"; + case Type::Sublocality: return "sublocality"; + case Type::Suburb: return "suburb"; + case Type::Building: return "building"; + case Type::Count: return "count"; } CHECK_SWITCH(); }