diff --git a/CMakeLists.txt b/CMakeLists.txt index bc60227582..ed9ed4c73b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -324,7 +324,6 @@ if (PLATFORM_DESKTOP) endif() add_subdirectory(feature_list) add_subdirectory(generator) - add_subdirectory(geocoder) add_subdirectory(openlr) add_subdirectory(track_analyzing) add_subdirectory(track_generator) diff --git a/geocoder/CMakeLists.txt b/geocoder/CMakeLists.txt deleted file mode 100644 index 277d94629d..0000000000 --- a/geocoder/CMakeLists.txt +++ /dev/null @@ -1,26 +0,0 @@ -project(geocoder) - -include_directories(${OMIM_ROOT}/3party/jansson/src) - -set( - SRC - geocoder.cpp - geocoder.hpp - hierarchy.cpp - hierarchy.hpp - hierarchy_reader.cpp - hierarchy_reader.hpp - index.cpp - index.hpp - name_dictionary.cpp - name_dictionary.hpp - result.cpp - result.hpp - types.cpp - types.hpp -) - -omim_add_library(${PROJECT_NAME} ${SRC}) - -add_subdirectory(geocoder_cli) -omim_add_test_subdirectory(geocoder_tests) diff --git a/geocoder/geocoder.cpp b/geocoder/geocoder.cpp deleted file mode 100644 index 4d6c675e7a..0000000000 --- a/geocoder/geocoder.cpp +++ /dev/null @@ -1,469 +0,0 @@ -#include "geocoder/geocoder.hpp" - -#include "geocoder/hierarchy_reader.hpp" - -#include "search/house_numbers_matcher.hpp" - -#include "indexer/search_string_utils.hpp" - -#include "base/assert.hpp" -#include "base/logging.hpp" -#include "base/scope_guard.hpp" -#include "base/stl_helpers.hpp" -#include "base/string_utils.hpp" -#include "base/timer.hpp" - -#include -#include -#include -#include -#include - -#include - -using namespace std; - -namespace -{ -size_t const kMaxResults = 100; - -// While Result's |m_certainty| is deliberately vaguely defined, -// current implementation is a log-prob type measure of our belief -// that the labeling of tokens is correct, provided the labeling is -// possible with respect to the IsParentTo relation on entries. -// In other words, non-scaled post-probabilities are -// log(Prob(Country|token)) ~ 10 -// log(Prob(Region|token)) ~ 5 -// etc. -// The greater their sum, the more likely it is that we guessed the -// token types right. -// -// The reasoning is as follows. A naïve weighing would look how many query tokens -// are covered with the current parse and assign this fraction to certainty. -// Turns out, it works badly since a single matched long street in the query -// (i.e., wrong city, wrong region, wrong locality, correct street) can shadow a more -// relevant result (correct city, correct locality, wrong street) in the case where -// the database does not contain an exact match. So let's make some parts of the -// query heavier (heuristically). This turns out to work more predictable. -double GetWeight(geocoder::Type t) -{ - switch (t) - { - case geocoder::Type::Country: return 10.0; - case geocoder::Type::Region: return 5.0; - case geocoder::Type::Subregion: return 4.0; - case geocoder::Type::Locality: return 3.0; - case geocoder::Type::Suburb: return 3.0; - case geocoder::Type::Sublocality: return 2.0; - case geocoder::Type::Street: return 1.0; - case geocoder::Type::Building: return 0.1; - case geocoder::Type::Count: return 0.0; - } - UNREACHABLE(); -} - -// todo(@m) This is taken from search/geocoder.hpp. Refactor. -struct ScopedMarkTokens -{ - using Type = geocoder::Type; - - // The range is [l, r). - ScopedMarkTokens(geocoder::Geocoder::Context & context, Type type, size_t l, size_t r) - : m_context(context), m_type(type), m_l(l), m_r(r) - { - CHECK_LESS_OR_EQUAL(l, r, ()); - CHECK_LESS_OR_EQUAL(r, context.GetNumTokens(), ()); - - for (size_t i = m_l; i < m_r; ++i) - m_context.MarkToken(i, m_type); - } - - ~ScopedMarkTokens() - { - for (size_t i = m_l; i < m_r; ++i) - m_context.MarkToken(i, Type::Count); - } - - geocoder::Geocoder::Context & m_context; - Type const m_type; - size_t m_l; - size_t m_r; -}; - -geocoder::Type NextType(geocoder::Type type) -{ - CHECK_NOT_EQUAL(type, geocoder::Type::Count, ()); - auto t = static_cast(type); - return static_cast(t + 1); -} - -strings::UniString MakeHouseNumber(geocoder::Tokens const & tokens) -{ - return strings::MakeUniString(strings::JoinStrings(tokens, " ")); -} -} // namespace - -namespace geocoder -{ -// Geocoder::Context ------------------------------------------------------------------------------- -Geocoder::Context::Context(string const & query) : m_beam(kMaxResults) -{ - search::NormalizeAndTokenizeAsUtf8(query, m_tokens); - m_tokenTypes.assign(m_tokens.size(), Type::Count); - m_numUsedTokens = 0; -} - -vector & Geocoder::Context::GetTokenTypes() { return m_tokenTypes; } - -size_t Geocoder::Context::GetNumTokens() const { return m_tokens.size(); } - -size_t Geocoder::Context::GetNumUsedTokens() const -{ - CHECK_LESS_OR_EQUAL(m_numUsedTokens, m_tokens.size(), ()); - return m_numUsedTokens; -} - -Type Geocoder::Context::GetTokenType(size_t id) const -{ - CHECK_LESS(id, m_tokenTypes.size(), ()); - return m_tokenTypes[id]; -} - -string const & Geocoder::Context::GetToken(size_t id) const -{ - CHECK_LESS(id, m_tokens.size(), ()); - return m_tokens[id]; -} - -void Geocoder::Context::MarkToken(size_t id, Type type) -{ - CHECK_LESS(id, m_tokens.size(), ()); - bool wasUsed = m_tokenTypes[id] != Type::Count; - m_tokenTypes[id] = type; - bool nowUsed = m_tokenTypes[id] != Type::Count; - - if (wasUsed && !nowUsed) - --m_numUsedTokens; - if (!wasUsed && nowUsed) - ++m_numUsedTokens; -} - -bool Geocoder::Context::IsTokenUsed(size_t id) const -{ - CHECK_LESS(id, m_tokens.size(), ()); - return m_tokenTypes[id] != Type::Count; -} - -bool Geocoder::Context::AllTokensUsed() const { return m_numUsedTokens == m_tokens.size(); } - -void Geocoder::Context::AddResult(base::GeoObjectId const & osmId, double certainty, Type type, - vector const & tokenIds, vector const & allTypes) -{ - m_beam.Add(BeamKey(osmId, type, tokenIds, allTypes), certainty); -} - -void Geocoder::Context::FillResults(vector & results) const -{ - results.clear(); - results.reserve(m_beam.GetEntries().size()); - - set seen; - bool const hasPotentialHouseNumber = !m_houseNumberPositionsInQuery.empty(); - for (auto const & e : m_beam.GetEntries()) - { - if (!seen.insert(e.m_key.m_osmId).second) - continue; - - if (hasPotentialHouseNumber && !IsGoodForPotentialHouseNumberAt(e.m_key, m_houseNumberPositionsInQuery)) - continue; - - results.emplace_back(e.m_key.m_osmId, e.m_value /* certainty */); - } - - if (!results.empty()) - { - auto const by = results.front().m_certainty; - for (auto & r : results) - { - r.m_certainty /= by; - ASSERT_GREATER_OR_EQUAL(r.m_certainty, 0.0, ()); - ASSERT_LESS_OR_EQUAL(r.m_certainty, 1.0, ()); - } - } - - ASSERT(is_sorted(results.rbegin(), results.rend(), base::LessBy(&Result::m_certainty)), ()); - ASSERT_LESS_OR_EQUAL(results.size(), kMaxResults, ()); -} - -vector & Geocoder::Context::GetLayers() { return m_layers; } - -vector const & Geocoder::Context::GetLayers() const { return m_layers; } - -void Geocoder::Context::MarkHouseNumberPositionsInQuery(vector const & tokenIds) -{ - m_houseNumberPositionsInQuery.insert(tokenIds.begin(), tokenIds.end()); -} - -bool Geocoder::Context::IsGoodForPotentialHouseNumberAt(BeamKey const & beamKey, - set const & tokenIds) const -{ - if (beamKey.m_tokenIds.size() == m_tokens.size()) - return true; - - if (IsBuildingWithAddress(beamKey)) - return true; - - // Pass street, locality or region with number in query address parts. - if (HasLocalityOrRegion(beamKey) && ContainsTokenIds(beamKey, tokenIds)) - return true; - - return false; -} - -bool Geocoder::Context::IsBuildingWithAddress(BeamKey const & beamKey) const -{ - if (beamKey.m_type != Type::Building) - return false; - - bool gotLocality = false; - bool gotStreet = false; - bool gotBuilding = false; - for (Type t : beamKey.m_allTypes) - { - if (t == Type::Region || t == Type::Subregion || t == Type::Locality) - gotLocality = true; - if (t == Type::Street) - gotStreet = true; - if (t == Type::Building) - gotBuilding = true; - } - return gotLocality && gotStreet && gotBuilding; -} - -bool Geocoder::Context::HasLocalityOrRegion(BeamKey const & beamKey) const -{ - for (Type t : beamKey.m_allTypes) - { - if (t == Type::Region || t == Type::Subregion || t == Type::Locality) - return true; - } - - return false; -} - -bool Geocoder::Context::ContainsTokenIds(BeamKey const & beamKey, set const & needTokenIds) const -{ - auto const & keyTokenIds = beamKey.m_tokenIds; - return base::Includes(keyTokenIds.begin(), keyTokenIds.end(), needTokenIds.begin(), needTokenIds.end()); -} - -// Geocoder ---------------------------------------------------------------------------------------- -Geocoder::Geocoder(string const & pathToJsonHierarchy, unsigned int loadThreadsCount) - : Geocoder{HierarchyReader{pathToJsonHierarchy}.Read(loadThreadsCount), loadThreadsCount} -{ -} - -Geocoder::Geocoder(istream & jsonHierarchy, unsigned int loadThreadsCount) - : Geocoder{HierarchyReader{jsonHierarchy}.Read(loadThreadsCount), loadThreadsCount} -{ -} - -Geocoder::Geocoder(Hierarchy && hierarchy, unsigned int loadThreadsCount) - : m_hierarchy(move(hierarchy)), m_index(m_hierarchy, loadThreadsCount) -{ -} - -void Geocoder::ProcessQuery(string const & query, vector & results) const -{ -#if defined(DEBUG) - base::Timer timer; - SCOPE_GUARD(printDuration, [&timer]() { - LOG(LINFO, ("Total geocoding time:", timer.ElapsedSeconds(), "seconds")); - }); -#endif - - Context ctx(query); - Go(ctx, Type::Country); - ctx.FillResults(results); -} - -Hierarchy const & Geocoder::GetHierarchy() const { return m_hierarchy; } - -Index const & Geocoder::GetIndex() const { return m_index; } - -void Geocoder::Go(Context & ctx, Type type) const -{ - if (ctx.GetNumTokens() == 0) - return; - - if (ctx.AllTokensUsed()) - return; - - if (type == Type::Count) - return; - - Tokens subquery; - vector subqueryTokenIds; - for (size_t i = 0; i < ctx.GetNumTokens(); ++i) - { - subquery.clear(); - subqueryTokenIds.clear(); - for (size_t j = i; j < ctx.GetNumTokens(); ++j) - { - if (ctx.IsTokenUsed(j)) - break; - - subquery.push_back(ctx.GetToken(j)); - subqueryTokenIds.push_back(j); - - Layer curLayer; - curLayer.m_type = type; - - // Buildings are indexed separately. - if (type == Type::Building) - { - FillBuildingsLayer(ctx, subquery, subqueryTokenIds, curLayer); - } - else - { - FillRegularLayer(ctx, type, subquery, curLayer); - } - - if (curLayer.m_entries.empty()) - continue; - - ScopedMarkTokens mark(ctx, type, i, j + 1); - boost::optional streetSynonymMark; - - double certainty = 0; - vector tokenIds; - vector allTypes; - for (size_t tokId = 0; tokId < ctx.GetNumTokens(); ++tokId) - { - auto const t = ctx.GetTokenType(tokId); - if (type == Type::Street && t == Type::Count && !streetSynonymMark) - { - if (search::IsStreetSynonym(strings::MakeUniString(ctx.GetToken(tokId)))) - streetSynonymMark.emplace(ctx, Type::Street, tokId, tokId + 1); - } - - certainty += GetWeight(t); - if (t != Type::Count) - { - tokenIds.push_back(tokId); - allTypes.push_back(t); - } - } - - for (auto const & docId : curLayer.m_entries) - ctx.AddResult(m_index.GetDoc(docId).m_osmId, certainty, type, tokenIds, allTypes); - - ctx.GetLayers().emplace_back(move(curLayer)); - SCOPE_GUARD(pop, [&] { ctx.GetLayers().pop_back(); }); - - Go(ctx, NextType(type)); - } - } - - Go(ctx, NextType(type)); -} - -void Geocoder::FillBuildingsLayer(Context & ctx, Tokens const & subquery, vector const & subqueryTokenIds, - Layer & curLayer) const -{ - if (ctx.GetLayers().empty()) - return; - - auto const & subqueryHN = MakeHouseNumber(subquery); - - if (!search::house_numbers::LooksLikeHouseNumber(subqueryHN, false /* isPrefix */)) - return; - - for_each(ctx.GetLayers().rbegin(), ctx.GetLayers().rend(), [&, this] (auto const & layer) { - if (layer.m_type != Type::Street && layer.m_type != Type::Locality) - return; - - // We've already filled a street/location layer and now see something that resembles - // a house number. While it still can be something else (a zip code, for example) - // let's stay on the safer side and mark the tokens as potential house number. - ctx.MarkHouseNumberPositionsInQuery(subqueryTokenIds); - - for (auto const & docId : layer.m_entries) - { - m_index.ForEachRelatedBuilding(docId, [&](Index::DocId const & buildingDocId) { - auto const & bld = m_index.GetDoc(buildingDocId); - auto const & multipleHN = bld.GetNormalizedMultipleNames( - Type::Building, m_hierarchy.GetNormalizedNameDictionary()); - auto const & realHN = multipleHN.GetMainName(); - auto const & realHNUniStr = strings::MakeUniString(realHN); - if (search::house_numbers::HouseNumbersMatch(realHNUniStr, subqueryHN, - false /* queryIsPrefix */)) - { - curLayer.m_entries.emplace_back(buildingDocId); - } - }); - } - }); -} - -void Geocoder::FillRegularLayer(Context const & ctx, Type type, Tokens const & subquery, - Layer & curLayer) const -{ - m_index.ForEachDocId(subquery, [&](Index::DocId const & docId) { - auto const & d = m_index.GetDoc(docId); - if (d.m_type != type) - return; - - if (ctx.GetLayers().empty() || HasParent(ctx.GetLayers(), d)) - { - if (type > Type::Locality && !IsRelevantLocalityMember(ctx, d, subquery)) - return; - - curLayer.m_entries.emplace_back(docId); - } - }); -} - -bool Geocoder::HasParent(vector const & layers, Hierarchy::Entry const & e) const -{ - CHECK(!layers.empty(), ()); - auto const & layer = layers.back(); - for (auto const & docId : layer.m_entries) - { - // Note that the relationship is somewhat inverted: every ancestor - // is stored in the address but the nodes have no information - // about their children. - if (m_hierarchy.IsParentTo(m_index.GetDoc(docId), e)) - return true; - } - return false; -} - -bool Geocoder::IsRelevantLocalityMember(Context const & ctx, Hierarchy::Entry const & member, - Tokens const & subquery) const -{ - auto const isNumeric = subquery.size() == 1 && strings::IsASCIINumeric(subquery.front()); - return !isNumeric || HasMemberLocalityInMatching(ctx, member); -} - -bool Geocoder::HasMemberLocalityInMatching(Context const & ctx, Hierarchy::Entry const & member) const -{ - for (auto const & layer : ctx.GetLayers()) - { - auto const layerType = layer.m_type; - if (layerType > Type::Locality) - break; - if (layerType != Type::Locality) - continue; - - for (auto const docId : layer.m_entries) - { - auto const & matchedEntry = m_index.GetDoc(docId); - if (m_hierarchy.IsParentTo(matchedEntry, member)) - return true; - } - } - - return false; -} -} // namespace geocoder diff --git a/geocoder/geocoder.hpp b/geocoder/geocoder.hpp deleted file mode 100644 index 007246881f..0000000000 --- a/geocoder/geocoder.hpp +++ /dev/null @@ -1,159 +0,0 @@ -#pragma once - -#include "geocoder/hierarchy.hpp" -#include "geocoder/index.hpp" -#include "geocoder/result.hpp" -#include "geocoder/types.hpp" - -#include "base/beam.hpp" -#include "base/geo_object_id.hpp" -#include "base/stl_helpers.hpp" -#include "base/string_utils.hpp" - -#include -#include -#include -#include -#include -#include -#include - -namespace geocoder -{ -// This class performs geocoding by using the data that we are currently unable -// to distribute to mobile devices. Therefore, the class is intended to be used -// on the server side. -// On the other hand, the design is largely experimental and when the dust -// settles we may reuse some parts of it in the offline mobile application. -// In this case, a partial merge with search/ and in particular with -// search/geocoder.hpp is possible. -// -// Geocoder receives a search query and returns the osm ids of the features -// that match it. Currently, the only data source for the geocoder is -// the hierarchy of features, that is, for every feature that can be found -// the geocoder expects to have the total information about this feature -// in the region subdivision graph (e.g., country, city, street that contain a -// certain house). This hierarchy is to be obtained elsewhere. -// -// Note that search index, locality index, scale index, and, generally, mwm -// features are currently not used at all. -class Geocoder -{ -public: - // A Layer contains all entries matched by a subquery of consecutive tokens. - struct Layer - { - Type m_type = Type::Count; - std::vector m_entries; - }; - - // This class is very similar to the one we use in search/. - // See search/geocoder_context.hpp. - class Context - { - public: - struct BeamKey - { - BeamKey(base::GeoObjectId osmId, Type type, std::vector const & tokenIds, - std::vector const & allTypes) - : m_osmId(osmId) - , m_type(type) - , m_tokenIds{tokenIds} - , m_allTypes(allTypes) - { - base::SortUnique(m_allTypes); - } - - base::GeoObjectId m_osmId; - Type m_type; - std::vector m_tokenIds; - std::vector m_allTypes; - }; - - Context(std::string const & query); - - void Clear(); - - std::vector & GetTokenTypes(); - size_t GetNumTokens() const; - size_t GetNumUsedTokens() const; - - Type GetTokenType(size_t id) const; - - std::string const & GetToken(size_t id) const; - - void MarkToken(size_t id, Type type); - - // Returns true if |token| is marked as used. - bool IsTokenUsed(size_t id) const; - - // Returns true iff all tokens are used. - bool AllTokensUsed() const; - - void AddResult(base::GeoObjectId const & osmId, double certainty, Type type, - std::vector const & tokenIds, std::vector const & allTypes); - - void FillResults(std::vector & results) const; - - std::vector & GetLayers(); - - std::vector const & GetLayers() const; - - void MarkHouseNumberPositionsInQuery(std::vector const & tokenIds); - - private: - bool IsGoodForPotentialHouseNumberAt(BeamKey const & beamKey, std::set const & tokenIds) const; - bool IsBuildingWithAddress(BeamKey const & beamKey) const; - bool HasLocalityOrRegion(BeamKey const & beamKey) const; - bool ContainsTokenIds(BeamKey const & beamKey, std::set const & needTokenIds) const; - - Tokens m_tokens; - std::vector m_tokenTypes; - - size_t m_numUsedTokens = 0; - - // |m_houseNumberPositionsInQuery| has indexes of query tokens which are placed on - // context-dependent positions of house number. - // The rationale is that we must only emit buildings in this case - // and implement a fallback to a more powerful geocoder if we - // could not find a building. - std::set m_houseNumberPositionsInQuery; - - // The highest value of certainty for a fixed amount of - // the most relevant retrieved osm ids. - base::Beam m_beam; - - std::vector m_layers; - }; - - explicit Geocoder(std::string const & pathToJsonHierarchy, unsigned int loadThreadsCount = 1); - explicit Geocoder(std::istream & jsonHierarchy, unsigned int loadThreadsCount = 1); - - void ProcessQuery(std::string const & query, std::vector & results) const; - - Hierarchy const & GetHierarchy() const; - - Index const & GetIndex() const; - -private: - explicit Geocoder(Hierarchy && hierarchy, unsigned int loadThreadsCount); - - void Go(Context & ctx, Type type) const; - - void FillBuildingsLayer(Context & ctx, Tokens const & subquery, std::vector const & subqueryTokenIds, - Layer & curLayer) const; - void FillRegularLayer(Context const & ctx, Type type, Tokens const & subquery, - Layer & curLayer) const; - - // Returns whether any of the paths through |layers| can be extended - // by appending |e|. - bool HasParent(std::vector const & layers, Hierarchy::Entry const & e) const; - bool IsRelevantLocalityMember(Context const & ctx, Hierarchy::Entry const & member, - Tokens const & subquery) const; - bool HasMemberLocalityInMatching(Context const & ctx, Hierarchy::Entry const & member) const; - - Hierarchy m_hierarchy; - - Index m_index; -}; -} // namespace geocoder diff --git a/geocoder/geocoder_cli/CMakeLists.txt b/geocoder/geocoder_cli/CMakeLists.txt deleted file mode 100644 index d2e5655ab6..0000000000 --- a/geocoder/geocoder_cli/CMakeLists.txt +++ /dev/null @@ -1,24 +0,0 @@ -project(geocoder_cli) - -include_directories(${OMIM_ROOT}/3party/gflags/src) - -set( - SRC - geocoder_cli.cpp -) - -omim_add_executable(${PROJECT_NAME} ${SRC}) - -omim_link_libraries( - ${PROJECT_NAME} - geocoder - search - indexer - platform - coding - base - stats_client - jansson - ${LIBZ} - gflags -) diff --git a/geocoder/geocoder_cli/geocoder_cli.cpp b/geocoder/geocoder_cli/geocoder_cli.cpp deleted file mode 100644 index b8ac491472..0000000000 --- a/geocoder/geocoder_cli/geocoder_cli.cpp +++ /dev/null @@ -1,109 +0,0 @@ -#include "geocoder/geocoder.hpp" -#include "geocoder/result.hpp" - -#include "base/internal/message.hpp" -#include "base/string_utils.hpp" - -#include -#include -#include -#include - -#include "3party/gflags/src/gflags/gflags.h" - -using namespace geocoder; -using namespace std; - -DEFINE_string(hierarchy_path, "", "Path to the hierarchy file for the geocoder"); -DEFINE_string(queries_path, "", "Path to the file with queries"); -DEFINE_int32(top, 5, "Number of top results to show for every query, -1 to show all results"); - -void PrintResults(Hierarchy const & hierarchy, vector const & results) -{ - cout << "Found results: " << results.size() << endl; - if (results.empty()) - return; - cout << "Top results:" << endl; - - auto const & dictionary = hierarchy.GetNormalizedNameDictionary(); - for (size_t i = 0; i < results.size(); ++i) - { - if (FLAGS_top >= 0 && static_cast(i) >= FLAGS_top) - break; - cout << " " << DebugPrint(results[i]); - if (auto const && e = hierarchy.GetEntryForOsmId(results[i].m_osmId)) - { - cout << " ["; - auto const * delimiter = ""; - for (size_t i = 0; i < static_cast(Type::Count); ++i) - { - if (e->m_normalizedAddress[i] != NameDictionary::kUnspecifiedPosition) - { - auto type = static_cast(i); - auto multipleNames = e->GetNormalizedMultipleNames(type, dictionary); - cout << delimiter << ToString(type) << ": " << multipleNames.GetMainName(); - delimiter = ", "; - } - } - cout << "]"; - } - cout << endl; - } -} - -void ProcessQueriesFromFile(string const & path) -{ - ifstream stream(path.c_str()); - CHECK(stream.is_open(), ("Can't open", path)); - - Geocoder geocoder(FLAGS_hierarchy_path); - - vector results; - string s; - while (getline(stream, s)) - { - strings::Trim(s); - if (s.empty()) - continue; - - cout << s << endl; - geocoder.ProcessQuery(s, results); - PrintResults(geocoder.GetHierarchy(), results); - cout << endl; - } -} - -void ProcessQueriesFromCommandLine() -{ - Geocoder geocoder(FLAGS_hierarchy_path); - - string query; - vector results; - while (true) - { - cout << "> "; - if (!getline(cin, query)) - break; - if (query == "q" || query == ":q" || query == "quit") - break; - geocoder.ProcessQuery(query, results); - PrintResults(geocoder.GetHierarchy(), results); - } -} - -int main(int argc, char * argv[]) -{ - ios_base::sync_with_stdio(false); - - google::SetUsageMessage("Geocoder command line interface."); - google::ParseCommandLineFlags(&argc, &argv, true); - - if (!FLAGS_queries_path.empty()) - { - ProcessQueriesFromFile(FLAGS_queries_path); - return 0; - } - - ProcessQueriesFromCommandLine(); - return 0; -} diff --git a/geocoder/geocoder_tests/CMakeLists.txt b/geocoder/geocoder_tests/CMakeLists.txt deleted file mode 100644 index a59bd7e1ac..0000000000 --- a/geocoder/geocoder_tests/CMakeLists.txt +++ /dev/null @@ -1,24 +0,0 @@ -project(geocoder_tests) - -set( - SRC - geocoder_tests.cpp -) - -omim_add_test(${PROJECT_NAME} ${SRC}) - -omim_link_libraries( - ${PROJECT_NAME} - platform_tests_support - geocoder - search - indexer - platform - coding - base - stats_client - jansson - ${LIBZ} -) - -link_qt5_core(${PROJECT_NAME}) diff --git a/geocoder/geocoder_tests/geocoder_tests.cpp b/geocoder/geocoder_tests/geocoder_tests.cpp deleted file mode 100644 index 863b665753..0000000000 --- a/geocoder/geocoder_tests/geocoder_tests.cpp +++ /dev/null @@ -1,354 +0,0 @@ -#include "testing/testing.hpp" - -#include "geocoder/geocoder.hpp" -#include "geocoder/hierarchy_reader.hpp" - -#include "indexer/search_string_utils.hpp" - -#include "platform/platform_tests_support/scoped_file.hpp" - -#include "base/geo_object_id.hpp" -#include "base/math.hpp" -#include "base/stl_helpers.hpp" - -#include -#include -#include -#include - -using namespace platform::tests_support; -using namespace std; - -namespace -{ -using Id = base::GeoObjectId; - -double const kCertaintyEps = 1e-6; -string const kRegionsData = R"#( -C00000000004B279 {"type": "Feature", "geometry": {"type": "Point", "coordinates": [-80.1142033187951, 21.55511095]}, "properties": {"locales": {"default": {"name": "Cuba", "address": {"country": "Cuba"}}}, "rank": 2}} -C0000000001C4CA7 {"type": "Feature", "geometry": {"type": "Point", "coordinates": [-78.7260117405499, 21.74300205]}, "properties": {"locales": {"default": {"name": "Ciego de Ávila", "address": {"region": "Ciego de Ávila", "country": "Cuba"}}}, "rank": 4}} -C00000000059D6B5 {"type": "Feature", "geometry": {"type": "Point", "coordinates": [-78.9263054493181, 22.08185765]}, "properties": {"locales": {"default": {"name": "Florencia", "address": {"subregion": "Florencia", "region": "Ciego de Ávila", "country": "Cuba"}}}, "rank": 6}} -)#"; -} // namespace - -namespace geocoder -{ -void TestGeocoder(Geocoder & geocoder, string const & query, vector && expected) -{ - vector actual; - geocoder.ProcessQuery(query, actual); - TEST_EQUAL(actual.size(), expected.size(), (query, actual, expected)); - sort(actual.begin(), actual.end(), base::LessBy(&Result::m_osmId)); - sort(expected.begin(), expected.end(), base::LessBy(&Result::m_osmId)); - for (size_t i = 0; i < actual.size(); ++i) - { - TEST(actual[i].m_certainty >= 0.0 && actual[i].m_certainty <= 1.0, - (query, actual[i].m_certainty)); - TEST_EQUAL(actual[i].m_osmId, expected[i].m_osmId, (query)); - TEST(base::AlmostEqualAbs(actual[i].m_certainty, expected[i].m_certainty, kCertaintyEps), - (query, actual[i].m_certainty, expected[i].m_certainty)); - } -} - -UNIT_TEST(Geocoder_Smoke) -{ - ScopedFile const regionsJsonFile("regions.jsonl", kRegionsData); - Geocoder geocoder(regionsJsonFile.GetFullPath()); - - base::GeoObjectId const florenciaId(0xc00000000059d6b5); - base::GeoObjectId const cubaId(0xc00000000004b279); - - TestGeocoder(geocoder, "florencia", {{florenciaId, 1.0}}); - TestGeocoder(geocoder, "cuba florencia", {{florenciaId, 1.0}, {cubaId, 0.714286}}); - TestGeocoder(geocoder, "florencia somewhere in cuba", {{cubaId, 0.714286}, {florenciaId, 1.0}}); -} - -UNIT_TEST(Geocoder_Hierarchy) -{ - ScopedFile const regionsJsonFile("regions.jsonl", kRegionsData); - Geocoder geocoder(regionsJsonFile.GetFullPath()); - auto const & hierarchy = geocoder.GetHierarchy(); - auto const & dictionary = hierarchy.GetNormalizedNameDictionary(); - - vector entries; - geocoder.GetIndex().ForEachDocId({("florencia")}, [&](Index::DocId const & docId) { - entries.emplace_back(geocoder.GetIndex().GetDoc(docId)); - }); - - TEST_EQUAL(entries.size(), 1, ()); - TEST_EQUAL(entries[0].GetNormalizedMultipleNames(Type::Country, dictionary).GetMainName(), "cuba", - ()); - TEST_EQUAL(entries[0].GetNormalizedMultipleNames(Type::Region, dictionary).GetMainName(), - "ciego de avila", ()); - TEST_EQUAL(entries[0].GetNormalizedMultipleNames(Type::Subregion, dictionary).GetMainName(), - "florencia", ()); -} - -UNIT_TEST(Geocoder_EnglishNames) -{ - string const kData = R"#( -10 {"properties": {"locales": {"default": {"address": {"locality": "Москва"}}, "en": {"address": {"locality": "Moscow"}}}}} -11 {"properties": {"locales": {"default": {"address": {"locality": "Москва", "street": "улица Новый Арбат"}}, "en": {"address": {"locality": "Moscow", "street": "New Arbat Avenue"}}}}} -)#"; - - ScopedFile const regionsJsonFile("regions.jsonl", kData); - Geocoder geocoder(regionsJsonFile.GetFullPath()); - - TestGeocoder(geocoder, "Moscow, New Arbat", {{Id{0x11}, 1.0}, {Id{0x10}, 0.6}}); -} - -UNIT_TEST(Geocoder_OnlyBuildings) -{ - string const kData = R"#( -10 {"properties": {"locales": {"default": {"address": {"locality": "Some Locality"}}}}} - -21 {"properties": {"locales": {"default": {"address": {"street": "Good", "locality": "Some Locality"}}}}} -22 {"properties": {"locales": {"default": {"address": {"building": "5", "street": "Good", "locality": "Some Locality"}}}}} - -31 {"properties": {"locales": {"default": {"address": {"street": "Bad", "locality": "Some Locality"}}}}} -32 {"properties": {"locales": {"default": {"address": {"building": "10", "street": "Bad", "locality": "Some Locality"}}}}} - -40 {"properties": {"locales": {"default": {"address": {"street": "MaybeNumbered", "locality": "Some Locality"}}}}} -41 {"properties": {"locales": {"default": {"address": {"street": "MaybeNumbered-3", "locality": "Some Locality"}}}}} -42 {"properties": {"locales": {"default": {"address": {"building": "3", "street": "MaybeNumbered", "locality": "Some Locality"}}}}} -)#"; - - ScopedFile const regionsJsonFile("regions.jsonl", kData); - Geocoder geocoder(regionsJsonFile.GetFullPath()); - - base::GeoObjectId const localityId(0x10); - base::GeoObjectId const goodStreetId(0x21); - base::GeoObjectId const badStreetId(0x31); - base::GeoObjectId const building5(0x22); - base::GeoObjectId const building10(0x32); - - TestGeocoder(geocoder, "some locality", {{localityId, 1.0}}); - TestGeocoder(geocoder, "some locality good", {{goodStreetId, 1.0}, {localityId, 0.857143}}); - TestGeocoder(geocoder, "some locality bad", {{badStreetId, 1.0}, {localityId, 0.857143}}); - - TestGeocoder(geocoder, "some locality good 5", {{building5, 1.0}}); - TestGeocoder(geocoder, "some locality bad 10", {{building10, 1.0}}); - - // There is a building "10" on Bad Street but we should not return it. - // Another possible resolution would be to return just "Good Street" (relaxed matching) - // but at the time of writing the goal is to either have an exact match or no match at all. - TestGeocoder(geocoder, "some locality good 10", {}); - - // Sometimes we may still emit a non-building. - // In this case it happens because all query tokens are used. - base::GeoObjectId const numberedStreet(0x41); - base::GeoObjectId const houseOnANonNumberedStreet(0x42); - TestGeocoder(geocoder, "some locality maybenumbered 3", - {{numberedStreet, 1.0}, {houseOnANonNumberedStreet, 0.8875}}); -} - -UNIT_TEST(Geocoder_MismatchedLocality) -{ - string const kData = R"#( -10 {"properties": {"locales": {"default": {"address": {"locality": "Moscow"}}}}} -11 {"properties": {"locales": {"default": {"address": {"locality": "Paris"}}}}} - -21 {"properties": {"locales": {"default": {"address": {"street": "Krymskaya", "locality": "Moscow"}}}}} -22 {"properties": {"locales": {"default": {"address": {"building": "2", "street": "Krymskaya", "locality": "Moscow"}}}}} - -31 {"properties": {"locales": {"default": {"address": {"street": "Krymskaya", "locality": "Paris"}}}}} -32 {"properties": {"locales": {"default": {"address": {"building": "3", "street": "Krymskaya", "locality": "Paris"}}}}} -)#"; - - ScopedFile const regionsJsonFile("regions.jsonl", kData); - Geocoder geocoder(regionsJsonFile.GetFullPath()); - - base::GeoObjectId const building2(0x22); - - TestGeocoder(geocoder, "Moscow Krymskaya 2", {{building2, 1.0}}); - - // "Krymskaya 3" looks almost like a match to "Paris-Krymskaya-3" but we should not emit it. - TestGeocoder(geocoder, "Moscow Krymskaya 3", {}); -} - -// Geocoder_StreetWithNumber* ---------------------------------------------------------------------- -UNIT_TEST(Geocoder_StreetWithNumberInCity) -{ - string const kData = R"#( -10 {"properties": {"locales": {"default": {"address": {"locality": "Москва"}}}}} -11 {"properties": {"locales": {"default": {"address": {"locality": "Москва", "street": "улица 1905 года"}}}}} - -20 {"properties": {"locales": {"default": {"address": {"locality": "Краснокамск"}}}}} -28 {"properties": {"locales": {"default": {"address": {"locality": "Краснокамск", "street": "улица 1905 года"}}}}} -)#"; - - ScopedFile const regionsJsonFile("regions.jsonl", kData); - Geocoder geocoder(regionsJsonFile.GetFullPath()); - - TestGeocoder(geocoder, "Москва, улица 1905 года", {{Id{0x11}, 1.0}}); -} - -UNIT_TEST(Geocoder_StreetWithNumberInClassifiedCity) -{ - string const kData = R"#( -10 {"properties": {"locales": {"default": {"address": {"locality": "Москва"}}}}} -11 {"properties": {"locales": {"default": {"address": {"locality": "Москва", "street": "улица 1905 года"}}}}} -)#"; - - ScopedFile const regionsJsonFile("regions.jsonl", kData); - Geocoder geocoder(regionsJsonFile.GetFullPath()); - - TestGeocoder(geocoder, "город Москва, улица 1905 года", {{Id{0x11}, 1.0}}); -} - -UNIT_TEST(Geocoder_StreetWithNumberInAnyCity) -{ - string const kData = R"#( -10 {"properties": {"locales": {"default": {"address": {"locality": "Москва"}}}}} -11 {"properties": {"locales": {"default": {"address": {"locality": "Москва", "street": "улица 1905 года"}}}}} - -20 {"properties": {"locales": {"default": {"address": {"locality": "Краснокамск"}}}}} -28 {"properties": {"locales": {"default": {"address": {"locality": "Краснокамск", "street": "улица 1905 года"}}}}} -)#"; - - ScopedFile const regionsJsonFile("regions.jsonl", kData); - Geocoder geocoder(regionsJsonFile.GetFullPath()); - - TestGeocoder(geocoder, "улица 1905 года", {{Id{0x11}, 1.0}, {Id{0x28}, 1.0}}); -} - -UNIT_TEST(Geocoder_StreetWithNumberAndWithoutStreetSynonym) -{ - string const kData = R"#( -10 {"properties": {"locales": {"default": {"address": {"locality": "Москва"}}}}} -11 {"properties": {"locales": {"default": {"address": {"locality": "Москва", "street": "улица 1905 года"}}}}} -)#"; - - ScopedFile const regionsJsonFile("regions.jsonl", kData); - Geocoder geocoder(regionsJsonFile.GetFullPath()); - - TestGeocoder(geocoder, "Москва, 1905 года", {{Id{0x11}, 1.0}}); -} - -UNIT_TEST(Geocoder_UntypedStreetWithNumberAndStreetSynonym) -{ - string const kData = R"#( -10 {"properties": {"locales": {"default": {"address": {"locality": "Москва"}}}}} -13 {"properties": {"locales": {"default": {"address": {"locality": "Москва", "street": "8 Марта"}}}}} -)#"; - - ScopedFile const regionsJsonFile("regions.jsonl", kData); - Geocoder geocoder(regionsJsonFile.GetFullPath()); - - TestGeocoder(geocoder, "Москва, улица 8 Марта", {{Id{0x13}, 1.0}}); -} - -UNIT_TEST(Geocoder_StreetWithTwoNumbers) -{ - string const kData = R"#( -10 {"properties": {"locales": {"default": {"address": {"locality": "Москва"}}}}} -12 {"properties": {"locales": {"default": {"address": {"locality": "Москва", "street": "4-я улица 8 Марта"}}}}} - -13 {"properties": {"locales": {"default": {"address": {"locality": "Москва", "street": "улица 8 Марта"}}}}} -)#"; - - ScopedFile const regionsJsonFile("regions.jsonl", kData); - Geocoder geocoder(regionsJsonFile.GetFullPath()); - - TestGeocoder(geocoder, "Москва, 4-я улица 8 Марта", {{Id{0x12}, 1.0}}); -} - -UNIT_TEST(Geocoder_BuildingOnStreetWithNumber) -{ - string const kData = R"#( -10 {"properties": {"locales": {"default": {"address": {"locality": "Москва"}}}}} -13 {"properties": {"locales": {"default": {"address": {"locality": "Москва", "street": "улица 8 Марта"}}}}} -15 {"properties": {"locales": {"default": {"address": {"locality": "Москва", "street": "улица 8 Марта", "building": "4"}}}}} -)#"; - - ScopedFile const regionsJsonFile("regions.jsonl", kData); - Geocoder geocoder(regionsJsonFile.GetFullPath()); - - TestGeocoder(geocoder, "Москва, улица 8 Марта, 4", {{Id{0x15}, 1.0}}); -} - -//-------------------------------------------------------------------------------------------------- -UNIT_TEST(Geocoder_LocalityBuilding) -{ - string const kData = R"#( -10 {"properties": {"locales": {"default": {"address": {"locality": "Zelenograd"}}}}} -22 {"properties": {"locales": {"default": {"address": {"building": "2", "locality": "Zelenograd"}}}}} -31 {"properties": {"locales": {"default": {"address": {"street": "Krymskaya", "locality": "Zelenograd"}}}}} -32 {"properties": {"locales": {"default": {"address": {"building": "2", "street": "Krymskaya", "locality": "Zelenograd"}}}}} -)#"; - ScopedFile const regionsJsonFile("regions.jsonl", kData); - Geocoder geocoder(regionsJsonFile.GetFullPath()); - base::GeoObjectId const building2(0x22); - TestGeocoder(geocoder, "Zelenograd 2", {{building2, 1.0}}); -} - -// Geocoder_Subregion* ----------------------------------------------------------------------------- -UNIT_TEST(Geocoder_SubregionInLocality) -{ - string const kData = R"#( -10 {"properties": {"locales": {"default": {"address": {"region": "Москва"}}}, "rank": 2}} -11 {"properties": {"locales": {"default": {"address": {"locality": "Москва", "region": "Москва"}}}, "rank": 4}} -12 {"properties": {"locales": {"default": {"address": {"subregion": "Северный административный округ", "locality": "Москва", "region": "Москва"}}}, "rank": 3}} -)#"; - - ScopedFile const regionsJsonFile("regions.jsonl", kData); - Geocoder geocoder(regionsJsonFile.GetFullPath()); - - TestGeocoder(geocoder, "Северный административный округ", {{Id{0x12}, 1.0}}); - TestGeocoder(geocoder, "Москва, Северный административный округ", - {{Id{0x12}, 1.0}, {Id{0x10}, 0.294118}, {Id{0x11}, 0.176471}}); - TestGeocoder(geocoder, "Москва", {{Id{0x10}, 1.0}, {Id{0x11}, 0.6}}); -} - -// Geocoder_NumericalSuburb* ---------------------------------------------------------------------- -UNIT_TEST(Geocoder_NumericalSuburbRelevance) -{ - string const kData = R"#( -10 {"properties": {"locales": {"default": {"address": {"region": "Metro Manila"}}}}} -11 {"properties": {"locales": {"default": {"address": {"locality": "Caloocan", "region": "Metro Manila"}}}}} -12 {"properties": {"locales": {"default": {"address": {"suburb": "60", "locality": "Caloocan", "region": "Metro Manila"}}}}} -20 {"properties": {"locales": {"default": {"address": {"locality": "Белгород"}}}}} -21 {"properties": {"locales": {"default": {"address": {"street": "Щорса", "locality": "Белгород"}}}}} -22 {"properties": {"locales": {"default": {"address": {"building": "60", "street": "Щорса", "locality": "Белгород"}}}}} -)#"; - - ScopedFile const regionsJsonFile("regions.jsonl", kData); - Geocoder geocoder(regionsJsonFile.GetFullPath()); - - TestGeocoder(geocoder, "Caloocan, 60", {{Id{0x12}, 1.0}}); - TestGeocoder(geocoder, "60", {}); - TestGeocoder(geocoder, "Metro Manila, 60", {{Id{0x10}, 1.0}}); - TestGeocoder(geocoder, "Белгород, Щорса, 60", {{Id{0x22}, 1.0}}); -} - -//-------------------------------------------------------------------------------------------------- -UNIT_TEST(Geocoder_EmptyFileConcurrentRead) -{ - ScopedFile const regionsJsonFile("regions.jsonl", ""); - Geocoder geocoder(regionsJsonFile.GetFullPath(), 8 /* reader threads */); - - TEST_EQUAL(geocoder.GetHierarchy().GetEntries().size(), 0, ()); -} - -UNIT_TEST(Geocoder_BigFileConcurrentRead) -{ - int const kEntryCount = 100000; - - stringstream s; - for (int i = 0; i < kEntryCount; ++i) - { - s << setw(16) << setfill('0') << hex << uppercase << i << " " - << "{" - << R"("type": "Feature",)" - << R"("geometry": {"type": "Point", "coordinates": [0, 0]},)" - << R"("properties": {"locales": {"default": {)" - << R"("name": ")" << i << R"(", "address": {"country": ")" << i << R"("}}}, "rank": 2})" - << "}\n"; - } - - ScopedFile const regionsJsonFile("regions.jsonl", s.str()); - Geocoder geocoder(regionsJsonFile.GetFullPath(), 8 /* reader threads */); - - TEST_EQUAL(geocoder.GetHierarchy().GetEntries().size(), kEntryCount, ()); -} -} // namespace geocoder diff --git a/geocoder/hierarchy.cpp b/geocoder/hierarchy.cpp deleted file mode 100644 index a67ff79b18..0000000000 --- a/geocoder/hierarchy.cpp +++ /dev/null @@ -1,237 +0,0 @@ -#include "geocoder/hierarchy.hpp" - -#include "indexer/search_string_utils.hpp" - -#include "base/exception.hpp" -#include "base/logging.hpp" -#include "base/macros.hpp" -#include "base/stl_helpers.hpp" -#include "base/string_utils.hpp" - -#include -#include - -using namespace std; - -namespace geocoder -{ -// Hierarchy::Entry -------------------------------------------------------------------------------- -bool Hierarchy::Entry::DeserializeFromJSON(string const & jsonStr, - NameDictionaryBuilder & normalizedNameDictionaryBuilder, - ParsingStats & stats) -{ - try - { - base::Json root(jsonStr.c_str()); - return DeserializeFromJSONImpl(root.get(), jsonStr, normalizedNameDictionaryBuilder, stats); - } - catch (base::Json::Exception const & e) - { - LOG(LDEBUG, ("Can't parse entry:", e.Msg(), jsonStr)); - } - return false; -} - -// todo(@m) Factor out to geojson.hpp? Add geojson to myjansson? -bool Hierarchy::Entry::DeserializeFromJSONImpl( - json_t * const root, string const & jsonStr, - NameDictionaryBuilder & normalizedNameDictionaryBuilder, ParsingStats & stats) -{ - if (!json_is_object(root)) - { - ++stats.m_badJsons; - MYTHROW(base::Json::Exception, ("Not a json object.")); - } - - if (!DeserializeAddressFromJSON(root, normalizedNameDictionaryBuilder, stats)) - return false; - - auto const defaultLocale = base::GetJSONObligatoryFieldByPath(root, "properties", "locales", - "default"); - FromJSONObjectOptionalField(defaultLocale, "name", m_name); - if (m_name.empty()) - ++stats.m_emptyNames; - - if (m_type == Type::Count) - { - LOG(LDEBUG, ("No address in an hierarchy entry:", jsonStr)); - ++stats.m_emptyAddresses; - } - return true; -} - -bool Hierarchy::Entry::DeserializeAddressFromJSON( - json_t * const root, NameDictionaryBuilder & normalizedNameDictionaryBuilder, - ParsingStats & stats) -{ - auto const properties = base::GetJSONObligatoryField(root, "properties"); - auto const locales = base::GetJSONObligatoryField(properties, "locales"); - m_normalizedAddress= {}; - for (size_t i = 0; i < static_cast(Type::Count); ++i) - { - Type const type = static_cast(i); - MultipleNames multipleNames; - if (!FetchAddressFieldNames(locales, type, multipleNames, normalizedNameDictionaryBuilder, - stats)) - { - return false; - } - - if (!multipleNames.GetMainName().empty()) - { - m_normalizedAddress[i] = normalizedNameDictionaryBuilder.Add(move(multipleNames)); - m_type = static_cast(i); - } - } - - if (auto const rank = FromJSONObjectOptional(properties, "rank")) - { - auto const type = RankToType(*rank); - if (type != Type::Count && - m_normalizedAddress[static_cast(type)] != NameDictionary::kUnspecifiedPosition) - { - m_type = type; - } - } - - auto const & subregion = m_normalizedAddress[static_cast(Type::Subregion)]; - auto const & locality = m_normalizedAddress[static_cast(Type::Locality)]; - if (m_type == Type::Street && locality == NameDictionary::kUnspecifiedPosition && - subregion == NameDictionary::kUnspecifiedPosition) - { - ++stats.m_noLocalityStreets; - return false; - } - if (m_type == Type::Building && locality == NameDictionary::kUnspecifiedPosition && - subregion == NameDictionary::kUnspecifiedPosition) - { - ++stats.m_noLocalityBuildings; - return false; - } - - return true; -} - -// static -bool Hierarchy::Entry::FetchAddressFieldNames( - json_t * const locales, Type type, MultipleNames & multipleNames, - NameDictionaryBuilder & normalizedNameDictionaryBuilder, ParsingStats & stats) -{ - char const * localeName = nullptr; - json_t * localisedNames = nullptr; - string const & levelKey = ToString(type); - Tokens tokens; - json_object_foreach(locales, localeName, localisedNames) - { - auto const address = base::GetJSONObligatoryField(localisedNames, "address"); - auto const levelJson = base::GetJSONOptionalField(address, levelKey); - if (!levelJson) - continue; - - if (base::JSONIsNull(levelJson)) - return false; - - string levelValue; - FromJSON(levelJson, levelValue); - if (levelValue.empty()) - continue; - - search::NormalizeAndTokenizeAsUtf8(levelValue, tokens); - if (tokens.empty()) - continue; - - auto normalizedValue = strings::JoinStrings(tokens, " "); - static std::string defaultLocale = "default"; - if (localeName == defaultLocale) - multipleNames.SetMainName(normalizedValue); - else - multipleNames.AddAltName(normalizedValue); - } - - return true; -} - -MultipleNames const & Hierarchy::Entry::GetNormalizedMultipleNames( - Type type, NameDictionary const & normalizedNameDictionary) const -{ - auto const & addressField = m_normalizedAddress[static_cast(type)]; - return normalizedNameDictionary.Get(addressField); -} - -// static -Type Hierarchy::Entry::RankToType(uint8_t rank) -{ - switch (rank) - { - case 1: - return Type::Country; - case 2: - return Type::Region; - case 3: - return Type::Subregion; - case 4: - return Type::Locality; - } - - return Type::Count; -} - -// Hierarchy --------------------------------------------------------------------------------------- -Hierarchy::Hierarchy(vector && entries, NameDictionary && normalizedNameDictionary) - : m_entries{move(entries)} - , m_normalizedNameDictionary{move(normalizedNameDictionary)} -{ - if (!is_sorted(m_entries.begin(), m_entries.end())) - { - LOG(LINFO, ("Sorting entries...")); - sort(m_entries.begin(), m_entries.end()); - } -} - -vector const & Hierarchy::GetEntries() const -{ - return m_entries; -} - -NameDictionary const & Hierarchy::GetNormalizedNameDictionary() const -{ - return m_normalizedNameDictionary; -} - -Hierarchy::Entry const * Hierarchy::GetEntryForOsmId(base::GeoObjectId const & osmId) const -{ - auto const cmp = [](Hierarchy::Entry const & e, base::GeoObjectId const & id) { - return e.m_osmId < id; - }; - - auto const it = lower_bound(m_entries.begin(), m_entries.end(), osmId, cmp); - - if (it == m_entries.end() || it->m_osmId != osmId) - return nullptr; - - return &(*it); -} - -bool Hierarchy::IsParentTo(Hierarchy::Entry const & entry, Hierarchy::Entry const & toEntry) const -{ - for (size_t i = 0; i < static_cast(geocoder::Type::Count); ++i) - { - if (entry.m_normalizedAddress[i] == NameDictionary::kUnspecifiedPosition) - continue; - - if (toEntry.m_normalizedAddress[i] == NameDictionary::kUnspecifiedPosition) - return false; - - auto const pos1 = entry.m_normalizedAddress[i]; - auto const pos2 = toEntry.m_normalizedAddress[i]; - if (pos1 == pos2) - continue; - - auto const & name1 = m_normalizedNameDictionary.Get(pos1).GetMainName(); - auto const & name2 = m_normalizedNameDictionary.Get(pos2).GetMainName(); - if (name1 != name2) - return false; - } - return true; -} -} // namespace geocoder diff --git a/geocoder/hierarchy.hpp b/geocoder/hierarchy.hpp deleted file mode 100644 index 02cfcdb487..0000000000 --- a/geocoder/hierarchy.hpp +++ /dev/null @@ -1,105 +0,0 @@ -#pragma once - -#include "geocoder/name_dictionary.hpp" -#include "geocoder/types.hpp" - -#include "base/geo_object_id.hpp" - -#include -#include -#include -#include -#include - -#include "3party/jansson/myjansson.hpp" - -namespace geocoder -{ -class Hierarchy -{ -public: - struct ParsingStats - { - // Number of entries that the hierarchy was constructed from. - uint64_t m_numLoaded = 0; - - // Number of corrupted json lines. - uint64_t m_badJsons = 0; - - // Number of entries with unreadable base::GeoObjectIds. - uint64_t m_badOsmIds = 0; - - // Number of base::GeoObjectsIds that occur as keys in at least two entries. - uint64_t m_duplicateOsmIds = 0; - - // Number of entries with duplicate subfields in the address field. - uint64_t m_duplicateAddresses = 0; - - // Number of entries whose address field either does - // not exist or consists of empty lines. - uint64_t m_emptyAddresses = 0; - - // Number of entries without the name field or with an empty one. - uint64_t m_emptyNames = 0; - - // Number of street entries without a locality name. - uint64_t m_noLocalityStreets = 0; - - // Number of building entries without a locality name. - uint64_t m_noLocalityBuildings = 0; - - // Number of entries whose names do not match the most - // specific parts of their addresses. - // This is expected from POIs but not from regions or streets. - uint64_t m_mismatchedNames = 0; - }; - - // A single entry in the hierarchy directed acyclic graph. - // Currently, this is more or less the "properties"-"address" - // part of the geojson entry. - struct Entry - { - bool DeserializeFromJSON(std::string const & jsonStr, - NameDictionaryBuilder & normalizedNameDictionaryBuilder, - ParsingStats & stats); - bool DeserializeFromJSONImpl(json_t * const root, std::string const & jsonStr, - NameDictionaryBuilder & normalizedNameDictionaryBuilder, - ParsingStats & stats); - bool DeserializeAddressFromJSON(json_t * const root, - NameDictionaryBuilder & normalizedNameDictionaryBuilder, - ParsingStats & stats); - static bool FetchAddressFieldNames(json_t * const locales, Type type, - MultipleNames & multipleNames, - NameDictionaryBuilder & normalizedNameDictionaryBuilder, - ParsingStats & stats); - // See generator::regions::LevelRegion::GetRank(). - static Type RankToType(uint8_t rank); - - MultipleNames const & GetNormalizedMultipleNames( - Type type, NameDictionary const & normalizedNameDictionary) const; - bool operator<(Entry const & rhs) const { return m_osmId < rhs.m_osmId; } - - base::GeoObjectId m_osmId = base::GeoObjectId(base::GeoObjectId::kInvalid); - - // Original name of the entry. Useful for debugging. - std::string m_name; - - Type m_type = Type::Count; - - // The positions of entry address fields in normalized name dictionary, one per Type. - std::array(Type::Count)> m_normalizedAddress{}; - }; - - explicit Hierarchy(std::vector && entries, NameDictionary && normalizeNameDictionary); - - std::vector const & GetEntries() const; - NameDictionary const & GetNormalizedNameDictionary() const; - - Entry const * GetEntryForOsmId(base::GeoObjectId const & osmId) const; - bool IsParentTo(Hierarchy::Entry const & entry, Hierarchy::Entry const & toEntry) const; - -private: - std::vector m_entries; - NameDictionary m_normalizedNameDictionary; -}; -} // namespace geocoder diff --git a/geocoder/hierarchy_reader.cpp b/geocoder/hierarchy_reader.cpp deleted file mode 100644 index ffababcb7b..0000000000 --- a/geocoder/hierarchy_reader.cpp +++ /dev/null @@ -1,226 +0,0 @@ -#include "geocoder/hierarchy_reader.hpp" - -#include "base/logging.hpp" -#include "base/thread_pool_computational.hpp" - -#include -#include -#include -#include -#include -#include - -using namespace std; - -namespace geocoder -{ -namespace -{ -// Information will be logged for every |kLogBatch| entries. -size_t const kLogBatch = 100000; - -void operator+=(Hierarchy::ParsingStats & accumulator, Hierarchy::ParsingStats & stats) -{ - struct ValidationStats - { - uint64_t m_numLoaded, m_badJsons, m_badOsmIds, m_duplicateOsmIds, m_duplicateAddresses, - m_emptyAddresses, m_emptyNames, m_noLocalityStreets, m_noLocalityBuildings, m_mismatchedNames; - }; - static_assert(sizeof(Hierarchy::ParsingStats) == sizeof(ValidationStats), - "Hierarchy::ParsingStats has been modified"); - - accumulator.m_numLoaded += stats.m_numLoaded; - accumulator.m_badJsons += stats.m_badJsons; - accumulator.m_badOsmIds += stats.m_badOsmIds; - accumulator.m_duplicateOsmIds += stats.m_duplicateOsmIds; - accumulator.m_duplicateAddresses += stats.m_duplicateAddresses; - accumulator.m_emptyAddresses += stats.m_emptyAddresses; - accumulator.m_emptyNames += stats.m_emptyNames; - accumulator.m_noLocalityStreets += stats.m_noLocalityStreets; - accumulator.m_noLocalityBuildings += stats.m_noLocalityBuildings; - accumulator.m_mismatchedNames += stats.m_mismatchedNames; -} -} // namespace - -HierarchyReader::HierarchyReader(string const & pathToJsonHierarchy) - : m_fileStream{pathToJsonHierarchy}, m_in{m_fileStream} -{ - if (!m_fileStream) - MYTHROW(OpenException, ("Failed to open file", pathToJsonHierarchy)); -} - -HierarchyReader::HierarchyReader(istream & in) - : m_in{in} -{ -} - -Hierarchy HierarchyReader::Read(unsigned int readersCount) -{ - CHECK_GREATER_OR_EQUAL(readersCount, 1, ()); - - LOG(LINFO, ("Reading entries...")); - - vector entries; - NameDictionaryBuilder nameDictionaryBuilder; - ParsingStats stats{}; - - base::thread_pool::computational::ThreadPool threadPool{readersCount}; - list> tasks{}; - while (!m_eof || !tasks.empty()) - { - size_t const kReadBlockLineCount = 1000; - while (!m_eof && tasks.size() <= 2 * readersCount) - tasks.emplace_back(threadPool.Submit([&] { return ReadEntries(kReadBlockLineCount); })); - - CHECK(!tasks.empty(), ()); - auto & task = tasks.front(); - auto taskResult = task.get(); - tasks.pop_front(); - - auto & taskEntries = taskResult.m_entries; - auto const & taskNameDictionary = taskResult.m_nameDictionary; - for (auto & entry : taskEntries) - { - for (size_t i = 0; i < static_cast(Type::Count); ++i) - { - if (auto & position = entry.m_normalizedAddress[i]) - { - auto const & multipleNames = taskNameDictionary.Get(position); - position = nameDictionaryBuilder.Add(MultipleNames{multipleNames}); - } - } - } - move(begin(taskEntries), end(taskEntries), back_inserter(entries)); - - stats += taskResult.m_stats; - } - - if (m_totalNumLoaded % kLogBatch != 0) - LOG(LINFO, ("Read", m_totalNumLoaded, "entries")); - - LOG(LINFO, ("Sorting entries...")); - sort(begin(entries), end(entries)); - LOG(LINFO, ("Finished entries sorting")); - - CheckDuplicateOsmIds(entries, stats); - - LOG(LINFO, ("Finished reading and indexing the hierarchy. Stats:")); - LOG(LINFO, ("Entries loaded:", stats.m_numLoaded)); - LOG(LINFO, ("Corrupted json lines:", stats.m_badJsons)); - LOG(LINFO, ("Unreadable base::GeoObjectIds:", stats.m_badOsmIds)); - LOG(LINFO, ("Duplicate base::GeoObjectIds:", stats.m_duplicateOsmIds)); - LOG(LINFO, ("Entries with duplicate address parts:", stats.m_duplicateAddresses)); - LOG(LINFO, ("Entries without address:", stats.m_emptyAddresses)); - LOG(LINFO, ("Entries without names:", stats.m_emptyNames)); - LOG(LINFO, ("Street entries without a locality name:", stats.m_noLocalityStreets)); - LOG(LINFO, ("Building entries without a locality name:", stats.m_noLocalityBuildings)); - LOG(LINFO, - ("Entries whose names do not match their most specific addresses:", stats.m_mismatchedNames)); - LOG(LINFO, ("(End of stats.)")); - - return Hierarchy{move(entries), nameDictionaryBuilder.Release()}; -} - -void HierarchyReader::CheckDuplicateOsmIds(vector const & entries, - ParsingStats & stats) -{ - size_t i = 0; - while (i < entries.size()) - { - size_t j = i + 1; - while (j < entries.size() && entries[i].m_osmId == entries[j].m_osmId) - ++j; - if (j != i + 1) - { - ++stats.m_duplicateOsmIds; - LOG(LDEBUG, - ("Duplicate osm id:", SerializeId(entries[i].m_osmId.GetEncodedId()), "(", - SerializeId(entries[i].m_osmId.GetEncodedId()), ")", "occurs as a key in", - j - i, "key-value entries.")); - } - i = j; - } -} - -HierarchyReader::ParsingResult HierarchyReader::ReadEntries(size_t count) -{ - vector linesBuffer(count); - size_t bufferSize = 0; - - { - lock_guard lock(m_mutex); - - for (; bufferSize < count; ++bufferSize) - { - if (!getline(m_in, linesBuffer[bufferSize])) - { - m_eof = true; - break; - } - } - } - - return DeserializeEntries(linesBuffer, bufferSize); -} - -HierarchyReader::ParsingResult HierarchyReader::DeserializeEntries( - vector const & linesBuffer, size_t const bufferSize) -{ - vector entries; - entries.reserve(bufferSize); - NameDictionaryBuilder nameDictionaryBuilder; - ParsingStats stats; - - for (size_t i = 0; i < bufferSize; ++i) - { - auto & line = linesBuffer[i]; - - if (line.empty()) - continue; - - auto const p = line.find(' '); - uint64_t encodedId; - if (p == string::npos || !DeserializeId(line.substr(0, p), encodedId)) - { - LOG(LWARNING, ("Cannot read osm id. Line:", line)); - ++stats.m_badOsmIds; - continue; - } - auto json = line.substr(p + 1); - - Entry entry; - auto const osmId = base::GeoObjectId(encodedId); - entry.m_osmId = osmId; - - if (!entry.DeserializeFromJSON(json, nameDictionaryBuilder, stats)) - continue; - - if (entry.m_type == Type::Count) - continue; - - ++stats.m_numLoaded; - - auto totalNumLoaded = m_totalNumLoaded.fetch_add(1) + 1; - if (totalNumLoaded % kLogBatch == 0) - LOG(LINFO, ("Read", totalNumLoaded, "entries")); - - entries.push_back(move(entry)); - } - - return {move(entries), nameDictionaryBuilder.Release(), move(stats)}; -} - -// static -bool HierarchyReader::DeserializeId(string const & str, uint64_t & id) -{ - return strings::to_uint64(str, id, 16 /* base */); -} - -// static -string HierarchyReader::SerializeId(uint64_t id) -{ - stringstream s; - s << setw(16) << setfill('0') << hex << uppercase << id; - return s.str(); -} -} // namespace geocoder diff --git a/geocoder/hierarchy_reader.hpp b/geocoder/hierarchy_reader.hpp deleted file mode 100644 index 4d089a7f23..0000000000 --- a/geocoder/hierarchy_reader.hpp +++ /dev/null @@ -1,55 +0,0 @@ -#pragma once - -#include "geocoder/hierarchy.hpp" -#include "geocoder/name_dictionary.hpp" - -#include "base/exception.hpp" -#include "base/geo_object_id.hpp" - -#include -#include -#include -#include -#include -#include -#include - -namespace geocoder -{ -class HierarchyReader -{ -public: - using Entry = Hierarchy::Entry; - using ParsingStats = Hierarchy::ParsingStats; - - DECLARE_EXCEPTION(OpenException, RootException); - - explicit HierarchyReader(std::string const & pathToJsonHierarchy); - explicit HierarchyReader(std::istream & jsonHierarchy); - - // Read hierarchy file/stream concurrently in |readersCount| threads. - Hierarchy Read(unsigned int readersCount = 1); - -private: - struct ParsingResult - { - std::vector m_entries; - NameDictionary m_nameDictionary; - ParsingStats m_stats; - }; - - ParsingResult ReadEntries(size_t count); - ParsingResult DeserializeEntries(std::vector const & linesBuffer, - std::size_t const bufferSize); - static bool DeserializeId(std::string const & str, uint64_t & id); - static std::string SerializeId(uint64_t id); - - void CheckDuplicateOsmIds(std::vector const & entries, ParsingStats & stats); - - std::ifstream m_fileStream; - std::istream & m_in; - bool m_eof{false}; - std::mutex m_mutex; - std::atomic m_totalNumLoaded{0}; -}; -} // namespace geocoder diff --git a/geocoder/index.cpp b/geocoder/index.cpp deleted file mode 100644 index 4938456128..0000000000 --- a/geocoder/index.cpp +++ /dev/null @@ -1,205 +0,0 @@ -#include "geocoder/index.hpp" - -#include "geocoder/types.hpp" - -#include "indexer/search_string_utils.hpp" - -#include "base/assert.hpp" -#include "base/logging.hpp" -#include "base/string_utils.hpp" - -#include -#include -#include -#include -#include - -using namespace std; - -namespace -{ -// Information will be logged for every |kLogBatch| docs. -size_t const kLogBatch = 100000; -} // namespace - -namespace geocoder -{ -Index::Index(Hierarchy const & hierarchy, unsigned int loadThreadsCount) - : m_docs(hierarchy.GetEntries()) - , m_hierarchy{hierarchy} -{ - CHECK_GREATER_OR_EQUAL(loadThreadsCount, 1, ()); - - LOG(LINFO, ("Indexing hierarchy entries...")); - AddEntries(); - LOG(LINFO, ("Indexing houses...")); - AddHouses(loadThreadsCount); -} - -Index::Doc const & Index::GetDoc(DocId const id) const -{ - ASSERT_LESS(static_cast(id), m_docs.size(), ()); - return m_docs[static_cast(id)]; -} - -// static -string Index::MakeIndexKey(Tokens const & tokens) -{ - if (tokens.size() == 1 || is_sorted(begin(tokens), end(tokens))) - return strings::JoinStrings(tokens, " "); - - auto indexTokens = tokens; - sort(begin(indexTokens), end(indexTokens)); - return strings::JoinStrings(indexTokens, " "); -} - -void Index::AddEntries() -{ - size_t numIndexed = 0; - auto const & dictionary = m_hierarchy.GetNormalizedNameDictionary(); - Tokens tokens; - for (DocId docId = 0; docId < static_cast(m_docs.size()); ++docId) - { - auto const & doc = m_docs[static_cast(docId)]; - // The doc is indexed only by its address. - // todo(@m) Index it by name too. - if (doc.m_type == Type::Count) - continue; - - if (doc.m_type == Type::Building) - continue; - - if (doc.m_type == Type::Street) - { - AddStreet(docId, doc); - } - else - { - for (auto const & name : doc.GetNormalizedMultipleNames(doc.m_type, dictionary)) - { - search::NormalizeAndTokenizeAsUtf8(name, tokens); - InsertToIndex(tokens, docId); - } - } - - ++numIndexed; - if (numIndexed % kLogBatch == 0) - LOG(LINFO, ("Indexed", numIndexed, "entries")); - } - - if (numIndexed % kLogBatch != 0) - LOG(LINFO, ("Indexed", numIndexed, "entries")); -} - -void Index::AddStreet(DocId const & docId, Index::Doc const & doc) -{ - CHECK_EQUAL(doc.m_type, Type::Street, ()); - - auto isStreetSynonym = [] (string const & s) { - return search::IsStreetSynonym(strings::MakeUniString(s)); - }; - - auto const & dictionary = m_hierarchy.GetNormalizedNameDictionary(); - Tokens tokens; - for (auto const & name : doc.GetNormalizedMultipleNames(Type::Street, dictionary)) - { - search::NormalizeAndTokenizeAsUtf8(name, tokens); - - if (all_of(begin(tokens), end(tokens), isStreetSynonym)) - { - if (tokens.size() > 1) - InsertToIndex(tokens, docId); - return; - } - - InsertToIndex(tokens, docId); - - for (size_t i = 0; i < tokens.size(); ++i) - { - if (!isStreetSynonym(tokens[i])) - continue; - auto addr = tokens; - addr.erase(addr.begin() + i); - InsertToIndex(addr, docId); - } - } -} - -void Index::AddHouses(unsigned int loadThreadsCount) -{ - atomic numIndexed{0}; - mutex buildingsMutex; - - vector threads(loadThreadsCount); - CHECK_GREATER(threads.size(), 0, ()); - - auto const & dictionary = m_hierarchy.GetNormalizedNameDictionary(); - - for (size_t t = 0; t < threads.size(); ++t) - { - threads[t] = thread([&, t, this]() { - size_t const size = m_docs.size() / threads.size(); - size_t docId = t * size; - size_t const docIdEnd = (t + 1 == threads.size() ? m_docs.size() : docId + size); - - for (; docId < docIdEnd; ++docId) - { - auto const & buildingDoc = GetDoc(docId); - - if (buildingDoc.m_type != Type::Building) - continue; - - auto const & street = buildingDoc.m_normalizedAddress[static_cast(Type::Street)]; - auto const & locality = - buildingDoc.m_normalizedAddress[static_cast(Type::Locality)]; - - NameDictionary::Position relation = NameDictionary::kUnspecifiedPosition; - if (street != NameDictionary::kUnspecifiedPosition) - relation = street; - else if (locality != NameDictionary::kUnspecifiedPosition) - relation = locality; - else - continue; - - auto const & relationMultipleNames = dictionary.Get(relation); - auto const & relationName = relationMultipleNames.GetMainName(); - Tokens relationNameTokens; - search::NormalizeAndTokenizeAsUtf8(relationName, relationNameTokens); - CHECK(!relationNameTokens.empty(), ()); - - bool indexed = false; - ForEachDocId(relationNameTokens, [&](DocId const & candidate) { - auto const & candidateDoc = GetDoc(candidate); - if (m_hierarchy.IsParentTo(candidateDoc, buildingDoc)) - { - indexed = true; - - lock_guard lock(buildingsMutex); - m_relatedBuildings[candidate].emplace_back(docId); - } - }); - - if (indexed) - { - auto const processedCount = numIndexed.fetch_add(1) + 1; - if (processedCount % kLogBatch == 0) - LOG(LINFO, ("Indexed", processedCount, "houses")); - } - } - }); - } - - for (auto & t : threads) - t.join(); - - if (numIndexed % kLogBatch != 0) - LOG(LINFO, ("Indexed", numIndexed, "houses")); -} - -void Index::InsertToIndex(Tokens const & tokens, DocId docId) -{ - auto & ids = m_docIdsByTokens[MakeIndexKey(tokens)]; - if (0 == count(ids.begin(), ids.end(), docId)) - ids.emplace_back(docId); -} -} // namespace geocoder diff --git a/geocoder/index.hpp b/geocoder/index.hpp deleted file mode 100644 index 1d9aeff3d2..0000000000 --- a/geocoder/index.hpp +++ /dev/null @@ -1,82 +0,0 @@ -#pragma once - -#include "geocoder/hierarchy.hpp" - -#include "base/geo_object_id.hpp" - -#include -#include -#include -#include -#include - -namespace geocoder -{ -class Index -{ -public: - using Doc = Hierarchy::Entry; - - // Number of the entry in the list of all hierarchy entries - // that the index was constructed from. - using DocId = std::vector::size_type; - - explicit Index(Hierarchy const & hierarchy, unsigned int loadThreadsCount = 1); - - Doc const & GetDoc(DocId const id) const; - - // Calls |fn| for DocIds of Docs whose names exactly match |tokens| (the order matters). - // - // todo This method (and the whole class, in fact) is in the - // prototype stage and may be too slow. Proper indexing should - // be implemented to perform this type of queries. - template - void ForEachDocId(Tokens const & tokens, Fn && fn) const - { - auto const it = m_docIdsByTokens.find(MakeIndexKey(tokens)); - if (it == m_docIdsByTokens.end()) - return; - - for (DocId const & docId : it->second) - fn(docId); - } - - // Calls |fn| for DocIds of buildings that are located on the - // street/locality whose DocId is |docId|. - template - void ForEachRelatedBuilding(DocId const & docId, Fn && fn) const - { - auto const it = m_relatedBuildings.find(docId); - if (it == m_relatedBuildings.end()) - return; - - for (DocId const & docId : it->second) - fn(docId); - } - -private: - void InsertToIndex(Tokens const & tokens, DocId docId); - - // Converts |tokens| to a single UTF-8 string that can be used - // as a key in the |m_docIdsByTokens| map. - static std::string MakeIndexKey(Tokens const & tokens); - - // Adds address information of |m_docs| to the index. - void AddEntries(); - - // Adds the street |e| (which has the id of |docId|) to the index, - // with and without synonyms of the word "street". - void AddStreet(DocId const & docId, Doc const & e); - - // Fills the |m_relatedBuildings| field. - void AddHouses(unsigned int loadThreadsCount); - - std::vector const & m_docs; - Hierarchy const & m_hierarchy; - - std::unordered_map> m_docIdsByTokens; - - // Lists of houses grouped by the streets/localities they belong to. - std::unordered_map> m_relatedBuildings; -}; -} // namespace geocoder diff --git a/geocoder/name_dictionary.cpp b/geocoder/name_dictionary.cpp deleted file mode 100644 index 560e566215..0000000000 --- a/geocoder/name_dictionary.cpp +++ /dev/null @@ -1,100 +0,0 @@ -#include "geocoder/name_dictionary.hpp" - -#include "base/assert.hpp" - -#include -#include -#include -#include - -namespace geocoder -{ -// MultipleName ------------------------------------------------------------------------------------ -MultipleNames::MultipleNames(std::string const & mainName) - : m_names{mainName} -{ } - -std::string const & MultipleNames::GetMainName() const noexcept -{ - return m_names[0]; -} - -std::vector const & MultipleNames::GetNames() const noexcept -{ - return m_names; -} - -MultipleNames::const_iterator MultipleNames::begin() const noexcept -{ - return m_names.begin(); -} - -MultipleNames::const_iterator MultipleNames::end() const noexcept -{ - return m_names.end(); -} - -void MultipleNames::SetMainName(std::string const & name) -{ - m_names[0] = name; -} - -void MultipleNames::AddAltName(std::string const & name) -{ - m_names.emplace_back(std::move(name)); - // Sort for operator==. - ASSERT_GREATER_OR_EQUAL(m_names.size(), 2, ()); - std::inplace_merge(std::next(m_names.begin()), std::prev(m_names.end()), m_names.end()); -} - -bool operator==(MultipleNames const & lhs, MultipleNames const & rhs) noexcept -{ - return lhs.m_names == rhs.m_names; -} - -bool operator!=(MultipleNames const & lhs, MultipleNames const & rhs) noexcept -{ - return !(lhs == rhs); -} - -// NameDictionary ---------------------------------------------------------------------------------- -MultipleNames const & NameDictionary::Get(Position position) const -{ - CHECK_GREATER(position, 0, ()); - CHECK_LESS_OR_EQUAL(position, m_stock.size(), ()); - return m_stock[position - 1]; -} - -NameDictionary::Position NameDictionary::Add(MultipleNames && names) -{ - CHECK(!names.GetMainName().empty(), ()); - CHECK_LESS(m_stock.size(), std::numeric_limits::max(), ()); - m_stock.push_back(std::move(names)); - return m_stock.size(); // index + 1 -} - -// NameDictionaryBuilder::Hash --------------------------------------------------------------------- -size_t NameDictionaryBuilder::Hash::operator()(MultipleNames const & names) const noexcept -{ - return std::hash{}(names.GetMainName()); -} - -// NameDictionaryBuilder ----------------------------------------------------------------------------- -NameDictionary::Position NameDictionaryBuilder::Add(MultipleNames && names) -{ - auto indexItem = m_index.find(names); - if (indexItem != m_index.end()) - return indexItem->second; - - auto p = m_dictionary.Add(std::move(names)); - auto indexEmplace = m_index.emplace(m_dictionary.Get(p), p); - CHECK(indexEmplace.second, ()); - return p; -} - -NameDictionary NameDictionaryBuilder::Release() -{ - m_index.clear(); - return std::move(m_dictionary); -} -} // namespace geocoder diff --git a/geocoder/name_dictionary.hpp b/geocoder/name_dictionary.hpp deleted file mode 100644 index fdf5b390f9..0000000000 --- a/geocoder/name_dictionary.hpp +++ /dev/null @@ -1,75 +0,0 @@ -#pragma once - -#include -#include -#include -#include - -namespace geocoder -{ -class MultipleNames -{ -public: - using const_iterator = std::vector::const_iterator; - - explicit MultipleNames(std::string const & mainName = {}); - - std::string const & GetMainName() const noexcept; - std::vector const & GetNames() const noexcept; - - const_iterator begin() const noexcept; - const_iterator end() const noexcept; - - void SetMainName(std::string const & name); - // Complexity: O(N-1) - a best case, O(N*log(N)) - a worst case. - void AddAltName(std::string const & name); - - friend bool operator==(MultipleNames const & lhs, MultipleNames const & rhs) noexcept; - friend bool operator!=(MultipleNames const & lhs, MultipleNames const & rhs) noexcept; - -private: - std::vector m_names; -}; - -class NameDictionary -{ -public: - // Values of Position type: kUnspecifiedPosition or >= 1. - using Position = std::uint32_t; - - static constexpr Position kUnspecifiedPosition = 0; - - NameDictionary() = default; - NameDictionary(NameDictionary &&) = default; - NameDictionary & operator=(NameDictionary &&) = default; - - NameDictionary(NameDictionary const &) = delete; - NameDictionary & operator=(NameDictionary const &) = delete; - - MultipleNames const & Get(Position position) const; - Position Add(MultipleNames && s); - -private: - std::vector m_stock; -}; - -class NameDictionaryBuilder -{ -public: - NameDictionaryBuilder() = default; - NameDictionaryBuilder(NameDictionaryBuilder const &) = delete; - NameDictionaryBuilder & operator=(NameDictionaryBuilder const &) = delete; - - NameDictionary::Position Add(MultipleNames && s); - NameDictionary Release(); - -private: - struct Hash - { - size_t operator()(MultipleNames const & names) const noexcept; - }; - - NameDictionary m_dictionary; - std::unordered_map m_index; -}; -} // namespace geocoder diff --git a/geocoder/result.cpp b/geocoder/result.cpp deleted file mode 100644 index 842bcfc8e3..0000000000 --- a/geocoder/result.cpp +++ /dev/null @@ -1,15 +0,0 @@ -#include "geocoder/result.hpp" - -#include - -using namespace std; - -namespace geocoder -{ -string DebugPrint(Result const & result) -{ - ostringstream oss; - oss << DebugPrint(result.m_osmId) << " certainty=" << result.m_certainty; - return oss.str(); -} -} // namespace geocoder diff --git a/geocoder/result.hpp b/geocoder/result.hpp deleted file mode 100644 index c073125ea7..0000000000 --- a/geocoder/result.hpp +++ /dev/null @@ -1,29 +0,0 @@ -#pragma once - -#include "base/geo_object_id.hpp" - -#include - -namespace geocoder -{ -struct Result -{ - Result() = default; - - Result(base::GeoObjectId const & osmId, double certainty) : m_osmId(osmId), m_certainty(certainty) - { - } - - // The encoded osm id of a node, way or relation. - base::GeoObjectId m_osmId = base::GeoObjectId(base::GeoObjectId::kInvalid); - - // A floating point number in the range [0.0, 1.0] - // describing the extent to which the result matches - // the query. - // 0.0 corresponds to the least probable results and - // 1.0 to the most probable. - double m_certainty = 0; -}; - -std::string DebugPrint(Result const & result); -} // namespace geocoder diff --git a/geocoder/types.cpp b/geocoder/types.cpp deleted file mode 100644 index 89602e418e..0000000000 --- a/geocoder/types.cpp +++ /dev/null @@ -1,30 +0,0 @@ -#include "geocoder/types.hpp" - -#include "base/assert.hpp" - -using namespace std; - -namespace geocoder -{ -string ToString(Type type) -{ - switch (type) - { - case Type::Country: return "country"; - case Type::Region: return "region"; - case Type::Subregion: return "subregion"; - case Type::Locality: return "locality"; - case Type::Suburb: return "suburb"; - case Type::Sublocality: return "sublocality"; - case Type::Street: return "street"; - case Type::Building: return "building"; - case Type::Count: return "count"; - } - UNREACHABLE(); -} - -string DebugPrint(Type type) -{ - return ToString(type); -} -} // namespace geocoder diff --git a/geocoder/types.hpp b/geocoder/types.hpp deleted file mode 100644 index 8b2f260e39..0000000000 --- a/geocoder/types.hpp +++ /dev/null @@ -1,30 +0,0 @@ -#pragma once - -#include "base/string_utils.hpp" - -#include -#include - -namespace geocoder -{ -using Tokens = std::vector; - -enum class Type -{ - // It is important that the types are ordered from - // the more general to the more specific. - Country, - Region, - Subregion, - Locality, - Suburb, - Sublocality, - Street, - Building, - - Count -}; - -std::string ToString(Type type); -std::string DebugPrint(Type type); -} // namespace geocoder