From 817cc1b7654d930d0c6c1870a4971024ba36d702 Mon Sep 17 00:00:00 2001 From: Anatoly Serdtcev Date: Thu, 7 Mar 2019 15:52:00 +0300 Subject: [PATCH 1/2] [geocoder] Optimize CPU: skip streets/buildings without locality --- geocoder/hierarchy.cpp | 13 +++++++++++++ geocoder/hierarchy.hpp | 6 ++++++ geocoder/hierarchy_reader.cpp | 6 +++++- 3 files changed, 24 insertions(+), 1 deletion(-) diff --git a/geocoder/hierarchy.cpp b/geocoder/hierarchy.cpp index d606f3459c..abccd07598 100644 --- a/geocoder/hierarchy.cpp +++ b/geocoder/hierarchy.cpp @@ -76,6 +76,19 @@ bool Hierarchy::Entry::DeserializeFromJSONImpl(json_t * const root, string const m_type = static_cast(i); } + auto const & subregion = m_address[static_cast(Type::Subregion)]; + auto const & locality = m_address[static_cast(Type::Locality)]; + if (m_type == Type::Street && locality.empty() && subregion.empty() /* if locality detection fail */) + { + ++stats.m_noLocalityStreets; + return false; + } + if (m_type == Type::Building && locality.empty() && subregion.empty() /* if locality detection fail */) + { + ++stats.m_noLocalityBuildings; + return false; + } + m_nameTokens.clear(); FromJSONObjectOptionalField(properties, "name", m_name); search::NormalizeAndTokenizeAsUtf8(m_name, m_nameTokens); diff --git a/geocoder/hierarchy.hpp b/geocoder/hierarchy.hpp index 8579db75fb..1d07ec8179 100644 --- a/geocoder/hierarchy.hpp +++ b/geocoder/hierarchy.hpp @@ -41,6 +41,12 @@ public: // Number of entries without the name field or with an empty one. uint64_t m_emptyNames = 0; + // Number of street entries without a locality name. + uint64_t m_noLocalityStreets = 0; + + // Number of building entries without a locality name. + uint64_t m_noLocalityBuildings = 0; + // Number of entries whose names do not match the most // specific parts of their addresses. // This is expected from POIs but not from regions or streets. diff --git a/geocoder/hierarchy_reader.cpp b/geocoder/hierarchy_reader.cpp index ae734f97ef..f79641b4a5 100644 --- a/geocoder/hierarchy_reader.cpp +++ b/geocoder/hierarchy_reader.cpp @@ -20,7 +20,7 @@ void operator+=(Hierarchy::ParsingStats & accumulator, Hierarchy::ParsingStats & struct ValidationStats { uint64_t m_numLoaded, m_badJsons, m_badOsmIds, m_duplicateOsmIds, m_duplicateAddresses, - m_emptyAddresses, m_emptyNames, m_mismatchedNames; + m_emptyAddresses, m_emptyNames, m_noLocalityStreets, m_noLocalityBuildings, m_mismatchedNames; }; static_assert(sizeof(Hierarchy::ParsingStats) == sizeof(ValidationStats), "Hierarchy::ParsingStats has been modified"); @@ -32,6 +32,8 @@ void operator+=(Hierarchy::ParsingStats & accumulator, Hierarchy::ParsingStats & accumulator.m_duplicateAddresses += stats.m_duplicateAddresses; accumulator.m_emptyAddresses += stats.m_emptyAddresses; accumulator.m_emptyNames += stats.m_emptyNames; + accumulator.m_noLocalityStreets += stats.m_noLocalityStreets; + accumulator.m_noLocalityBuildings += stats.m_noLocalityBuildings; accumulator.m_mismatchedNames += stats.m_mismatchedNames; } } // namespace @@ -82,6 +84,8 @@ Hierarchy HierarchyReader::Read(unsigned int readersCount) LOG(LINFO, ("Entries with duplicate address parts:", stats.m_duplicateAddresses)); LOG(LINFO, ("Entries without address:", stats.m_emptyAddresses)); LOG(LINFO, ("Entries without names:", stats.m_emptyNames)); + LOG(LINFO, ("Street entries without a locality name:", stats.m_noLocalityStreets)); + LOG(LINFO, ("Building entries without a localtity name:", stats.m_noLocalityBuildings)); LOG(LINFO, ("Entries whose names do not match their most specific addresses:", stats.m_mismatchedNames)); LOG(LINFO, ("(End of stats.)")); From 03d318096baad2f2b735efb416180eeb2f6e89cd Mon Sep 17 00:00:00 2001 From: Anatoly Serdtcev Date: Thu, 7 Mar 2019 16:30:30 +0300 Subject: [PATCH 2/2] [geocoder] Fix for review --- geocoder/hierarchy_reader.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/geocoder/hierarchy_reader.cpp b/geocoder/hierarchy_reader.cpp index f79641b4a5..f0f5f30c82 100644 --- a/geocoder/hierarchy_reader.cpp +++ b/geocoder/hierarchy_reader.cpp @@ -85,7 +85,7 @@ Hierarchy HierarchyReader::Read(unsigned int readersCount) LOG(LINFO, ("Entries without address:", stats.m_emptyAddresses)); LOG(LINFO, ("Entries without names:", stats.m_emptyNames)); LOG(LINFO, ("Street entries without a locality name:", stats.m_noLocalityStreets)); - LOG(LINFO, ("Building entries without a localtity name:", stats.m_noLocalityBuildings)); + LOG(LINFO, ("Building entries without a locality name:", stats.m_noLocalityBuildings)); LOG(LINFO, ("Entries whose names do not match their most specific addresses:", stats.m_mismatchedNames)); LOG(LINFO, ("(End of stats.)"));