[geocoder] Optimize CPU: skip streets/buildings without locality

This commit is contained in:
Anatoly Serdtcev 2019-03-07 15:52:00 +03:00
parent 8fdaabf361
commit 817cc1b765
3 changed files with 24 additions and 1 deletions

View file

@ -76,6 +76,19 @@ bool Hierarchy::Entry::DeserializeFromJSONImpl(json_t * const root, string const
m_type = static_cast<Type>(i);
}
auto const & subregion = m_address[static_cast<size_t>(Type::Subregion)];
auto const & locality = m_address[static_cast<size_t>(Type::Locality)];
if (m_type == Type::Street && locality.empty() && subregion.empty() /* if locality detection fail */)
{
++stats.m_noLocalityStreets;
return false;
}
if (m_type == Type::Building && locality.empty() && subregion.empty() /* if locality detection fail */)
{
++stats.m_noLocalityBuildings;
return false;
}
m_nameTokens.clear();
FromJSONObjectOptionalField(properties, "name", m_name);
search::NormalizeAndTokenizeAsUtf8(m_name, m_nameTokens);

View file

@ -41,6 +41,12 @@ public:
// Number of entries without the name field or with an empty one.
uint64_t m_emptyNames = 0;
// Number of street entries without a locality name.
uint64_t m_noLocalityStreets = 0;
// Number of building entries without a locality name.
uint64_t m_noLocalityBuildings = 0;
// Number of entries whose names do not match the most
// specific parts of their addresses.
// This is expected from POIs but not from regions or streets.

View file

@ -20,7 +20,7 @@ void operator+=(Hierarchy::ParsingStats & accumulator, Hierarchy::ParsingStats &
struct ValidationStats
{
uint64_t m_numLoaded, m_badJsons, m_badOsmIds, m_duplicateOsmIds, m_duplicateAddresses,
m_emptyAddresses, m_emptyNames, m_mismatchedNames;
m_emptyAddresses, m_emptyNames, m_noLocalityStreets, m_noLocalityBuildings, m_mismatchedNames;
};
static_assert(sizeof(Hierarchy::ParsingStats) == sizeof(ValidationStats),
"Hierarchy::ParsingStats has been modified");
@ -32,6 +32,8 @@ void operator+=(Hierarchy::ParsingStats & accumulator, Hierarchy::ParsingStats &
accumulator.m_duplicateAddresses += stats.m_duplicateAddresses;
accumulator.m_emptyAddresses += stats.m_emptyAddresses;
accumulator.m_emptyNames += stats.m_emptyNames;
accumulator.m_noLocalityStreets += stats.m_noLocalityStreets;
accumulator.m_noLocalityBuildings += stats.m_noLocalityBuildings;
accumulator.m_mismatchedNames += stats.m_mismatchedNames;
}
} // namespace
@ -82,6 +84,8 @@ Hierarchy HierarchyReader::Read(unsigned int readersCount)
LOG(LINFO, ("Entries with duplicate address parts:", stats.m_duplicateAddresses));
LOG(LINFO, ("Entries without address:", stats.m_emptyAddresses));
LOG(LINFO, ("Entries without names:", stats.m_emptyNames));
LOG(LINFO, ("Street entries without a locality name:", stats.m_noLocalityStreets));
LOG(LINFO, ("Building entries without a localtity name:", stats.m_noLocalityBuildings));
LOG(LINFO,
("Entries whose names do not match their most specific addresses:", stats.m_mismatchedNames));
LOG(LINFO, ("(End of stats.)"));