forked from organicmaps/organicmaps
[geocoder] Relax number matching for streets
This commit is contained in:
parent
03e2cdd140
commit
4c2f6be287
4 changed files with 201 additions and 60 deletions
|
@ -299,6 +299,23 @@ bool IsSortedAndUnique(Iter beg, Iter end)
|
|||
return IsSortedAndUnique(beg, end, std::less<typename std::iterator_traits<Iter>::value_type>());
|
||||
}
|
||||
|
||||
// See std::includes() C++20.
|
||||
template <typename Iter1, typename Iter2>
|
||||
bool Includes(Iter1 first1, Iter1 last1, Iter2 first2, Iter2 last2)
|
||||
{
|
||||
assert(std::is_sorted(first1, last1));
|
||||
assert(std::is_sorted(first2, last2));
|
||||
|
||||
for (; first2 != last2; ++first1)
|
||||
{
|
||||
if (first1 == last1 || *first2 < *first1)
|
||||
return false;
|
||||
if (!(*first1 < *first2))
|
||||
++first2;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
struct DeleteFunctor
|
||||
{
|
||||
template <typename T>
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
#include "base/timer.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <numeric>
|
||||
#include <set>
|
||||
#include <thread>
|
||||
#include <utility>
|
||||
|
@ -138,9 +139,9 @@ bool Geocoder::Context::IsTokenUsed(size_t id) const
|
|||
bool Geocoder::Context::AllTokensUsed() const { return m_numUsedTokens == m_tokens.size(); }
|
||||
|
||||
void Geocoder::Context::AddResult(base::GeoObjectId const & osmId, double certainty, Type type,
|
||||
vector<Type> const & allTypes, bool allTokensUsed)
|
||||
vector<size_t> const & tokenIds, vector<Type> const & allTypes)
|
||||
{
|
||||
m_beam.Add(BeamKey(osmId, type, allTypes, allTokensUsed), certainty);
|
||||
m_beam.Add(BeamKey(osmId, type, tokenIds, allTypes), certainty);
|
||||
}
|
||||
|
||||
void Geocoder::Context::FillResults(vector<Result> & results) const
|
||||
|
@ -149,30 +150,13 @@ void Geocoder::Context::FillResults(vector<Result> & results) const
|
|||
results.reserve(m_beam.GetEntries().size());
|
||||
|
||||
set<base::GeoObjectId> seen;
|
||||
bool const hasPotentialHouseNumber = !m_houseNumberPositionsInQuery.empty();
|
||||
for (auto const & e : m_beam.GetEntries())
|
||||
{
|
||||
if (!seen.insert(e.m_key.m_osmId).second)
|
||||
continue;
|
||||
|
||||
bool isGoodHouseHumber = false;
|
||||
if (e.m_key.m_type == Type::Building)
|
||||
{
|
||||
bool gotLocality = false;
|
||||
bool gotStreet = false;
|
||||
bool gotBuilding = false;
|
||||
for (Type t : e.m_key.m_allTypes)
|
||||
{
|
||||
if (t == Type::Region || t == Type::Subregion || t == Type::Locality)
|
||||
gotLocality = true;
|
||||
if (t == Type::Street)
|
||||
gotStreet = true;
|
||||
if (t == Type::Building)
|
||||
gotBuilding = true;
|
||||
}
|
||||
isGoodHouseHumber = gotLocality && gotStreet && gotBuilding;
|
||||
}
|
||||
|
||||
if (m_surelyGotHouseNumber && !isGoodHouseHumber && !e.m_key.m_allTokensUsed)
|
||||
if (hasPotentialHouseNumber && !IsGoodForPotentialHouseNumberAt(e.m_key, m_houseNumberPositionsInQuery))
|
||||
continue;
|
||||
|
||||
results.emplace_back(e.m_key.m_osmId, e.m_value /* certainty */);
|
||||
|
@ -197,6 +181,64 @@ vector<Geocoder::Layer> & Geocoder::Context::GetLayers() { return m_layers; }
|
|||
|
||||
vector<Geocoder::Layer> const & Geocoder::Context::GetLayers() const { return m_layers; }
|
||||
|
||||
void Geocoder::Context::MarkHouseNumberPositionsInQuery(vector<size_t> const & tokenIds)
|
||||
{
|
||||
m_houseNumberPositionsInQuery.insert(tokenIds.begin(), tokenIds.end());
|
||||
}
|
||||
|
||||
bool Geocoder::Context::IsGoodForPotentialHouseNumberAt(BeamKey const & beamKey,
|
||||
set<size_t> const & tokenIds) const
|
||||
{
|
||||
if (beamKey.m_tokenIds.size() == m_tokens.size())
|
||||
return true;
|
||||
|
||||
if (IsBuildingWithAddress(beamKey))
|
||||
return true;
|
||||
|
||||
// Pass street, locality or region with number in query address parts.
|
||||
if (HasLocalityOrRegion(beamKey) && ContainsTokenIds(beamKey, tokenIds))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool Geocoder::Context::IsBuildingWithAddress(BeamKey const & beamKey) const
|
||||
{
|
||||
if (beamKey.m_type != Type::Building)
|
||||
return false;
|
||||
|
||||
bool gotLocality = false;
|
||||
bool gotStreet = false;
|
||||
bool gotBuilding = false;
|
||||
for (Type t : beamKey.m_allTypes)
|
||||
{
|
||||
if (t == Type::Region || t == Type::Subregion || t == Type::Locality)
|
||||
gotLocality = true;
|
||||
if (t == Type::Street)
|
||||
gotStreet = true;
|
||||
if (t == Type::Building)
|
||||
gotBuilding = true;
|
||||
}
|
||||
return gotLocality && gotStreet && gotBuilding;
|
||||
}
|
||||
|
||||
bool Geocoder::Context::HasLocalityOrRegion(BeamKey const & beamKey) const
|
||||
{
|
||||
for (Type t : beamKey.m_allTypes)
|
||||
{
|
||||
if (t == Type::Region || t == Type::Subregion || t == Type::Locality)
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool Geocoder::Context::ContainsTokenIds(BeamKey const & beamKey, set<size_t> const & needTokenIds) const
|
||||
{
|
||||
auto const & keyTokenIds = beamKey.m_tokenIds;
|
||||
return base::Includes(keyTokenIds.begin(), keyTokenIds.end(), needTokenIds.begin(), needTokenIds.end());
|
||||
}
|
||||
|
||||
// Geocoder ----------------------------------------------------------------------------------------
|
||||
Geocoder::Geocoder(string const & pathToJsonHierarchy, unsigned int loadThreadsCount)
|
||||
: Geocoder{HierarchyReader{pathToJsonHierarchy}.Read(loadThreadsCount), loadThreadsCount}
|
||||
|
@ -243,15 +285,18 @@ void Geocoder::Go(Context & ctx, Type type) const
|
|||
return;
|
||||
|
||||
Tokens subquery;
|
||||
vector<size_t> subqueryTokenIds;
|
||||
for (size_t i = 0; i < ctx.GetNumTokens(); ++i)
|
||||
{
|
||||
subquery.clear();
|
||||
subqueryTokenIds.clear();
|
||||
for (size_t j = i; j < ctx.GetNumTokens(); ++j)
|
||||
{
|
||||
if (ctx.IsTokenUsed(j))
|
||||
break;
|
||||
|
||||
subquery.push_back(ctx.GetToken(j));
|
||||
subqueryTokenIds.push_back(j);
|
||||
|
||||
Layer curLayer;
|
||||
curLayer.m_type = type;
|
||||
|
@ -259,7 +304,7 @@ void Geocoder::Go(Context & ctx, Type type) const
|
|||
// Buildings are indexed separately.
|
||||
if (type == Type::Building)
|
||||
{
|
||||
FillBuildingsLayer(ctx, subquery, curLayer);
|
||||
FillBuildingsLayer(ctx, subquery, subqueryTokenIds, curLayer);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -273,6 +318,7 @@ void Geocoder::Go(Context & ctx, Type type) const
|
|||
boost::optional<ScopedMarkTokens> streetSynonymMark;
|
||||
|
||||
double certainty = 0;
|
||||
vector<size_t> tokenIds;
|
||||
vector<Type> allTypes;
|
||||
for (size_t tokId = 0; tokId < ctx.GetNumTokens(); ++tokId)
|
||||
{
|
||||
|
@ -285,11 +331,14 @@ void Geocoder::Go(Context & ctx, Type type) const
|
|||
|
||||
certainty += GetWeight(t);
|
||||
if (t != Type::Count)
|
||||
{
|
||||
tokenIds.push_back(tokId);
|
||||
allTypes.push_back(t);
|
||||
}
|
||||
}
|
||||
|
||||
for (auto const & docId : curLayer.m_entries)
|
||||
ctx.AddResult(m_index.GetDoc(docId).m_osmId, certainty, type, allTypes, ctx.AllTokensUsed());
|
||||
ctx.AddResult(m_index.GetDoc(docId).m_osmId, certainty, type, tokenIds, allTypes);
|
||||
|
||||
ctx.GetLayers().emplace_back(move(curLayer));
|
||||
SCOPE_GUARD(pop, [&] { ctx.GetLayers().pop_back(); });
|
||||
|
@ -301,7 +350,8 @@ void Geocoder::Go(Context & ctx, Type type) const
|
|||
Go(ctx, NextType(type));
|
||||
}
|
||||
|
||||
void Geocoder::FillBuildingsLayer(Context & ctx, Tokens const & subquery, Layer & curLayer) const
|
||||
void Geocoder::FillBuildingsLayer(Context & ctx, Tokens const & subquery, vector<size_t> const & subqueryTokenIds,
|
||||
Layer & curLayer) const
|
||||
{
|
||||
if (ctx.GetLayers().empty())
|
||||
return;
|
||||
|
@ -317,8 +367,8 @@ void Geocoder::FillBuildingsLayer(Context & ctx, Tokens const & subquery, Layer
|
|||
|
||||
// We've already filled a street/location layer and now see something that resembles
|
||||
// a house number. While it still can be something else (a zip code, for example)
|
||||
// let's stay on the safer side and set the house number bit.
|
||||
ctx.SetHouseNumberBit();
|
||||
// let's stay on the safer side and mark the tokens as potential house number.
|
||||
ctx.MarkHouseNumberPositionsInQuery(subqueryTokenIds);
|
||||
|
||||
for (auto const & docId : layer.m_entries)
|
||||
{
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
#include "base/string_utils.hpp"
|
||||
|
||||
#include <cstddef>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <thread>
|
||||
#include <unordered_map>
|
||||
|
@ -53,19 +54,20 @@ public:
|
|||
public:
|
||||
struct BeamKey
|
||||
{
|
||||
BeamKey(base::GeoObjectId osmId, Type type, std::vector<Type> const & allTypes, bool allTokensUsed)
|
||||
BeamKey(base::GeoObjectId osmId, Type type, std::vector<size_t> const & tokenIds,
|
||||
std::vector<Type> const & allTypes)
|
||||
: m_osmId(osmId)
|
||||
, m_type(type)
|
||||
, m_tokenIds{tokenIds}
|
||||
, m_allTypes(allTypes)
|
||||
, m_allTokensUsed(allTokensUsed)
|
||||
{
|
||||
base::SortUnique(m_allTypes);
|
||||
}
|
||||
|
||||
base::GeoObjectId m_osmId;
|
||||
Type m_type;
|
||||
std::vector<size_t> m_tokenIds;
|
||||
std::vector<Type> m_allTypes;
|
||||
bool m_allTokensUsed;
|
||||
};
|
||||
|
||||
Context(std::string const & query);
|
||||
|
@ -89,7 +91,7 @@ public:
|
|||
bool AllTokensUsed() const;
|
||||
|
||||
void AddResult(base::GeoObjectId const & osmId, double certainty, Type type,
|
||||
std::vector<Type> const & allTypes, bool allTokensUsed);
|
||||
std::vector<size_t> const & tokenIds, std::vector<Type> const & allTypes);
|
||||
|
||||
void FillResults(std::vector<Result> & results) const;
|
||||
|
||||
|
@ -97,20 +99,25 @@ public:
|
|||
|
||||
std::vector<Layer> const & GetLayers() const;
|
||||
|
||||
void SetHouseNumberBit() { m_surelyGotHouseNumber = true; }
|
||||
void MarkHouseNumberPositionsInQuery(std::vector<size_t> const & tokenIds);
|
||||
|
||||
private:
|
||||
bool IsGoodForPotentialHouseNumberAt(BeamKey const & beamKey, std::set<size_t> const & tokenIds) const;
|
||||
bool IsBuildingWithAddress(BeamKey const & beamKey) const;
|
||||
bool HasLocalityOrRegion(BeamKey const & beamKey) const;
|
||||
bool ContainsTokenIds(BeamKey const & beamKey, std::set<size_t> const & needTokenIds) const;
|
||||
|
||||
Tokens m_tokens;
|
||||
std::vector<Type> m_tokenTypes;
|
||||
|
||||
size_t m_numUsedTokens = 0;
|
||||
|
||||
// Sticky bit that records a heuristic check whether
|
||||
// the current query contains a house number.
|
||||
// |m_houseNumberPositionsInQuery| has indexes of query tokens which are placed on
|
||||
// context-dependent positions of house number.
|
||||
// The rationale is that we must only emit buildings in this case
|
||||
// and implement a fallback to a more powerful geocoder if we
|
||||
// could not find a building.
|
||||
bool m_surelyGotHouseNumber = false;
|
||||
std::set<size_t> m_houseNumberPositionsInQuery;
|
||||
|
||||
// The highest value of certainty for a fixed amount of
|
||||
// the most relevant retrieved osm ids.
|
||||
|
@ -133,8 +140,8 @@ private:
|
|||
|
||||
void Go(Context & ctx, Type type) const;
|
||||
|
||||
void FillBuildingsLayer(Context & ctx, Tokens const & subquery, Layer & curLayer) const;
|
||||
|
||||
void FillBuildingsLayer(Context & ctx, Tokens const & subquery, std::vector<size_t> const & subqueryTokenIds,
|
||||
Layer & curLayer) const;
|
||||
void FillRegularLayer(Context const & ctx, Type type, Tokens const & subquery,
|
||||
Layer & curLayer) const;
|
||||
|
||||
|
|
|
@ -20,8 +20,9 @@ using namespace std;
|
|||
|
||||
namespace
|
||||
{
|
||||
double const kCertaintyEps = 1e-6;
|
||||
using Id = base::GeoObjectId;
|
||||
|
||||
double const kCertaintyEps = 1e-6;
|
||||
string const kRegionsData = R"#(
|
||||
-4611686018427080071 {"type": "Feature", "geometry": {"type": "Point", "coordinates": [-80.1142033187951, 21.55511095]}, "properties": {"name": "Cuba", "rank": 2, "address": {"country": "Cuba"}}}
|
||||
-4611686018425533273 {"type": "Feature", "geometry": {"type": "Point", "coordinates": [-78.7260117405499, 21.74300205]}, "properties": {"name": "Ciego de Ávila", "rank": 4, "address": {"region": "Ciego de Ávila", "country": "Cuba"}}}
|
||||
|
@ -153,42 +154,108 @@ UNIT_TEST(Geocoder_MismatchedLocality)
|
|||
TestGeocoder(geocoder, "Moscow Krymskaya 3", {});
|
||||
}
|
||||
|
||||
UNIT_TEST(Geocoder_StreetWithNumber)
|
||||
// Geocoder_StreetWithNumber* ----------------------------------------------------------------------
|
||||
UNIT_TEST(Geocoder_StreetWithNumberInCity)
|
||||
{
|
||||
string const kData = R"#(
|
||||
10 {"properties": {"address": {"locality": "Москва"}}}
|
||||
20 {"properties": {"address": {"locality": "Краснокамск"}}}
|
||||
|
||||
11 {"properties": {"address": {"locality": "Москва", "street": "улица 1905 года"}}}
|
||||
|
||||
12 {"properties": {"address": {"locality": "Москва", "street": "4-я улица 8 Марта"}}}
|
||||
|
||||
13 {"properties": {"address": {"locality": "Москва", "street": "8 Марта"}}}
|
||||
|
||||
21 {"properties": {"address": {"locality": "Краснокамск", "street": "улица 8 Марта"}}}
|
||||
25 {"properties": {"address": {"locality": "Краснокамск", "street": "Январская улица"}}}
|
||||
26 {"properties": {"address": {"locality": "Краснокамск", "street": "Январская улица", "building": "8"}}}
|
||||
20 {"properties": {"address": {"locality": "Краснокамск"}}}
|
||||
28 {"properties": {"address": {"locality": "Краснокамск", "street": "улица 1905 года"}}}
|
||||
)#";
|
||||
|
||||
ScopedFile const regionsJsonFile("regions.jsonl", kData);
|
||||
Geocoder geocoder(regionsJsonFile.GetFullPath());
|
||||
|
||||
using Id = base::GeoObjectId;
|
||||
TestGeocoder(geocoder, "Москва, улица 1905 года", {{Id{11}, 1.0}});
|
||||
TestGeocoder(geocoder, "Москва, 1905 года", {{Id{11}, 1.0}});
|
||||
TestGeocoder(geocoder, "Краснокамск, улица 1905 года", {});
|
||||
|
||||
TestGeocoder(geocoder, "Москва, 4-я улица 8 Марта", {{Id{12}, 1.0}});
|
||||
TestGeocoder(geocoder, "Москва, 4-я 8 Марта", {{Id{12}, 1.0}});
|
||||
|
||||
TestGeocoder(geocoder, "Москва, 8 Марта", {{Id{13}, 1.0}});
|
||||
TestGeocoder(geocoder, "Москва, улица 8 Марта", {{Id{13}, 1.0}});
|
||||
|
||||
TestGeocoder(geocoder, "Краснокамск, улица 8 Марта", {{Id{21}, 1.0}});
|
||||
TestGeocoder(geocoder, "Краснокамск, 8 Марта", {{Id{21}, 1.0}});
|
||||
TestGeocoder(geocoder, "Краснокамск, Январская 8", {{Id{26}, 1.0}});
|
||||
}
|
||||
|
||||
UNIT_TEST(Geocoder_StreetWithNumberInClassifiedCity)
|
||||
{
|
||||
string const kData = R"#(
|
||||
10 {"properties": {"address": {"locality": "Москва"}}}
|
||||
11 {"properties": {"address": {"locality": "Москва", "street": "улица 1905 года"}}}
|
||||
)#";
|
||||
|
||||
ScopedFile const regionsJsonFile("regions.jsonl", kData);
|
||||
Geocoder geocoder(regionsJsonFile.GetFullPath());
|
||||
|
||||
TestGeocoder(geocoder, "город Москва, улица 1905 года", {{Id{11}, 1.0}});
|
||||
}
|
||||
|
||||
UNIT_TEST(Geocoder_StreetWithNumberInAnyCity)
|
||||
{
|
||||
string const kData = R"#(
|
||||
10 {"properties": {"address": {"locality": "Москва"}}}
|
||||
11 {"properties": {"address": {"locality": "Москва", "street": "улица 1905 года"}}}
|
||||
|
||||
20 {"properties": {"address": {"locality": "Краснокамск"}}}
|
||||
28 {"properties": {"address": {"locality": "Краснокамск", "street": "улица 1905 года"}}}
|
||||
)#";
|
||||
|
||||
ScopedFile const regionsJsonFile("regions.jsonl", kData);
|
||||
Geocoder geocoder(regionsJsonFile.GetFullPath());
|
||||
|
||||
TestGeocoder(geocoder, "улица 1905 года", {{Id{11}, 1.0}, {Id{28}, 1.0}});
|
||||
}
|
||||
|
||||
UNIT_TEST(Geocoder_StreetWithNumberAndWithoutStreetSynonym)
|
||||
{
|
||||
string const kData = R"#(
|
||||
10 {"properties": {"address": {"locality": "Москва"}}}
|
||||
11 {"properties": {"address": {"locality": "Москва", "street": "улица 1905 года"}}}
|
||||
)#";
|
||||
|
||||
ScopedFile const regionsJsonFile("regions.jsonl", kData);
|
||||
Geocoder geocoder(regionsJsonFile.GetFullPath());
|
||||
|
||||
TestGeocoder(geocoder, "Москва, 1905 года", {{Id{11}, 1.0}});
|
||||
}
|
||||
|
||||
UNIT_TEST(Geocoder_UntypedStreetWithNumberAndStreetSynonym)
|
||||
{
|
||||
string const kData = R"#(
|
||||
10 {"properties": {"address": {"locality": "Москва"}}}
|
||||
13 {"properties": {"address": {"locality": "Москва", "street": "8 Марта"}}}
|
||||
)#";
|
||||
|
||||
ScopedFile const regionsJsonFile("regions.jsonl", kData);
|
||||
Geocoder geocoder(regionsJsonFile.GetFullPath());
|
||||
|
||||
TestGeocoder(geocoder, "Москва, улица 8 Марта", {{Id{13}, 1.0}});
|
||||
}
|
||||
|
||||
UNIT_TEST(Geocoder_StreetWithTwoNumbers)
|
||||
{
|
||||
string const kData = R"#(
|
||||
10 {"properties": {"address": {"locality": "Москва"}}}
|
||||
12 {"properties": {"address": {"locality": "Москва", "street": "4-я улица 8 Марта"}}}
|
||||
|
||||
13 {"properties": {"address": {"locality": "Москва", "street": "улица 8 Марта"}}}
|
||||
)#";
|
||||
|
||||
ScopedFile const regionsJsonFile("regions.jsonl", kData);
|
||||
Geocoder geocoder(regionsJsonFile.GetFullPath());
|
||||
|
||||
TestGeocoder(geocoder, "Москва, 4-я улица 8 Марта", {{Id{12}, 1.0}});
|
||||
}
|
||||
|
||||
UNIT_TEST(Geocoder_BuildingOnStreetWithNumber)
|
||||
{
|
||||
string const kData = R"#(
|
||||
10 {"properties": {"address": {"locality": "Москва"}}}
|
||||
13 {"properties": {"address": {"locality": "Москва", "street": "улица 8 Марта"}}}
|
||||
15 {"properties": {"address": {"locality": "Москва", "street": "улица 8 Марта", "building": "4"}}}
|
||||
)#";
|
||||
|
||||
ScopedFile const regionsJsonFile("regions.jsonl", kData);
|
||||
Geocoder geocoder(regionsJsonFile.GetFullPath());
|
||||
|
||||
TestGeocoder(geocoder, "Москва, улица 8 Марта, 4", {{Id{15}, 1.0}});
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------------------------------
|
||||
UNIT_TEST(Geocoder_LocalityBuilding)
|
||||
{
|
||||
string const kData = R"#(
|
||||
|
|
Loading…
Add table
Reference in a new issue