forked from organicmaps/organicmaps
[geocoder] Do not return anything if a house number is present but we could not find it.
This commit is contained in:
parent
43fb388d7c
commit
6b26fef83a
7 changed files with 100 additions and 52 deletions
|
@ -4,7 +4,6 @@ include_directories(${OMIM_ROOT}/3party/jansson/src)
|
|||
|
||||
set(
|
||||
SRC
|
||||
beam.cpp
|
||||
beam.hpp
|
||||
geocoder.cpp
|
||||
geocoder.hpp
|
||||
|
|
|
@ -1,32 +0,0 @@
|
|||
#include "geocoder/beam.hpp"
|
||||
|
||||
#include "base/assert.hpp"
|
||||
#include "base/macros.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
namespace geocoder
|
||||
{
|
||||
Beam::Beam(size_t capacity) : m_capacity(capacity) { m_entries.reserve(m_capacity); }
|
||||
|
||||
void Beam::Add(Key const & key, Value value)
|
||||
{
|
||||
if (PREDICT_FALSE(m_capacity == 0))
|
||||
return;
|
||||
|
||||
Entry const e(key, value);
|
||||
auto it = std::lower_bound(m_entries.begin(), m_entries.end(), e);
|
||||
|
||||
if (it == m_entries.end())
|
||||
{
|
||||
if (m_entries.size() < m_capacity)
|
||||
m_entries.emplace_back(e);
|
||||
return;
|
||||
}
|
||||
|
||||
if (m_entries.size() == m_capacity)
|
||||
m_entries.pop_back();
|
||||
|
||||
m_entries.insert(it, e);
|
||||
}
|
||||
} // namespace geocoder
|
|
@ -1,6 +1,7 @@
|
|||
#pragma once
|
||||
|
||||
#include "base/geo_object_id.hpp"
|
||||
#include "base/macros.hpp"
|
||||
|
||||
#include <vector>
|
||||
|
||||
|
@ -9,27 +10,47 @@ namespace geocoder
|
|||
// A data structure to perform the beam search with.
|
||||
// Maintains a list of (Key, Value) pairs sorted in the decreasing
|
||||
// order of Values.
|
||||
template <typename TKey, typename TValue>
|
||||
class Beam
|
||||
{
|
||||
public:
|
||||
using Key = base::GeoObjectId;
|
||||
using Value = double;
|
||||
using Key = TKey;
|
||||
using Value = TValue;
|
||||
|
||||
struct Entry
|
||||
{
|
||||
Key m_key;
|
||||
Value m_value;
|
||||
|
||||
Entry(Key const & key, Value value) : m_key(key), m_value(value) {}
|
||||
Entry(Key const & key, Value const & value) : m_key(key), m_value(value) {}
|
||||
|
||||
bool operator<(Entry const & rhs) const { return m_value > rhs.m_value; }
|
||||
};
|
||||
|
||||
explicit Beam(size_t capacity);
|
||||
explicit Beam(size_t capacity) : m_capacity(capacity) { m_entries.reserve(m_capacity); }
|
||||
|
||||
// O(log(n) + n) for |n| entries.
|
||||
// O(|m_capacity|) in the worst case.
|
||||
void Add(Key const & key, Value value);
|
||||
void Add(Key const & key, Value value)
|
||||
{
|
||||
if (PREDICT_FALSE(m_capacity == 0))
|
||||
return;
|
||||
|
||||
Entry const e(key, value);
|
||||
auto it = std::lower_bound(m_entries.begin(), m_entries.end(), e);
|
||||
|
||||
if (it == m_entries.end())
|
||||
{
|
||||
if (m_entries.size() < m_capacity)
|
||||
m_entries.emplace_back(e);
|
||||
return;
|
||||
}
|
||||
|
||||
if (m_entries.size() == m_capacity)
|
||||
m_entries.pop_back();
|
||||
|
||||
m_entries.insert(it, e);
|
||||
}
|
||||
|
||||
std::vector<Entry> const & GetEntries() const { return m_entries; }
|
||||
|
||||
|
|
|
@ -142,9 +142,9 @@ bool Geocoder::Context::IsTokenUsed(size_t id) const
|
|||
|
||||
bool Geocoder::Context::AllTokensUsed() const { return m_numUsedTokens == m_tokens.size(); }
|
||||
|
||||
void Geocoder::Context::AddResult(base::GeoObjectId const & osmId, double certainty)
|
||||
void Geocoder::Context::AddResult(base::GeoObjectId const & osmId, double certainty, Type type)
|
||||
{
|
||||
m_beam.Add(osmId, certainty);
|
||||
m_beam.Add(BeamKey(osmId, type), certainty);
|
||||
}
|
||||
|
||||
void Geocoder::Context::FillResults(vector<Result> & results) const
|
||||
|
@ -152,13 +152,15 @@ void Geocoder::Context::FillResults(vector<Result> & results) const
|
|||
results.clear();
|
||||
results.reserve(m_beam.GetEntries().size());
|
||||
|
||||
set<decltype(m_beam)::Key> seen;
|
||||
set<base::GeoObjectId> seen;
|
||||
for (auto const & e : m_beam.GetEntries())
|
||||
{
|
||||
if (!seen.insert(e.m_key).second)
|
||||
if (!seen.insert(e.m_key.m_osmId).second)
|
||||
continue;
|
||||
|
||||
results.emplace_back(e.m_key /* osmId */, e.m_value /* certainty */);
|
||||
if (m_surelyGotHouseNumber && e.m_key.m_type != Type::Building)
|
||||
continue;
|
||||
results.emplace_back(e.m_key.m_osmId, e.m_value /* certainty */);
|
||||
}
|
||||
|
||||
if (!results.empty())
|
||||
|
@ -245,7 +247,7 @@ void Geocoder::Go(Context & ctx, Type type) const
|
|||
|
||||
for (auto const * e : curLayer.m_entries)
|
||||
{
|
||||
ctx.AddResult(e->m_osmId, certainty);
|
||||
ctx.AddResult(e->m_osmId, certainty, type);
|
||||
}
|
||||
|
||||
ctx.GetLayers().emplace_back(move(curLayer));
|
||||
|
@ -258,8 +260,7 @@ void Geocoder::Go(Context & ctx, Type type) const
|
|||
Go(ctx, NextType(type));
|
||||
}
|
||||
|
||||
void Geocoder::FillBuildingsLayer(Context const & ctx, Tokens const & subquery,
|
||||
Layer & curLayer) const
|
||||
void Geocoder::FillBuildingsLayer(Context & ctx, Tokens const & subquery, Layer & curLayer) const
|
||||
{
|
||||
if (ctx.GetLayers().empty())
|
||||
return;
|
||||
|
@ -272,6 +273,11 @@ void Geocoder::FillBuildingsLayer(Context const & ctx, Tokens const & subquery,
|
|||
if (!search::house_numbers::LooksLikeHouseNumber(subqueryHN, false /* isPrefix */))
|
||||
return;
|
||||
|
||||
// We've already filled a street layer and now see something that resembles
|
||||
// a house number. While it still can be something else (a zip code, for example)
|
||||
// let's stay on the safer side and set the house number bit.
|
||||
ctx.SetHouseNumberBit();
|
||||
|
||||
for (auto const & se : layer.m_entries)
|
||||
{
|
||||
for (auto const & be : se->m_buildingsOnStreet)
|
||||
|
|
|
@ -48,6 +48,14 @@ public:
|
|||
class Context
|
||||
{
|
||||
public:
|
||||
struct BeamKey
|
||||
{
|
||||
BeamKey(base::GeoObjectId osmId, Type type) : m_osmId(osmId), m_type(type) {}
|
||||
|
||||
base::GeoObjectId m_osmId;
|
||||
Type m_type;
|
||||
};
|
||||
|
||||
Context(std::string const & query);
|
||||
|
||||
void Clear();
|
||||
|
@ -66,7 +74,7 @@ public:
|
|||
// Returns true iff all tokens are used.
|
||||
bool AllTokensUsed() const;
|
||||
|
||||
void AddResult(base::GeoObjectId const & osmId, double certainty);
|
||||
void AddResult(base::GeoObjectId const & osmId, double certainty, Type type);
|
||||
|
||||
void FillResults(std::vector<Result> & results) const;
|
||||
|
||||
|
@ -74,15 +82,24 @@ public:
|
|||
|
||||
std::vector<Layer> const & GetLayers() const;
|
||||
|
||||
void SetHouseNumberBit() { m_surelyGotHouseNumber = true; }
|
||||
|
||||
private:
|
||||
Tokens m_tokens;
|
||||
std::vector<Type> m_tokenTypes;
|
||||
|
||||
size_t m_numUsedTokens = 0;
|
||||
|
||||
// Sticky bit that records a heuristic check whether
|
||||
// the current query contains a house number.
|
||||
// The rationale is that we must only emit buildings in this case
|
||||
// and implement a fallback to a more powerful geocoder if we
|
||||
// could not find a building.
|
||||
bool m_surelyGotHouseNumber = false;
|
||||
|
||||
// The highest value of certainty for a fixed amount of
|
||||
// the most relevant retrieved osm ids.
|
||||
Beam m_beam;
|
||||
Beam<BeamKey, double> m_beam;
|
||||
|
||||
std::vector<Layer> m_layers;
|
||||
};
|
||||
|
@ -96,7 +113,7 @@ public:
|
|||
private:
|
||||
void Go(Context & ctx, Type type) const;
|
||||
|
||||
void FillBuildingsLayer(Context const & ctx, Tokens const & subquery, Layer & curLayer) const;
|
||||
void FillBuildingsLayer(Context & ctx, Tokens const & subquery, Layer & curLayer) const;
|
||||
|
||||
void FillRegularLayer(Context const & ctx, Type type, Tokens const & subquery,
|
||||
Layer & curLayer) const;
|
||||
|
|
|
@ -41,13 +41,14 @@ void TestGeocoder(Geocoder & geocoder, string const & query, vector<Result> && e
|
|||
{
|
||||
vector<Result> actual;
|
||||
geocoder.ProcessQuery(query, actual);
|
||||
TEST_EQUAL(actual.size(), expected.size(), (actual, expected));
|
||||
TEST_EQUAL(actual.size(), expected.size(), (query, actual, expected));
|
||||
sort(actual.begin(), actual.end(), base::LessBy(&Result::m_osmId));
|
||||
sort(expected.begin(), expected.end(), base::LessBy(&Result::m_osmId));
|
||||
for (size_t i = 0; i < actual.size(); ++i)
|
||||
{
|
||||
TEST(actual[i].m_certainty >= 0.0 && actual[i].m_certainty <= 1.0, (actual[i].m_certainty));
|
||||
TEST_EQUAL(actual[i].m_osmId, expected[i].m_osmId, ());
|
||||
TEST(actual[i].m_certainty >= 0.0 && actual[i].m_certainty <= 1.0,
|
||||
(query, actual[i].m_certainty));
|
||||
TEST_EQUAL(actual[i].m_osmId, expected[i].m_osmId, (query));
|
||||
TEST(base::AlmostEqualAbs(actual[i].m_certainty, expected[i].m_certainty, kCertaintyEps),
|
||||
(query, actual[i].m_certainty, expected[i].m_certainty));
|
||||
}
|
||||
|
@ -81,4 +82,38 @@ UNIT_TEST(Geocoder_Hierarchy)
|
|||
TEST_EQUAL((*entries)[0]->m_address[static_cast<size_t>(Type::Subregion)], Split("florencia"),
|
||||
());
|
||||
}
|
||||
|
||||
UNIT_TEST(Geocoder_OnlyBuildings)
|
||||
{
|
||||
string const kData = R"#(
|
||||
10 {"properties": {"address": {"locality": "Some Locality"}}}
|
||||
|
||||
21 {"properties": {"address": {"street": "Good", "locality": "Some Locality"}}}
|
||||
22 {"properties": {"address": {"building": "5", "street": "Good", "locality": "Some Locality"}}}
|
||||
|
||||
31 {"properties": {"address": {"street": "Bad", "locality": "Some Locality"}}}
|
||||
32 {"properties": {"address": {"building": "10", "street": "Bad", "locality": "Some Locality"}}}
|
||||
)#";
|
||||
|
||||
ScopedFile const regionsJsonFile("regions.jsonl", kData);
|
||||
Geocoder geocoder(regionsJsonFile.GetFullPath());
|
||||
|
||||
base::GeoObjectId const localityId(10);
|
||||
base::GeoObjectId const goodStreetId(21);
|
||||
base::GeoObjectId const badStreetId(31);
|
||||
base::GeoObjectId const building5(22);
|
||||
base::GeoObjectId const building10(32);
|
||||
|
||||
TestGeocoder(geocoder, "some locality", {{localityId, 1.0}});
|
||||
TestGeocoder(geocoder, "some locality good", {{goodStreetId, 1.0}, {localityId, 0.857143}});
|
||||
TestGeocoder(geocoder, "some locality bad", {{badStreetId, 1.0}, {localityId, 0.857143}});
|
||||
|
||||
TestGeocoder(geocoder, "some locality good 5", {{building5, 1.0}});
|
||||
TestGeocoder(geocoder, "some locality bad 10", {{building10, 1.0}});
|
||||
|
||||
// There is a building "10" on Bad Street but we should not return it.
|
||||
// Another possible resolution would be to return just "Good Street" (relaxed matching)
|
||||
// but at the time of writing the goal is to either have an exact match or no match at all.
|
||||
TestGeocoder(geocoder, "some locality good 10", {});
|
||||
}
|
||||
} // namespace geocoder
|
||||
|
|
|
@ -232,6 +232,8 @@ void Hierarchy::IndexHouses()
|
|||
size_t const t = static_cast<size_t>(Type::Street);
|
||||
|
||||
auto const * streetCandidates = GetEntries(be.m_address[t]);
|
||||
if (streetCandidates == nullptr)
|
||||
continue;
|
||||
|
||||
for (auto & se : *streetCandidates)
|
||||
{
|
||||
|
|
Loading…
Add table
Reference in a new issue