[geocoder] Do not return anything if a house number is present but we could not find it.

This commit is contained in:
Maxim Pimenov 2018-11-28 17:18:24 +03:00 committed by Tatiana Yan
parent 43fb388d7c
commit 6b26fef83a
7 changed files with 100 additions and 52 deletions

View file

@ -4,7 +4,6 @@ include_directories(${OMIM_ROOT}/3party/jansson/src)
set(
SRC
beam.cpp
beam.hpp
geocoder.cpp
geocoder.hpp

View file

@ -1,32 +0,0 @@
#include "geocoder/beam.hpp"
#include "base/assert.hpp"
#include "base/macros.hpp"
#include <algorithm>
namespace geocoder
{
Beam::Beam(size_t capacity) : m_capacity(capacity) { m_entries.reserve(m_capacity); }
void Beam::Add(Key const & key, Value value)
{
if (PREDICT_FALSE(m_capacity == 0))
return;
Entry const e(key, value);
auto it = std::lower_bound(m_entries.begin(), m_entries.end(), e);
if (it == m_entries.end())
{
if (m_entries.size() < m_capacity)
m_entries.emplace_back(e);
return;
}
if (m_entries.size() == m_capacity)
m_entries.pop_back();
m_entries.insert(it, e);
}
} // namespace geocoder

View file

@ -1,6 +1,7 @@
#pragma once
#include "base/geo_object_id.hpp"
#include "base/macros.hpp"
#include <vector>
@ -9,27 +10,47 @@ namespace geocoder
// A data structure to perform the beam search with.
// Maintains a list of (Key, Value) pairs sorted in the decreasing
// order of Values.
template <typename TKey, typename TValue>
class Beam
{
public:
using Key = base::GeoObjectId;
using Value = double;
using Key = TKey;
using Value = TValue;
struct Entry
{
Key m_key;
Value m_value;
Entry(Key const & key, Value value) : m_key(key), m_value(value) {}
Entry(Key const & key, Value const & value) : m_key(key), m_value(value) {}
bool operator<(Entry const & rhs) const { return m_value > rhs.m_value; }
};
explicit Beam(size_t capacity);
explicit Beam(size_t capacity) : m_capacity(capacity) { m_entries.reserve(m_capacity); }
// O(log(n) + n) for |n| entries.
// O(|m_capacity|) in the worst case.
void Add(Key const & key, Value value);
void Add(Key const & key, Value value)
{
if (PREDICT_FALSE(m_capacity == 0))
return;
Entry const e(key, value);
auto it = std::lower_bound(m_entries.begin(), m_entries.end(), e);
if (it == m_entries.end())
{
if (m_entries.size() < m_capacity)
m_entries.emplace_back(e);
return;
}
if (m_entries.size() == m_capacity)
m_entries.pop_back();
m_entries.insert(it, e);
}
std::vector<Entry> const & GetEntries() const { return m_entries; }

View file

@ -142,9 +142,9 @@ bool Geocoder::Context::IsTokenUsed(size_t id) const
bool Geocoder::Context::AllTokensUsed() const { return m_numUsedTokens == m_tokens.size(); }
void Geocoder::Context::AddResult(base::GeoObjectId const & osmId, double certainty)
void Geocoder::Context::AddResult(base::GeoObjectId const & osmId, double certainty, Type type)
{
m_beam.Add(osmId, certainty);
m_beam.Add(BeamKey(osmId, type), certainty);
}
void Geocoder::Context::FillResults(vector<Result> & results) const
@ -152,13 +152,15 @@ void Geocoder::Context::FillResults(vector<Result> & results) const
results.clear();
results.reserve(m_beam.GetEntries().size());
set<decltype(m_beam)::Key> seen;
set<base::GeoObjectId> seen;
for (auto const & e : m_beam.GetEntries())
{
if (!seen.insert(e.m_key).second)
if (!seen.insert(e.m_key.m_osmId).second)
continue;
results.emplace_back(e.m_key /* osmId */, e.m_value /* certainty */);
if (m_surelyGotHouseNumber && e.m_key.m_type != Type::Building)
continue;
results.emplace_back(e.m_key.m_osmId, e.m_value /* certainty */);
}
if (!results.empty())
@ -245,7 +247,7 @@ void Geocoder::Go(Context & ctx, Type type) const
for (auto const * e : curLayer.m_entries)
{
ctx.AddResult(e->m_osmId, certainty);
ctx.AddResult(e->m_osmId, certainty, type);
}
ctx.GetLayers().emplace_back(move(curLayer));
@ -258,8 +260,7 @@ void Geocoder::Go(Context & ctx, Type type) const
Go(ctx, NextType(type));
}
void Geocoder::FillBuildingsLayer(Context const & ctx, Tokens const & subquery,
Layer & curLayer) const
void Geocoder::FillBuildingsLayer(Context & ctx, Tokens const & subquery, Layer & curLayer) const
{
if (ctx.GetLayers().empty())
return;
@ -272,6 +273,11 @@ void Geocoder::FillBuildingsLayer(Context const & ctx, Tokens const & subquery,
if (!search::house_numbers::LooksLikeHouseNumber(subqueryHN, false /* isPrefix */))
return;
// We've already filled a street layer and now see something that resembles
// a house number. While it still can be something else (a zip code, for example)
// let's stay on the safer side and set the house number bit.
ctx.SetHouseNumberBit();
for (auto const & se : layer.m_entries)
{
for (auto const & be : se->m_buildingsOnStreet)

View file

@ -48,6 +48,14 @@ public:
class Context
{
public:
struct BeamKey
{
BeamKey(base::GeoObjectId osmId, Type type) : m_osmId(osmId), m_type(type) {}
base::GeoObjectId m_osmId;
Type m_type;
};
Context(std::string const & query);
void Clear();
@ -66,7 +74,7 @@ public:
// Returns true iff all tokens are used.
bool AllTokensUsed() const;
void AddResult(base::GeoObjectId const & osmId, double certainty);
void AddResult(base::GeoObjectId const & osmId, double certainty, Type type);
void FillResults(std::vector<Result> & results) const;
@ -74,15 +82,24 @@ public:
std::vector<Layer> const & GetLayers() const;
void SetHouseNumberBit() { m_surelyGotHouseNumber = true; }
private:
Tokens m_tokens;
std::vector<Type> m_tokenTypes;
size_t m_numUsedTokens = 0;
// Sticky bit that records a heuristic check whether
// the current query contains a house number.
// The rationale is that we must only emit buildings in this case
// and implement a fallback to a more powerful geocoder if we
// could not find a building.
bool m_surelyGotHouseNumber = false;
// The highest value of certainty for a fixed amount of
// the most relevant retrieved osm ids.
Beam m_beam;
Beam<BeamKey, double> m_beam;
std::vector<Layer> m_layers;
};
@ -96,7 +113,7 @@ public:
private:
void Go(Context & ctx, Type type) const;
void FillBuildingsLayer(Context const & ctx, Tokens const & subquery, Layer & curLayer) const;
void FillBuildingsLayer(Context & ctx, Tokens const & subquery, Layer & curLayer) const;
void FillRegularLayer(Context const & ctx, Type type, Tokens const & subquery,
Layer & curLayer) const;

View file

@ -41,13 +41,14 @@ void TestGeocoder(Geocoder & geocoder, string const & query, vector<Result> && e
{
vector<Result> actual;
geocoder.ProcessQuery(query, actual);
TEST_EQUAL(actual.size(), expected.size(), (actual, expected));
TEST_EQUAL(actual.size(), expected.size(), (query, actual, expected));
sort(actual.begin(), actual.end(), base::LessBy(&Result::m_osmId));
sort(expected.begin(), expected.end(), base::LessBy(&Result::m_osmId));
for (size_t i = 0; i < actual.size(); ++i)
{
TEST(actual[i].m_certainty >= 0.0 && actual[i].m_certainty <= 1.0, (actual[i].m_certainty));
TEST_EQUAL(actual[i].m_osmId, expected[i].m_osmId, ());
TEST(actual[i].m_certainty >= 0.0 && actual[i].m_certainty <= 1.0,
(query, actual[i].m_certainty));
TEST_EQUAL(actual[i].m_osmId, expected[i].m_osmId, (query));
TEST(base::AlmostEqualAbs(actual[i].m_certainty, expected[i].m_certainty, kCertaintyEps),
(query, actual[i].m_certainty, expected[i].m_certainty));
}
@ -81,4 +82,38 @@ UNIT_TEST(Geocoder_Hierarchy)
TEST_EQUAL((*entries)[0]->m_address[static_cast<size_t>(Type::Subregion)], Split("florencia"),
());
}
UNIT_TEST(Geocoder_OnlyBuildings)
{
string const kData = R"#(
10 {"properties": {"address": {"locality": "Some Locality"}}}
21 {"properties": {"address": {"street": "Good", "locality": "Some Locality"}}}
22 {"properties": {"address": {"building": "5", "street": "Good", "locality": "Some Locality"}}}
31 {"properties": {"address": {"street": "Bad", "locality": "Some Locality"}}}
32 {"properties": {"address": {"building": "10", "street": "Bad", "locality": "Some Locality"}}}
)#";
ScopedFile const regionsJsonFile("regions.jsonl", kData);
Geocoder geocoder(regionsJsonFile.GetFullPath());
base::GeoObjectId const localityId(10);
base::GeoObjectId const goodStreetId(21);
base::GeoObjectId const badStreetId(31);
base::GeoObjectId const building5(22);
base::GeoObjectId const building10(32);
TestGeocoder(geocoder, "some locality", {{localityId, 1.0}});
TestGeocoder(geocoder, "some locality good", {{goodStreetId, 1.0}, {localityId, 0.857143}});
TestGeocoder(geocoder, "some locality bad", {{badStreetId, 1.0}, {localityId, 0.857143}});
TestGeocoder(geocoder, "some locality good 5", {{building5, 1.0}});
TestGeocoder(geocoder, "some locality bad 10", {{building10, 1.0}});
// There is a building "10" on Bad Street but we should not return it.
// Another possible resolution would be to return just "Good Street" (relaxed matching)
// but at the time of writing the goal is to either have an exact match or no match at all.
TestGeocoder(geocoder, "some locality good 10", {});
}
} // namespace geocoder

View file

@ -232,6 +232,8 @@ void Hierarchy::IndexHouses()
size_t const t = static_cast<size_t>(Type::Street);
auto const * streetCandidates = GetEntries(be.m_address[t]);
if (streetCandidates == nullptr)
continue;
for (auto & se : *streetCandidates)
{