diff --git a/geocoder/CMakeLists.txt b/geocoder/CMakeLists.txt index 01f2cf1a35..d10f7ce489 100644 --- a/geocoder/CMakeLists.txt +++ b/geocoder/CMakeLists.txt @@ -4,7 +4,6 @@ include_directories(${OMIM_ROOT}/3party/jansson/src) set( SRC - beam.cpp beam.hpp geocoder.cpp geocoder.hpp diff --git a/geocoder/beam.cpp b/geocoder/beam.cpp deleted file mode 100644 index 996f0523cc..0000000000 --- a/geocoder/beam.cpp +++ /dev/null @@ -1,32 +0,0 @@ -#include "geocoder/beam.hpp" - -#include "base/assert.hpp" -#include "base/macros.hpp" - -#include - -namespace geocoder -{ -Beam::Beam(size_t capacity) : m_capacity(capacity) { m_entries.reserve(m_capacity); } - -void Beam::Add(Key const & key, Value value) -{ - if (PREDICT_FALSE(m_capacity == 0)) - return; - - Entry const e(key, value); - auto it = std::lower_bound(m_entries.begin(), m_entries.end(), e); - - if (it == m_entries.end()) - { - if (m_entries.size() < m_capacity) - m_entries.emplace_back(e); - return; - } - - if (m_entries.size() == m_capacity) - m_entries.pop_back(); - - m_entries.insert(it, e); -} -} // namespace geocoder diff --git a/geocoder/beam.hpp b/geocoder/beam.hpp index 68534ce125..d0728889f1 100644 --- a/geocoder/beam.hpp +++ b/geocoder/beam.hpp @@ -1,6 +1,7 @@ #pragma once #include "base/geo_object_id.hpp" +#include "base/macros.hpp" #include @@ -9,27 +10,47 @@ namespace geocoder // A data structure to perform the beam search with. // Maintains a list of (Key, Value) pairs sorted in the decreasing // order of Values. +template class Beam { public: - using Key = base::GeoObjectId; - using Value = double; + using Key = TKey; + using Value = TValue; struct Entry { Key m_key; Value m_value; - Entry(Key const & key, Value value) : m_key(key), m_value(value) {} + Entry(Key const & key, Value const & value) : m_key(key), m_value(value) {} bool operator<(Entry const & rhs) const { return m_value > rhs.m_value; } }; - explicit Beam(size_t capacity); + explicit Beam(size_t capacity) : m_capacity(capacity) { m_entries.reserve(m_capacity); } // O(log(n) + n) for |n| entries. // O(|m_capacity|) in the worst case. - void Add(Key const & key, Value value); + void Add(Key const & key, Value value) + { + if (PREDICT_FALSE(m_capacity == 0)) + return; + + Entry const e(key, value); + auto it = std::lower_bound(m_entries.begin(), m_entries.end(), e); + + if (it == m_entries.end()) + { + if (m_entries.size() < m_capacity) + m_entries.emplace_back(e); + return; + } + + if (m_entries.size() == m_capacity) + m_entries.pop_back(); + + m_entries.insert(it, e); + } std::vector const & GetEntries() const { return m_entries; } diff --git a/geocoder/geocoder.cpp b/geocoder/geocoder.cpp index 64c0cd41e3..e268eedbd9 100644 --- a/geocoder/geocoder.cpp +++ b/geocoder/geocoder.cpp @@ -142,9 +142,9 @@ bool Geocoder::Context::IsTokenUsed(size_t id) const bool Geocoder::Context::AllTokensUsed() const { return m_numUsedTokens == m_tokens.size(); } -void Geocoder::Context::AddResult(base::GeoObjectId const & osmId, double certainty) +void Geocoder::Context::AddResult(base::GeoObjectId const & osmId, double certainty, Type type) { - m_beam.Add(osmId, certainty); + m_beam.Add(BeamKey(osmId, type), certainty); } void Geocoder::Context::FillResults(vector & results) const @@ -152,13 +152,15 @@ void Geocoder::Context::FillResults(vector & results) const results.clear(); results.reserve(m_beam.GetEntries().size()); - set seen; + set seen; for (auto const & e : m_beam.GetEntries()) { - if (!seen.insert(e.m_key).second) + if (!seen.insert(e.m_key.m_osmId).second) continue; - results.emplace_back(e.m_key /* osmId */, e.m_value /* certainty */); + if (m_surelyGotHouseNumber && e.m_key.m_type != Type::Building) + continue; + results.emplace_back(e.m_key.m_osmId, e.m_value /* certainty */); } if (!results.empty()) @@ -245,7 +247,7 @@ void Geocoder::Go(Context & ctx, Type type) const for (auto const * e : curLayer.m_entries) { - ctx.AddResult(e->m_osmId, certainty); + ctx.AddResult(e->m_osmId, certainty, type); } ctx.GetLayers().emplace_back(move(curLayer)); @@ -258,8 +260,7 @@ void Geocoder::Go(Context & ctx, Type type) const Go(ctx, NextType(type)); } -void Geocoder::FillBuildingsLayer(Context const & ctx, Tokens const & subquery, - Layer & curLayer) const +void Geocoder::FillBuildingsLayer(Context & ctx, Tokens const & subquery, Layer & curLayer) const { if (ctx.GetLayers().empty()) return; @@ -272,6 +273,11 @@ void Geocoder::FillBuildingsLayer(Context const & ctx, Tokens const & subquery, if (!search::house_numbers::LooksLikeHouseNumber(subqueryHN, false /* isPrefix */)) return; + // We've already filled a street layer and now see something that resembles + // a house number. While it still can be something else (a zip code, for example) + // let's stay on the safer side and set the house number bit. + ctx.SetHouseNumberBit(); + for (auto const & se : layer.m_entries) { for (auto const & be : se->m_buildingsOnStreet) diff --git a/geocoder/geocoder.hpp b/geocoder/geocoder.hpp index e9c07b8ab9..0aaa29cf4d 100644 --- a/geocoder/geocoder.hpp +++ b/geocoder/geocoder.hpp @@ -48,6 +48,14 @@ public: class Context { public: + struct BeamKey + { + BeamKey(base::GeoObjectId osmId, Type type) : m_osmId(osmId), m_type(type) {} + + base::GeoObjectId m_osmId; + Type m_type; + }; + Context(std::string const & query); void Clear(); @@ -66,7 +74,7 @@ public: // Returns true iff all tokens are used. bool AllTokensUsed() const; - void AddResult(base::GeoObjectId const & osmId, double certainty); + void AddResult(base::GeoObjectId const & osmId, double certainty, Type type); void FillResults(std::vector & results) const; @@ -74,15 +82,24 @@ public: std::vector const & GetLayers() const; + void SetHouseNumberBit() { m_surelyGotHouseNumber = true; } + private: Tokens m_tokens; std::vector m_tokenTypes; size_t m_numUsedTokens = 0; + // Sticky bit that records a heuristic check whether + // the current query contains a house number. + // The rationale is that we must only emit buildings in this case + // and implement a fallback to a more powerful geocoder if we + // could not find a building. + bool m_surelyGotHouseNumber = false; + // The highest value of certainty for a fixed amount of // the most relevant retrieved osm ids. - Beam m_beam; + Beam m_beam; std::vector m_layers; }; @@ -96,7 +113,7 @@ public: private: void Go(Context & ctx, Type type) const; - void FillBuildingsLayer(Context const & ctx, Tokens const & subquery, Layer & curLayer) const; + void FillBuildingsLayer(Context & ctx, Tokens const & subquery, Layer & curLayer) const; void FillRegularLayer(Context const & ctx, Type type, Tokens const & subquery, Layer & curLayer) const; diff --git a/geocoder/geocoder_tests/geocoder_tests.cpp b/geocoder/geocoder_tests/geocoder_tests.cpp index 43430b9da7..455f9c6279 100644 --- a/geocoder/geocoder_tests/geocoder_tests.cpp +++ b/geocoder/geocoder_tests/geocoder_tests.cpp @@ -41,13 +41,14 @@ void TestGeocoder(Geocoder & geocoder, string const & query, vector && e { vector actual; geocoder.ProcessQuery(query, actual); - TEST_EQUAL(actual.size(), expected.size(), (actual, expected)); + TEST_EQUAL(actual.size(), expected.size(), (query, actual, expected)); sort(actual.begin(), actual.end(), base::LessBy(&Result::m_osmId)); sort(expected.begin(), expected.end(), base::LessBy(&Result::m_osmId)); for (size_t i = 0; i < actual.size(); ++i) { - TEST(actual[i].m_certainty >= 0.0 && actual[i].m_certainty <= 1.0, (actual[i].m_certainty)); - TEST_EQUAL(actual[i].m_osmId, expected[i].m_osmId, ()); + TEST(actual[i].m_certainty >= 0.0 && actual[i].m_certainty <= 1.0, + (query, actual[i].m_certainty)); + TEST_EQUAL(actual[i].m_osmId, expected[i].m_osmId, (query)); TEST(base::AlmostEqualAbs(actual[i].m_certainty, expected[i].m_certainty, kCertaintyEps), (query, actual[i].m_certainty, expected[i].m_certainty)); } @@ -81,4 +82,38 @@ UNIT_TEST(Geocoder_Hierarchy) TEST_EQUAL((*entries)[0]->m_address[static_cast(Type::Subregion)], Split("florencia"), ()); } + +UNIT_TEST(Geocoder_OnlyBuildings) +{ + string const kData = R"#( +10 {"properties": {"address": {"locality": "Some Locality"}}} + +21 {"properties": {"address": {"street": "Good", "locality": "Some Locality"}}} +22 {"properties": {"address": {"building": "5", "street": "Good", "locality": "Some Locality"}}} + +31 {"properties": {"address": {"street": "Bad", "locality": "Some Locality"}}} +32 {"properties": {"address": {"building": "10", "street": "Bad", "locality": "Some Locality"}}} +)#"; + + ScopedFile const regionsJsonFile("regions.jsonl", kData); + Geocoder geocoder(regionsJsonFile.GetFullPath()); + + base::GeoObjectId const localityId(10); + base::GeoObjectId const goodStreetId(21); + base::GeoObjectId const badStreetId(31); + base::GeoObjectId const building5(22); + base::GeoObjectId const building10(32); + + TestGeocoder(geocoder, "some locality", {{localityId, 1.0}}); + TestGeocoder(geocoder, "some locality good", {{goodStreetId, 1.0}, {localityId, 0.857143}}); + TestGeocoder(geocoder, "some locality bad", {{badStreetId, 1.0}, {localityId, 0.857143}}); + + TestGeocoder(geocoder, "some locality good 5", {{building5, 1.0}}); + TestGeocoder(geocoder, "some locality bad 10", {{building10, 1.0}}); + + // There is a building "10" on Bad Street but we should not return it. + // Another possible resolution would be to return just "Good Street" (relaxed matching) + // but at the time of writing the goal is to either have an exact match or no match at all. + TestGeocoder(geocoder, "some locality good 10", {}); +} } // namespace geocoder diff --git a/geocoder/hierarchy.cpp b/geocoder/hierarchy.cpp index 24da2d3046..0a92899221 100644 --- a/geocoder/hierarchy.cpp +++ b/geocoder/hierarchy.cpp @@ -232,6 +232,8 @@ void Hierarchy::IndexHouses() size_t const t = static_cast(Type::Street); auto const * streetCandidates = GetEntries(be.m_address[t]); + if (streetCandidates == nullptr) + continue; for (auto & se : *streetCandidates) {