[search] Added building -> street detection algorithm.

This commit is contained in:
vng 2015-11-26 15:00:52 +03:00 committed by Sergey Yershov
parent 64fae3de62
commit 5e38e4ee25
9 changed files with 203 additions and 13 deletions

View file

@ -1,7 +1,7 @@
# Generator binary
ROOT_DIR = ../..
DEPENDENCIES = generator routing storage indexer platform geometry coding base \
DEPENDENCIES = generator search routing storage indexer platform geometry coding base \
osrm gflags expat tess2 jansson protobuf tomcrypt \
succinct stats_client

View file

@ -70,4 +70,6 @@ public:
FeaturesVector const & GetVector() const { return m_vector; }
FilesContainerR::ReaderT GetReader(string const & tag) const { return m_cont.GetReader(tag); }
string const & GetFilePath() const { return m_cont.GetFileName(); }
};

View file

@ -1,11 +1,14 @@
#include "indexer/search_index_builder.hpp"
#include "search/reverse_geocoding.hpp"
#include "indexer/categories_holder.hpp"
#include "indexer/classificator.hpp"
#include "indexer/feature_algo.hpp"
#include "indexer/feature_utils.hpp"
#include "indexer/feature_visibility.hpp"
#include "indexer/features_vector.hpp"
#include "indexer/index.hpp"
#include "indexer/search_delimiters.hpp"
#include "indexer/search_index_values.hpp"
#include "indexer/search_string_utils.hpp"
@ -260,8 +263,7 @@ public:
template <typename TKey, typename TValue>
void AddFeatureNameIndexPairs(FeaturesVectorTest & features, CategoriesHolder & categoriesHolder,
vector<pair<TKey, TValue>> & keyValuePairs,
SingleValueSerializer<TValue> const & serializer)
vector<pair<TKey, TValue>> & keyValuePairs)
{
feature::DataHeader const & header = features.GetHeader();
@ -276,20 +278,49 @@ void AddFeatureNameIndexPairs(FeaturesVectorTest & features, CategoriesHolder &
ReaderSource<ModelReaderPtr> src = features.GetReader(SEARCH_TOKENS_FILE_TAG);
uint64_t index = 0;
FeatureNameInserter<TKey, TValue> inserter(nullptr, keyValuePairs);
int8_t const lang = StringUtf8Multilang::GetLangIndex("default");
uint64_t address = 0, missing = 0;
map<size_t, size_t> bounds;
Index mwmIndex;
/// @ todo Make some better solution, or legalize MakeTemporary.
mwmIndex.RegisterMap(platform::LocalCountryFile::MakeTemporary(features.GetFilePath()));
search::ReverseGeocoding rgc(&mwmIndex);
while (src.Size() > 0)
{
feature::AddressData data;
data.Deserialize(src);
inserter.m_val.m_featureId = index++;
string const street = data.Get(feature::AddressData::STREET);
string street;
search::GetStreetNameAsKey(data.Get(feature::AddressData::STREET), street);
if (!street.empty())
inserter(lang, street);
{
FeatureType ft;
features.GetVector().GetByIndex(index, ft);
using TStreet = search::ReverseGeocoding::Street;
vector<TStreet> streets;
rgc.GetNearbyStreets(ft, street, streets);
size_t const ind = rgc.GetMatchedStreetIndex(streets);
if (ind == streets.size())
{
++missing;
//LOG(LWARNING, ("No street found for address", street, ft));
}
else
{
++bounds[ind];
}
++address;
}
++index;
}
LOG(LINFO, ("Address: Matched percent", 100 * (1.0 - missing/double(address))));
LOG(LINFO, ("Address: Upper bounds", bounds));
}
} // namespace
@ -353,7 +384,7 @@ void BuildSearchIndex(FilesContainerR & container, Writer & indexWriter)
SingleValueSerializer<TValue> serializer(codingParams);
vector<pair<TKey, TValue>> searchIndexKeyValuePairs;
AddFeatureNameIndexPairs(features, categoriesHolder, searchIndexKeyValuePairs, serializer);
AddFeatureNameIndexPairs(features, categoriesHolder, searchIndexKeyValuePairs);
sort(searchIndexKeyValuePairs.begin(), searchIndexKeyValuePairs.end());
LOG(LINFO, ("End sorting strings:", timer.ElapsedSeconds()));

View file

@ -4,6 +4,7 @@
#include "indexer/classificator.hpp"
#include "indexer/feature_impl.hpp"
#include "indexer/search_string_utils.hpp"
#include "geometry/angles.hpp"
#include "geometry/distance.hpp"

View file

@ -14,9 +14,6 @@
namespace search
{
void GetStreetNameAsKey(string const & name, string & res);
class FeatureLoader
{
Index const * m_pIndex;

View file

@ -0,0 +1,117 @@
#include "reverse_geocoding.hpp"
#include "indexer/feature.hpp"
#include "indexer/feature_algo.hpp"
#include "indexer/feature_visibility.hpp"
#include "indexer/index.hpp"
#include "indexer/scales.hpp"
#include "indexer/search_string_utils.hpp"
namespace search
{
namespace
{
double constexpr kLookupRadiusM = 500.0;
size_t const kMaxStreetIndex = 16;
size_t const kPossiblePercent = 10;
/// @todo Need to check projection here?
double CalculateMinDistance(FeatureType const & ft, m2::PointD const & pt)
{
ASSERT_EQUAL(ft.GetFeatureType(), feature::GEOM_LINE, ());
double res = numeric_limits<double>::max();
ft.ForEachPoint([&] (m2::PointD const & p)
{
double const d = MercatorBounds::DistanceOnEarth(p, pt);
if (d < res)
res = d;
}, FeatureType::BEST_GEOMETRY);
return res;
}
} // namespace
template <class TCompare>
void ReverseGeocoding::GetNearbyStreets(FeatureType const & addrFt, TCompare comp,
vector<Street> & streets)
{
m2::PointD const & center = feature::GetCenter(addrFt);
m2::RectD const rect = MercatorBounds::RectByCenterXYAndSizeInMeters(
center, kLookupRadiusM);
auto const fn = [&](FeatureType const & ft)
{
if (ft.GetFeatureType() != feature::GEOM_LINE)
return;
static feature::TypeSetChecker checker({"highway"});
feature::TypesHolder types(ft);
if (!checker.IsEqualR(types.begin(), types.end()))
return;
string name;
static int8_t const lang = StringUtf8Multilang::GetLangIndex("default");
if (!ft.GetName(lang, name))
return;
ASSERT(!name.empty(), ());
streets.push_back({ft.GetID(), CalculateMinDistance(ft, center), comp(name)});
};
m_index->ForEachInRect(fn, rect, scales::GetUpperScale());
sort(streets.begin(), streets.end(), [](Street const & s1, Street const & s2)
{
return s1.m_distance < s2.m_distance;
});
}
void ReverseGeocoding::GetNearbyStreets(FeatureType const & addrFt, string const & keyName,
vector<Street> & streets)
{
strings::UniString const uniKey1 = strings::MakeUniString(keyName);
GetNearbyStreets(addrFt, [&uniKey1](string const & name) -> pair<size_t, size_t>
{
string key;
search::GetStreetNameAsKey(name, key);
strings::UniString const uniKey2 = strings::MakeUniString(key);
return { strings::EditDistance(uniKey1.begin(), uniKey1.end(), uniKey2.begin(), uniKey2.end()),
uniKey1.size() };
}, streets);
}
size_t ReverseGeocoding::GetMatchedStreetIndex(vector<Street> const & streets)
{
// do limit possible return values
size_t const count = min(streets.size(), kMaxStreetIndex);
// try to find exact match
for (size_t i = 0; i < count; ++i)
if (streets[i].m_editDistance.first == 0)
return i;
// try to find best match in kPossiblePercent limit
size_t res = count;
size_t minPercent = kPossiblePercent + 1;
for (size_t i = 0; i < count; ++i)
{
size_t const p = streets[i].m_editDistance.first * 100 / streets[i].m_editDistance.second;
if (p < kPossiblePercent)
{
res = i;
minPercent = p;
}
}
return (res < count ? res : streets.size());
}
} // namespace search

View file

@ -0,0 +1,39 @@
#pragma once
#include "indexer/feature_decl.hpp"
#include "std/string.hpp"
#include "std/utility.hpp"
#include "std/vector.hpp"
class FeatureType;
class Index;
namespace search
{
class ReverseGeocoding
{
Index * m_index;
public:
ReverseGeocoding(Index * p) : m_index(p) {}
struct Street
{
FeatureID m_id;
double m_distance;
pair<size_t, size_t> m_editDistance;
};
void GetNearbyStreets(FeatureType const & ft, string const & keyName, vector<Street> & streets);
static size_t GetMatchedStreetIndex(vector<Street> const & streets);
private:
template <class TCompare>
void GetNearbyStreets(FeatureType const & ft, TCompare comp, vector<Street> & streets);
};
} // namespace search

View file

@ -27,6 +27,7 @@ HEADERS += \
region.hpp \
result.hpp \
retrieval.hpp \
reverse_geocoding.hpp \
search_common.hpp \
search_engine.hpp \
search_query.hpp \
@ -50,6 +51,7 @@ SOURCES += \
region.cpp \
result.cpp \
retrieval.cpp \
reverse_geocoding.cpp \
search_engine.cpp \
search_query.cpp \
search_query_params.cpp \

View file

@ -7,6 +7,7 @@
#include "indexer/ftypes_matcher.hpp"
#include "indexer/index.hpp"
#include "indexer/scales.hpp"
#include "indexer/search_string_utils.hpp"
#include "platform/platform.hpp"