diff --git a/generator/generator_tool/generator_tool.pro b/generator/generator_tool/generator_tool.pro index 4c3ffe0f1f..4c3b066219 100644 --- a/generator/generator_tool/generator_tool.pro +++ b/generator/generator_tool/generator_tool.pro @@ -1,7 +1,7 @@ # Generator binary ROOT_DIR = ../.. -DEPENDENCIES = generator routing storage indexer platform geometry coding base \ +DEPENDENCIES = generator search routing storage indexer platform geometry coding base \ osrm gflags expat tess2 jansson protobuf tomcrypt \ succinct stats_client diff --git a/indexer/features_vector.hpp b/indexer/features_vector.hpp index ffc9d34861..9a1b350d49 100644 --- a/indexer/features_vector.hpp +++ b/indexer/features_vector.hpp @@ -70,4 +70,6 @@ public: FeaturesVector const & GetVector() const { return m_vector; } FilesContainerR::ReaderT GetReader(string const & tag) const { return m_cont.GetReader(tag); } + + string const & GetFilePath() const { return m_cont.GetFileName(); } }; diff --git a/indexer/search_index_builder.cpp b/indexer/search_index_builder.cpp index 75d83135d0..41dd76a877 100644 --- a/indexer/search_index_builder.cpp +++ b/indexer/search_index_builder.cpp @@ -1,11 +1,14 @@ #include "indexer/search_index_builder.hpp" +#include "search/reverse_geocoding.hpp" + #include "indexer/categories_holder.hpp" #include "indexer/classificator.hpp" #include "indexer/feature_algo.hpp" #include "indexer/feature_utils.hpp" #include "indexer/feature_visibility.hpp" #include "indexer/features_vector.hpp" +#include "indexer/index.hpp" #include "indexer/search_delimiters.hpp" #include "indexer/search_index_values.hpp" #include "indexer/search_string_utils.hpp" @@ -260,8 +263,7 @@ public: template void AddFeatureNameIndexPairs(FeaturesVectorTest & features, CategoriesHolder & categoriesHolder, - vector> & keyValuePairs, - SingleValueSerializer const & serializer) + vector> & keyValuePairs) { feature::DataHeader const & header = features.GetHeader(); @@ -276,20 +278,49 @@ void AddFeatureNameIndexPairs(FeaturesVectorTest & features, CategoriesHolder & ReaderSource src = features.GetReader(SEARCH_TOKENS_FILE_TAG); uint64_t index = 0; - FeatureNameInserter inserter(nullptr, keyValuePairs); - int8_t const lang = StringUtf8Multilang::GetLangIndex("default"); + uint64_t address = 0, missing = 0; + map bounds; + + Index mwmIndex; + /// @ todo Make some better solution, or legalize MakeTemporary. + mwmIndex.RegisterMap(platform::LocalCountryFile::MakeTemporary(features.GetFilePath())); + search::ReverseGeocoding rgc(&mwmIndex); while (src.Size() > 0) { feature::AddressData data; data.Deserialize(src); - inserter.m_val.m_featureId = index++; - - string const street = data.Get(feature::AddressData::STREET); + string street; + search::GetStreetNameAsKey(data.Get(feature::AddressData::STREET), street); if (!street.empty()) - inserter(lang, street); + { + FeatureType ft; + features.GetVector().GetByIndex(index, ft); + + using TStreet = search::ReverseGeocoding::Street; + vector streets; + rgc.GetNearbyStreets(ft, street, streets); + + size_t const ind = rgc.GetMatchedStreetIndex(streets); + if (ind == streets.size()) + { + ++missing; + //LOG(LWARNING, ("No street found for address", street, ft)); + } + else + { + ++bounds[ind]; + } + + ++address; + } + + ++index; } + + LOG(LINFO, ("Address: Matched percent", 100 * (1.0 - missing/double(address)))); + LOG(LINFO, ("Address: Upper bounds", bounds)); } } // namespace @@ -353,7 +384,7 @@ void BuildSearchIndex(FilesContainerR & container, Writer & indexWriter) SingleValueSerializer serializer(codingParams); vector> searchIndexKeyValuePairs; - AddFeatureNameIndexPairs(features, categoriesHolder, searchIndexKeyValuePairs, serializer); + AddFeatureNameIndexPairs(features, categoriesHolder, searchIndexKeyValuePairs); sort(searchIndexKeyValuePairs.begin(), searchIndexKeyValuePairs.end()); LOG(LINFO, ("End sorting strings:", timer.ElapsedSeconds())); diff --git a/search/house_detector.cpp b/search/house_detector.cpp index 9b625163a7..000372097f 100644 --- a/search/house_detector.cpp +++ b/search/house_detector.cpp @@ -4,6 +4,7 @@ #include "indexer/classificator.hpp" #include "indexer/feature_impl.hpp" +#include "indexer/search_string_utils.hpp" #include "geometry/angles.hpp" #include "geometry/distance.hpp" diff --git a/search/house_detector.hpp b/search/house_detector.hpp index 013bea68d6..911a96bcff 100644 --- a/search/house_detector.hpp +++ b/search/house_detector.hpp @@ -14,9 +14,6 @@ namespace search { -void GetStreetNameAsKey(string const & name, string & res); - - class FeatureLoader { Index const * m_pIndex; diff --git a/search/reverse_geocoding.cpp b/search/reverse_geocoding.cpp new file mode 100644 index 0000000000..1c5f008ffa --- /dev/null +++ b/search/reverse_geocoding.cpp @@ -0,0 +1,117 @@ +#include "reverse_geocoding.hpp" + +#include "indexer/feature.hpp" +#include "indexer/feature_algo.hpp" +#include "indexer/feature_visibility.hpp" +#include "indexer/index.hpp" +#include "indexer/scales.hpp" +#include "indexer/search_string_utils.hpp" + + +namespace search +{ + +namespace +{ + +double constexpr kLookupRadiusM = 500.0; +size_t const kMaxStreetIndex = 16; +size_t const kPossiblePercent = 10; + + +/// @todo Need to check projection here? +double CalculateMinDistance(FeatureType const & ft, m2::PointD const & pt) +{ + ASSERT_EQUAL(ft.GetFeatureType(), feature::GEOM_LINE, ()); + + double res = numeric_limits::max(); + ft.ForEachPoint([&] (m2::PointD const & p) + { + double const d = MercatorBounds::DistanceOnEarth(p, pt); + if (d < res) + res = d; + }, FeatureType::BEST_GEOMETRY); + + return res; +} + +} // namespace + +template +void ReverseGeocoding::GetNearbyStreets(FeatureType const & addrFt, TCompare comp, + vector & streets) +{ + m2::PointD const & center = feature::GetCenter(addrFt); + m2::RectD const rect = MercatorBounds::RectByCenterXYAndSizeInMeters( + center, kLookupRadiusM); + + auto const fn = [&](FeatureType const & ft) + { + if (ft.GetFeatureType() != feature::GEOM_LINE) + return; + + static feature::TypeSetChecker checker({"highway"}); + feature::TypesHolder types(ft); + if (!checker.IsEqualR(types.begin(), types.end())) + return; + + string name; + static int8_t const lang = StringUtf8Multilang::GetLangIndex("default"); + if (!ft.GetName(lang, name)) + return; + + ASSERT(!name.empty(), ()); + streets.push_back({ft.GetID(), CalculateMinDistance(ft, center), comp(name)}); + }; + + m_index->ForEachInRect(fn, rect, scales::GetUpperScale()); + + sort(streets.begin(), streets.end(), [](Street const & s1, Street const & s2) + { + return s1.m_distance < s2.m_distance; + }); +} + +void ReverseGeocoding::GetNearbyStreets(FeatureType const & addrFt, string const & keyName, + vector & streets) +{ + strings::UniString const uniKey1 = strings::MakeUniString(keyName); + + GetNearbyStreets(addrFt, [&uniKey1](string const & name) -> pair + { + string key; + search::GetStreetNameAsKey(name, key); + strings::UniString const uniKey2 = strings::MakeUniString(key); + + return { strings::EditDistance(uniKey1.begin(), uniKey1.end(), uniKey2.begin(), uniKey2.end()), + uniKey1.size() }; + }, streets); +} + +size_t ReverseGeocoding::GetMatchedStreetIndex(vector const & streets) +{ + // do limit possible return values + size_t const count = min(streets.size(), kMaxStreetIndex); + + // try to find exact match + for (size_t i = 0; i < count; ++i) + if (streets[i].m_editDistance.first == 0) + return i; + + // try to find best match in kPossiblePercent limit + size_t res = count; + size_t minPercent = kPossiblePercent + 1; + for (size_t i = 0; i < count; ++i) + { + size_t const p = streets[i].m_editDistance.first * 100 / streets[i].m_editDistance.second; + if (p < kPossiblePercent) + { + res = i; + minPercent = p; + } + } + + return (res < count ? res : streets.size()); +} + +} // namespace search diff --git a/search/reverse_geocoding.hpp b/search/reverse_geocoding.hpp new file mode 100644 index 0000000000..40493ec377 --- /dev/null +++ b/search/reverse_geocoding.hpp @@ -0,0 +1,39 @@ +#pragma once + +#include "indexer/feature_decl.hpp" + +#include "std/string.hpp" +#include "std/utility.hpp" +#include "std/vector.hpp" + + +class FeatureType; +class Index; + +namespace search +{ + +class ReverseGeocoding +{ + Index * m_index; + +public: + ReverseGeocoding(Index * p) : m_index(p) {} + + struct Street + { + FeatureID m_id; + double m_distance; + pair m_editDistance; + }; + + void GetNearbyStreets(FeatureType const & ft, string const & keyName, vector & streets); + + static size_t GetMatchedStreetIndex(vector const & streets); + +private: + template + void GetNearbyStreets(FeatureType const & ft, TCompare comp, vector & streets); +}; + +} // namespace search diff --git a/search/search.pro b/search/search.pro index 9469eb0b99..f14202ab54 100644 --- a/search/search.pro +++ b/search/search.pro @@ -27,6 +27,7 @@ HEADERS += \ region.hpp \ result.hpp \ retrieval.hpp \ + reverse_geocoding.hpp \ search_common.hpp \ search_engine.hpp \ search_query.hpp \ @@ -50,6 +51,7 @@ SOURCES += \ region.cpp \ result.cpp \ retrieval.cpp \ + reverse_geocoding.cpp \ search_engine.cpp \ search_query.cpp \ search_query_params.cpp \ diff --git a/search/search_tests/house_detector_tests.cpp b/search/search_tests/house_detector_tests.cpp index 8c85879d13..de78d58ae4 100644 --- a/search/search_tests/house_detector_tests.cpp +++ b/search/search_tests/house_detector_tests.cpp @@ -7,6 +7,7 @@ #include "indexer/ftypes_matcher.hpp" #include "indexer/index.hpp" #include "indexer/scales.hpp" +#include "indexer/search_string_utils.hpp" #include "platform/platform.hpp"