forked from organicmaps/organicmaps
Refactor. Add Matching by name.
This commit is contained in:
parent
6f423fbac4
commit
88d5775c23
8 changed files with 151 additions and 63 deletions
|
@ -1,11 +1,11 @@
|
|||
#include "generator/booking_dataset.hpp"
|
||||
|
||||
#include "generator/booking_scoring.hpp"
|
||||
|
||||
#include "platform/local_country_file_utils.hpp"
|
||||
#include "platform/platform.hpp"
|
||||
|
||||
#include "indexer/ftypes_matcher.hpp"
|
||||
#include "indexer/search_delimiters.hpp"
|
||||
#include "indexer/search_string_utils.hpp"
|
||||
|
||||
#include "geometry/distance_on_sphere.hpp"
|
||||
|
||||
|
@ -171,45 +171,6 @@ vector<size_t> BookingDataset::GetNearestHotels(double lat, double lon, size_t l
|
|||
return indexes;
|
||||
}
|
||||
|
||||
bool BookingDataset::MatchByName(string const & osmName,
|
||||
vector<size_t> const & bookingIndexes) const
|
||||
{
|
||||
return false;
|
||||
|
||||
// Match name.
|
||||
// vector<strings::UniString> osmTokens;
|
||||
// NormalizeAndTokenizeString(name, osmTokens, search::Delimiters());
|
||||
//
|
||||
// cout << "\n------------- " << name << endl;
|
||||
//
|
||||
// bool matched = false;
|
||||
// for (auto const & index : indexes)
|
||||
// {
|
||||
// vector<strings::UniString> bookingTokens;
|
||||
// NormalizeAndTokenizeString(m_hotels[index].name, bookingTokens, search::Delimiters());
|
||||
//
|
||||
// map<size_t, vector<pair<size_t, size_t>>> weightPair;
|
||||
//
|
||||
// for (size_t j = 0; j < osmTokens.size(); ++j)
|
||||
// {
|
||||
// for (size_t i = 0; i < bookingTokens.size(); ++i)
|
||||
// {
|
||||
// size_t distance = strings::EditDistance(osmTokens[j].begin(), osmTokens[j].end(),
|
||||
// bookingTokens[i].begin(),
|
||||
// bookingTokens[i].end());
|
||||
// if (distance < 3)
|
||||
// weightPair[distance].emplace_back(i, j);
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// if (!weightPair.empty())
|
||||
// {
|
||||
// cout << m_hotels[e.second] << endl;
|
||||
// matched = true;
|
||||
// }
|
||||
// }
|
||||
}
|
||||
|
||||
void BookingDataset::BuildFeatures(function<void(OsmElement *)> const & fn) const
|
||||
{
|
||||
for (auto const & hotel : m_hotels)
|
||||
|
@ -302,13 +263,6 @@ void BookingDataset::BuildFeatures(function<void(OsmElement *)> const & fn) cons
|
|||
}
|
||||
}
|
||||
|
||||
// static
|
||||
double BookingDataset::ScoreByLinearNormDistance(double distance)
|
||||
{
|
||||
distance = my::clamp(distance, 0, kDistanceLimitInMeters);
|
||||
return 1.0 - distance / kDistanceLimitInMeters;
|
||||
}
|
||||
|
||||
void BookingDataset::LoadHotels(istream & src, string const & addressReferencePath)
|
||||
{
|
||||
m_hotels.clear();
|
||||
|
@ -374,11 +328,7 @@ bool BookingDataset::MatchWithBooking(OsmElement const & e) const
|
|||
|
||||
for (size_t const j : bookingIndexes)
|
||||
{
|
||||
auto const & hotel = GetHotel(j);
|
||||
double const distanceMeters = ms::DistanceOnEarth(e.lat, e.lon, hotel.lat, hotel.lon);
|
||||
double score = ScoreByLinearNormDistance(distanceMeters);
|
||||
matched = score > kOptimalThreshold;
|
||||
if (matched)
|
||||
if (booking_scoring::Match(GetHotel(j), e).IsMatched())
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
|
@ -22,9 +22,6 @@ public:
|
|||
double static constexpr kDistanceLimitInMeters = 150;
|
||||
size_t static constexpr kMaxSelectedElements = 3;
|
||||
|
||||
// Calculated with tools/python/booking_hotels_quality.py
|
||||
double static constexpr kOptimalThreshold = 0.709283;
|
||||
|
||||
struct Hotel
|
||||
{
|
||||
enum class Fields
|
||||
|
@ -92,8 +89,6 @@ public:
|
|||
|
||||
void BuildFeatures(function<void(OsmElement *)> const & fn) const;
|
||||
|
||||
static double ScoreByLinearNormDistance(double distance);
|
||||
|
||||
protected:
|
||||
vector<Hotel> m_hotels;
|
||||
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
#include "generator/booking_dataset.hpp"
|
||||
#include "generator/booking_scoring.hpp"
|
||||
#include "generator/osm_source.hpp"
|
||||
|
||||
#include "geometry/distance_on_sphere.hpp"
|
||||
|
@ -73,15 +74,15 @@ int main(int argc, char * argv[])
|
|||
for (size_t const j : bookingIndexes)
|
||||
{
|
||||
auto const & hotel = bookingDataset.GetHotel(j);
|
||||
double const distanceMeters = ms::DistanceOnEarth(e.lat, e.lon, hotel.lat, hotel.lon);
|
||||
double const score = BookingDataset::ScoreByLinearNormDistance(distanceMeters);
|
||||
auto const score = booking_scoring::Match(hotel, e);
|
||||
|
||||
bool matched = score > BookingDataset::kOptimalThreshold;
|
||||
double const distanceMeters = ms::DistanceOnEarth(e.lat, e.lon, hotel.lat, hotel.lon);
|
||||
bool matched = score.IsMatched();
|
||||
|
||||
outStream << "# ------------------------------------------" << fixed << setprecision(6)
|
||||
<< endl;
|
||||
outStream << (matched ? 'y' : 'n') << " \t" << i << "\t " << j
|
||||
<< " distance: " << distanceMeters << " score: " << score << endl;
|
||||
<< " distance: " << distanceMeters << " score: " << score.GetMatchingScore() << endl;
|
||||
outStream << "# " << e << endl;
|
||||
outStream << "# " << hotel << endl;
|
||||
outStream << "# URL: https://www.openstreetmap.org/?mlat=" << hotel.lat
|
||||
|
|
97
generator/booking_scoring.cpp
Normal file
97
generator/booking_scoring.cpp
Normal file
|
@ -0,0 +1,97 @@
|
|||
#include "generator/booking_scoring.hpp"
|
||||
|
||||
#include "generator/booking_dataset.hpp"
|
||||
|
||||
#include "indexer/search_string_utils.hpp"
|
||||
#include "indexer/search_delimiters.hpp"
|
||||
|
||||
#include "geometry/distance_on_sphere.hpp"
|
||||
|
||||
#include "base/collection_cast.hpp"
|
||||
|
||||
namespace generator
|
||||
{
|
||||
namespace booking_scoring
|
||||
{
|
||||
namespace
|
||||
{
|
||||
// Calculated with tools/python/booking_hotels_quality.py.
|
||||
double constexpr kOptimalThreshold = 0.151001;
|
||||
|
||||
template <typename T, typename U>
|
||||
struct decay_equiv :
|
||||
std::is_same<typename std::decay<T>::type, U>::type
|
||||
{};
|
||||
|
||||
set<strings::UniString> StringToSetOfWords(string const & str)
|
||||
{
|
||||
vector<strings::UniString> result;
|
||||
search::NormalizeAndTokenizeString(str, result, search::Delimiters{});
|
||||
return my::collection_cast<set>(result);
|
||||
}
|
||||
|
||||
// TODO(mgsergio): Update existing one in base or wherever...
|
||||
// Or just use one from boost.
|
||||
struct CounterIterator
|
||||
{
|
||||
template<typename T, typename = typename enable_if<!decay_equiv<T, CounterIterator>::value>::type>
|
||||
CounterIterator & operator=(T const &) { ++m_count; return *this; }
|
||||
CounterIterator & operator++() { return *this; }
|
||||
CounterIterator & operator++(int) { return *this; }
|
||||
CounterIterator & operator*() { return *this; }
|
||||
uint32_t Count() const { return m_count; }
|
||||
|
||||
uint32_t m_count = 0;
|
||||
};
|
||||
|
||||
double StringSimilarityScore(string const & a, string const & b)
|
||||
{
|
||||
auto const aWords = StringToSetOfWords(a);
|
||||
auto const bWords = StringToSetOfWords(b);
|
||||
|
||||
auto const intersectionCard = set_intersection(begin(aWords), end(aWords),
|
||||
begin(bWords), end(bWords),
|
||||
CounterIterator()).Count();
|
||||
auto const aLikeBScore = static_cast<double>(intersectionCard) / aWords.size();
|
||||
auto const bLikeAScore = static_cast<double>(intersectionCard) / bWords.size();
|
||||
|
||||
return aLikeBScore * bLikeAScore;
|
||||
}
|
||||
|
||||
double GetLinearNormDistanceScrore(double distance)
|
||||
{
|
||||
distance = my::clamp(distance, 0, BookingDataset::kDistanceLimitInMeters);
|
||||
return 1.0 - distance / BookingDataset::kDistanceLimitInMeters;
|
||||
}
|
||||
|
||||
double GetNameSimilarityScore(string const & booking_name, string const & osm_name)
|
||||
{
|
||||
return StringSimilarityScore(booking_name, osm_name);
|
||||
}
|
||||
} // namespace
|
||||
|
||||
double BookingMatchScore::GetMatchingScore() const
|
||||
{
|
||||
return m_linearNormDistanceScore * m_nameSimilarityScore;
|
||||
}
|
||||
|
||||
bool BookingMatchScore::IsMatched() const
|
||||
{
|
||||
return GetMatchingScore() > kOptimalThreshold;
|
||||
}
|
||||
|
||||
BookingMatchScore Match(BookingDataset::Hotel const & h, OsmElement const & e)
|
||||
{
|
||||
BookingMatchScore score;
|
||||
|
||||
auto const distance = ms::DistanceOnEarth(e.lat, e.lon, h.lat, h.lon);
|
||||
score.m_linearNormDistanceScore = GetLinearNormDistanceScrore(distance);
|
||||
|
||||
string osmHotelName;
|
||||
score.m_nameSimilarityScore = e.GetTag("name", osmHotelName)
|
||||
? GetNameSimilarityScore(h.name, osmHotelName) : 0;
|
||||
|
||||
return score;
|
||||
}
|
||||
} // namespace booking_scoring
|
||||
} // namespace generator
|
21
generator/booking_scoring.hpp
Normal file
21
generator/booking_scoring.hpp
Normal file
|
@ -0,0 +1,21 @@
|
|||
#pragma once
|
||||
|
||||
#include "generator/booking_dataset.hpp"
|
||||
#include "generator/osm_element.hpp"
|
||||
|
||||
namespace generator
|
||||
{
|
||||
namespace booking_scoring
|
||||
{
|
||||
struct BookingMatchScore
|
||||
{
|
||||
double GetMatchingScore() const;
|
||||
bool IsMatched() const;
|
||||
|
||||
double m_linearNormDistanceScore{};
|
||||
double m_nameSimilarityScore{};
|
||||
};
|
||||
|
||||
BookingMatchScore Match(BookingDataset::Hotel const & h, OsmElement const & e);
|
||||
} // namespace booking_scoring
|
||||
} // namespace generator
|
|
@ -121,6 +121,20 @@ string OsmElement::ToString(string const & shift) const
|
|||
return ss.str();
|
||||
}
|
||||
|
||||
bool OsmElement::GetTag(string const & key, string & value) const
|
||||
{
|
||||
auto const it = find_if(begin(m_tags), end(m_tags), [&key](Tag const & tag)
|
||||
{
|
||||
return tag.key == key;
|
||||
});
|
||||
|
||||
if (it == end(m_tags))
|
||||
return false;
|
||||
|
||||
value = it->value;
|
||||
return true;
|
||||
}
|
||||
|
||||
string DebugPrint(OsmElement const & e)
|
||||
{
|
||||
return e.ToString();
|
||||
|
|
|
@ -152,7 +152,8 @@ struct OsmElement
|
|||
if (!v.empty())
|
||||
AddTag(k, v);
|
||||
}
|
||||
|
||||
bool GetTag(string const & key, string & value) const;
|
||||
};
|
||||
|
||||
string DebugPrint(OsmElement const & e);
|
||||
|
||||
|
|
|
@ -63,6 +63,8 @@
|
|||
67BC92E31D1A9ED800A4A378 /* test_feature.hpp in Headers */ = {isa = PBXBuildFile; fileRef = 67BC92D91D1A9E9800A4A378 /* test_feature.hpp */; };
|
||||
67BC92E41D1A9ED800A4A378 /* test_mwm_builder.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 67BC92DA1D1A9E9800A4A378 /* test_mwm_builder.cpp */; };
|
||||
67BC92E51D1A9ED800A4A378 /* test_mwm_builder.hpp in Headers */ = {isa = PBXBuildFile; fileRef = 67BC92DB1D1A9E9800A4A378 /* test_mwm_builder.hpp */; };
|
||||
E9502E331D34012200CAB86B /* booking_scoring.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E9502E311D34012200CAB86B /* booking_scoring.cpp */; };
|
||||
E9502E341D34012200CAB86B /* booking_scoring.hpp in Headers */ = {isa = PBXBuildFile; fileRef = E9502E321D34012200CAB86B /* booking_scoring.hpp */; };
|
||||
/* End PBXBuildFile section */
|
||||
|
||||
/* Begin PBXFileReference section */
|
||||
|
@ -127,6 +129,8 @@
|
|||
67BC92DA1D1A9E9800A4A378 /* test_mwm_builder.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = test_mwm_builder.cpp; sourceTree = "<group>"; };
|
||||
67BC92DB1D1A9E9800A4A378 /* test_mwm_builder.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = test_mwm_builder.hpp; sourceTree = "<group>"; };
|
||||
67F0F6761B8C9DCE003F52FF /* osm_xml_source.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = osm_xml_source.hpp; sourceTree = "<group>"; };
|
||||
E9502E311D34012200CAB86B /* booking_scoring.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = booking_scoring.cpp; sourceTree = "<group>"; };
|
||||
E9502E321D34012200CAB86B /* booking_scoring.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = booking_scoring.hpp; sourceTree = "<group>"; };
|
||||
/* End PBXFileReference section */
|
||||
|
||||
/* Begin PBXFrameworksBuildPhase section */
|
||||
|
@ -169,6 +173,8 @@
|
|||
6753401D1A3F2A1B00A0A8C3 /* generator */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
E9502E311D34012200CAB86B /* booking_scoring.cpp */,
|
||||
E9502E321D34012200CAB86B /* booking_scoring.hpp */,
|
||||
677E2A111CAACC5F001DC42A /* tag_admixer.hpp */,
|
||||
677E2A121CAACC5F001DC42A /* towns_dumper.cpp */,
|
||||
677E2A131CAACC5F001DC42A /* towns_dumper.hpp */,
|
||||
|
@ -253,6 +259,7 @@
|
|||
6753407F1A3F2A7400A0A8C3 /* osm2type.hpp in Headers */,
|
||||
670B84BD1A8CDB0000CE4492 /* osm_source.hpp in Headers */,
|
||||
675340631A3F2A7400A0A8C3 /* coastlines_generator.hpp in Headers */,
|
||||
E9502E341D34012200CAB86B /* booking_scoring.hpp in Headers */,
|
||||
675340641A3F2A7400A0A8C3 /* intermediate_data.hpp in Headers */,
|
||||
675340781A3F2A7400A0A8C3 /* intermediate_elements.hpp in Headers */,
|
||||
6753406B1A3F2A7400A0A8C3 /* feature_emitter_iface.hpp in Headers */,
|
||||
|
@ -384,6 +391,7 @@
|
|||
677E2A171CAACC5F001DC42A /* towns_dumper.cpp in Sources */,
|
||||
6753405C1A3F2A7400A0A8C3 /* borders_generator.cpp in Sources */,
|
||||
675340671A3F2A7400A0A8C3 /* dumper.cpp in Sources */,
|
||||
E9502E331D34012200CAB86B /* booking_scoring.cpp in Sources */,
|
||||
675340831A3F2A7400A0A8C3 /* statistics.cpp in Sources */,
|
||||
6753407E1A3F2A7400A0A8C3 /* osm2type.cpp in Sources */,
|
||||
675340601A3F2A7400A0A8C3 /* check_model.cpp in Sources */,
|
||||
|
@ -581,6 +589,7 @@
|
|||
67BC92D51D1A9E5F00A4A378 /* Release */,
|
||||
);
|
||||
defaultConfigurationIsVisible = 0;
|
||||
defaultConfigurationName = Release;
|
||||
};
|
||||
/* End XCConfigurationList section */
|
||||
};
|
||||
|
|
Loading…
Add table
Reference in a new issue