forked from organicmaps/organicmaps
[search] A probabilistic tweak to locality scorer.
This commit is contained in:
parent
eeb41dadb8
commit
90fe2ee57b
5 changed files with 33 additions and 9 deletions
|
@ -694,9 +694,14 @@ void Geocoder::FillLocalityCandidates(coding::CompressedBitVector const * filter
|
|||
for (size_t startToken = 0; startToken < m_numTokens; ++startToken)
|
||||
{
|
||||
CBVPtr intersection;
|
||||
CBVPtr unfilteredIntersection;
|
||||
intersection.SetFull();
|
||||
unfilteredIntersection.SetFull();
|
||||
if (filter)
|
||||
{
|
||||
intersection.Intersect(filter);
|
||||
unfilteredIntersection.Intersect(m_addressFeatures[startToken].get());
|
||||
}
|
||||
intersection.Intersect(m_addressFeatures[startToken].get());
|
||||
if (intersection.IsEmpty())
|
||||
continue;
|
||||
|
@ -713,6 +718,11 @@ void Geocoder::FillLocalityCandidates(coding::CompressedBitVector const * filter
|
|||
l.m_featureId = featureId;
|
||||
l.m_startToken = startToken;
|
||||
l.m_endToken = endToken;
|
||||
if (filter)
|
||||
{
|
||||
l.m_prob = static_cast<double>(intersection->PopCount()) /
|
||||
static_cast<double>(unfilteredIntersection->PopCount());
|
||||
}
|
||||
preLocalities.push_back(l);
|
||||
});
|
||||
}
|
||||
|
@ -720,6 +730,8 @@ void Geocoder::FillLocalityCandidates(coding::CompressedBitVector const * filter
|
|||
if (endToken < m_numTokens)
|
||||
{
|
||||
intersection.Intersect(m_addressFeatures[endToken].get());
|
||||
if (filter)
|
||||
unfilteredIntersection.Intersect(m_addressFeatures[endToken].get());
|
||||
if (intersection.IsEmpty())
|
||||
break;
|
||||
}
|
||||
|
@ -829,7 +841,7 @@ void Geocoder::FillVillageLocalities()
|
|||
|
||||
#if defined(DEBUG)
|
||||
ft.GetName(StringUtf8Multilang::kDefaultCode, village.m_defaultName);
|
||||
LOG(LDEBUG, ("Village =", village.m_defaultName));
|
||||
LOG(LDEBUG, ("Village =", village.m_defaultName, "prob =", village.m_prob));
|
||||
#endif
|
||||
|
||||
m_cities[{l.m_startToken, l.m_endToken}].push_back(village);
|
||||
|
|
|
@ -99,10 +99,10 @@ public:
|
|||
|
||||
struct Locality
|
||||
{
|
||||
Locality() : m_featureId(0), m_startToken(0), m_endToken(0) {}
|
||||
Locality() : m_featureId(0), m_startToken(0), m_endToken(0), m_prob(0.0) {}
|
||||
|
||||
Locality(uint32_t featureId, size_t startToken, size_t endToken)
|
||||
: m_featureId(featureId), m_startToken(startToken), m_endToken(endToken)
|
||||
: m_featureId(featureId), m_startToken(startToken), m_endToken(endToken), m_prob(0.0)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -110,6 +110,12 @@ public:
|
|||
uint32_t m_featureId;
|
||||
size_t m_startToken;
|
||||
size_t m_endToken;
|
||||
|
||||
// Measures our belief in the fact that tokens in the range [m_startToken, m_endToken)
|
||||
// indeed specify a locality. Currently it is set only for villages.
|
||||
double m_prob;
|
||||
|
||||
string m_name;
|
||||
};
|
||||
|
||||
// This struct represents a country or US- or Canadian- state. It
|
||||
|
|
|
@ -44,8 +44,8 @@ void LocalityScorer::GetTopLocalities(size_t limit, vector<Geocoder::Locality> &
|
|||
ls.emplace_back(locality);
|
||||
|
||||
RemoveDuplicates(ls);
|
||||
LeaveTopByRank(std::max(limit, kDefaultReadLimit), ls);
|
||||
SortByName(ls);
|
||||
LeaveTopByRankAndProb(std::max(limit, kDefaultReadLimit), ls);
|
||||
SortByNameAndProb(ls);
|
||||
if (ls.size() > limit)
|
||||
ls.resize(limit);
|
||||
|
||||
|
@ -71,7 +71,7 @@ void LocalityScorer::RemoveDuplicates(vector<ExLocality> & ls) const
|
|||
ls.end());
|
||||
}
|
||||
|
||||
void LocalityScorer::LeaveTopByRank(size_t limit, vector<ExLocality> & ls) const
|
||||
void LocalityScorer::LeaveTopByRankAndProb(size_t limit, vector<ExLocality> & ls) const
|
||||
{
|
||||
if (ls.size() <= limit)
|
||||
return;
|
||||
|
@ -81,6 +81,8 @@ void LocalityScorer::LeaveTopByRank(size_t limit, vector<ExLocality> & ls) const
|
|||
|
||||
sort(ls.begin(), ls.end(), [](ExLocality const & lhs, ExLocality const & rhs)
|
||||
{
|
||||
if (lhs.m_locality.m_prob != rhs.m_locality.m_prob)
|
||||
return lhs.m_locality.m_prob > rhs.m_locality.m_prob;
|
||||
if (lhs.m_rank != rhs.m_rank)
|
||||
return lhs.m_rank > rhs.m_rank;
|
||||
return lhs.m_numTokens > rhs.m_numTokens;
|
||||
|
@ -88,7 +90,7 @@ void LocalityScorer::LeaveTopByRank(size_t limit, vector<ExLocality> & ls) const
|
|||
ls.resize(limit);
|
||||
}
|
||||
|
||||
void LocalityScorer::SortByName(vector<ExLocality> & ls) const
|
||||
void LocalityScorer::SortByNameAndProb(vector<ExLocality> & ls) const
|
||||
{
|
||||
vector<string> names;
|
||||
for (auto & l : ls)
|
||||
|
@ -107,6 +109,9 @@ void LocalityScorer::SortByName(vector<ExLocality> & ls) const
|
|||
|
||||
sort(ls.begin(), ls.end(), [](ExLocality const & lhs, ExLocality const & rhs)
|
||||
{
|
||||
// Probabilities form a stronger signal than name scores do.
|
||||
if (lhs.m_locality.m_prob != rhs.m_locality.m_prob)
|
||||
return lhs.m_locality.m_prob > rhs.m_locality.m_prob;
|
||||
if (IsAlmostFullMatch(lhs.m_nameScore) && IsAlmostFullMatch(rhs.m_nameScore))
|
||||
{
|
||||
// When both localities match well, e.g. full or full prefix
|
||||
|
|
|
@ -45,8 +45,8 @@ private:
|
|||
};
|
||||
|
||||
void RemoveDuplicates(vector<ExLocality> & ls) const;
|
||||
void LeaveTopByRank(size_t limit, vector<ExLocality> & ls) const;
|
||||
void SortByName(vector<ExLocality> & ls) const;
|
||||
void LeaveTopByRankAndProb(size_t limit, vector<ExLocality> & ls) const;
|
||||
void SortByNameAndProb(vector<ExLocality> & ls) const;
|
||||
|
||||
QueryParams const & m_params;
|
||||
Delegate const & m_delegate;
|
||||
|
|
|
@ -51,6 +51,7 @@ fi
|
|||
|
||||
NAMES=("Australia_Brisbane.mwm"
|
||||
"Belarus_Minsk*.mwm"
|
||||
"Canada_Quebek_Montreal.mwm"
|
||||
"Germany_*.mwm"
|
||||
"Russia_*.mwm"
|
||||
"UK_England_*.mwm"
|
||||
|
|
Loading…
Add table
Reference in a new issue