forked from organicmaps/organicmaps
[search] Fixed bug with feature names scoring.
This commit is contained in:
parent
419c12b2b7
commit
fb7494df03
6 changed files with 53 additions and 29 deletions
|
@ -11,6 +11,10 @@
|
|||
namespace search
|
||||
{
|
||||
|
||||
KeywordLangMatcher::ScoreT::ScoreT() : m_langScore(numeric_limits<int>::min())
|
||||
{
|
||||
}
|
||||
|
||||
KeywordLangMatcher::ScoreT::ScoreT(KeywordMatcher::ScoreT const & score, int langScore)
|
||||
: m_parentScore(score), m_langScore(langScore)
|
||||
{
|
||||
|
@ -71,4 +75,11 @@ KeywordLangMatcher::ScoreT KeywordLangMatcher::Score(int8_t lang,
|
|||
return ScoreT(m_keywordMatcher.Score(tokens, count), GetLangScore(lang));
|
||||
}
|
||||
|
||||
string DebugPrint(KeywordLangMatcher::ScoreT const & score)
|
||||
{
|
||||
ostringstream ss;
|
||||
ss << "KLM::ScoreT(" << DebugPrint(score.m_parentScore) << ", LS=" << score.m_langScore << ")";
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
} // namespace search
|
||||
|
|
|
@ -9,14 +9,15 @@ namespace search
|
|||
class KeywordLangMatcher
|
||||
{
|
||||
public:
|
||||
|
||||
class ScoreT
|
||||
{
|
||||
public:
|
||||
ScoreT() {}
|
||||
ScoreT();
|
||||
bool operator < (ScoreT const & s) const;
|
||||
|
||||
private:
|
||||
friend class KeywordLangMatcher;
|
||||
friend string DebugPrint(ScoreT const & score);
|
||||
|
||||
ScoreT(KeywordMatcher::ScoreT const & score, int langScore);
|
||||
|
||||
|
|
|
@ -38,16 +38,17 @@ KeywordMatcher::ScoreT KeywordMatcher::Score(StringT const & name) const
|
|||
SplitUniString(name, MakeBackInsertFunctor(tokens), Delimiters());
|
||||
|
||||
// Some names can have too many tokens. Trim them.
|
||||
return Score(tokens.data(), min(size_t(MAX_TOKENS), tokens.size()));
|
||||
return Score(tokens.data(), tokens.size());
|
||||
}
|
||||
|
||||
KeywordMatcher::ScoreT KeywordMatcher::Score(StringT const * tokens, size_t count) const
|
||||
{
|
||||
count = min(count, size_t(MAX_TOKENS));
|
||||
|
||||
vector<bool> isQueryTokenMatched(m_keywords.size());
|
||||
vector<bool> isNameTokenMatched(count);
|
||||
uint32_t numQueryTokensMatched = 0;
|
||||
uint32_t sumTokenMatchDistance = 0;
|
||||
int32_t prevTokenMatchDistance = 0;
|
||||
int8_t prevTokenMatchDistance = 0;
|
||||
bool bPrefixMatched = true;
|
||||
|
||||
for (int i = 0; i < m_keywords.size(); ++i)
|
||||
|
@ -55,7 +56,7 @@ KeywordMatcher::ScoreT KeywordMatcher::Score(StringT const * tokens, size_t coun
|
|||
if (!isNameTokenMatched[j] && m_keywords[i] == tokens[j])
|
||||
{
|
||||
isQueryTokenMatched[i] = isNameTokenMatched[j] = true;
|
||||
int32_t const tokenMatchDistance = i - j;
|
||||
int8_t const tokenMatchDistance = i - j;
|
||||
sumTokenMatchDistance += abs(tokenMatchDistance - prevTokenMatchDistance);
|
||||
prevTokenMatchDistance = tokenMatchDistance;
|
||||
}
|
||||
|
@ -68,31 +69,37 @@ KeywordMatcher::ScoreT KeywordMatcher::Score(StringT const * tokens, size_t coun
|
|||
StartsWith(tokens[j].begin(), tokens[j].end(), m_prefix.begin(), m_prefix.end()))
|
||||
{
|
||||
isNameTokenMatched[j] = bPrefixMatched = true;
|
||||
int32_t const tokenMatchDistance = int(m_keywords.size()) - j;
|
||||
int8_t const tokenMatchDistance = int(m_keywords.size()) - j;
|
||||
sumTokenMatchDistance += abs(tokenMatchDistance - prevTokenMatchDistance);
|
||||
}
|
||||
}
|
||||
|
||||
uint8_t numQueryTokensMatched = 0;
|
||||
for (size_t i = 0; i < isQueryTokenMatched.size(); ++i)
|
||||
if (isQueryTokenMatched[i])
|
||||
++numQueryTokensMatched;
|
||||
|
||||
ScoreT score = ScoreT();
|
||||
ScoreT score;
|
||||
score.m_bFullQueryMatched = bPrefixMatched && (numQueryTokensMatched == isQueryTokenMatched.size());
|
||||
score.m_bPrefixMatched = bPrefixMatched;
|
||||
score.m_numQueryTokensAndPrefixMatched = numQueryTokensMatched + (bPrefixMatched ? 1 : 0);
|
||||
score.m_nameTokensMatched = 0xFFFFFFFF;
|
||||
for (uint32_t i = 0; i < min(size_t(32), isNameTokenMatched.size()); ++i)
|
||||
if (!isNameTokenMatched[i])
|
||||
score.m_nameTokensMatched &= ~(1 << (31 - i));
|
||||
score.m_sumTokenMatchDistance = sumTokenMatchDistance;
|
||||
|
||||
score.m_nameTokensMatched = 0;
|
||||
score.m_nameTokensLength = 0;
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
{
|
||||
if (isNameTokenMatched[i])
|
||||
score.m_nameTokensMatched |= (1 << (MAX_TOKENS-1 - i));
|
||||
score.m_nameTokensLength += tokens[i].size();
|
||||
}
|
||||
|
||||
score.m_sumTokenMatchDistance = sumTokenMatchDistance;
|
||||
return score;
|
||||
}
|
||||
|
||||
KeywordMatcher::ScoreT::ScoreT()
|
||||
: m_sumTokenMatchDistance(0), m_nameTokensMatched(0), m_numQueryTokensAndPrefixMatched(0),
|
||||
m_bFullQueryMatched(false), m_bPrefixMatched(false)
|
||||
: m_sumTokenMatchDistance(0), m_nameTokensMatched(0), m_nameTokensLength(0),
|
||||
m_numQueryTokensAndPrefixMatched(0), m_bFullQueryMatched(false), m_bPrefixMatched(false)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -108,7 +115,11 @@ bool KeywordMatcher::ScoreT::operator < (KeywordMatcher::ScoreT const & s) const
|
|||
return m_nameTokensMatched < s.m_nameTokensMatched;
|
||||
if (m_sumTokenMatchDistance != s.m_sumTokenMatchDistance)
|
||||
return m_sumTokenMatchDistance > s.m_sumTokenMatchDistance;
|
||||
return false;
|
||||
|
||||
if (m_bFullQueryMatched)
|
||||
return m_nameTokensLength > s.m_nameTokensLength;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
string DebugPrint(KeywordMatcher::ScoreT const & score)
|
||||
|
@ -119,7 +130,8 @@ string DebugPrint(KeywordMatcher::ScoreT const & score)
|
|||
out << ",nQTM=" << static_cast<int>(score.m_numQueryTokensAndPrefixMatched);
|
||||
out << ",PM=" << score.m_bPrefixMatched;
|
||||
out << ",NTM=";
|
||||
for (int i = 31; i >= 0; --i) out << ((score.m_nameTokensMatched >> i) & 1);
|
||||
for (int i = MAX_TOKENS-1; i >= 0; --i)
|
||||
out << ((score.m_nameTokensMatched >> i) & 1);
|
||||
out << ",STMD=" << score.m_sumTokenMatchDistance;
|
||||
out << ")";
|
||||
return out.str();
|
||||
|
|
|
@ -28,6 +28,7 @@ public:
|
|||
|
||||
uint32_t m_sumTokenMatchDistance;
|
||||
uint32_t m_nameTokensMatched;
|
||||
uint32_t m_nameTokensLength;
|
||||
uint8_t m_numQueryTokensAndPrefixMatched;
|
||||
bool m_bFullQueryMatched : 1;
|
||||
bool m_bPrefixMatched : 1;
|
||||
|
|
|
@ -95,8 +95,8 @@ Query::Query(Index const * pIndex,
|
|||
// Initialize keywords scorer.
|
||||
// Note! This order should match the indexes arrays above.
|
||||
vector<vector<int8_t> > langPriorities(4);
|
||||
langPriorities[0].push_back(0); // future current lang
|
||||
langPriorities[1].push_back(0); // future input lang
|
||||
langPriorities[0].push_back(-1); // future current lang
|
||||
langPriorities[1].push_back(-1); // future input lang
|
||||
langPriorities[2].push_back(StringUtf8Multilang::GetLangIndex("int_name"));
|
||||
langPriorities[2].push_back(StringUtf8Multilang::GetLangIndex("en"));
|
||||
langPriorities[3].push_back(StringUtf8Multilang::GetLangIndex("default"));
|
||||
|
|
|
@ -117,11 +117,11 @@ UNIT_TEST(KeywordMatcher_Prefix)
|
|||
|
||||
{MATCHES, STRONGLY_BETTER, "new york gym"},
|
||||
{MATCHES, BETTER_OR_EQUAL, "new new york"},
|
||||
|
||||
{MATCHES, STRONGLY_BETTER, "new york"},
|
||||
|
||||
{MATCHES, STRONGLY_BETTER, "newark"},
|
||||
{MATCHES, BETTER_OR_EQUAL, "new"},
|
||||
|
||||
{MATCHES, STRONGLY_BETTER, "new"},
|
||||
};
|
||||
TestKeywordMatcher(query, testCases);
|
||||
}
|
||||
|
@ -135,8 +135,7 @@ UNIT_TEST(KeywordMatcher_Keyword)
|
|||
{NOMATCH, DOES_NOT_MATTER, "zzz"},
|
||||
{NOMATCH, DOES_NOT_MATTER, "ne"},
|
||||
{NOMATCH, DOES_NOT_MATTER, "the netherlands"},
|
||||
|
||||
{NOMATCH, STRONGLY_BETTER, "newark"},
|
||||
{NOMATCH, DOES_NOT_MATTER, "newark"},
|
||||
|
||||
{MATCHES, STRONGLY_BETTER, "york new"},
|
||||
|
||||
|
@ -174,14 +173,15 @@ UNIT_TEST(KeywordMatcher_KeywordAndPrefix)
|
|||
|
||||
{MATCHES, STRONGLY_BETTER, "the new york"},
|
||||
{MATCHES, BETTER_OR_EQUAL, "york new the"},
|
||||
{MATCHES, BETTER_OR_EQUAL, "york new"},
|
||||
|
||||
{MATCHES, STRONGLY_BETTER, "yo new"},
|
||||
|
||||
{MATCHES, STRONGLY_BETTER, "new york pizza"},
|
||||
|
||||
{MATCHES, STRONGLY_BETTER, "york new"},
|
||||
{MATCHES, BETTER_OR_EQUAL, "yo new"},
|
||||
|
||||
{MATCHES, STRONGLY_BETTER, "new york"},
|
||||
{MATCHES, BETTER_OR_EQUAL, "new yo"},
|
||||
|
||||
{MATCHES, STRONGLY_BETTER, "new yo"},
|
||||
};
|
||||
TestKeywordMatcher(query, testCases);
|
||||
}
|
||||
|
@ -203,10 +203,9 @@ UNIT_TEST(KeywordMatcher_KeywordAndKeyword)
|
|||
{MATCHES, STRONGLY_BETTER, "the new york"},
|
||||
{MATCHES, BETTER_OR_EQUAL, "york new the"},
|
||||
|
||||
{MATCHES, STRONGLY_BETTER, "new york pizza"},
|
||||
|
||||
{MATCHES, STRONGLY_BETTER, "york new"},
|
||||
|
||||
{MATCHES, STRONGLY_BETTER, "new york pizza"},
|
||||
{MATCHES, STRONGLY_BETTER, "new york"},
|
||||
};
|
||||
TestKeywordMatcher(query, testCases);
|
||||
|
|
Loading…
Add table
Reference in a new issue