forked from organicmaps/organicmaps
[search] KeywordMatcher rewrite code review comments: simplify SetKeywords() semantics and fix a bug in tests, remove useless constants and asserts, other minor fixes.
This commit is contained in:
parent
056f14ef4e
commit
5a6d08106a
7 changed files with 19 additions and 46 deletions
|
@ -32,43 +32,26 @@ bool KeywordLangMatcher::ScoreT::operator <(KeywordLangMatcher::ScoreT const & s
|
|||
void KeywordLangMatcher::SetLanguages(vector<vector<int8_t> > const & languagePriorities)
|
||||
{
|
||||
m_languagePriorities = languagePriorities;
|
||||
|
||||
#ifdef DEBUG
|
||||
ASSERT_EQUAL ( static_cast<size_t>(NUM_LANG_PRIORITY_TIERS), m_languagePriorities.size(), () );
|
||||
for (int i = 0; i < NUM_LANG_PRIORITY_TIERS; ++i)
|
||||
ASSERT_LESS_OR_EQUAL ( m_languagePriorities[i].size(), static_cast<size_t>(MAX_LANGS_IN_TIER), () );
|
||||
#endif
|
||||
}
|
||||
|
||||
bool KeywordLangMatcher::AssertIndex(pair<int, int> const & ind) const
|
||||
{
|
||||
ASSERT_LESS ( static_cast<size_t>(ind.first), m_languagePriorities.size(), () );
|
||||
ASSERT_LESS ( static_cast<size_t>(ind.second), m_languagePriorities[ind.first].size(), () );
|
||||
return true;
|
||||
}
|
||||
|
||||
void KeywordLangMatcher::SetLanguage(pair<int, int> const & ind, int8_t lang)
|
||||
{
|
||||
ASSERT ( AssertIndex(ind), () );
|
||||
m_languagePriorities[ind.first][ind.second] = lang;
|
||||
}
|
||||
|
||||
int8_t KeywordLangMatcher::GetLanguage(pair<int, int> const & ind) const
|
||||
{
|
||||
ASSERT ( AssertIndex(ind), () );
|
||||
return m_languagePriorities[ind.first][ind.second];
|
||||
}
|
||||
|
||||
int KeywordLangMatcher::GetLangScore(int8_t lang) const
|
||||
{
|
||||
int const LANG_TIER_COUNT = static_cast<int>(m_languagePriorities.size());
|
||||
|
||||
for (int i = 0; i < m_languagePriorities.size(); ++i)
|
||||
for (int j = 0; j < m_languagePriorities[i].size(); ++j)
|
||||
if (m_languagePriorities[i][j] == lang)
|
||||
return -i; // All languages in the same tier are equal.
|
||||
|
||||
return -LANG_TIER_COUNT;
|
||||
return -static_cast<int>(m_languagePriorities.size());
|
||||
}
|
||||
|
||||
KeywordLangMatcher::ScoreT KeywordLangMatcher::Score(int8_t lang, string const & name) const
|
||||
|
|
|
@ -25,9 +25,6 @@ public:
|
|||
};
|
||||
|
||||
private:
|
||||
enum { NUM_LANG_PRIORITY_TIERS = 4 };
|
||||
enum { MAX_LANGS_IN_TIER = 2 };
|
||||
|
||||
typedef KeywordMatcher::StringT StringT;
|
||||
|
||||
public:
|
||||
|
@ -37,7 +34,7 @@ public:
|
|||
int8_t GetLanguage(pair<int, int> const & ind) const;
|
||||
|
||||
/// Store references to keywords from source array of strings.
|
||||
inline void SetKeywords(StringT const * keywords, size_t count, StringT const * prefix)
|
||||
inline void SetKeywords(StringT const * keywords, size_t count, StringT const & prefix)
|
||||
{
|
||||
m_keywordMatcher.SetKeywords(keywords, count, prefix);
|
||||
}
|
||||
|
@ -50,7 +47,6 @@ public:
|
|||
//@}
|
||||
|
||||
private:
|
||||
bool AssertIndex(pair<int, int> const & ind) const;
|
||||
int GetLangScore(int8_t lang) const;
|
||||
|
||||
vector<vector<int8_t> > m_languagePriorities;
|
||||
|
|
|
@ -17,19 +17,14 @@ KeywordMatcher::KeywordMatcher()
|
|||
|
||||
void KeywordMatcher::Clear()
|
||||
{
|
||||
m_keywords = NULL;
|
||||
m_keywordsCount = 0;
|
||||
m_prefix = NULL;
|
||||
m_keywords.clear();
|
||||
m_prefix.clear();
|
||||
}
|
||||
|
||||
void KeywordMatcher::SetKeywords(StringT const * keywords, size_t count, StringT const * prefix)
|
||||
void KeywordMatcher::SetKeywords(StringT const * keywords, size_t count, StringT const & prefix)
|
||||
{
|
||||
m_keywords = keywords;
|
||||
m_keywordsCount = min(static_cast<size_t>(MAX_TOKENS), count);
|
||||
|
||||
m_keywords.assign(keywords, keywords + count);
|
||||
m_prefix = prefix;
|
||||
if (m_prefix && m_prefix->empty())
|
||||
m_prefix = NULL;
|
||||
}
|
||||
|
||||
KeywordMatcher::ScoreT KeywordMatcher::Score(string const & name) const
|
||||
|
@ -48,14 +43,14 @@ KeywordMatcher::ScoreT KeywordMatcher::Score(StringT const & name) const
|
|||
|
||||
KeywordMatcher::ScoreT KeywordMatcher::Score(StringT const * tokens, size_t count) const
|
||||
{
|
||||
vector<bool> isQueryTokenMatched(m_keywordsCount);
|
||||
vector<bool> isQueryTokenMatched(m_keywords.size());
|
||||
vector<bool> isNameTokenMatched(count);
|
||||
uint32_t numQueryTokensMatched = 0;
|
||||
uint32_t sumTokenMatchDistance = 0;
|
||||
uint32_t prevTokenMatchDistance = 0;
|
||||
bool bPrefixMatched = true;
|
||||
|
||||
for (int i = 0; i < m_keywordsCount; ++i)
|
||||
for (int i = 0; i < m_keywords.size(); ++i)
|
||||
for (int j = 0; j < count && !isQueryTokenMatched[i]; ++j)
|
||||
if (!isNameTokenMatched[j] && m_keywords[i] == tokens[j])
|
||||
{
|
||||
|
@ -65,15 +60,15 @@ KeywordMatcher::ScoreT KeywordMatcher::Score(StringT const * tokens, size_t coun
|
|||
prevTokenMatchDistance = tokenMatchDistance;
|
||||
}
|
||||
|
||||
if (m_prefix)
|
||||
if (!m_prefix.empty())
|
||||
{
|
||||
bPrefixMatched = false;
|
||||
for (int j = 0; j < count && !bPrefixMatched; ++j)
|
||||
if (!isNameTokenMatched[j] &&
|
||||
StartsWith(tokens[j].begin(), tokens[j].end(), m_prefix->begin(), m_prefix->end()))
|
||||
StartsWith(tokens[j].begin(), tokens[j].end(), m_prefix.begin(), m_prefix.end()))
|
||||
{
|
||||
isNameTokenMatched[j] = bPrefixMatched = true;
|
||||
uint32_t const tokenMatchDistance = int(m_keywordsCount) - j;
|
||||
uint32_t const tokenMatchDistance = int(m_keywords.size()) - j;
|
||||
sumTokenMatchDistance += abs(tokenMatchDistance - prevTokenMatchDistance);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -37,8 +37,8 @@ public:
|
|||
|
||||
void Clear();
|
||||
|
||||
/// Store references to keywords from source array of strings.
|
||||
void SetKeywords(StringT const * keywords, size_t count, StringT const * prefix);
|
||||
/// Internal copy of keywords is made.
|
||||
void SetKeywords(StringT const * keywords, size_t count, StringT const & prefix);
|
||||
|
||||
/// @return Score of the name (greater is better).
|
||||
//@{
|
||||
|
@ -50,9 +50,8 @@ public:
|
|||
static bool IsQueryMatched(ScoreT const & score) { return score.IsQueryMatched(); }
|
||||
|
||||
private:
|
||||
StringT const * m_keywords;
|
||||
size_t m_keywordsCount;
|
||||
StringT const * m_prefix;
|
||||
vector<StringT> m_keywords;
|
||||
StringT m_prefix;
|
||||
};
|
||||
|
||||
} // namespace search
|
||||
|
|
|
@ -281,7 +281,7 @@ void Query::SetQuery(string const & query)
|
|||
m_tokens.resize(maxTokensCount);
|
||||
|
||||
// assign tokens and prefix to scorer
|
||||
m_keywordsScorer.SetKeywords(m_tokens.data(), m_tokens.size(), &m_prefix);
|
||||
m_keywordsScorer.SetKeywords(m_tokens.data(), m_tokens.size(), m_prefix);
|
||||
}
|
||||
|
||||
void Query::SearchCoordinates(string const & query, Results & res) const
|
||||
|
|
|
@ -41,7 +41,7 @@ KeywordLangMatcher CreateMatcher(string const & query)
|
|||
prefix = keywords.back();
|
||||
keywords.pop_back();
|
||||
}
|
||||
matcher.SetKeywords(keywords.data(), keywords.size(), &prefix);
|
||||
matcher.SetKeywords(keywords.data(), keywords.size(), prefix);
|
||||
|
||||
return matcher;
|
||||
}
|
||||
|
|
|
@ -54,7 +54,7 @@ void TestKeywordMatcher(char const * const query, KeywordMatcherTestCase const (
|
|||
}
|
||||
|
||||
KeywordMatcher matcher;
|
||||
matcher.SetKeywords(keywords.data(), keywords.size(), &prefix);
|
||||
matcher.SetKeywords(keywords.data(), keywords.size(), prefix);
|
||||
ScoreT prevScore = ScoreT();
|
||||
for (size_t i = 0; i < N; ++i)
|
||||
{
|
||||
|
@ -65,7 +65,7 @@ void TestKeywordMatcher(char const * const query, KeywordMatcherTestCase const (
|
|||
// Test that a newly created matcher returns the same result
|
||||
{
|
||||
KeywordMatcher freshMatcher;
|
||||
freshMatcher.SetKeywords(keywords.data(), keywords.size(), &prefix);
|
||||
freshMatcher.SetKeywords(keywords.data(), keywords.size(), prefix);
|
||||
ScoreT const freshScore = freshMatcher.Score(name);
|
||||
// TEST_EQUAL(testScore, freshScore, (query, name));
|
||||
TEST(!(testScore < freshScore), (query, name));
|
||||
|
|
Loading…
Add table
Reference in a new issue