[search] KeywordMatcher rewrite code review comments: simplify SetKeywords() semantics and fix a bug in tests, remove useless constants and asserts, other minor fixes.

This commit is contained in:
Yury Melnichek 2013-02-11 13:27:01 +01:00 committed by Alex Zolotarev
parent 056f14ef4e
commit 5a6d08106a
7 changed files with 19 additions and 46 deletions

View file

@ -32,43 +32,26 @@ bool KeywordLangMatcher::ScoreT::operator <(KeywordLangMatcher::ScoreT const & s
void KeywordLangMatcher::SetLanguages(vector<vector<int8_t> > const & languagePriorities)
{
m_languagePriorities = languagePriorities;
#ifdef DEBUG
ASSERT_EQUAL ( static_cast<size_t>(NUM_LANG_PRIORITY_TIERS), m_languagePriorities.size(), () );
for (int i = 0; i < NUM_LANG_PRIORITY_TIERS; ++i)
ASSERT_LESS_OR_EQUAL ( m_languagePriorities[i].size(), static_cast<size_t>(MAX_LANGS_IN_TIER), () );
#endif
}
bool KeywordLangMatcher::AssertIndex(pair<int, int> const & ind) const
{
ASSERT_LESS ( static_cast<size_t>(ind.first), m_languagePriorities.size(), () );
ASSERT_LESS ( static_cast<size_t>(ind.second), m_languagePriorities[ind.first].size(), () );
return true;
}
void KeywordLangMatcher::SetLanguage(pair<int, int> const & ind, int8_t lang)
{
ASSERT ( AssertIndex(ind), () );
m_languagePriorities[ind.first][ind.second] = lang;
}
int8_t KeywordLangMatcher::GetLanguage(pair<int, int> const & ind) const
{
ASSERT ( AssertIndex(ind), () );
return m_languagePriorities[ind.first][ind.second];
}
int KeywordLangMatcher::GetLangScore(int8_t lang) const
{
int const LANG_TIER_COUNT = static_cast<int>(m_languagePriorities.size());
for (int i = 0; i < m_languagePriorities.size(); ++i)
for (int j = 0; j < m_languagePriorities[i].size(); ++j)
if (m_languagePriorities[i][j] == lang)
return -i; // All languages in the same tier are equal.
return -LANG_TIER_COUNT;
return -static_cast<int>(m_languagePriorities.size());
}
KeywordLangMatcher::ScoreT KeywordLangMatcher::Score(int8_t lang, string const & name) const

View file

@ -25,9 +25,6 @@ public:
};
private:
enum { NUM_LANG_PRIORITY_TIERS = 4 };
enum { MAX_LANGS_IN_TIER = 2 };
typedef KeywordMatcher::StringT StringT;
public:
@ -37,7 +34,7 @@ public:
int8_t GetLanguage(pair<int, int> const & ind) const;
/// Store references to keywords from source array of strings.
inline void SetKeywords(StringT const * keywords, size_t count, StringT const * prefix)
inline void SetKeywords(StringT const * keywords, size_t count, StringT const & prefix)
{
m_keywordMatcher.SetKeywords(keywords, count, prefix);
}
@ -50,7 +47,6 @@ public:
//@}
private:
bool AssertIndex(pair<int, int> const & ind) const;
int GetLangScore(int8_t lang) const;
vector<vector<int8_t> > m_languagePriorities;

View file

@ -17,19 +17,14 @@ KeywordMatcher::KeywordMatcher()
void KeywordMatcher::Clear()
{
m_keywords = NULL;
m_keywordsCount = 0;
m_prefix = NULL;
m_keywords.clear();
m_prefix.clear();
}
void KeywordMatcher::SetKeywords(StringT const * keywords, size_t count, StringT const * prefix)
void KeywordMatcher::SetKeywords(StringT const * keywords, size_t count, StringT const & prefix)
{
m_keywords = keywords;
m_keywordsCount = min(static_cast<size_t>(MAX_TOKENS), count);
m_keywords.assign(keywords, keywords + count);
m_prefix = prefix;
if (m_prefix && m_prefix->empty())
m_prefix = NULL;
}
KeywordMatcher::ScoreT KeywordMatcher::Score(string const & name) const
@ -48,14 +43,14 @@ KeywordMatcher::ScoreT KeywordMatcher::Score(StringT const & name) const
KeywordMatcher::ScoreT KeywordMatcher::Score(StringT const * tokens, size_t count) const
{
vector<bool> isQueryTokenMatched(m_keywordsCount);
vector<bool> isQueryTokenMatched(m_keywords.size());
vector<bool> isNameTokenMatched(count);
uint32_t numQueryTokensMatched = 0;
uint32_t sumTokenMatchDistance = 0;
uint32_t prevTokenMatchDistance = 0;
bool bPrefixMatched = true;
for (int i = 0; i < m_keywordsCount; ++i)
for (int i = 0; i < m_keywords.size(); ++i)
for (int j = 0; j < count && !isQueryTokenMatched[i]; ++j)
if (!isNameTokenMatched[j] && m_keywords[i] == tokens[j])
{
@ -65,15 +60,15 @@ KeywordMatcher::ScoreT KeywordMatcher::Score(StringT const * tokens, size_t coun
prevTokenMatchDistance = tokenMatchDistance;
}
if (m_prefix)
if (!m_prefix.empty())
{
bPrefixMatched = false;
for (int j = 0; j < count && !bPrefixMatched; ++j)
if (!isNameTokenMatched[j] &&
StartsWith(tokens[j].begin(), tokens[j].end(), m_prefix->begin(), m_prefix->end()))
StartsWith(tokens[j].begin(), tokens[j].end(), m_prefix.begin(), m_prefix.end()))
{
isNameTokenMatched[j] = bPrefixMatched = true;
uint32_t const tokenMatchDistance = int(m_keywordsCount) - j;
uint32_t const tokenMatchDistance = int(m_keywords.size()) - j;
sumTokenMatchDistance += abs(tokenMatchDistance - prevTokenMatchDistance);
}
}

View file

@ -37,8 +37,8 @@ public:
void Clear();
/// Store references to keywords from source array of strings.
void SetKeywords(StringT const * keywords, size_t count, StringT const * prefix);
/// Internal copy of keywords is made.
void SetKeywords(StringT const * keywords, size_t count, StringT const & prefix);
/// @return Score of the name (greater is better).
//@{
@ -50,9 +50,8 @@ public:
static bool IsQueryMatched(ScoreT const & score) { return score.IsQueryMatched(); }
private:
StringT const * m_keywords;
size_t m_keywordsCount;
StringT const * m_prefix;
vector<StringT> m_keywords;
StringT m_prefix;
};
} // namespace search

View file

@ -281,7 +281,7 @@ void Query::SetQuery(string const & query)
m_tokens.resize(maxTokensCount);
// assign tokens and prefix to scorer
m_keywordsScorer.SetKeywords(m_tokens.data(), m_tokens.size(), &m_prefix);
m_keywordsScorer.SetKeywords(m_tokens.data(), m_tokens.size(), m_prefix);
}
void Query::SearchCoordinates(string const & query, Results & res) const

View file

@ -41,7 +41,7 @@ KeywordLangMatcher CreateMatcher(string const & query)
prefix = keywords.back();
keywords.pop_back();
}
matcher.SetKeywords(keywords.data(), keywords.size(), &prefix);
matcher.SetKeywords(keywords.data(), keywords.size(), prefix);
return matcher;
}

View file

@ -54,7 +54,7 @@ void TestKeywordMatcher(char const * const query, KeywordMatcherTestCase const (
}
KeywordMatcher matcher;
matcher.SetKeywords(keywords.data(), keywords.size(), &prefix);
matcher.SetKeywords(keywords.data(), keywords.size(), prefix);
ScoreT prevScore = ScoreT();
for (size_t i = 0; i < N; ++i)
{
@ -65,7 +65,7 @@ void TestKeywordMatcher(char const * const query, KeywordMatcherTestCase const (
// Test that a newly created matcher returns the same result
{
KeywordMatcher freshMatcher;
freshMatcher.SetKeywords(keywords.data(), keywords.size(), &prefix);
freshMatcher.SetKeywords(keywords.data(), keywords.size(), prefix);
ScoreT const freshScore = freshMatcher.Score(name);
// TEST_EQUAL(testScore, freshScore, (query, name));
TEST(!(testScore < freshScore), (query, name));