[search] Use vector of pointers in KeywordMatcher to avoid copying the keywords strings.

This commit is contained in:
Yury Melnichek 2011-07-22 17:01:21 +02:00 committed by Alex Zolotarev
parent 8087c60161
commit 0afb0f3b08
4 changed files with 48 additions and 34 deletions

View file

@ -11,7 +11,7 @@ namespace search
namespace impl
{
KeywordMatcher::KeywordMatcher(strings::UniString const * pKeywords,
KeywordMatcher::KeywordMatcher(strings::UniString const * const * pKeywords,
size_t keywordsCount,
strings::UniString const & prefix,
uint32_t maxKeywordMatchCost, uint32_t maxPrefixMatchCost,
@ -25,6 +25,10 @@ KeywordMatcher::KeywordMatcher(strings::UniString const * pKeywords,
m_minPrefixMatchCost(m_maxPrefixMatchCost + 1),
m_bestMatchNamePenalty(-1)
{
#ifdef DEBUG
for (size_t i = 0; i < keywordsCount; ++i)
ASSERT(!m_pKeywords[i]->empty(), (i));
#endif
}
void KeywordMatcher::ProcessName(string const & name)
@ -38,7 +42,8 @@ void KeywordMatcher::ProcessNameToken(string const & name, strings::UniString co
uint32_t matchPenalty = 0;
for (size_t i = 0; i < m_minKeywordMatchCost.size(); ++i)
{
uint32_t const matchCost = m_keywordMatchFn(&m_pKeywords[i][0], m_pKeywords[i].size(),
strings::UniString const & keyword = *(m_pKeywords[i]);
uint32_t const matchCost = m_keywordMatchFn(&keyword[0], keyword.size(),
&s[0], s.size(), m_minKeywordMatchCost[i]);
matchPenalty += matchCost;
if (matchCost <= m_maxKeywordMatchCost)

View file

@ -17,7 +17,7 @@ typedef uint32_t (* StringMatchFn)(strings::UniChar const * sA, uint32_t sizeA,
// Matches keywords agains given names.
class KeywordMatcher
{
strings::UniString const * m_pKeywords;
strings::UniString const * const * m_pKeywords;
strings::UniString const & m_prefix;
uint32_t m_maxKeywordMatchCost, m_maxPrefixMatchCost;
StringMatchFn m_keywordMatchFn, m_prefixMatchFn;
@ -27,7 +27,7 @@ class KeywordMatcher
uint32_t m_bestMatchNamePenalty;
public:
KeywordMatcher(strings::UniString const * pKeywords,
KeywordMatcher(strings::UniString const * const * pKeywords,
size_t keywordsCount,
strings::UniString const & prefix,
uint32_t maxKeywordMatchCost, uint32_t maxPrefixMatchCost,

View file

@ -56,7 +56,8 @@ inline uint32_t GetMaxPrefixMatchScore(int size)
return 512;
}
inline KeywordMatcher MakeMatcher(vector<strings::UniString> const & tokens,
template <typename UniStringPtrVectorT>
inline KeywordMatcher MakeMatcher(UniStringPtrVectorT const & tokens,
strings::UniString const & prefix)
{
return KeywordMatcher(tokens.empty() ? NULL : &tokens[0], tokens.size(),
@ -87,14 +88,13 @@ struct FeatureProcessor
for (int i = 0; i < types.m_size; ++i)
keywordsSkipMask |= m_query.GetKeywordsToSkipForType(types.m_types[i]);
// TODO: Make faster.
vector<strings::UniString> const & queryKeywords = m_query.GetKeywords();
ASSERT_LESS(queryKeywords.size(), 32, ());
vector<strings::UniString> keywords;
buffer_vector<strings::UniString const *, 32> keywords;
keywords.reserve(queryKeywords.size());
for (size_t i = 0; i < queryKeywords.size() && i < 32; ++i)
if (!(keywordsSkipMask & (1 << i)))
keywords.push_back(queryKeywords[i]);
keywords.push_back(&queryKeywords[i]);
KeywordMatcher matcher(MakeMatcher(keywords, m_query.GetPrefix()));
feature.ForEachNameRef(matcher);
@ -202,7 +202,7 @@ void Query::Search(function<void (Result const &)> const & f)
// TODO: Prefer user languages here.
if (m_prefix.size() >= iName->m_prefixLengthToSuggest)
{
KeywordMatcher matcher = MakeMatcher(vector<strings::UniString>(), m_prefix);
KeywordMatcher matcher = MakeMatcher(vector<strings::UniString const *>(), m_prefix);
matcher.ProcessNameToken(string(), NormalizeAndSimplifyString(iName->m_name));
ASSERT_LESS(iName->m_prefixLengthToSuggest, 1 << PREFIX_LEN_BITS, ());
int const penalty =

View file

@ -4,6 +4,7 @@
#include "../string_match.hpp"
#include "../../testing/testing_utils.hpp"
#include "../../base/string_utils.hpp"
#include "../../std/scoped_ptr.hpp"
#include "../../std/vector.hpp"
namespace
@ -25,73 +26,81 @@ uint32_t PrefixMatchForTest(strings::UniChar const * sA, uint32_t sizeA,
maxCost, true);
}
struct KeywordMatcherAdaptor
{
explicit KeywordMatcherAdaptor(char const * prefix,
uint32_t maxKeywordMatchCost, uint32_t maxPrefixMatchCost,
char const * s0, char const * s1 = NULL)
{
m_keywords.push_back(strings::MakeUniString(s0));
if (s1)
m_keywords.push_back(strings::MakeUniString(s1));
for (size_t i = 0; i < m_keywords.size(); ++i)
m_keywordPtrs.push_back(&m_keywords[i]);
m_pMatcher.reset(new search::impl::KeywordMatcher(&m_keywordPtrs[0], m_keywordPtrs.size(),
strings::MakeUniString(prefix),
maxKeywordMatchCost, maxPrefixMatchCost,
&KeywordMatchForTest, &PrefixMatchForTest));
}
vector<strings::UniString> m_keywords;
vector<strings::UniString const *> m_keywordPtrs;
scoped_ptr<search::impl::KeywordMatcher> m_pMatcher;
};
} // unnamed namespace
UNIT_TEST(KeywordMatcher_Smoke)
{
vector<strings::UniString> keywords;
keywords.push_back(strings::MakeUniString("minsk"));
keywords.push_back(strings::MakeUniString("belarus"));
search::impl::KeywordMatcher matcher(&keywords[0], keywords.size(),
strings::MakeUniString("l"),
3, 3,
&KeywordMatchForTest, &PrefixMatchForTest);
KeywordMatcherAdaptor matcherAdaptor("l", 3, 3, "minsk", "belarus");
search::impl::KeywordMatcher & matcher = *matcherAdaptor.m_pMatcher;
TEST_EQUAL(matcher.GetPrefixMatchScore(), 4, ());
TEST_EQUAL(vector<uint32_t>(matcher.GetKeywordMatchScores(),
matcher.GetKeywordMatchScores() + keywords.size()),
matcher.GetKeywordMatchScores() + 2),
Vec<uint32_t>(4, 4), ());
TEST_EQUAL(matcher.GetMatchScore(), 4 + 4 + 4, ());
matcher.ProcessName("belarrr");
TEST_EQUAL(matcher.GetPrefixMatchScore(), 1, ());
TEST_EQUAL(vector<uint32_t>(matcher.GetKeywordMatchScores(),
matcher.GetKeywordMatchScores() + keywords.size()),
matcher.GetKeywordMatchScores() + 2),
Vec<uint32_t>(4, 2), ());
TEST_EQUAL(matcher.GetMatchScore(), 1 + 4 + 2, ());
matcher.ProcessName("belaruu minnn");
TEST_EQUAL(matcher.GetPrefixMatchScore(), 1, ());
TEST_EQUAL(vector<uint32_t>(matcher.GetKeywordMatchScores(),
matcher.GetKeywordMatchScores() + keywords.size()),
matcher.GetKeywordMatchScores() + 2),
Vec<uint32_t>(2, 1), ());
TEST_EQUAL(matcher.GetMatchScore(), 1 + 2 + 1, ());
matcher.ProcessName("belaruu les minnn");
TEST_EQUAL(matcher.GetPrefixMatchScore(), 0, ());
TEST_EQUAL(vector<uint32_t>(matcher.GetKeywordMatchScores(),
matcher.GetKeywordMatchScores() + keywords.size()),
matcher.GetKeywordMatchScores() + 2),
Vec<uint32_t>(2, 1), ());
TEST_EQUAL(matcher.GetMatchScore(), 0 + 2 + 1, ());
}
UNIT_TEST(KeywordMatcher_NoPrefix)
{
vector<strings::UniString> keywords;
keywords.push_back(strings::MakeUniString("minsk"));
keywords.push_back(strings::MakeUniString("belarus"));
search::impl::KeywordMatcher matcher(&keywords[0], keywords.size(),
strings::MakeUniString(""),
3, 3,
&KeywordMatchForTest, &PrefixMatchForTest);
KeywordMatcherAdaptor matcherAdaptor("", 3, 3, "minsk", "belarus");
search::impl::KeywordMatcher & matcher = *matcherAdaptor.m_pMatcher;
TEST_EQUAL(matcher.GetPrefixMatchScore(), 4, ());
TEST_EQUAL(matcher.GetMatchScore(), 4 + 4 + 4, ());
matcher.ProcessName("belaruu zzz minnn");
TEST_EQUAL(matcher.GetPrefixMatchScore(), 0, ());
TEST_EQUAL(vector<uint32_t>(matcher.GetKeywordMatchScores(),
matcher.GetKeywordMatchScores() + keywords.size()),
matcher.GetKeywordMatchScores() + 1),
Vec<uint32_t>(2, 1), ());
TEST_EQUAL(matcher.GetMatchScore(), 0 + 2 + 1, ());
}
UNIT_TEST(KeywordMatcher_Suomi)
{
vector<strings::UniString> keywords;
keywords.push_back(strings::MakeUniString("minsk"));
search::impl::KeywordMatcher matcher(&keywords[0], keywords.size(),
strings::MakeUniString(""),
4, 4,
&KeywordMatchForTest, &PrefixMatchForTest);
KeywordMatcherAdaptor matcherAdaptor("", 4, 4, "minsk");
search::impl::KeywordMatcher & matcher = *matcherAdaptor.m_pMatcher;
matcher.ProcessName("Suomi");
TEST_EQUAL(matcher.GetMatchScore(), 5, ());
}