forked from organicmaps/organicmaps
[search] Use vector of pointers in KeywordMatcher to avoid copying the keywords strings.
This commit is contained in:
parent
8087c60161
commit
0afb0f3b08
4 changed files with 48 additions and 34 deletions
|
@ -11,7 +11,7 @@ namespace search
|
|||
namespace impl
|
||||
{
|
||||
|
||||
KeywordMatcher::KeywordMatcher(strings::UniString const * pKeywords,
|
||||
KeywordMatcher::KeywordMatcher(strings::UniString const * const * pKeywords,
|
||||
size_t keywordsCount,
|
||||
strings::UniString const & prefix,
|
||||
uint32_t maxKeywordMatchCost, uint32_t maxPrefixMatchCost,
|
||||
|
@ -25,6 +25,10 @@ KeywordMatcher::KeywordMatcher(strings::UniString const * pKeywords,
|
|||
m_minPrefixMatchCost(m_maxPrefixMatchCost + 1),
|
||||
m_bestMatchNamePenalty(-1)
|
||||
{
|
||||
#ifdef DEBUG
|
||||
for (size_t i = 0; i < keywordsCount; ++i)
|
||||
ASSERT(!m_pKeywords[i]->empty(), (i));
|
||||
#endif
|
||||
}
|
||||
|
||||
void KeywordMatcher::ProcessName(string const & name)
|
||||
|
@ -38,7 +42,8 @@ void KeywordMatcher::ProcessNameToken(string const & name, strings::UniString co
|
|||
uint32_t matchPenalty = 0;
|
||||
for (size_t i = 0; i < m_minKeywordMatchCost.size(); ++i)
|
||||
{
|
||||
uint32_t const matchCost = m_keywordMatchFn(&m_pKeywords[i][0], m_pKeywords[i].size(),
|
||||
strings::UniString const & keyword = *(m_pKeywords[i]);
|
||||
uint32_t const matchCost = m_keywordMatchFn(&keyword[0], keyword.size(),
|
||||
&s[0], s.size(), m_minKeywordMatchCost[i]);
|
||||
matchPenalty += matchCost;
|
||||
if (matchCost <= m_maxKeywordMatchCost)
|
||||
|
|
|
@ -17,7 +17,7 @@ typedef uint32_t (* StringMatchFn)(strings::UniChar const * sA, uint32_t sizeA,
|
|||
// Matches keywords agains given names.
|
||||
class KeywordMatcher
|
||||
{
|
||||
strings::UniString const * m_pKeywords;
|
||||
strings::UniString const * const * m_pKeywords;
|
||||
strings::UniString const & m_prefix;
|
||||
uint32_t m_maxKeywordMatchCost, m_maxPrefixMatchCost;
|
||||
StringMatchFn m_keywordMatchFn, m_prefixMatchFn;
|
||||
|
@ -27,7 +27,7 @@ class KeywordMatcher
|
|||
uint32_t m_bestMatchNamePenalty;
|
||||
|
||||
public:
|
||||
KeywordMatcher(strings::UniString const * pKeywords,
|
||||
KeywordMatcher(strings::UniString const * const * pKeywords,
|
||||
size_t keywordsCount,
|
||||
strings::UniString const & prefix,
|
||||
uint32_t maxKeywordMatchCost, uint32_t maxPrefixMatchCost,
|
||||
|
|
|
@ -56,7 +56,8 @@ inline uint32_t GetMaxPrefixMatchScore(int size)
|
|||
return 512;
|
||||
}
|
||||
|
||||
inline KeywordMatcher MakeMatcher(vector<strings::UniString> const & tokens,
|
||||
template <typename UniStringPtrVectorT>
|
||||
inline KeywordMatcher MakeMatcher(UniStringPtrVectorT const & tokens,
|
||||
strings::UniString const & prefix)
|
||||
{
|
||||
return KeywordMatcher(tokens.empty() ? NULL : &tokens[0], tokens.size(),
|
||||
|
@ -87,14 +88,13 @@ struct FeatureProcessor
|
|||
for (int i = 0; i < types.m_size; ++i)
|
||||
keywordsSkipMask |= m_query.GetKeywordsToSkipForType(types.m_types[i]);
|
||||
|
||||
// TODO: Make faster.
|
||||
vector<strings::UniString> const & queryKeywords = m_query.GetKeywords();
|
||||
ASSERT_LESS(queryKeywords.size(), 32, ());
|
||||
vector<strings::UniString> keywords;
|
||||
buffer_vector<strings::UniString const *, 32> keywords;
|
||||
keywords.reserve(queryKeywords.size());
|
||||
for (size_t i = 0; i < queryKeywords.size() && i < 32; ++i)
|
||||
if (!(keywordsSkipMask & (1 << i)))
|
||||
keywords.push_back(queryKeywords[i]);
|
||||
keywords.push_back(&queryKeywords[i]);
|
||||
|
||||
KeywordMatcher matcher(MakeMatcher(keywords, m_query.GetPrefix()));
|
||||
feature.ForEachNameRef(matcher);
|
||||
|
@ -202,7 +202,7 @@ void Query::Search(function<void (Result const &)> const & f)
|
|||
// TODO: Prefer user languages here.
|
||||
if (m_prefix.size() >= iName->m_prefixLengthToSuggest)
|
||||
{
|
||||
KeywordMatcher matcher = MakeMatcher(vector<strings::UniString>(), m_prefix);
|
||||
KeywordMatcher matcher = MakeMatcher(vector<strings::UniString const *>(), m_prefix);
|
||||
matcher.ProcessNameToken(string(), NormalizeAndSimplifyString(iName->m_name));
|
||||
ASSERT_LESS(iName->m_prefixLengthToSuggest, 1 << PREFIX_LEN_BITS, ());
|
||||
int const penalty =
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
#include "../string_match.hpp"
|
||||
#include "../../testing/testing_utils.hpp"
|
||||
#include "../../base/string_utils.hpp"
|
||||
#include "../../std/scoped_ptr.hpp"
|
||||
#include "../../std/vector.hpp"
|
||||
|
||||
namespace
|
||||
|
@ -25,73 +26,81 @@ uint32_t PrefixMatchForTest(strings::UniChar const * sA, uint32_t sizeA,
|
|||
maxCost, true);
|
||||
}
|
||||
|
||||
struct KeywordMatcherAdaptor
|
||||
{
|
||||
explicit KeywordMatcherAdaptor(char const * prefix,
|
||||
uint32_t maxKeywordMatchCost, uint32_t maxPrefixMatchCost,
|
||||
char const * s0, char const * s1 = NULL)
|
||||
{
|
||||
m_keywords.push_back(strings::MakeUniString(s0));
|
||||
if (s1)
|
||||
m_keywords.push_back(strings::MakeUniString(s1));
|
||||
for (size_t i = 0; i < m_keywords.size(); ++i)
|
||||
m_keywordPtrs.push_back(&m_keywords[i]);
|
||||
m_pMatcher.reset(new search::impl::KeywordMatcher(&m_keywordPtrs[0], m_keywordPtrs.size(),
|
||||
strings::MakeUniString(prefix),
|
||||
maxKeywordMatchCost, maxPrefixMatchCost,
|
||||
&KeywordMatchForTest, &PrefixMatchForTest));
|
||||
}
|
||||
|
||||
vector<strings::UniString> m_keywords;
|
||||
vector<strings::UniString const *> m_keywordPtrs;
|
||||
scoped_ptr<search::impl::KeywordMatcher> m_pMatcher;
|
||||
};
|
||||
|
||||
} // unnamed namespace
|
||||
|
||||
UNIT_TEST(KeywordMatcher_Smoke)
|
||||
{
|
||||
vector<strings::UniString> keywords;
|
||||
keywords.push_back(strings::MakeUniString("minsk"));
|
||||
keywords.push_back(strings::MakeUniString("belarus"));
|
||||
search::impl::KeywordMatcher matcher(&keywords[0], keywords.size(),
|
||||
strings::MakeUniString("l"),
|
||||
3, 3,
|
||||
&KeywordMatchForTest, &PrefixMatchForTest);
|
||||
KeywordMatcherAdaptor matcherAdaptor("l", 3, 3, "minsk", "belarus");
|
||||
search::impl::KeywordMatcher & matcher = *matcherAdaptor.m_pMatcher;
|
||||
TEST_EQUAL(matcher.GetPrefixMatchScore(), 4, ());
|
||||
TEST_EQUAL(vector<uint32_t>(matcher.GetKeywordMatchScores(),
|
||||
matcher.GetKeywordMatchScores() + keywords.size()),
|
||||
matcher.GetKeywordMatchScores() + 2),
|
||||
Vec<uint32_t>(4, 4), ());
|
||||
TEST_EQUAL(matcher.GetMatchScore(), 4 + 4 + 4, ());
|
||||
|
||||
matcher.ProcessName("belarrr");
|
||||
TEST_EQUAL(matcher.GetPrefixMatchScore(), 1, ());
|
||||
TEST_EQUAL(vector<uint32_t>(matcher.GetKeywordMatchScores(),
|
||||
matcher.GetKeywordMatchScores() + keywords.size()),
|
||||
matcher.GetKeywordMatchScores() + 2),
|
||||
Vec<uint32_t>(4, 2), ());
|
||||
TEST_EQUAL(matcher.GetMatchScore(), 1 + 4 + 2, ());
|
||||
|
||||
matcher.ProcessName("belaruu minnn");
|
||||
TEST_EQUAL(matcher.GetPrefixMatchScore(), 1, ());
|
||||
TEST_EQUAL(vector<uint32_t>(matcher.GetKeywordMatchScores(),
|
||||
matcher.GetKeywordMatchScores() + keywords.size()),
|
||||
matcher.GetKeywordMatchScores() + 2),
|
||||
Vec<uint32_t>(2, 1), ());
|
||||
TEST_EQUAL(matcher.GetMatchScore(), 1 + 2 + 1, ());
|
||||
|
||||
matcher.ProcessName("belaruu les minnn");
|
||||
TEST_EQUAL(matcher.GetPrefixMatchScore(), 0, ());
|
||||
TEST_EQUAL(vector<uint32_t>(matcher.GetKeywordMatchScores(),
|
||||
matcher.GetKeywordMatchScores() + keywords.size()),
|
||||
matcher.GetKeywordMatchScores() + 2),
|
||||
Vec<uint32_t>(2, 1), ());
|
||||
TEST_EQUAL(matcher.GetMatchScore(), 0 + 2 + 1, ());
|
||||
}
|
||||
|
||||
UNIT_TEST(KeywordMatcher_NoPrefix)
|
||||
{
|
||||
vector<strings::UniString> keywords;
|
||||
keywords.push_back(strings::MakeUniString("minsk"));
|
||||
keywords.push_back(strings::MakeUniString("belarus"));
|
||||
search::impl::KeywordMatcher matcher(&keywords[0], keywords.size(),
|
||||
strings::MakeUniString(""),
|
||||
3, 3,
|
||||
&KeywordMatchForTest, &PrefixMatchForTest);
|
||||
KeywordMatcherAdaptor matcherAdaptor("", 3, 3, "minsk", "belarus");
|
||||
search::impl::KeywordMatcher & matcher = *matcherAdaptor.m_pMatcher;
|
||||
TEST_EQUAL(matcher.GetPrefixMatchScore(), 4, ());
|
||||
TEST_EQUAL(matcher.GetMatchScore(), 4 + 4 + 4, ());
|
||||
|
||||
matcher.ProcessName("belaruu zzz minnn");
|
||||
TEST_EQUAL(matcher.GetPrefixMatchScore(), 0, ());
|
||||
TEST_EQUAL(vector<uint32_t>(matcher.GetKeywordMatchScores(),
|
||||
matcher.GetKeywordMatchScores() + keywords.size()),
|
||||
matcher.GetKeywordMatchScores() + 1),
|
||||
Vec<uint32_t>(2, 1), ());
|
||||
TEST_EQUAL(matcher.GetMatchScore(), 0 + 2 + 1, ());
|
||||
}
|
||||
|
||||
UNIT_TEST(KeywordMatcher_Suomi)
|
||||
{
|
||||
vector<strings::UniString> keywords;
|
||||
keywords.push_back(strings::MakeUniString("minsk"));
|
||||
search::impl::KeywordMatcher matcher(&keywords[0], keywords.size(),
|
||||
strings::MakeUniString(""),
|
||||
4, 4,
|
||||
&KeywordMatchForTest, &PrefixMatchForTest);
|
||||
KeywordMatcherAdaptor matcherAdaptor("", 4, 4, "minsk");
|
||||
search::impl::KeywordMatcher & matcher = *matcherAdaptor.m_pMatcher;
|
||||
matcher.ProcessName("Suomi");
|
||||
TEST_EQUAL(matcher.GetMatchScore(), 5, ());
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue