From a1f0d166338770f1d3febeba88fa361e9acc98f6 Mon Sep 17 00:00:00 2001 From: Maxim Pimenov Date: Tue, 31 Oct 2017 14:24:19 +0300 Subject: [PATCH] Review fixes. --- search/keyword_lang_matcher.cpp | 30 ++++--- search/keyword_lang_matcher.hpp | 27 +++--- search/keyword_matcher.cpp | 89 +++++++++++-------- search/keyword_matcher.hpp | 40 ++++----- search/ranker.cpp | 7 +- .../keyword_lang_matcher_test.cpp | 16 ++-- search/search_tests/keyword_matcher_test.cpp | 14 +-- 7 files changed, 121 insertions(+), 102 deletions(-) diff --git a/search/keyword_lang_matcher.cpp b/search/keyword_lang_matcher.cpp index e65c2aaf8c..5639af2e8f 100644 --- a/search/keyword_lang_matcher.cpp +++ b/search/keyword_lang_matcher.cpp @@ -14,17 +14,17 @@ using namespace std; namespace search { -// KeywordLangMatcher::ScoreT ---------------------------------------------------------------------- -KeywordLangMatcher::ScoreT::ScoreT() : m_langScore(numeric_limits::min()) +// KeywordLangMatcher::Score ---------------------------------------------------------------------- +KeywordLangMatcher::Score::Score() : m_langScore(numeric_limits::min()) { } -KeywordLangMatcher::ScoreT::ScoreT(KeywordMatcher::ScoreT const & score, int langScore) +KeywordLangMatcher::Score::Score(KeywordMatcher::Score const & score, int langScore) : m_parentScore(score), m_langScore(langScore) { } -bool KeywordLangMatcher::ScoreT::operator<(KeywordLangMatcher::ScoreT const & score) const +bool KeywordLangMatcher::Score::operator<(KeywordLangMatcher::Score const & score) const { if (m_parentScore != score.m_parentScore) return m_parentScore < score.m_parentScore; @@ -49,7 +49,7 @@ void KeywordLangMatcher::SetLanguages(size_t tier, std::vector && langua m_languagePriorities[tier] = move(languages); } -int KeywordLangMatcher::GetLangScore(int8_t lang) const +int KeywordLangMatcher::CalcLangScore(int8_t lang) const { int const numTiers = static_cast(m_languagePriorities.size()); for (int i = 0; i < numTiers; ++i) @@ -64,27 +64,29 @@ int KeywordLangMatcher::GetLangScore(int8_t lang) const return -numTiers; } -KeywordLangMatcher::ScoreT KeywordLangMatcher::Score(int8_t lang, string const & name) const +KeywordLangMatcher::Score KeywordLangMatcher::CalcScore(int8_t lang, string const & name) const { - return ScoreT(m_keywordMatcher.Score(name), GetLangScore(lang)); + return Score(m_keywordMatcher.CalcScore(name), CalcLangScore(lang)); } -KeywordLangMatcher::ScoreT KeywordLangMatcher::Score(int8_t lang, StringT const & name) const +KeywordLangMatcher::Score KeywordLangMatcher::CalcScore(int8_t lang, + strings::UniString const & name) const { - return ScoreT(m_keywordMatcher.Score(name), GetLangScore(lang)); + return Score(m_keywordMatcher.CalcScore(name), CalcLangScore(lang)); } -KeywordLangMatcher::ScoreT KeywordLangMatcher::Score(int8_t lang, - StringT const * tokens, size_t count) const +KeywordLangMatcher::Score KeywordLangMatcher::CalcScore(int8_t lang, + strings::UniString const * tokens, + size_t count) const { - return ScoreT(m_keywordMatcher.Score(tokens, count), GetLangScore(lang)); + return Score(m_keywordMatcher.CalcScore(tokens, count), CalcLangScore(lang)); } // Functions --------------------------------------------------------------------------------------- -string DebugPrint(KeywordLangMatcher::ScoreT const & score) +string DebugPrint(KeywordLangMatcher::Score const & score) { ostringstream ss; - ss << "KLM::ScoreT(" << DebugPrint(score.m_parentScore) << ", LS=" << score.m_langScore << ")"; + ss << "KLM::Score(" << DebugPrint(score.m_parentScore) << ", LS=" << score.m_langScore << ")"; return ss.str(); } } // namespace search diff --git a/search/keyword_lang_matcher.hpp b/search/keyword_lang_matcher.hpp index a55de89f31..ac64d4c69d 100644 --- a/search/keyword_lang_matcher.hpp +++ b/search/keyword_lang_matcher.hpp @@ -2,6 +2,8 @@ #include "search/keyword_matcher.hpp" +#include "base/string_utils.hpp" + #include #include #include @@ -11,21 +13,19 @@ namespace search class KeywordLangMatcher { public: - using StringT = KeywordMatcher::StringT; - - class ScoreT + class Score { public: - ScoreT(); - bool operator<(ScoreT const & s) const; + Score(); + bool operator<(Score const & s) const; private: friend class KeywordLangMatcher; - friend string DebugPrint(ScoreT const & score); + friend string DebugPrint(Score const & score); - ScoreT(KeywordMatcher::ScoreT const & score, int langScore); + Score(KeywordMatcher::Score const & score, int langScore); - KeywordMatcher::ScoreT m_parentScore; + KeywordMatcher::Score m_parentScore; int m_langScore; }; @@ -50,18 +50,19 @@ public: } // Store references to keywords from source array of strings. - inline void SetKeywords(StringT const * keywords, size_t count, StringT const & prefix) + inline void SetKeywords(strings::UniString const * keywords, size_t count, + strings::UniString const & prefix) { m_keywordMatcher.SetKeywords(keywords, count, prefix); } // Returns the Score of the name (greater is better). - ScoreT Score(int8_t lang, string const & name) const; - ScoreT Score(int8_t lang, StringT const & name) const; - ScoreT Score(int8_t lang, StringT const * tokens, size_t count) const; + Score CalcScore(int8_t lang, string const & name) const; + Score CalcScore(int8_t lang, strings::UniString const & name) const; + Score CalcScore(int8_t lang, strings::UniString const * tokens, size_t count) const; private: - int GetLangScore(int8_t lang) const; + int CalcLangScore(int8_t lang) const; std::vector> m_languagePriorities; KeywordMatcher m_keywordMatcher; diff --git a/search/keyword_matcher.cpp b/search/keyword_matcher.cpp index 4be49eeea4..3c344fe1a3 100644 --- a/search/keyword_matcher.cpp +++ b/search/keyword_matcher.cpp @@ -3,11 +3,14 @@ #include "indexer/search_delimiters.hpp" #include "indexer/search_string_utils.hpp" +#include "base/assert.hpp" +#include "base/buffer_vector.hpp" #include "base/stl_add.hpp" -#include "base/string_utils.hpp" -#include "std/algorithm.hpp" -#include "std/sstream.hpp" +#include +#include + +using namespace std; namespace search { @@ -22,26 +25,28 @@ void KeywordMatcher::Clear() m_prefix.clear(); } -void KeywordMatcher::SetKeywords(StringT const * keywords, size_t count, StringT const & prefix) +void KeywordMatcher::SetKeywords(strings::UniString const * keywords, size_t count, + strings::UniString const & prefix) { m_keywords.assign(keywords, keywords + count); m_prefix = prefix; } -KeywordMatcher::ScoreT KeywordMatcher::Score(string const & name) const +KeywordMatcher::Score KeywordMatcher::CalcScore(string const & name) const { - return Score(NormalizeAndSimplifyString(name)); + return CalcScore(NormalizeAndSimplifyString(name)); } -KeywordMatcher::ScoreT KeywordMatcher::Score(StringT const & name) const +KeywordMatcher::Score KeywordMatcher::CalcScore(strings::UniString const & name) const { - buffer_vector tokens; + buffer_vector tokens; SplitUniString(name, MakeBackInsertFunctor(tokens), Delimiters()); - return Score(tokens.data(), tokens.size()); + return CalcScore(tokens.data(), tokens.size()); } -KeywordMatcher::ScoreT KeywordMatcher::Score(StringT const * tokens, size_t count) const +KeywordMatcher::Score KeywordMatcher::CalcScore(strings::UniString const * tokens, + size_t count) const { // Some names can have too many tokens. Trim them. count = min(count, static_cast(MAX_TOKENS)); @@ -50,10 +55,12 @@ KeywordMatcher::ScoreT KeywordMatcher::Score(StringT const * tokens, size_t coun vector isNameTokenMatched(count); uint32_t sumTokenMatchDistance = 0; int8_t prevTokenMatchDistance = 0; - bool bPrefixMatched = true; + bool prefixMatched = true; for (int i = 0; i < m_keywords.size(); ++i) + { for (int j = 0; j < count && !isQueryTokenMatched[i]; ++j) + { if (!isNameTokenMatched[j] && m_keywords[i] == tokens[j]) { isQueryTokenMatched[i] = isNameTokenMatched[j] = true; @@ -61,29 +68,35 @@ KeywordMatcher::ScoreT KeywordMatcher::Score(StringT const * tokens, size_t coun sumTokenMatchDistance += abs(tokenMatchDistance - prevTokenMatchDistance); prevTokenMatchDistance = tokenMatchDistance; } + } + } if (!m_prefix.empty()) { - bPrefixMatched = false; - for (int j = 0; j < count && !bPrefixMatched; ++j) + prefixMatched = false; + for (int j = 0; j < count && !prefixMatched; ++j) + { if (!isNameTokenMatched[j] && strings::StartsWith(tokens[j].begin(), tokens[j].end(), m_prefix.begin(), m_prefix.end())) { - isNameTokenMatched[j] = bPrefixMatched = true; + isNameTokenMatched[j] = prefixMatched = true; int8_t const tokenMatchDistance = int(m_keywords.size()) - j; sumTokenMatchDistance += abs(tokenMatchDistance - prevTokenMatchDistance); } + } } uint8_t numQueryTokensMatched = 0; for (size_t i = 0; i < isQueryTokenMatched.size(); ++i) + { if (isQueryTokenMatched[i]) ++numQueryTokensMatched; + } - ScoreT score; - score.m_bFullQueryMatched = bPrefixMatched && (numQueryTokensMatched == isQueryTokenMatched.size()); - score.m_bPrefixMatched = bPrefixMatched; - score.m_numQueryTokensAndPrefixMatched = numQueryTokensMatched + (bPrefixMatched ? 1 : 0); + Score score; + score.m_fullQueryMatched = prefixMatched && (numQueryTokensMatched == isQueryTokenMatched.size()); + score.m_prefixMatched = prefixMatched; + score.m_numQueryTokensAndPrefixMatched = numQueryTokensMatched + (prefixMatched ? 1 : 0); score.m_nameTokensMatched = 0; score.m_nameTokensLength = 0; @@ -98,20 +111,24 @@ KeywordMatcher::ScoreT KeywordMatcher::Score(StringT const * tokens, size_t coun return score; } -KeywordMatcher::ScoreT::ScoreT() - : m_sumTokenMatchDistance(0), m_nameTokensMatched(0), m_nameTokensLength(0), - m_numQueryTokensAndPrefixMatched(0), m_bFullQueryMatched(false), m_bPrefixMatched(false) +KeywordMatcher::Score::Score() + : m_sumTokenMatchDistance(0) + , m_nameTokensMatched(0) + , m_nameTokensLength(0) + , m_numQueryTokensAndPrefixMatched(0) + , m_fullQueryMatched(false) + , m_prefixMatched(false) { } -bool KeywordMatcher::ScoreT::operator<(KeywordMatcher::ScoreT const & s) const +bool KeywordMatcher::Score::operator<(KeywordMatcher::Score const & s) const { - if (m_bFullQueryMatched != s.m_bFullQueryMatched) - return m_bFullQueryMatched < s.m_bFullQueryMatched; + if (m_fullQueryMatched != s.m_fullQueryMatched) + return m_fullQueryMatched < s.m_fullQueryMatched; if (m_numQueryTokensAndPrefixMatched != s.m_numQueryTokensAndPrefixMatched) return m_numQueryTokensAndPrefixMatched < s.m_numQueryTokensAndPrefixMatched; - if (m_bPrefixMatched != s.m_bPrefixMatched) - return m_bPrefixMatched < s.m_bPrefixMatched; + if (m_prefixMatched != s.m_prefixMatched) + return m_prefixMatched < s.m_prefixMatched; if (m_nameTokensMatched != s.m_nameTokensMatched) return m_nameTokensMatched < s.m_nameTokensMatched; if (m_sumTokenMatchDistance != s.m_sumTokenMatchDistance) @@ -120,32 +137,32 @@ bool KeywordMatcher::ScoreT::operator<(KeywordMatcher::ScoreT const & s) const return false; } -bool KeywordMatcher::ScoreT::operator==(KeywordMatcher::ScoreT const & s) const +bool KeywordMatcher::Score::operator==(KeywordMatcher::Score const & s) const { return m_sumTokenMatchDistance == s.m_sumTokenMatchDistance && m_nameTokensMatched == s.m_nameTokensMatched && m_numQueryTokensAndPrefixMatched == s.m_numQueryTokensAndPrefixMatched - && m_bFullQueryMatched == s.m_bFullQueryMatched - && m_bPrefixMatched == s.m_bPrefixMatched; + && m_fullQueryMatched == s.m_fullQueryMatched + && m_prefixMatched == s.m_prefixMatched; } -bool KeywordMatcher::ScoreT::LessInTokensLength(ScoreT const & s) const +bool KeywordMatcher::Score::LessInTokensLength(Score const & s) const { - if (m_bFullQueryMatched) + if (m_fullQueryMatched) { - ASSERT(s.m_bFullQueryMatched, ()); + ASSERT(s.m_fullQueryMatched, ()); return m_nameTokensLength > s.m_nameTokensLength; } return false; } -string DebugPrint(KeywordMatcher::ScoreT const & score) +string DebugPrint(KeywordMatcher::Score const & score) { ostringstream out; - out << "KeywordMatcher::ScoreT("; - out << "FQM=" << score.m_bFullQueryMatched; + out << "KeywordMatcher::Score("; + out << "FQM=" << score.m_fullQueryMatched; out << ",nQTM=" << static_cast(score.m_numQueryTokensAndPrefixMatched); - out << ",PM=" << score.m_bPrefixMatched; + out << ",PM=" << score.m_prefixMatched; out << ",NTM="; for (int i = MAX_TOKENS-1; i >= 0; --i) out << ((score.m_nameTokensMatched >> i) & 1); diff --git a/search/keyword_matcher.hpp b/search/keyword_matcher.hpp index 442e82f32d..ded6c3da05 100644 --- a/search/keyword_matcher.hpp +++ b/search/keyword_matcher.hpp @@ -4,41 +4,40 @@ #include "base/string_utils.hpp" -#include "std/string.hpp" -#include "std/vector.hpp" +#include +#include +#include namespace search { class KeywordMatcher { public: - using StringT = strings::UniString; - - class ScoreT + class Score { public: - ScoreT(); + Score(); // *NOTE* m_nameTokensLength is usually used as a late stage tiebreaker // and does not take part in the operators. - bool operator<(ScoreT const & s) const; - bool operator==(ScoreT const & s) const; - bool operator!=(ScoreT const & s) const { return !(*this == s); } + bool operator<(Score const & s) const; + bool operator==(Score const & s) const; + bool operator!=(Score const & s) const { return !(*this == s); } - bool LessInTokensLength(ScoreT const & s) const; + bool LessInTokensLength(Score const & s) const; - bool IsQueryMatched() const { return m_bFullQueryMatched; } + bool IsQueryMatched() const { return m_fullQueryMatched; } private: friend class KeywordMatcher; - friend string DebugPrint(ScoreT const & score); + friend std::string DebugPrint(Score const & score); uint32_t m_sumTokenMatchDistance; uint32_t m_nameTokensMatched; uint32_t m_nameTokensLength; uint8_t m_numQueryTokensAndPrefixMatched; - bool m_bFullQueryMatched : 1; - bool m_bPrefixMatched : 1; + bool m_fullQueryMatched : 1; + bool m_prefixMatched : 1; }; KeywordMatcher(); @@ -46,17 +45,18 @@ public: void Clear(); /// Internal copy of keywords is made. - void SetKeywords(StringT const * keywords, size_t count, StringT const & prefix); + void SetKeywords(strings::UniString const * keywords, size_t count, + strings::UniString const & prefix); /// @return Score of the name (greater is better). //@{ - ScoreT Score(string const & name) const; - ScoreT Score(StringT const & name) const; - ScoreT Score(StringT const * tokens, size_t count) const; + Score CalcScore(std::string const & name) const; + Score CalcScore(strings::UniString const & name) const; + Score CalcScore(strings::UniString const * tokens, size_t count) const; //@} private: - vector m_keywords; - StringT m_prefix; + std::vector m_keywords; + strings::UniString m_prefix; }; } // namespace search diff --git a/search/ranker.cpp b/search/ranker.cpp index a8f5b7dc1d..5fc77d64a0 100644 --- a/search/ranker.cpp +++ b/search/ranker.cpp @@ -510,10 +510,9 @@ void Ranker::MakeRankerResults(Geocoder::Params const & geocoderParams, void Ranker::GetBestMatchName(FeatureType const & f, string & name) const { - KeywordLangMatcher::ScoreT bestScore; - auto bestNameFinder = [&](int8_t lang, string const & s) -> bool - { - auto const score = m_keywordsScorer.Score(lang, s); + KeywordLangMatcher::Score bestScore; + auto bestNameFinder = [&](int8_t lang, string const & s) -> bool { + auto const score = m_keywordsScorer.CalcScore(lang, s); if (bestScore < score) { bestScore = score; diff --git a/search/search_tests/keyword_lang_matcher_test.cpp b/search/search_tests/keyword_lang_matcher_test.cpp index 983185678a..738116468a 100644 --- a/search/search_tests/keyword_lang_matcher_test.cpp +++ b/search/search_tests/keyword_lang_matcher_test.cpp @@ -12,7 +12,7 @@ using namespace std; namespace { using search::KeywordLangMatcher; -using ScoreT = search::KeywordLangMatcher::ScoreT; +using Score = search::KeywordLangMatcher::Score; enum { @@ -62,13 +62,13 @@ UNIT_TEST(KeywordMatcher_LanguageMatchIsUsedWhenTokenMatchIsTheSame) char const * name = "test"; KeywordLangMatcher matcher = CreateMatcher(query); - TEST(matcher.Score(LANG_UNKNOWN, name) < matcher.Score(LANG_SOME, name), ()); - TEST(matcher.Score(LANG_UNKNOWN, name) < matcher.Score(LANG_SOME_OTHER, name), ()); - TEST(matcher.Score(LANG_UNKNOWN, name) < matcher.Score(LANG_HIGH_PRIORITY, name), ()); + TEST(matcher.CalcScore(LANG_UNKNOWN, name) < matcher.CalcScore(LANG_SOME, name), ()); + TEST(matcher.CalcScore(LANG_UNKNOWN, name) < matcher.CalcScore(LANG_SOME_OTHER, name), ()); + TEST(matcher.CalcScore(LANG_UNKNOWN, name) < matcher.CalcScore(LANG_HIGH_PRIORITY, name), ()); - TEST(!(matcher.Score(LANG_SOME, name) < matcher.Score(LANG_SOME_OTHER, name)), ()); - TEST(!(matcher.Score(LANG_SOME_OTHER, name) < matcher.Score(LANG_SOME, name)), ()); + TEST(!(matcher.CalcScore(LANG_SOME, name) < matcher.CalcScore(LANG_SOME_OTHER, name)), ()); + TEST(!(matcher.CalcScore(LANG_SOME_OTHER, name) < matcher.CalcScore(LANG_SOME, name)), ()); - TEST(matcher.Score(LANG_SOME, name) < matcher.Score(LANG_HIGH_PRIORITY, name), ()); - TEST(matcher.Score(LANG_SOME_OTHER, name) < matcher.Score(LANG_HIGH_PRIORITY, name), ()); + TEST(matcher.CalcScore(LANG_SOME, name) < matcher.CalcScore(LANG_HIGH_PRIORITY, name), ()); + TEST(matcher.CalcScore(LANG_SOME_OTHER, name) < matcher.CalcScore(LANG_HIGH_PRIORITY, name), ()); } diff --git a/search/search_tests/keyword_matcher_test.cpp b/search/search_tests/keyword_matcher_test.cpp index bd52b53f4b..5414223d2b 100644 --- a/search/search_tests/keyword_matcher_test.cpp +++ b/search/search_tests/keyword_matcher_test.cpp @@ -55,12 +55,12 @@ void InitMatcher(char const * query, KeywordMatcher & matcher) class TestScore { - typedef KeywordMatcher::ScoreT ScoreT; - ScoreT m_score; + typedef KeywordMatcher::Score Score; + Score m_score; public: TestScore() {} - TestScore(ScoreT const & score) : m_score(score) {} + TestScore(Score const & score) : m_score(score) {} bool operator<(TestScore const & s) const { @@ -90,14 +90,14 @@ void TestKeywordMatcher(char const * const query, KeywordMatcherTestCase const ( { char const * const name = testCases[i].m_name; char const * const prevName = (i == 0 ? "N/A" : testCases[i-1].m_name); - TestScore const testScore = matcher.Score(name); + TestScore const testScore = matcher.CalcScore(name); // Test that a newly created matcher returns the same result { KeywordMatcher freshMatcher; InitMatcher(query, freshMatcher); - TestScore const freshScore = freshMatcher.Score(name); + TestScore const freshScore = freshMatcher.CalcScore(name); // TEST_EQUAL(testScore, freshScore, (query, name)); TEST(!(testScore < freshScore), (query, name)); TEST(!(freshScore < testScore), (query, name)); @@ -337,6 +337,6 @@ UNIT_TEST(KeywordMatcher_DifferentLangs) InitMatcher("не", matcher); char const * arr[] = { "Невский переулок", "Неўскі завулак" }; - TEST(!(matcher.Score(arr[0]) < matcher.Score(arr[1])), ()); - TEST(!(matcher.Score(arr[1]) < matcher.Score(arr[0])), ()); + TEST(!(matcher.CalcScore(arr[0]) < matcher.CalcScore(arr[1])), ()); + TEST(!(matcher.CalcScore(arr[1]) < matcher.CalcScore(arr[0])), ()); }