From 00aafcdeaeca75e35deeb5f4d1b7b436f4c7e921 Mon Sep 17 00:00:00 2001 From: vng Date: Thu, 23 Jan 2014 22:29:54 +0300 Subject: [PATCH] [search] Fixed keyword matching: compare matched name's length only after language comparison. --- search/keyword_lang_matcher.cpp | 2 +- search/keyword_matcher.cpp | 11 +++- search/keyword_matcher.hpp | 7 +-- search/search_tests/keyword_matcher_test.cpp | 62 +++++++++++++++++--- 4 files changed, 66 insertions(+), 16 deletions(-) diff --git a/search/keyword_lang_matcher.cpp b/search/keyword_lang_matcher.cpp index 0ac697c12b..5abe7f0924 100644 --- a/search/keyword_lang_matcher.cpp +++ b/search/keyword_lang_matcher.cpp @@ -30,7 +30,7 @@ bool KeywordLangMatcher::ScoreT::operator <(KeywordLangMatcher::ScoreT const & s if (m_langScore != score.m_langScore) return m_langScore < score.m_langScore; - return false; + return m_parentScore.LessInTokensLength(score.m_parentScore); } void KeywordLangMatcher::SetLanguages(vector > const & languagePriorities) diff --git a/search/keyword_matcher.cpp b/search/keyword_matcher.cpp index 72335aad38..bb580cf9b7 100644 --- a/search/keyword_matcher.cpp +++ b/search/keyword_matcher.cpp @@ -116,10 +116,17 @@ bool KeywordMatcher::ScoreT::operator < (KeywordMatcher::ScoreT const & s) const if (m_sumTokenMatchDistance != s.m_sumTokenMatchDistance) return m_sumTokenMatchDistance > s.m_sumTokenMatchDistance; + return false; +} + +bool KeywordMatcher::ScoreT::LessInTokensLength(ScoreT const & s) const +{ if (m_bFullQueryMatched) + { + ASSERT(s.m_bFullQueryMatched, ()); return m_nameTokensLength > s.m_nameTokensLength; - else - return false; + } + return false; } string DebugPrint(KeywordMatcher::ScoreT const & score) diff --git a/search/keyword_matcher.hpp b/search/keyword_matcher.hpp index 28b7d36e12..0ad1114bdf 100644 --- a/search/keyword_matcher.hpp +++ b/search/keyword_matcher.hpp @@ -19,13 +19,14 @@ public: public: ScoreT(); bool operator < (ScoreT const & s) const; + bool LessInTokensLength(ScoreT const & s) const; + + bool IsQueryMatched() const { return m_bFullQueryMatched; } private: friend class KeywordMatcher; friend string DebugPrint(ScoreT const & score); - bool IsQueryMatched() const { return m_bFullQueryMatched; } - uint32_t m_sumTokenMatchDistance; uint32_t m_nameTokensMatched; uint32_t m_nameTokensLength; @@ -48,8 +49,6 @@ public: ScoreT Score(StringT const * tokens, size_t count) const; //@} - static bool IsQueryMatched(ScoreT const & score) { return score.IsQueryMatched(); } - private: vector m_keywords; StringT m_prefix; diff --git a/search/search_tests/keyword_matcher_test.cpp b/search/search_tests/keyword_matcher_test.cpp index 7bbb0691a9..8531200bbf 100644 --- a/search/search_tests/keyword_matcher_test.cpp +++ b/search/search_tests/keyword_matcher_test.cpp @@ -17,7 +17,6 @@ namespace { using search::KeywordMatcher; -typedef search::KeywordMatcher::ScoreT ScoreT; using search::MAX_TOKENS; enum ExpectedMatchResult @@ -42,8 +41,7 @@ struct KeywordMatcherTestCase char const * m_name; }; -template -void TestKeywordMatcher(char const * const query, KeywordMatcherTestCase const (&testCases)[N]) +void InitMatcher(char const * query, KeywordMatcher & matcher) { vector keywords; strings::UniString prefix; @@ -53,20 +51,54 @@ void TestKeywordMatcher(char const * const query, KeywordMatcherTestCase const ( keywords.pop_back(); } - KeywordMatcher matcher; matcher.SetKeywords(&keywords[0], keywords.size(), prefix); - ScoreT prevScore = ScoreT(); +} + +class TestScore +{ + typedef KeywordMatcher::ScoreT ScoreT; + ScoreT m_score; + +public: + TestScore() {} + TestScore(ScoreT const & score) : m_score(score) {} + + bool operator<(TestScore const & s) const + { + if (m_score < s.m_score) + return true; + return m_score.LessInTokensLength(s.m_score); + } + + bool IsQueryMatched() const { return m_score.IsQueryMatched(); } + + friend string DebugPrint(TestScore const & score); +}; + +string DebugPrint(TestScore const & s) +{ + return DebugPrint(s.m_score); +} + +template +void TestKeywordMatcher(char const * const query, KeywordMatcherTestCase const (&testCases)[N]) +{ + KeywordMatcher matcher; + InitMatcher(query, matcher); + + TestScore prevScore; for (size_t i = 0; i < N; ++i) { char const * const name = testCases[i].m_name; char const * const prevName = (i == 0 ? "N/A" : testCases[i-1].m_name); - ScoreT const testScore = matcher.Score(name); + TestScore const testScore = matcher.Score(name); // Test that a newly created matcher returns the same result { KeywordMatcher freshMatcher; - freshMatcher.SetKeywords(&keywords[0], keywords.size(), prefix); - ScoreT const freshScore = freshMatcher.Score(name); + InitMatcher(query, freshMatcher); + + TestScore const freshScore = freshMatcher.Score(name); // TEST_EQUAL(testScore, freshScore, (query, name)); TEST(!(testScore < freshScore), (query, name)); TEST(!(freshScore < testScore), (query, name)); @@ -75,7 +107,7 @@ void TestKeywordMatcher(char const * const query, KeywordMatcherTestCase const ( if (testCases[i].m_eMatch != ANY_RES) { TEST_EQUAL(testCases[i].m_eMatch == MATCHES, - KeywordMatcher::IsQueryMatched(testScore), + testScore.IsQueryMatched(), (query, name, testScore)); } @@ -103,6 +135,7 @@ void TestKeywordMatcher(char const * const query, KeywordMatcherTestCase const ( } // unnamed namespace + UNIT_TEST(KeywordMatcher_Prefix) { char const query[] = "new"; @@ -297,3 +330,14 @@ UNIT_TEST(KeywordMatcher_ManyTokensInReverseOrder) }; TestKeywordMatcher(query.c_str(), testCases); } + +UNIT_TEST(KeywordMatcher_DifferentLangs) +{ + KeywordMatcher matcher; + + InitMatcher("не", matcher); + + char const * arr[] = { "Невский переулок", "Неўскі завулак" }; + TEST(!(matcher.Score(arr[0]) < matcher.Score(arr[1])), ()); + TEST(!(matcher.Score(arr[1]) < matcher.Score(arr[0])), ()); +}