From bd48d71ceb9587241e66d2907ce17b1b8afa37e9 Mon Sep 17 00:00:00 2001 From: Yury Melnichek Date: Mon, 23 May 2011 17:44:32 +0200 Subject: [PATCH] Refactored and improved search: approximate matching and splitting feature names into tokens. --- map/framework.cpp | 18 ++-- map/framework.hpp | 3 +- qt/mainwindow.cpp | 10 +-- search/engine.cpp | 23 +++++ search/engine.hpp | 31 +++++++ search/keyword_matcher.cpp | 60 +++++++++++++ search/keyword_matcher.hpp | 54 ++++++++++++ search/query.cpp | 78 +++++++++++++++-- search/query.hpp | 43 +++++++-- search/result.cpp | 21 +++++ search/result.hpp | 24 +++++ search/search.pro | 22 +++-- search/search_processor.cpp | 2 +- search/search_processor.hpp | 12 +-- search/search_tests/keyword_matcher_test.cpp | 85 ++++++++++++++++++ search/search_tests/match_cost_mock.hpp | 23 +++++ search/search_tests/query_test.cpp | 28 ++++++ search/search_tests/search_tests.pro | 5 ++ search/search_tests/string_match_test.cpp | 92 +++++++++++--------- search/string_match.cpp | 14 +-- search/string_match.hpp | 7 +- 21 files changed, 554 insertions(+), 101 deletions(-) create mode 100644 search/engine.cpp create mode 100644 search/engine.hpp create mode 100644 search/keyword_matcher.cpp create mode 100644 search/keyword_matcher.hpp create mode 100644 search/result.cpp create mode 100644 search/result.hpp create mode 100644 search/search_tests/keyword_matcher_test.cpp create mode 100644 search/search_tests/match_cost_mock.hpp create mode 100644 search/search_tests/query_test.cpp diff --git a/map/framework.cpp b/map/framework.cpp index 83f3c28e4b..ef762a57d1 100644 --- a/map/framework.cpp +++ b/map/framework.cpp @@ -7,6 +7,9 @@ #include "benchmark_provider.hpp" #include "languages.hpp" +#include "../search/engine.hpp" +#include "../search/result.hpp" + #include "../indexer/feature_visibility.hpp" #include "../indexer/feature.hpp" #include "../indexer/scales.hpp" @@ -1062,16 +1065,11 @@ void FrameWork::AddRedrawCommandSure() { threads::MutexGuard lock(m_modelSyn); - search::Query query(text); - search::Processor doClass(query); - m_model.ForEachFeature(m_navigator.Screen().GlobalRect() - /*m2::RectD(MercatorBounds::minX, - MercatorBounds::minY, - MercatorBounds::maxX, - MercatorBounds::maxY)*/, doClass); - query.ForEachResultRef(callback); - // empty name indicates last element - callback(search::Result(string(), m2::RectD())); + search::Engine engine(&m_model.GetIndex()); + engine.Search(text, m_navigator.Screen().GlobalRect(), callback); + + // Empty name indicates last element. + callback(search::Result(string(), m2::RectD(), 0)); } template class FrameWork; diff --git a/map/framework.hpp b/map/framework.hpp index b1b4e76691..0ccbac9da6 100644 --- a/map/framework.hpp +++ b/map/framework.hpp @@ -10,8 +10,6 @@ #include "../defines.hpp" -#include "../search/search_processor.hpp" - #include "../indexer/drawing_rule_def.hpp" #include "../indexer/mercator.hpp" #include "../indexer/data_header.hpp" @@ -54,6 +52,7 @@ class redraw_operation_cancelled {}; struct BenchmarkRectProvider; +namespace search { class Result; } typedef function SearchCallbackT; namespace fwork diff --git a/qt/mainwindow.cpp b/qt/mainwindow.cpp index 246ac8ce98..c77dd28c8c 100644 --- a/qt/mainwindow.cpp +++ b/qt/mainwindow.cpp @@ -6,7 +6,7 @@ #include "../defines.hpp" -#include "../search/search_processor.hpp" +#include "../search/result.hpp" #include "../map/settings.hpp" @@ -360,7 +360,7 @@ void MainWindow::OnSearchTextChanged(QString const & str) void MainWindow::OnSearchResult(search::Result const & result) { - if (result.m_name.empty()) // last element + if (result.GetString().empty()) // last element { if (!m_Docks[3]->isVisible()) m_Docks[3]->show(); @@ -372,9 +372,9 @@ void MainWindow::OnSearchResult(search::Result const & result) int const rowCount = table->rowCount(); table->setRowCount(rowCount + 1); - QTableWidgetItem * item = new QTableWidgetItem(QString::fromUtf8(result.m_name.c_str())); - item->setData(Qt::UserRole, QRectF(QPointF(result.m_rect.minX(), result.m_rect.maxY()), - QPointF(result.m_rect.maxX(), result.m_rect.minY()))); + QTableWidgetItem * item = new QTableWidgetItem(QString::fromUtf8(result.GetString().c_str())); + item->setData(Qt::UserRole, QRectF(QPointF(result.GetRect().minX(), result.GetRect().maxY()), + QPointF(result.GetRect().maxX(), result.GetRect().minY()))); item->setFlags(Qt::ItemIsSelectable | Qt::ItemIsEnabled); table->setItem(rowCount, 0, item); } diff --git a/search/engine.cpp b/search/engine.cpp new file mode 100644 index 0000000000..e4e87dc06d --- /dev/null +++ b/search/engine.cpp @@ -0,0 +1,23 @@ +#include "engine.hpp" +#include "query.hpp" +#include "result.hpp" +#include "../indexer/feature.hpp" +#include "../std/string.hpp" +#include "../std/vector.hpp" + +namespace search +{ + +Engine::Engine(IndexType const * pIndex) : m_pIndex(pIndex) +{ +} + +void Engine::Search(string const & queryText, + m2::RectD const & rect, + function const & f) +{ + impl::Query query(queryText, rect, m_pIndex); + query.Search(f); +} + +} // namespace search diff --git a/search/engine.hpp b/search/engine.hpp new file mode 100644 index 0000000000..6da1b99020 --- /dev/null +++ b/search/engine.hpp @@ -0,0 +1,31 @@ +#pragma once +#include "../indexer/index.hpp" +#include "../geometry/rect2d.hpp" +#include "../base/base.hpp" +#include "../std/function.hpp" +#include "../std/string.hpp" + +class FileReader; +class FeatureType; + +namespace search +{ + +class Result; + +class Engine +{ +public: + typedef Index::Type IndexType; + + explicit Engine(IndexType const * pIndex); + + void Search(string const & query, + m2::RectD const & rect, + function const & f); + +private: + IndexType const * m_pIndex; +}; + +} // namespace search diff --git a/search/keyword_matcher.cpp b/search/keyword_matcher.cpp new file mode 100644 index 0000000000..54553c7a39 --- /dev/null +++ b/search/keyword_matcher.cpp @@ -0,0 +1,60 @@ +#include "keyword_matcher.hpp" +#include "delimiters.hpp" +#include "../base/string_utils.hpp" +#include "../std/numeric.hpp" + +namespace search +{ +namespace impl +{ + +KeywordMatcher::KeywordMatcher(strings::UniString * pKeywords, + size_t keywordsCount, + strings::UniString const & prefix, + uint32_t maxKeywordMatchCost, uint32_t maxPrefixMatchCost, + StringMatchFn keywordMatchFn, StringMatchFn prefixMatchFn) + : m_pKewords(pKeywords), m_prefix(prefix), + m_maxKeywordMatchCost(maxKeywordMatchCost), + m_maxPrefixMatchCost(maxPrefixMatchCost), + m_keywordMatchFn(keywordMatchFn), + m_prefixMatchFn(prefixMatchFn), + m_minKeywordMatchCost(keywordsCount, m_maxKeywordMatchCost + 1), + m_minPrefixMatchCost(m_maxPrefixMatchCost + 1) +{ +} + +void KeywordMatcher::ProcessName(string const & name) +{ + search::Delimiters delims; + for (strings::TokenizeIterator iter(name, delims); iter; ++iter) + { + strings::UniString const s = iter.GetUniString(); + + for (size_t i = 0; i < m_minKeywordMatchCost.size(); ++i) + { + m_minKeywordMatchCost[i] = min(m_minKeywordMatchCost[i], + m_keywordMatchFn(&m_pKewords[i][0], m_pKewords[i].size(), + &s[0], s.size(), + m_minKeywordMatchCost[i])); + } + + if (!m_prefix.empty()) + { + m_minPrefixMatchCost = min(m_minPrefixMatchCost, + m_prefixMatchFn(&m_prefix[0], m_prefix.size(), + &s[0], s.size(), + m_minPrefixMatchCost)); + } + else + m_minPrefixMatchCost = 0; + } +} + +uint32_t KeywordMatcher::GetMatchScore() const +{ + return accumulate(m_minKeywordMatchCost.begin(), m_minKeywordMatchCost.end(), + m_minPrefixMatchCost); +} + +} // namespace search::impl +} // namespace search diff --git a/search/keyword_matcher.hpp b/search/keyword_matcher.hpp new file mode 100644 index 0000000000..e6968ea338 --- /dev/null +++ b/search/keyword_matcher.hpp @@ -0,0 +1,54 @@ +#pragma once +#include "../base/base.hpp" +#include "../base/buffer_vector.hpp" +#include "../base/string_utils.hpp" +#include "../std/string.hpp" + +namespace search +{ +namespace impl +{ + +typedef uint32_t (* StringMatchFn)(strings::UniChar const * sA, uint32_t sizeA, + strings::UniChar const * sB, uint32_t sizeB, + uint32_t maxCost); + + +// Matches keywords agains given names. +class KeywordMatcher +{ + strings::UniString * m_pKewords; + strings::UniString const & m_prefix; + uint32_t m_maxKeywordMatchCost, m_maxPrefixMatchCost; + StringMatchFn m_keywordMatchFn, m_prefixMatchFn; + buffer_vector m_minKeywordMatchCost; + uint32_t m_minPrefixMatchCost; + +public: + KeywordMatcher(strings::UniString * pKeywords, + size_t keywordsCount, + strings::UniString const & prefix, + uint32_t maxKeywordMatchCost, uint32_t maxPrefixMatchCost, + StringMatchFn keywordMatchFn, StringMatchFn prefixMatchFn); + + void ProcessName(string const & name); + + // Useful for FeatureType.ForEachName(), calls ProcessName() and always returns true. + bool operator () (int /*lang*/, string const & name) + { + ProcessName(name); + return true; + } + + // Get total feature match score. + uint32_t GetMatchScore() const; + + // Get prefix match score. + uint32_t GetPrefixMatchScore() const { return m_minPrefixMatchCost; } + + // Get match score for each keyword. + uint32_t const * GetKeywordMatchScores() const { return &m_minKeywordMatchCost[0]; } +}; + +} // namespace search::impl +} // namespace search diff --git a/search/query.cpp b/search/query.cpp index abdb558d1c..c8c35f85ab 100644 --- a/search/query.cpp +++ b/search/query.cpp @@ -1,22 +1,84 @@ #include "query.hpp" #include "delimiters.hpp" +#include "keyword_matcher.hpp" +#include "string_match.hpp" -#include "../base/string_utils.hpp" - -namespace search1 +namespace search +{ +namespace impl { -Query::Query(string const & query) +uint32_t KeywordMatch(strings::UniChar const * sA, uint32_t sizeA, + strings::UniChar const * sB, uint32_t sizeB, + uint32_t maxCost) +{ + return StringMatchCost(sA, sizeA, sB, sizeB, DefaultMatchCost(), maxCost, false); +} + +uint32_t PrefixMatch(strings::UniChar const * sA, uint32_t sizeA, + strings::UniChar const * sB, uint32_t sizeB, + uint32_t maxCost) +{ + return StringMatchCost(sA, sizeA, sB, sizeB, DefaultMatchCost(), maxCost, true); +} + + +Query::Query(string const & query, m2::RectD const & rect, IndexType const * pIndex) + : m_queryText(query), m_rect(rect), m_pIndex(pIndex) { search::Delimiters delims; - strings::TokenizeIterator iter(query, delims); - while (iter) + for (strings::TokenizeIterator iter(query, delims); iter; ++iter) { if (iter.IsLast() && !delims(strings::LastUniChar(query))) - m_prefix = *iter; + m_prefix = iter.GetUniString(); else - m_keywords.push_back(*iter); + m_keywords.push_back(iter.GetUniString()); } } +struct FeatureProcessor +{ + Query & m_query; + + explicit FeatureProcessor(Query & query) : m_query(query) {} + + void operator () (FeatureType const & feature) const + { + KeywordMatcher matcher(&m_query.m_keywords[0], m_query.m_keywords.size(), + m_query.m_prefix, 1000, 1000, + &KeywordMatch, &PrefixMatch); + feature.ForEachNameRef(matcher); + m_query.AddResult(Result(feature.GetPreferredDrawableName(), feature.GetLimitRect(-1), + matcher.GetMatchScore())); + } +}; + +void Query::Search(function const & f) +{ + FeatureProcessor featureProcessor(*this); + m_pIndex->ForEachInViewport(featureProcessor, m_rect); + vector results; + results.reserve(m_resuts.size()); + while (!m_resuts.empty()) + { + results.push_back(m_resuts.top()); + m_resuts.pop(); + } + for (vector::const_reverse_iterator it = results.rbegin(); it != results.rend(); ++it) + f(*it); } + +void Query::AddResult(Result const & result) +{ + m_resuts.push(result); + while (m_resuts.size() > 10) + m_resuts.pop(); +} + +bool Query::ResultBetter::operator ()(Result const & r1, Result const & r2) const +{ + return r1.GetPenalty() < r2.GetPenalty(); +} + +} // namespace search::impl +} // namespace search diff --git a/search/query.hpp b/search/query.hpp index d950b87e69..1d58f05aa9 100644 --- a/search/query.hpp +++ b/search/query.hpp @@ -1,19 +1,48 @@ #pragma once -#include "../base/base.hpp" +#include "engine.hpp" +#include "result.hpp" +#include "../geometry/rect2d.hpp" +#include "../base/string_utils.hpp" +#include "../std/function.hpp" +#include "../std/queue.hpp" #include "../std/string.hpp" #include "../std/vector.hpp" -namespace search1 +namespace search +{ +namespace impl { class Query { public: - explicit Query(string const & query); -private: - vector m_keywords; - string m_prefix; + typedef Engine::IndexType IndexType; + + Query(string const & query, m2::RectD const & rect, IndexType const * pIndex); + + // Search with parameters, passed in constructor. + void Search(function const & f); + + // Add result for scoring. + void AddResult(Result const & result); + + struct ResultBetter + { + bool operator() (Result const & r1, Result const & r2) const; + }; + + string m_queryText; + vector m_keywords; + strings::UniString m_prefix; + + m2::RectD m_rect; + + IndexType const * m_pIndex; + IndexType::Query m_indexQuery; + + priority_queue, ResultBetter> m_resuts; }; -} // namespace search1 +} // namespace search::impl +} // namespace search diff --git a/search/result.cpp b/search/result.cpp new file mode 100644 index 0000000000..2a2af69caa --- /dev/null +++ b/search/result.cpp @@ -0,0 +1,21 @@ +#include "result.hpp" +#include "../base/base.hpp" +#include "../base/string_utils.hpp" + +namespace search +{ + +Result::Result(string const & str, m2::RectD const & rect, int penalty) + : m_str(str), m_rect(rect), m_penalty(penalty) +{ +#ifdef DEBUG + if (!str.empty()) + { + m_str.push_back(' '); + m_str += strings::to_string(penalty); + } +#endif +} + + +} // namespace search diff --git a/search/result.hpp b/search/result.hpp new file mode 100644 index 0000000000..44bb7957fe --- /dev/null +++ b/search/result.hpp @@ -0,0 +1,24 @@ +#pragma once +#include "../geometry/rect2d.hpp" +#include "../std/string.hpp" + +namespace search +{ + +// Search result. Search returns a list of them, ordered by score. +class Result +{ +public: + Result(string const & str, m2::RectD const & rect, int penalty); + + string GetString() const { return m_str; } + m2::RectD GetRect() const { return m_rect; } + int GetPenalty() const { return m_penalty; } + +private: + string m_str; + m2::RectD m_rect; + int m_penalty; +}; + +} diff --git a/search/search.pro b/search/search.pro index 086453d34f..0234e07e2e 100644 --- a/search/search.pro +++ b/search/search.pro @@ -10,13 +10,19 @@ DEPENDENCIES = indexer geometry coding base include($$ROOT_DIR/common.pri) HEADERS += \ - query.hpp \ - search_processor.hpp \ - string_match.hpp \ - delimiters.hpp \ + delimiters.hpp \ + engine.hpp \ + keyword_matcher.hpp \ + query.hpp \ + result.hpp \ + search_processor.hpp \ + string_match.hpp \ SOURCES += \ - query.cpp \ - search_processor.cpp \ - string_match.cpp \ - delimiters.cpp \ + delimiters.cpp \ + engine.cpp \ + keyword_matcher.cpp \ + query.cpp \ + result.cpp \ + search_processor.cpp \ + string_match.cpp \ diff --git a/search/search_processor.cpp b/search/search_processor.cpp index a0900a2f8e..e9d70ff3bc 100644 --- a/search/search_processor.cpp +++ b/search/search_processor.cpp @@ -47,7 +47,7 @@ namespace search } if (score > 0) { - m_queue.push(make_pair(score, Result(utf8s, m_currFeature->GetLimitRect(-1)))); + m_queue.push(make_pair(score, Result(utf8s, m_currFeature->GetLimitRect(-1), 0))); return false; } return true; diff --git a/search/search_processor.hpp b/search/search_processor.hpp index a9c05f7151..ef36df3747 100644 --- a/search/search_processor.hpp +++ b/search/search_processor.hpp @@ -1,7 +1,6 @@ #pragma once - +#include "result.hpp" #include "../geometry/rect2d.hpp" - #include "../std/string.hpp" #include "../std/vector.hpp" #include "../std/queue.hpp" @@ -10,15 +9,6 @@ class FeatureType; namespace search { - class Result - { - public: - Result(string const & name, m2::RectD const & rect) - : m_name(name), m_rect(rect) {} - string m_name; - m2::RectD m_rect; - }; - typedef pair elem_type; struct QueueComparer { diff --git a/search/search_tests/keyword_matcher_test.cpp b/search/search_tests/keyword_matcher_test.cpp new file mode 100644 index 0000000000..95459a7eca --- /dev/null +++ b/search/search_tests/keyword_matcher_test.cpp @@ -0,0 +1,85 @@ +#include "../../testing/testing.hpp" +#include "../keyword_matcher.hpp" +#include "match_cost_mock.hpp" +#include "../string_match.hpp" +#include "../../testing/testing_utils.hpp" +#include "../../base/string_utils.hpp" +#include "../../std/vector.hpp" + +namespace +{ + +uint32_t KeywordMatchForTest(strings::UniChar const * sA, uint32_t sizeA, + strings::UniChar const * sB, uint32_t sizeB, + uint32_t maxCost) +{ + return StringMatchCost(sA, sizeA, sB, sizeB, search::MatchCostMock(), + maxCost, false); +} + +uint32_t PrefixMatchForTest(strings::UniChar const * sA, uint32_t sizeA, + strings::UniChar const * sB, uint32_t sizeB, + uint32_t maxCost) +{ + return StringMatchCost(sA, sizeA, sB, sizeB, search::MatchCostMock(), + maxCost, true); +} + +} // unnamed namespace + +UNIT_TEST(KeywordMatcher_Smoke) +{ + vector keywords; + keywords.push_back(strings::MakeUniString("minsk")); + keywords.push_back(strings::MakeUniString("belarus")); + search::impl::KeywordMatcher matcher(&keywords[0], keywords.size(), + strings::MakeUniString("l"), + 3, 3, + &KeywordMatchForTest, &PrefixMatchForTest); + TEST_EQUAL(matcher.GetPrefixMatchScore(), 4, ()); + TEST_EQUAL(vector(matcher.GetKeywordMatchScores(), + matcher.GetKeywordMatchScores() + keywords.size()), + Vec(4, 4), ()); + TEST_EQUAL(matcher.GetMatchScore(), 4 + 4 + 4, ()); + + matcher.ProcessName("belarrr"); + TEST_EQUAL(matcher.GetPrefixMatchScore(), 1, ()); + TEST_EQUAL(vector(matcher.GetKeywordMatchScores(), + matcher.GetKeywordMatchScores() + keywords.size()), + Vec(4, 2), ()); + TEST_EQUAL(matcher.GetMatchScore(), 1 + 4 + 2, ()); + + matcher.ProcessName("belaruu minnn"); + TEST_EQUAL(matcher.GetPrefixMatchScore(), 1, ()); + TEST_EQUAL(vector(matcher.GetKeywordMatchScores(), + matcher.GetKeywordMatchScores() + keywords.size()), + Vec(2, 1), ()); + TEST_EQUAL(matcher.GetMatchScore(), 1 + 2 + 1, ()); + + matcher.ProcessName("belaruu les minnn"); + TEST_EQUAL(matcher.GetPrefixMatchScore(), 0, ()); + TEST_EQUAL(vector(matcher.GetKeywordMatchScores(), + matcher.GetKeywordMatchScores() + keywords.size()), + Vec(2, 1), ()); + TEST_EQUAL(matcher.GetMatchScore(), 0 + 2 + 1, ()); +} + +UNIT_TEST(KeywordMatcher_NoPrefix) +{ + vector keywords; + keywords.push_back(strings::MakeUniString("minsk")); + keywords.push_back(strings::MakeUniString("belarus")); + search::impl::KeywordMatcher matcher(&keywords[0], keywords.size(), + strings::MakeUniString(""), + 3, 3, + &KeywordMatchForTest, &PrefixMatchForTest); + TEST_EQUAL(matcher.GetPrefixMatchScore(), 4, ()); + TEST_EQUAL(matcher.GetMatchScore(), 4 + 4 + 4, ()); + + matcher.ProcessName("belaruu zzz minnn"); + TEST_EQUAL(matcher.GetPrefixMatchScore(), 0, ()); + TEST_EQUAL(vector(matcher.GetKeywordMatchScores(), + matcher.GetKeywordMatchScores() + keywords.size()), + Vec(2, 1), ()); + TEST_EQUAL(matcher.GetMatchScore(), 0 + 2 + 1, ()); +} diff --git a/search/search_tests/match_cost_mock.hpp b/search/search_tests/match_cost_mock.hpp new file mode 100644 index 0000000000..c4a6fef902 --- /dev/null +++ b/search/search_tests/match_cost_mock.hpp @@ -0,0 +1,23 @@ +#pragma once + +namespace search +{ + +template class MatchCostMock +{ +public: + uint32_t Cost10(T) const { return 1; } + uint32_t Cost01(T) const { return 1; } + uint32_t Cost11(T, T) const { return 1; } + uint32_t Cost12(T a, T const * pB) const + { + if (a == 'X' && pB[0] == '>' && pB[1] == '<') + return 0; + return 2; + } + uint32_t Cost21(T const * pA, T b) const { return Cost12(b, pA); } + uint32_t Cost22(T const *, T const *) const { return 2; } + uint32_t SwapCost(T, T) const { return 1; } +}; + +} // namespace search diff --git a/search/search_tests/query_test.cpp b/search/search_tests/query_test.cpp new file mode 100644 index 0000000000..aed5fbb6df --- /dev/null +++ b/search/search_tests/query_test.cpp @@ -0,0 +1,28 @@ +#include "../../testing/testing.hpp" +#include "../query.hpp" +#include "../../base/string_utils.hpp" +#include "../../std/memcpy.hpp" +#include "../../std/string.hpp" + +using search::impl::Query; +using strings::MakeUniString; +using strings::UniString; + +UNIT_TEST(QueryParseKeywords_Smoke) +{ + vector expected; + expected.push_back(MakeUniString("minsk")); + expected.push_back(MakeUniString("belarus")); + TEST_EQUAL(expected, Query("minsk belarus ", m2::RectD(), NULL).m_keywords, ()); + TEST_EQUAL(MakeUniString(""), Query("minsk belarus ", m2::RectD(), NULL).m_prefix, ()); + TEST_EQUAL(expected, Query("minsk belarus ma", m2::RectD(), NULL).m_keywords, ()); + TEST_EQUAL(MakeUniString("ma"), Query("minsk belarus ma", m2::RectD(), NULL).m_prefix, ()); +} + +UNIT_TEST(QueryParseKeywords_Empty) +{ + TEST_EQUAL(vector(), Query("", m2::RectD(), NULL).m_keywords, ()); + TEST_EQUAL(MakeUniString(""), Query("", m2::RectD(), NULL).m_prefix, ()); + TEST_EQUAL(vector(), Query("Z", m2::RectD(), NULL).m_keywords, ()); + TEST_EQUAL(MakeUniString("Z"), Query("Z", m2::RectD(), NULL).m_prefix, ()); +} diff --git a/search/search_tests/search_tests.pro b/search/search_tests/search_tests.pro index cbb09573f0..b5a30dd80f 100644 --- a/search/search_tests/search_tests.pro +++ b/search/search_tests/search_tests.pro @@ -19,4 +19,9 @@ win32 { SOURCES += \ ../../testing/testingmain.cpp \ + keyword_matcher_test.cpp \ + query_test.cpp \ string_match_test.cpp \ + +HEADERS += \ + match_cost_mock.hpp \ diff --git a/search/search_tests/string_match_test.cpp b/search/search_tests/string_match_test.cpp index 0c50bfbc38..3385a68b34 100644 --- a/search/search_tests/string_match_test.cpp +++ b/search/search_tests/string_match_test.cpp @@ -1,55 +1,69 @@ #include "../../testing/testing.hpp" #include "../string_match.hpp" - +#include "match_cost_mock.hpp" #include "../../std/memcpy.hpp" namespace { -class TestMatchCost +uint32_t FullMatchCost(char const * a, char const * b, uint32_t maxCost = 1000) { -public: - uint32_t Cost10(char) const { return 1; } - uint32_t Cost01(char) const { return 1; } - uint32_t Cost11(char, char) const { return 1; } - uint32_t Cost12(char a, char const * pB) const - { - if (a == 'X' && pB[0] == '>' && pB[1] == '<') - return 0; - return 2; - } - uint32_t Cost21(char const * pA, char b) const { return Cost12(b, pA); } - uint32_t Cost22(char const *, char const *) const { return 2; } - uint32_t SwapCost(char, char) const { return 1; } -}; + return ::search::StringMatchCost(a, strlen(a), b, strlen(b), + search::MatchCostMock(), maxCost); +} -uint32_t MatchCost(char const * a, char const * b, uint32_t maxCost = 1000) +uint32_t PrefixMatchCost(char const * a, char const * b) { - return ::search::StringMatchCost(a, strlen(a), b, strlen(b), TestMatchCost(), maxCost); + return ::search::StringMatchCost(a, strlen(a), b, strlen(b), + search::MatchCostMock(), 1000, true); } } -UNIT_TEST(StringMatchCost) +UNIT_TEST(StringMatchCost_FullMatch) { - TEST_EQUAL(MatchCost("", ""), 0, ()); - TEST_EQUAL(MatchCost("a", "b"), 1, ()); - TEST_EQUAL(MatchCost("a", ""), 1, ()); - TEST_EQUAL(MatchCost("", "b"), 1, ()); - TEST_EQUAL(MatchCost("ab", "cd"), 2, ()); - TEST_EQUAL(MatchCost("ab", "ba"), 1, ()); - TEST_EQUAL(MatchCost("abcd", "efgh"), 4, ()); - TEST_EQUAL(MatchCost("Hello!", "Hello!"), 0, ()); - TEST_EQUAL(MatchCost("Hello!", "Helo!"), 1, ()); - TEST_EQUAL(MatchCost("X", "X"), 0, ()); - TEST_EQUAL(MatchCost("X", "><"), 0, ()); - TEST_EQUAL(MatchCost("XXX", "><><><"), 0, ()); - TEST_EQUAL(MatchCost("XXX", "><"), 0, ()); - TEST_EQUAL(MatchCost("TeXt", "Te><"), 1, ()); - TEST_EQUAL(MatchCost("TeXt", "TetX"), 1, ()); - TEST_EQUAL(MatchCost("TeXt", "Tet><"), 2, ()); - TEST_EQUAL(MatchCost("", "ALongString"), 11, ()); - TEST_EQUAL(MatchCost("x", "ALongString"), 11, ()); - TEST_EQUAL(MatchCost("g", "ALongString"), 10, ()); + TEST_EQUAL(FullMatchCost("", ""), 0, ()); + TEST_EQUAL(FullMatchCost("a", "b"), 1, ()); + TEST_EQUAL(FullMatchCost("a", ""), 1, ()); + TEST_EQUAL(FullMatchCost("", "b"), 1, ()); + TEST_EQUAL(FullMatchCost("ab", "cd"), 2, ()); + TEST_EQUAL(FullMatchCost("ab", "ba"), 1, ()); + TEST_EQUAL(FullMatchCost("abcd", "efgh"), 4, ()); + TEST_EQUAL(FullMatchCost("Hello!", "Hello!"), 0, ()); + TEST_EQUAL(FullMatchCost("Hello!", "Helo!"), 1, ()); + TEST_EQUAL(FullMatchCost("X", "X"), 0, ()); + TEST_EQUAL(FullMatchCost("X", "><"), 0, ()); + TEST_EQUAL(FullMatchCost("XXX", "><><><"), 0, ()); + TEST_EQUAL(FullMatchCost("XXX", "><"), 0, ()); + TEST_EQUAL(FullMatchCost("TeXt", "Te><"), 1, ()); + TEST_EQUAL(FullMatchCost("TeXt", "TetX"), 1, ()); + TEST_EQUAL(FullMatchCost("TeXt", "Tet><"), 2, ()); + TEST_EQUAL(FullMatchCost("", "ALongString"), 11, ()); + TEST_EQUAL(FullMatchCost("x", "ALongString"), 11, ()); + TEST_EQUAL(FullMatchCost("g", "ALongString"), 10, ()); +} + +UNIT_TEST(StringMatchCost_MaxCost) +{ + TEST_EQUAL(FullMatchCost("g", "ALongString", 1), 2, ()); + TEST_EQUAL(FullMatchCost("g", "ALongString", 5), 6, ()); + TEST_EQUAL(FullMatchCost("g", "ALongString", 9), 10, ()); + TEST_EQUAL(FullMatchCost("g", "ALongString", 9), 10, ()); + TEST_EQUAL(FullMatchCost("g", "ALongString", 10), 10, ()); + TEST_EQUAL(FullMatchCost("g", "ALongString", 11), 10, ()); +} + +UNIT_TEST(StringMatchCost_PrefixMatch) +{ + TEST_EQUAL(PrefixMatchCost("", "Hello!"), 0, ()); + TEST_EQUAL(PrefixMatchCost("H", "Hello!"), 0, ()); + TEST_EQUAL(PrefixMatchCost("He", "Hello!"), 0, ()); + TEST_EQUAL(PrefixMatchCost("Hel", "Hello!"), 0, ()); + TEST_EQUAL(PrefixMatchCost("Hell", "Hello!"), 0, ()); + TEST_EQUAL(PrefixMatchCost("Hello", "Hello!"), 0, ()); + TEST_EQUAL(PrefixMatchCost("Hello!", "Hello!"), 0, ()); + TEST_EQUAL(PrefixMatchCost("Hx", "Hello!"), 1, ()); + TEST_EQUAL(PrefixMatchCost("Helpo", "Hello!"), 1, ()); + TEST_EQUAL(PrefixMatchCost("Happo", "Hello!"), 3, ()); } diff --git a/search/string_match.cpp b/search/string_match.cpp index bf3e00b13d..aa3812faea 100644 --- a/search/string_match.cpp +++ b/search/string_match.cpp @@ -10,37 +10,37 @@ namespace search uint32_t DefaultMatchCost::Cost10(UniChar) const { - return 128; + return 256; } uint32_t DefaultMatchCost::Cost01(UniChar) const { - return 128; + return 256; } uint32_t DefaultMatchCost::Cost11(UniChar, UniChar) const { - return 128; + return 256; } uint32_t DefaultMatchCost::Cost12(UniChar, UniChar const *) const { - return 256; + return 512; } uint32_t DefaultMatchCost::Cost21(UniChar const *, UniChar) const { - return 256; + return 512; } uint32_t DefaultMatchCost::Cost22(UniChar const *, UniChar const *) const { - return 256; + return 512; } uint32_t DefaultMatchCost::SwapCost(UniChar, UniChar) const { - return 128; + return 256; } } // namespace search diff --git a/search/string_match.hpp b/search/string_match.hpp index fa482d2dbf..adaf99990a 100644 --- a/search/string_match.hpp +++ b/search/string_match.hpp @@ -29,7 +29,7 @@ struct MatchCostData template void PushMatchCost(PriorityQueyeT & q, uint32_t maxCost, uint32_t a, uint32_t b, uint32_t cost) { - if (cost < maxCost) + if (cost <= maxCost) q.push(MatchCostData(a, b, cost)); } @@ -50,7 +50,8 @@ public: template uint32_t StringMatchCost(CharT const * sA, uint32_t sizeA, CharT const * sB, uint32_t sizeB, - CostF const & costF, uint32_t maxCost) + CostF const & costF, uint32_t maxCost, + bool bPrefixMatch = false) { priority_queue > q; q.push(impl::MatchCostData(0, 0, 0)); @@ -63,7 +64,7 @@ uint32_t StringMatchCost(CharT const * sA, uint32_t sizeA, while (a < sizeA && b < sizeB && sA[a] == sB[b]) ++a, ++b; - if (a == sizeA && b == sizeB) + if (a == sizeA && (bPrefixMatch || b == sizeB)) return c; if (a < sizeA)