diff --git a/base/base_tests/levenshtein_dfa_test.cpp b/base/base_tests/levenshtein_dfa_test.cpp index 0560d4f535..84374246de 100644 --- a/base/base_tests/levenshtein_dfa_test.cpp +++ b/base/base_tests/levenshtein_dfa_test.cpp @@ -203,4 +203,75 @@ UNIT_TEST(LevenshteinDFA_ErrorsMade) TEST_EQUAL(GetResult(dfa, "кафер"), Result(Status::Accepts, 1 /* errorsMade */), ()); } } + +UNIT_TEST(LevenshteinDFA_PrefixDFAModifier) +{ + { + PrefixDFAModifier dfa(LevenshteinDFA("abcde", 2 /* maxErrors */)); + + auto it = dfa.Begin(); + DFAMove(it, "ab"); + + TEST(!it.Accepts(), ()); + TEST(!it.Rejects(), ()); + + DFAMove(it, "c"); + TEST(it.Accepts(), ()); + TEST(!it.Rejects(), ()); + TEST_EQUAL(it.ErrorsMade(), 2, ()); + + DFAMove(it, "d"); + TEST(it.Accepts(), ()); + TEST(!it.Rejects(), ()); + TEST_EQUAL(it.ErrorsMade(), 1, ()); + + DFAMove(it, "e"); + TEST(it.Accepts(), ()); + TEST(!it.Rejects(), ()); + TEST_EQUAL(it.ErrorsMade(), 0, ()); + + DFAMove(it, "fghijklmn"); + TEST(it.Accepts(), ()); + TEST(!it.Rejects(), ()); + TEST_EQUAL(it.ErrorsMade(), 0, ()); + } +} + +UNIT_TEST(LevenshteinDFA_PrefixDFASmoke) +{ + vector const kAlphabet = {'a', 'b', 'c'}; + vector sources; + vector queries; + auto generate = [](vector const & alphabet, size_t size, vector & result) + { + result.clear(); + result.resize(pow(alphabet.size(), size)); + for (size_t letterNumber = 0; letterNumber < size; ++letterNumber) + { + for (size_t i = 0; i < result.size(); ++i) + { + auto const letterIndex = + static_cast(i / pow(alphabet.size(), size - letterNumber - 1)) % + alphabet.size(); + result[i].push_back(alphabet[letterIndex]); + } + } + }; + { + generate(kAlphabet, 4, sources); + generate(kAlphabet, 2, queries); + + for (auto const & source : sources) + { + for (auto const & query : queries) + { + LOG(LINFO, (source, query)); + PrefixDFAModifier dfa(LevenshteinDFA(source, 2 /* maxErrors */)); + auto it = dfa.Begin(); + for (auto const c : query) + DFAMove(it, strings::MakeUniString({c})); + } + } + } +} } // namespace diff --git a/base/dfa_helpers.hpp b/base/dfa_helpers.hpp index ed0bcddc82..87ee1de2a6 100644 --- a/base/dfa_helpers.hpp +++ b/base/dfa_helpers.hpp @@ -16,9 +16,42 @@ public: public: Iterator & Move(strings::UniChar c) { - if (Accepts() || Rejects()) + if (Rejects()) return *this; + if (Accepts()) + { + auto currentIt = m_it; + currentIt.Move(c); + + // While moving m_it, errors number decreases while matching unmatched symbols: + // source: a b c d e f + // query: a b c d e f + // errors: 5 4 3 2 1 0 + // + // After a misprinted symbol errors number remains the same: + // source: a b c d e f + // query: a b z d e f + // errors: 5 4 3 3 2 1 + // + // source: a b c d e f + // query: a b d c e f + // errors: 5 4 3 3 2 1 + // + // source: a b c d e f + // query: a b d e f + // errors: 5 4 3 3 2 + // + // source: a b c d e f + // query: a b c z d e f + // errors: 5 4 3 3 3 2 1 + // + // Errors number cannot decrease after it has increased once. + + if (currentIt.ErrorsMade() > ErrorsMade()) + return *this; + } + m_it.Move(c); if (m_it.Accepts()) m_accepts = true; diff --git a/search/search_integration_tests/processor_test.cpp b/search/search_integration_tests/processor_test.cpp index 35563471dd..dc0a20422e 100644 --- a/search/search_integration_tests/processor_test.cpp +++ b/search/search_integration_tests/processor_test.cpp @@ -1915,6 +1915,30 @@ UNIT_CLASS_TEST(ProcessorTest, ExactMatchTest) TEST(!results[0].GetRankingInfo().m_exactMatch, ()); TEST(!results[1].GetRankingInfo().m_exactMatch, ()); } + + { + auto request = MakeRequest("cafe лермонтов"); + auto const & results = request->Results(); + + Rules rules{ExactMatch(wonderlandId, cafe), ExactMatch(wonderlandId, lermontov)}; + TEST(ResultsMatch(results, rules), ()); + + TEST_EQUAL(2, results.size(), ("Unexpected number of retrieved cafes.")); + TEST(results[0].GetRankingInfo().m_exactMatch, ()); + TEST(results[1].GetRankingInfo().m_exactMatch, ()); + } + + { + auto request = MakeRequest("cafe лер"); + auto const & results = request->Results(); + + Rules rules{ExactMatch(wonderlandId, cafe), ExactMatch(wonderlandId, lermontov)}; + TEST(ResultsMatch(results, rules), ()); + + TEST_EQUAL(2, results.size(), ("Unexpected number of retrieved cafes.")); + TEST(results[0].GetRankingInfo().m_exactMatch, ()); + TEST(results[1].GetRankingInfo().m_exactMatch, ()); + } } } // namespace } // namespace search diff --git a/search/search_tests/feature_offset_match_tests.cpp b/search/search_tests/feature_offset_match_tests.cpp index ed081025c0..5365e3195e 100644 --- a/search/search_tests/feature_offset_match_tests.cpp +++ b/search/search_tests/feature_offset_match_tests.cpp @@ -4,6 +4,7 @@ #include "indexer/trie.hpp" +#include "base/dfa_helpers.hpp" #include "base/mem_trie.hpp" #include "base/string_utils.hpp" @@ -21,6 +22,8 @@ using Key = strings::UniString; using Value = uint32_t; using ValueList = VectorValues; using Trie = MemTrie; +using DFA = strings::LevenshteinDFA; +using PrefixDFA = strings::PrefixDFAModifier; UNIT_TEST(MatchInTrieTest) { @@ -35,17 +38,60 @@ UNIT_TEST(MatchInTrieTest) map vals; auto saveResult = [&vals](uint32_t v, bool exactMatch) { vals[v] = exactMatch; }; - auto const hotelDFA = strings::LevenshteinDFA("hotel", 1 /* maxErrors */); + auto const hotelDFA = DFA("hotel", 1 /* maxErrors */); search::impl::MatchInTrie(rootIterator, nullptr, 0 /* prefixSize */, hotelDFA, saveResult); TEST(vals.at(1), (vals)); TEST(vals.at(3), (vals)); TEST(!vals.at(2), (vals)); vals.clear(); - auto const homelDFA = strings::LevenshteinDFA("homel", 1 /* maxErrors */); + auto const homelDFA = DFA("homel", 1 /* maxErrors */); search::impl::MatchInTrie(rootIterator, nullptr, 0 /* prefixSize */, homelDFA, saveResult); TEST(vals.at(2), (vals)); TEST(!vals.at(1), (vals)); TEST(!vals.at(3), (vals)); } + +UNIT_TEST(MatchPrefixInTrieTest) +{ + Trie trie; + + vector> const data = {{"лермонтовъ", 1}, {"лермонтово", 2}}; + + for (auto const & kv : data) + trie.Add(strings::MakeUniString(kv.first), kv.second); + + trie::MemTrieIterator const rootIterator(trie.GetRootIterator()); + map vals; + auto saveResult = [&vals](uint32_t v, bool exactMatch) { vals[v] = exactMatch; }; + + { + vals.clear(); + auto const lermontov = PrefixDFA(DFA("лермонтовъ", 2 /* maxErrors */)); + search::impl::MatchInTrie(rootIterator, nullptr, 0 /* prefixSize */, lermontov, saveResult); + TEST(vals.at(1), (vals)); + TEST(!vals.at(2), (vals)); + } + { + vals.clear(); + auto const lermontovo = PrefixDFA(DFA("лермонтово", 2 /* maxErrors */)); + search::impl::MatchInTrie(rootIterator, nullptr, 0 /* prefixSize */, lermontovo, saveResult); + TEST(vals.at(2), (vals)); + TEST(!vals.at(1), (vals)); + } + { + vals.clear(); + auto const commonPrexif = PrefixDFA(DFA("лермонтов", 2 /* maxErrors */)); + search::impl::MatchInTrie(rootIterator, nullptr, 0 /* prefixSize */, commonPrexif, saveResult); + TEST(vals.at(2), (vals)); + TEST(vals.at(1), (vals)); + } + { + vals.clear(); + auto const commonPrexif = PrefixDFA(DFA("лер", 2 /* maxErrors */)); + search::impl::MatchInTrie(rootIterator, nullptr, 0 /* prefixSize */, commonPrexif, saveResult); + TEST(vals.at(2), (vals)); + TEST(vals.at(1), (vals)); + } +} } // namespace