[base] Fix ErrorsMade() for PrefixDFAModifier, add tests.

This commit is contained in:
tatiana-yan 2019-06-05 18:37:31 +03:00 committed by mpimenov
parent 5999493ed4
commit 86e540688e
4 changed files with 177 additions and 3 deletions

View file

@ -203,4 +203,75 @@ UNIT_TEST(LevenshteinDFA_ErrorsMade)
TEST_EQUAL(GetResult(dfa, "кафер"), Result(Status::Accepts, 1 /* errorsMade */), ());
}
}
UNIT_TEST(LevenshteinDFA_PrefixDFAModifier)
{
{
PrefixDFAModifier<LevenshteinDFA> dfa(LevenshteinDFA("abcde", 2 /* maxErrors */));
auto it = dfa.Begin();
DFAMove(it, "ab");
TEST(!it.Accepts(), ());
TEST(!it.Rejects(), ());
DFAMove(it, "c");
TEST(it.Accepts(), ());
TEST(!it.Rejects(), ());
TEST_EQUAL(it.ErrorsMade(), 2, ());
DFAMove(it, "d");
TEST(it.Accepts(), ());
TEST(!it.Rejects(), ());
TEST_EQUAL(it.ErrorsMade(), 1, ());
DFAMove(it, "e");
TEST(it.Accepts(), ());
TEST(!it.Rejects(), ());
TEST_EQUAL(it.ErrorsMade(), 0, ());
DFAMove(it, "fghijklmn");
TEST(it.Accepts(), ());
TEST(!it.Rejects(), ());
TEST_EQUAL(it.ErrorsMade(), 0, ());
}
}
UNIT_TEST(LevenshteinDFA_PrefixDFASmoke)
{
vector<char> const kAlphabet = {'a', 'b', 'c'};
vector<string> sources;
vector<string> queries;
auto generate = [](vector<char> const & alphabet, size_t size, vector<string> & result)
{
result.clear();
result.resize(pow(alphabet.size(), size));
for (size_t letterNumber = 0; letterNumber < size; ++letterNumber)
{
for (size_t i = 0; i < result.size(); ++i)
{
auto const letterIndex =
static_cast<size_t>(i / pow(alphabet.size(), size - letterNumber - 1)) %
alphabet.size();
result[i].push_back(alphabet[letterIndex]);
}
}
};
{
generate(kAlphabet, 4, sources);
generate(kAlphabet, 2, queries);
for (auto const & source : sources)
{
for (auto const & query : queries)
{
LOG(LINFO, (source, query));
PrefixDFAModifier<LevenshteinDFA> dfa(LevenshteinDFA(source, 2 /* maxErrors */));
auto it = dfa.Begin();
for (auto const c : query)
DFAMove(it, strings::MakeUniString({c}));
}
}
}
}
} // namespace

View file

@ -16,9 +16,42 @@ public:
public:
Iterator & Move(strings::UniChar c)
{
if (Accepts() || Rejects())
if (Rejects())
return *this;
if (Accepts())
{
auto currentIt = m_it;
currentIt.Move(c);
// While moving m_it, errors number decreases while matching unmatched symbols:
// source: a b c d e f
// query: a b c d e f
// errors: 5 4 3 2 1 0
//
// After a misprinted symbol errors number remains the same:
// source: a b c d e f
// query: a b z d e f
// errors: 5 4 3 3 2 1
//
// source: a b c d e f
// query: a b d c e f
// errors: 5 4 3 3 2 1
//
// source: a b c d e f
// query: a b d e f
// errors: 5 4 3 3 2
//
// source: a b c d e f
// query: a b c z d e f
// errors: 5 4 3 3 3 2 1
//
// Errors number cannot decrease after it has increased once.
if (currentIt.ErrorsMade() > ErrorsMade())
return *this;
}
m_it.Move(c);
if (m_it.Accepts())
m_accepts = true;

View file

@ -1915,6 +1915,30 @@ UNIT_CLASS_TEST(ProcessorTest, ExactMatchTest)
TEST(!results[0].GetRankingInfo().m_exactMatch, ());
TEST(!results[1].GetRankingInfo().m_exactMatch, ());
}
{
auto request = MakeRequest("cafe лермонтов");
auto const & results = request->Results();
Rules rules{ExactMatch(wonderlandId, cafe), ExactMatch(wonderlandId, lermontov)};
TEST(ResultsMatch(results, rules), ());
TEST_EQUAL(2, results.size(), ("Unexpected number of retrieved cafes."));
TEST(results[0].GetRankingInfo().m_exactMatch, ());
TEST(results[1].GetRankingInfo().m_exactMatch, ());
}
{
auto request = MakeRequest("cafe лер");
auto const & results = request->Results();
Rules rules{ExactMatch(wonderlandId, cafe), ExactMatch(wonderlandId, lermontov)};
TEST(ResultsMatch(results, rules), ());
TEST_EQUAL(2, results.size(), ("Unexpected number of retrieved cafes."));
TEST(results[0].GetRankingInfo().m_exactMatch, ());
TEST(results[1].GetRankingInfo().m_exactMatch, ());
}
}
} // namespace
} // namespace search

View file

@ -4,6 +4,7 @@
#include "indexer/trie.hpp"
#include "base/dfa_helpers.hpp"
#include "base/mem_trie.hpp"
#include "base/string_utils.hpp"
@ -21,6 +22,8 @@ using Key = strings::UniString;
using Value = uint32_t;
using ValueList = VectorValues<Value>;
using Trie = MemTrie<Key, ValueList>;
using DFA = strings::LevenshteinDFA;
using PrefixDFA = strings::PrefixDFAModifier<DFA>;
UNIT_TEST(MatchInTrieTest)
{
@ -35,17 +38,60 @@ UNIT_TEST(MatchInTrieTest)
map<uint32_t, bool> vals;
auto saveResult = [&vals](uint32_t v, bool exactMatch) { vals[v] = exactMatch; };
auto const hotelDFA = strings::LevenshteinDFA("hotel", 1 /* maxErrors */);
auto const hotelDFA = DFA("hotel", 1 /* maxErrors */);
search::impl::MatchInTrie(rootIterator, nullptr, 0 /* prefixSize */, hotelDFA, saveResult);
TEST(vals.at(1), (vals));
TEST(vals.at(3), (vals));
TEST(!vals.at(2), (vals));
vals.clear();
auto const homelDFA = strings::LevenshteinDFA("homel", 1 /* maxErrors */);
auto const homelDFA = DFA("homel", 1 /* maxErrors */);
search::impl::MatchInTrie(rootIterator, nullptr, 0 /* prefixSize */, homelDFA, saveResult);
TEST(vals.at(2), (vals));
TEST(!vals.at(1), (vals));
TEST(!vals.at(3), (vals));
}
UNIT_TEST(MatchPrefixInTrieTest)
{
Trie trie;
vector<pair<string, uint32_t>> const data = {{"лермонтовъ", 1}, {"лермонтово", 2}};
for (auto const & kv : data)
trie.Add(strings::MakeUniString(kv.first), kv.second);
trie::MemTrieIterator<Key, ValueList> const rootIterator(trie.GetRootIterator());
map<uint32_t, bool> vals;
auto saveResult = [&vals](uint32_t v, bool exactMatch) { vals[v] = exactMatch; };
{
vals.clear();
auto const lermontov = PrefixDFA(DFA("лермонтовъ", 2 /* maxErrors */));
search::impl::MatchInTrie(rootIterator, nullptr, 0 /* prefixSize */, lermontov, saveResult);
TEST(vals.at(1), (vals));
TEST(!vals.at(2), (vals));
}
{
vals.clear();
auto const lermontovo = PrefixDFA(DFA("лермонтово", 2 /* maxErrors */));
search::impl::MatchInTrie(rootIterator, nullptr, 0 /* prefixSize */, lermontovo, saveResult);
TEST(vals.at(2), (vals));
TEST(!vals.at(1), (vals));
}
{
vals.clear();
auto const commonPrexif = PrefixDFA(DFA("лермонтов", 2 /* maxErrors */));
search::impl::MatchInTrie(rootIterator, nullptr, 0 /* prefixSize */, commonPrexif, saveResult);
TEST(vals.at(2), (vals));
TEST(vals.at(1), (vals));
}
{
vals.clear();
auto const commonPrexif = PrefixDFA(DFA("лер", 2 /* maxErrors */));
search::impl::MatchInTrie(rootIterator, nullptr, 0 /* prefixSize */, commonPrexif, saveResult);
TEST(vals.at(2), (vals));
TEST(vals.at(1), (vals));
}
}
} // namespace