forked from organicmaps/organicmaps-tmp
[base] Fix ErrorsMade() for PrefixDFAModifier, add tests.
This commit is contained in:
parent
5999493ed4
commit
86e540688e
4 changed files with 177 additions and 3 deletions
|
@ -203,4 +203,75 @@ UNIT_TEST(LevenshteinDFA_ErrorsMade)
|
|||
TEST_EQUAL(GetResult(dfa, "кафер"), Result(Status::Accepts, 1 /* errorsMade */), ());
|
||||
}
|
||||
}
|
||||
|
||||
UNIT_TEST(LevenshteinDFA_PrefixDFAModifier)
|
||||
{
|
||||
{
|
||||
PrefixDFAModifier<LevenshteinDFA> dfa(LevenshteinDFA("abcde", 2 /* maxErrors */));
|
||||
|
||||
auto it = dfa.Begin();
|
||||
DFAMove(it, "ab");
|
||||
|
||||
TEST(!it.Accepts(), ());
|
||||
TEST(!it.Rejects(), ());
|
||||
|
||||
DFAMove(it, "c");
|
||||
TEST(it.Accepts(), ());
|
||||
TEST(!it.Rejects(), ());
|
||||
TEST_EQUAL(it.ErrorsMade(), 2, ());
|
||||
|
||||
DFAMove(it, "d");
|
||||
TEST(it.Accepts(), ());
|
||||
TEST(!it.Rejects(), ());
|
||||
TEST_EQUAL(it.ErrorsMade(), 1, ());
|
||||
|
||||
DFAMove(it, "e");
|
||||
TEST(it.Accepts(), ());
|
||||
TEST(!it.Rejects(), ());
|
||||
TEST_EQUAL(it.ErrorsMade(), 0, ());
|
||||
|
||||
DFAMove(it, "fghijklmn");
|
||||
TEST(it.Accepts(), ());
|
||||
TEST(!it.Rejects(), ());
|
||||
TEST_EQUAL(it.ErrorsMade(), 0, ());
|
||||
}
|
||||
}
|
||||
|
||||
UNIT_TEST(LevenshteinDFA_PrefixDFASmoke)
|
||||
{
|
||||
vector<char> const kAlphabet = {'a', 'b', 'c'};
|
||||
vector<string> sources;
|
||||
vector<string> queries;
|
||||
auto generate = [](vector<char> const & alphabet, size_t size, vector<string> & result)
|
||||
{
|
||||
result.clear();
|
||||
result.resize(pow(alphabet.size(), size));
|
||||
for (size_t letterNumber = 0; letterNumber < size; ++letterNumber)
|
||||
{
|
||||
for (size_t i = 0; i < result.size(); ++i)
|
||||
{
|
||||
auto const letterIndex =
|
||||
static_cast<size_t>(i / pow(alphabet.size(), size - letterNumber - 1)) %
|
||||
alphabet.size();
|
||||
result[i].push_back(alphabet[letterIndex]);
|
||||
}
|
||||
}
|
||||
};
|
||||
{
|
||||
generate(kAlphabet, 4, sources);
|
||||
generate(kAlphabet, 2, queries);
|
||||
|
||||
for (auto const & source : sources)
|
||||
{
|
||||
for (auto const & query : queries)
|
||||
{
|
||||
LOG(LINFO, (source, query));
|
||||
PrefixDFAModifier<LevenshteinDFA> dfa(LevenshteinDFA(source, 2 /* maxErrors */));
|
||||
auto it = dfa.Begin();
|
||||
for (auto const c : query)
|
||||
DFAMove(it, strings::MakeUniString({c}));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
|
|
@ -16,9 +16,42 @@ public:
|
|||
public:
|
||||
Iterator & Move(strings::UniChar c)
|
||||
{
|
||||
if (Accepts() || Rejects())
|
||||
if (Rejects())
|
||||
return *this;
|
||||
|
||||
if (Accepts())
|
||||
{
|
||||
auto currentIt = m_it;
|
||||
currentIt.Move(c);
|
||||
|
||||
// While moving m_it, errors number decreases while matching unmatched symbols:
|
||||
// source: a b c d e f
|
||||
// query: a b c d e f
|
||||
// errors: 5 4 3 2 1 0
|
||||
//
|
||||
// After a misprinted symbol errors number remains the same:
|
||||
// source: a b c d e f
|
||||
// query: a b z d e f
|
||||
// errors: 5 4 3 3 2 1
|
||||
//
|
||||
// source: a b c d e f
|
||||
// query: a b d c e f
|
||||
// errors: 5 4 3 3 2 1
|
||||
//
|
||||
// source: a b c d e f
|
||||
// query: a b d e f
|
||||
// errors: 5 4 3 3 2
|
||||
//
|
||||
// source: a b c d e f
|
||||
// query: a b c z d e f
|
||||
// errors: 5 4 3 3 3 2 1
|
||||
//
|
||||
// Errors number cannot decrease after it has increased once.
|
||||
|
||||
if (currentIt.ErrorsMade() > ErrorsMade())
|
||||
return *this;
|
||||
}
|
||||
|
||||
m_it.Move(c);
|
||||
if (m_it.Accepts())
|
||||
m_accepts = true;
|
||||
|
|
|
@ -1915,6 +1915,30 @@ UNIT_CLASS_TEST(ProcessorTest, ExactMatchTest)
|
|||
TEST(!results[0].GetRankingInfo().m_exactMatch, ());
|
||||
TEST(!results[1].GetRankingInfo().m_exactMatch, ());
|
||||
}
|
||||
|
||||
{
|
||||
auto request = MakeRequest("cafe лермонтов");
|
||||
auto const & results = request->Results();
|
||||
|
||||
Rules rules{ExactMatch(wonderlandId, cafe), ExactMatch(wonderlandId, lermontov)};
|
||||
TEST(ResultsMatch(results, rules), ());
|
||||
|
||||
TEST_EQUAL(2, results.size(), ("Unexpected number of retrieved cafes."));
|
||||
TEST(results[0].GetRankingInfo().m_exactMatch, ());
|
||||
TEST(results[1].GetRankingInfo().m_exactMatch, ());
|
||||
}
|
||||
|
||||
{
|
||||
auto request = MakeRequest("cafe лер");
|
||||
auto const & results = request->Results();
|
||||
|
||||
Rules rules{ExactMatch(wonderlandId, cafe), ExactMatch(wonderlandId, lermontov)};
|
||||
TEST(ResultsMatch(results, rules), ());
|
||||
|
||||
TEST_EQUAL(2, results.size(), ("Unexpected number of retrieved cafes."));
|
||||
TEST(results[0].GetRankingInfo().m_exactMatch, ());
|
||||
TEST(results[1].GetRankingInfo().m_exactMatch, ());
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
} // namespace search
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
|
||||
#include "indexer/trie.hpp"
|
||||
|
||||
#include "base/dfa_helpers.hpp"
|
||||
#include "base/mem_trie.hpp"
|
||||
#include "base/string_utils.hpp"
|
||||
|
||||
|
@ -21,6 +22,8 @@ using Key = strings::UniString;
|
|||
using Value = uint32_t;
|
||||
using ValueList = VectorValues<Value>;
|
||||
using Trie = MemTrie<Key, ValueList>;
|
||||
using DFA = strings::LevenshteinDFA;
|
||||
using PrefixDFA = strings::PrefixDFAModifier<DFA>;
|
||||
|
||||
UNIT_TEST(MatchInTrieTest)
|
||||
{
|
||||
|
@ -35,17 +38,60 @@ UNIT_TEST(MatchInTrieTest)
|
|||
map<uint32_t, bool> vals;
|
||||
auto saveResult = [&vals](uint32_t v, bool exactMatch) { vals[v] = exactMatch; };
|
||||
|
||||
auto const hotelDFA = strings::LevenshteinDFA("hotel", 1 /* maxErrors */);
|
||||
auto const hotelDFA = DFA("hotel", 1 /* maxErrors */);
|
||||
search::impl::MatchInTrie(rootIterator, nullptr, 0 /* prefixSize */, hotelDFA, saveResult);
|
||||
TEST(vals.at(1), (vals));
|
||||
TEST(vals.at(3), (vals));
|
||||
TEST(!vals.at(2), (vals));
|
||||
|
||||
vals.clear();
|
||||
auto const homelDFA = strings::LevenshteinDFA("homel", 1 /* maxErrors */);
|
||||
auto const homelDFA = DFA("homel", 1 /* maxErrors */);
|
||||
search::impl::MatchInTrie(rootIterator, nullptr, 0 /* prefixSize */, homelDFA, saveResult);
|
||||
TEST(vals.at(2), (vals));
|
||||
TEST(!vals.at(1), (vals));
|
||||
TEST(!vals.at(3), (vals));
|
||||
}
|
||||
|
||||
UNIT_TEST(MatchPrefixInTrieTest)
|
||||
{
|
||||
Trie trie;
|
||||
|
||||
vector<pair<string, uint32_t>> const data = {{"лермонтовъ", 1}, {"лермонтово", 2}};
|
||||
|
||||
for (auto const & kv : data)
|
||||
trie.Add(strings::MakeUniString(kv.first), kv.second);
|
||||
|
||||
trie::MemTrieIterator<Key, ValueList> const rootIterator(trie.GetRootIterator());
|
||||
map<uint32_t, bool> vals;
|
||||
auto saveResult = [&vals](uint32_t v, bool exactMatch) { vals[v] = exactMatch; };
|
||||
|
||||
{
|
||||
vals.clear();
|
||||
auto const lermontov = PrefixDFA(DFA("лермонтовъ", 2 /* maxErrors */));
|
||||
search::impl::MatchInTrie(rootIterator, nullptr, 0 /* prefixSize */, lermontov, saveResult);
|
||||
TEST(vals.at(1), (vals));
|
||||
TEST(!vals.at(2), (vals));
|
||||
}
|
||||
{
|
||||
vals.clear();
|
||||
auto const lermontovo = PrefixDFA(DFA("лермонтово", 2 /* maxErrors */));
|
||||
search::impl::MatchInTrie(rootIterator, nullptr, 0 /* prefixSize */, lermontovo, saveResult);
|
||||
TEST(vals.at(2), (vals));
|
||||
TEST(!vals.at(1), (vals));
|
||||
}
|
||||
{
|
||||
vals.clear();
|
||||
auto const commonPrexif = PrefixDFA(DFA("лермонтов", 2 /* maxErrors */));
|
||||
search::impl::MatchInTrie(rootIterator, nullptr, 0 /* prefixSize */, commonPrexif, saveResult);
|
||||
TEST(vals.at(2), (vals));
|
||||
TEST(vals.at(1), (vals));
|
||||
}
|
||||
{
|
||||
vals.clear();
|
||||
auto const commonPrexif = PrefixDFA(DFA("лер", 2 /* maxErrors */));
|
||||
search::impl::MatchInTrie(rootIterator, nullptr, 0 /* prefixSize */, commonPrexif, saveResult);
|
||||
TEST(vals.at(2), (vals));
|
||||
TEST(vals.at(1), (vals));
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
|
Loading…
Add table
Reference in a new issue