forked from organicmaps/organicmaps
Add fuzzy string matching.
This commit is contained in:
parent
ab47cb8632
commit
4a2fb4f338
5 changed files with 191 additions and 0 deletions
|
@ -12,7 +12,9 @@ include($$ROOT_DIR/common.pri)
|
|||
HEADERS += \
|
||||
query.hpp \
|
||||
search_processor.hpp \
|
||||
string_match.hpp \
|
||||
|
||||
SOURCES += \
|
||||
query.cpp \
|
||||
search_processor.cpp \
|
||||
string_match.cpp \
|
||||
|
|
|
@ -19,3 +19,4 @@ win32 {
|
|||
|
||||
SOURCES += \
|
||||
../../testing/testingmain.cpp \
|
||||
string_match_test.cpp \
|
||||
|
|
53
search/search_tests/string_match_test.cpp
Normal file
53
search/search_tests/string_match_test.cpp
Normal file
|
@ -0,0 +1,53 @@
|
|||
#include "../../testing/testing.hpp"
|
||||
#include "../string_match.hpp"
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
class TestMatchCost
|
||||
{
|
||||
public:
|
||||
uint32_t Cost10(char) const { return 1; }
|
||||
uint32_t Cost01(char) const { return 1; }
|
||||
uint32_t Cost11(char, char) const { return 1; }
|
||||
uint32_t Cost12(char a, char const * pB) const
|
||||
{
|
||||
if (a == 'X' && pB[0] == '>' && pB[1] == '<')
|
||||
return 0;
|
||||
return 2;
|
||||
}
|
||||
uint32_t Cost21(char const * pA, char b) const { return Cost12(b, pA); }
|
||||
uint32_t Cost22(char const *, char const *) const { return 2; }
|
||||
uint32_t SwapCost(char, char) const { return 1; }
|
||||
};
|
||||
|
||||
uint32_t MatchCost(char const * a, char const * b, uint32_t maxCost = 1000)
|
||||
{
|
||||
return ::search::StringMatchCost(a, strlen(a), b, strlen(b), TestMatchCost(), maxCost);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
UNIT_TEST(StringMatchCost)
|
||||
{
|
||||
TEST_EQUAL(MatchCost("", ""), 0, ());
|
||||
TEST_EQUAL(MatchCost("a", "b"), 1, ());
|
||||
TEST_EQUAL(MatchCost("a", ""), 1, ());
|
||||
TEST_EQUAL(MatchCost("", "b"), 1, ());
|
||||
TEST_EQUAL(MatchCost("ab", "cd"), 2, ());
|
||||
TEST_EQUAL(MatchCost("ab", "ba"), 1, ());
|
||||
TEST_EQUAL(MatchCost("abcd", "efgh"), 4, ());
|
||||
TEST_EQUAL(MatchCost("Hello!", "Hello!"), 0, ());
|
||||
TEST_EQUAL(MatchCost("Hello!", "Helo!"), 1, ());
|
||||
TEST_EQUAL(MatchCost("X", "X"), 0, ());
|
||||
TEST_EQUAL(MatchCost("X", "><"), 0, ());
|
||||
TEST_EQUAL(MatchCost("XXX", "><><><"), 0, ());
|
||||
TEST_EQUAL(MatchCost("XXX", "><X><"), 0, ());
|
||||
TEST_EQUAL(MatchCost("TeXt", "Te><t"), 0, ());
|
||||
TEST_EQUAL(MatchCost("TeXt", "Te><"), 1, ());
|
||||
TEST_EQUAL(MatchCost("TeXt", "TetX"), 1, ());
|
||||
TEST_EQUAL(MatchCost("TeXt", "Tet><"), 2, ());
|
||||
TEST_EQUAL(MatchCost("", "ALongString"), 11, ());
|
||||
TEST_EQUAL(MatchCost("x", "ALongString"), 11, ());
|
||||
TEST_EQUAL(MatchCost("g", "ALongString"), 10, ());
|
||||
}
|
46
search/string_match.cpp
Normal file
46
search/string_match.cpp
Normal file
|
@ -0,0 +1,46 @@
|
|||
#include "string_match.hpp"
|
||||
|
||||
// TODO: Сделать модель ошибок.
|
||||
// Учитывать соседние кнопки на клавиатуре.
|
||||
// 1. Сосед вместо нужной
|
||||
// 2. Сосед до или после нужной.
|
||||
|
||||
namespace search
|
||||
{
|
||||
|
||||
uint32_t DefaultMatchCost::Cost10(UniChar) const
|
||||
{
|
||||
return 128;
|
||||
}
|
||||
|
||||
uint32_t DefaultMatchCost::Cost01(UniChar) const
|
||||
{
|
||||
return 128;
|
||||
}
|
||||
|
||||
uint32_t DefaultMatchCost::Cost11(UniChar, UniChar) const
|
||||
{
|
||||
return 128;
|
||||
}
|
||||
|
||||
uint32_t DefaultMatchCost::Cost12(UniChar, UniChar const *) const
|
||||
{
|
||||
return 256;
|
||||
}
|
||||
|
||||
uint32_t DefaultMatchCost::Cost21(UniChar const *, UniChar) const
|
||||
{
|
||||
return 256;
|
||||
}
|
||||
|
||||
uint32_t DefaultMatchCost::Cost22(UniChar const *, UniChar const *) const
|
||||
{
|
||||
return 256;
|
||||
}
|
||||
|
||||
uint32_t DefaultMatchCost::SwapCost(UniChar, UniChar) const
|
||||
{
|
||||
return 128;
|
||||
}
|
||||
|
||||
} // namespace search
|
89
search/string_match.hpp
Normal file
89
search/string_match.hpp
Normal file
|
@ -0,0 +1,89 @@
|
|||
#pragma once
|
||||
|
||||
#include "../base/base.hpp"
|
||||
#include "../base/buffer_vector.hpp"
|
||||
#include "../std/queue.hpp"
|
||||
|
||||
namespace search
|
||||
{
|
||||
|
||||
typedef uint32_t UniChar;
|
||||
|
||||
namespace impl
|
||||
{
|
||||
|
||||
struct MatchCostData
|
||||
{
|
||||
uint32_t m_A, m_B;
|
||||
uint32_t m_Cost;
|
||||
|
||||
MatchCostData() : m_A(0), m_B(0), m_Cost(0) {}
|
||||
MatchCostData(uint32_t a, uint32_t b, uint32_t cost) : m_A(a), m_B(b), m_Cost(cost) {}
|
||||
|
||||
bool operator < (MatchCostData const & o) const
|
||||
{
|
||||
return m_Cost > o.m_Cost;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename PriorityQueyeT>
|
||||
void PushMatchCost(PriorityQueyeT & q, uint32_t maxCost, uint32_t a, uint32_t b, uint32_t cost)
|
||||
{
|
||||
if (cost < maxCost)
|
||||
q.push(MatchCostData(a, b, cost));
|
||||
}
|
||||
|
||||
} // namespace search::impl
|
||||
|
||||
class DefaultMatchCost
|
||||
{
|
||||
public:
|
||||
uint32_t Cost10(UniChar a) const;
|
||||
uint32_t Cost01(UniChar b) const;
|
||||
uint32_t Cost11(UniChar a, UniChar b) const;
|
||||
uint32_t Cost12(UniChar a, UniChar const * pB) const;
|
||||
uint32_t Cost21(UniChar const * pA, UniChar b) const;
|
||||
uint32_t Cost22(UniChar const * pA, UniChar const * pB) const;
|
||||
uint32_t SwapCost(UniChar a1, UniChar a2) const;
|
||||
};
|
||||
|
||||
template <typename CharT, typename CostF>
|
||||
uint32_t StringMatchCost(CharT const * sA, uint32_t sizeA,
|
||||
CharT const * sB, uint32_t sizeB,
|
||||
CostF const & costF, uint32_t maxCost)
|
||||
{
|
||||
priority_queue<impl::MatchCostData, buffer_vector<impl::MatchCostData, 256> > q;
|
||||
q.push(impl::MatchCostData(0, 0, 0));
|
||||
while (!q.empty())
|
||||
{
|
||||
uint32_t a = q.top().m_A;
|
||||
uint32_t b = q.top().m_B;
|
||||
uint32_t const c = q.top().m_Cost;
|
||||
q.pop();
|
||||
while (a < sizeA && b < sizeB && sA[a] == sB[b])
|
||||
++a, ++b;
|
||||
|
||||
if (a == sizeA && b == sizeB)
|
||||
return c;
|
||||
|
||||
if (a < sizeA)
|
||||
impl::PushMatchCost(q, maxCost, a + 1, b, c + costF.Cost10(sA[a]));
|
||||
if (b < sizeB)
|
||||
impl::PushMatchCost(q, maxCost, a, b + 1, c + costF.Cost01(sB[b]));
|
||||
if (a < sizeA && b < sizeB)
|
||||
impl::PushMatchCost(q, maxCost, a + 1, b + 1, c + costF.Cost11(sA[a], sB[b]));
|
||||
if (a + 1 < sizeA && b < sizeB)
|
||||
impl::PushMatchCost(q, maxCost, a + 2, b + 1, c + costF.Cost21(&sA[a], sB[b]));
|
||||
if (a < sizeA && b + 1 < sizeB)
|
||||
impl::PushMatchCost(q, maxCost, a + 1, b + 2, c + costF.Cost12(sA[a], &sB[b]));
|
||||
if (a + 1 < sizeA && b + 1 < sizeB)
|
||||
{
|
||||
impl::PushMatchCost(q, maxCost, a + 2, b + 2, c + costF.Cost22(&sA[a], &sB[b]));
|
||||
if (sA[a] == sB[b + 1] && sA[a + 1] == sB[b])
|
||||
impl::PushMatchCost(q, maxCost, a + 2, b + 2, c + costF.SwapCost(sA[a], sA[a + 1]));
|
||||
}
|
||||
}
|
||||
return maxCost + 1;
|
||||
}
|
||||
|
||||
} // namespace search
|
Loading…
Add table
Reference in a new issue