[search] Factor out approximate string matching and stop using it. There will be no spelling correction, but search works much faster.

This commit is contained in:
Yury Melnichek 2011-08-21 18:13:52 +02:00 committed by Alex Zolotarev
parent c5ae9bf2e1
commit 490f954951
9 changed files with 42 additions and 36 deletions

View file

@ -1,7 +1,7 @@
#include "search_index_builder.hpp"
#include "features_vector.hpp"
#include "../search/search_trie.hpp"
#include "../search/string_match.hpp"
#include "../search/string_search_utils.hpp"
#include "../coding/trie_builder.hpp"
#include "../coding/writer.hpp"
#include "../base/string_utils.hpp"

View file

@ -1,4 +1,4 @@
#include "string_match.hpp"
#include "approximate_string_match.hpp"
// TODO: Сделать модель ошибок.
// Учитывать соседние кнопки на клавиатуре.

View file

@ -1,28 +1,12 @@
#pragma once
#include "string_search_utils.hpp"
#include "../base/base.hpp"
#include "../base/buffer_vector.hpp"
#include "../base/string_utils.hpp"
#include "../std/queue.hpp"
namespace search
{
using strings::UniChar;
inline strings::UniString NormalizeAndSimplifyString(string const & s)
{
strings::UniString uniS = strings::MakeLowerCase(strings::MakeUniString(s));
strings::Normalize(uniS);
return uniS;
}
template <class DelimsT, typename F>
void SplitUniString(strings::UniString const & uniS, F f, DelimsT const & delims)
{
for (strings::TokenizeIterator<DelimsT> iter(uniS, delims); iter; ++iter)
f(iter.GetUniString());
}
namespace impl
{

View file

@ -1,6 +1,6 @@
#include "keyword_matcher.hpp"
#include "delimiters.hpp"
#include "string_match.hpp"
#include "string_search_utils.hpp"
#include "../base/logging.hpp"
#include "../base/string_utils.hpp"
#include "../std/bind.hpp"

View file

@ -2,7 +2,7 @@
#include "categories_holder.hpp"
#include "delimiters.hpp"
#include "latlon_match.hpp"
#include "string_match.hpp"
#include "string_search_utils.hpp"
#include "search_trie_matching.hpp"
#include "../indexer/feature_visibility.hpp"
#include "../base/exception.hpp"
@ -21,30 +21,26 @@ uint32_t KeywordMatch(strings::UniChar const * sA, uint32_t sizeA,
strings::UniChar const * sB, uint32_t sizeB,
uint32_t maxCost)
{
/*
if (sizeA != sizeB)
return maxCost + 1;
for (uint32_t i = 0; i< sizeA; ++i)
if (sA[i] != sB[i])
strings::UniChar const * const endA = sA + sizeA;
while (sA != endA)
if (*sA++ != *sB++)
return maxCost + 1;
return 0;
*/
return StringMatchCost(sA, sizeA, sB, sizeB, DefaultMatchCost(), maxCost, false);
}
uint32_t PrefixMatch(strings::UniChar const * sA, uint32_t sizeA,
strings::UniChar const * sB, uint32_t sizeB,
uint32_t maxCost)
{
/*
if (sizeA > sizeB)
return maxCost + 1;
for (uint32_t i = 0; i< sizeA; ++i)
if (sA[i] != sB[i])
strings::UniChar const * const endA = sA + sizeA;
while (sA != endA)
if (*sA++ != *sB++)
return maxCost + 1;
return 0;
*/
return StringMatchCost(sA, sizeA, sB, sizeB, DefaultMatchCost(), maxCost, true);
}
inline uint32_t GetMaxKeywordMatchScore() { return 512; }
@ -243,7 +239,7 @@ void Query::Search(function<void (Result const &)> const & f)
{
FeatureProcessor featureProcessor(*this);
/// @todo Tune depth scale search (1 is no enough)
m_pIndex->ForEachInRect(featureProcessor, m_viewport, min(scales::GetUpperScale(), scale + 1));
m_pIndex->ForEachInRect(featureProcessor, m_viewport, min(scales::GetUpperScale(), scale + 7));
}
catch (FeatureProcessor::StopException &)
{

View file

@ -16,11 +16,12 @@ HEADERS += \
keyword_matcher.hpp \
query.hpp \
result.hpp \
string_match.hpp \
latlon_match.hpp \
categories_holder.hpp \
search_trie.hpp \
search_trie_matching.hpp \
string_search_utils.hpp \
approximate_string_match.hpp \
SOURCES += \
delimiters.cpp \
@ -29,7 +30,7 @@ SOURCES += \
keyword_matcher.cpp \
query.cpp \
result.cpp \
string_match.cpp \
latlon_match.cpp \
categories_holder.cpp \
search_trie_matching.cpp \
approximate_string_match.cpp \

View file

@ -1,7 +1,8 @@
#include "../../testing/testing.hpp"
#include "../keyword_matcher.hpp"
#include "match_cost_mock.hpp"
#include "../string_match.hpp"
#include "../approximate_string_match.hpp"
#include "../string_search_utils.hpp"
#include "../../testing/testing_utils.hpp"
#include "../../base/string_utils.hpp"
#include "../../std/scoped_ptr.hpp"

View file

@ -1,6 +1,6 @@
#include "search_trie_matching.hpp"
#include "query.hpp"
#include "string_match.hpp"
#include "string_search_utils.hpp"
#include "../indexer/feature_visibility.hpp"

View file

@ -0,0 +1,24 @@
#pragma once
#include "../base/string_utils.hpp"
#include "../base/base.hpp"
namespace search
{
using strings::UniChar;
inline strings::UniString NormalizeAndSimplifyString(string const & s)
{
strings::UniString uniS = strings::MakeLowerCase(strings::MakeUniString(s));
strings::Normalize(uniS);
return uniS;
}
template <class DelimsT, typename F>
void SplitUniString(strings::UniString const & uniS, F f, DelimsT const & delims)
{
for (strings::TokenizeIterator<DelimsT> iter(uniS, delims); iter; ++iter)
f(iter.GetUniString());
}
} // namespace search