[search] Search query params is extracted into individual hpp/cpp files.

This commit is contained in:
Yuri Gorshenin 2015-07-13 11:43:26 +03:00 committed by Alex Zolotarev
parent 48153a92f6
commit 3532ba29cf
5 changed files with 218 additions and 198 deletions

View file

@ -26,6 +26,7 @@ HEADERS += \
search_engine.hpp \
search_query.hpp \
search_query_factory.hpp \
search_query_params.hpp \
search_string_intersection.hpp \
SOURCES += \
@ -41,3 +42,4 @@ SOURCES += \
result.cpp \
search_engine.cpp \
search_query.cpp \
search_query_params.cpp \

View file

@ -1,9 +1,11 @@
#include "search_query.hpp"
#include "feature_offset_match.hpp"
#include "geometry_utils.hpp"
#include "indexed_value.hpp"
#include "latlon_match.hpp"
#include "search_common.hpp"
#include "indexed_value.hpp"
#include "geometry_utils.hpp"
#include "search_query_params.hpp"
#include "search_string_intersection.hpp"
#include "storage/country_info.hpp"
@ -1241,188 +1243,47 @@ namespace
}
}
Query::Params::Params(Query const & q, bool isLocalities/* = false*/)
void Query::InitParams(bool localitySearch, SearchQueryParams & params)
{
if (!q.m_prefix.empty())
m_prefixTokens.push_back(q.m_prefix);
if (!m_prefix.empty())
params.m_prefixTokens.push_back(m_prefix);
size_t const tokensCount = q.m_tokens.size();
m_tokens.resize(tokensCount);
size_t const tokensCount = m_tokens.size();
params.m_tokens.resize(tokensCount);
// Add normal tokens.
for (size_t i = 0; i < tokensCount; ++i)
m_tokens[i].push_back(q.m_tokens[i]);
params.m_tokens[i].push_back(m_tokens[i]);
// Add names of categories (and synonyms).
if (!isLocalities)
if (!localitySearch)
{
Classificator const & cl = classif();
q.ForEachCategoryTypes([&] (size_t i, uint32_t t)
auto addSyms = [&](size_t i, uint32_t t)
{
TokensVectorT & v = (i < tokensCount ? m_tokens[i] : m_prefixTokens);
SearchQueryParams::TSynonymsVector & v =
(i < tokensCount ? params.m_tokens[i] : params.m_prefixTokens);
uint32_t const index = cl.GetIndexForType(t);
v.push_back(FeatureTypeToString(index));
// v2-version MWM has raw classificator types in search index prefix, so
// do the hack: add synonyms for old convention if needed.
if (q.m_supportOldFormat)
if (m_supportOldFormat)
{
int const type = GetOldTypeFromIndex(index);
if (type >= 0)
{
ASSERT ( type == 70 || type > 4000, (type));
ASSERT(type == 70 || type > 4000, (type));
v.push_back(FeatureTypeToString(static_cast<uint32_t>(type)));
}
}
});
};
ForEachCategoryTypes(addSyms);
}
FillLanguages(q);
}
void Query::Params::EraseTokens(vector<size_t> & eraseInds)
{
eraseInds.erase(unique(eraseInds.begin(), eraseInds.end()), eraseInds.end());
// fill temporary vector
vector<TokensVectorT> newTokens;
size_t skipI = 0;
size_t const count = m_tokens.size();
size_t const eraseCount = eraseInds.size();
for (size_t i = 0; i < count; ++i)
{
if (skipI < eraseCount && eraseInds[skipI] == i)
{
++skipI;
}
else
{
newTokens.push_back(TokensVectorT());
newTokens.back().swap(m_tokens[i]);
}
}
// assign to m_tokens
newTokens.swap(m_tokens);
if (skipI < eraseCount)
{
// it means that we need to skip prefix tokens
ASSERT_EQUAL ( skipI+1, eraseCount, (eraseInds) );
ASSERT_EQUAL ( eraseInds[skipI], count, (eraseInds) );
m_prefixTokens.clear();
}
}
template <class ToDo> void Query::Params::ForEachToken(ToDo toDo)
{
size_t const count = m_tokens.size();
for (size_t i = 0; i < count; ++i)
{
ASSERT ( !m_tokens[i].empty(), () );
ASSERT ( !m_tokens[i].front().empty(), () );
toDo(m_tokens[i].front(), i);
}
if (!m_prefixTokens.empty())
{
ASSERT ( !m_prefixTokens.front().empty(), () );
toDo(m_prefixTokens.front(), count);
}
}
string DebugPrint(Query::Params const & p)
{
return ("Query::Params: Tokens = " + DebugPrint(p.m_tokens) +
"; Prefixes = " + DebugPrint(p.m_prefixTokens));
}
namespace
{
class DoStoreNumbers
{
vector<size_t> & m_vec;
public:
DoStoreNumbers(vector<size_t> & vec) : m_vec(vec) {}
void operator() (Query::Params::StringT const & s, size_t i)
{
/// @todo Do smart filtering of house numbers and zipcodes.
if (feature::IsNumber(s))
m_vec.push_back(i);
}
};
class DoAddStreetSynonyms
{
Query::Params & m_params;
Query::Params::TokensVectorT & GetTokens(size_t i)
{
size_t const count = m_params.m_tokens.size();
if (i < count)
return m_params.m_tokens[i];
else
{
ASSERT_EQUAL ( i, count, () );
return m_params.m_prefixTokens;
}
}
void AddSynonym(size_t i, string const & sym)
{
GetTokens(i).push_back(strings::MakeUniString(sym));
}
public:
DoAddStreetSynonyms(Query::Params & params) : m_params(params) {}
void operator() (Query::Params::StringT const & s, size_t i)
{
if (s.size() <= 2)
{
string const ss = strings::ToUtf8(strings::MakeLowerCase(s));
// All synonyms should be lowercase!
if (ss == "n")
AddSynonym(i, "north");
else if (ss == "w")
AddSynonym(i, "west");
else if (ss == "s")
AddSynonym(i, "south");
else if (ss == "e")
AddSynonym(i, "east");
else if (ss == "nw")
AddSynonym(i, "northwest");
else if (ss == "ne")
AddSynonym(i, "northeast");
else if (ss == "sw")
AddSynonym(i, "southwest");
else if (ss == "se")
AddSynonym(i, "southeast");
}
}
};
}
void Query::Params::ProcessAddressTokens()
{
// 1. Do simple stuff - erase all number tokens.
// Assume that USA street name numbers are end with "st, nd, rd, th" suffixes.
vector<size_t> toErase;
ForEachToken(DoStoreNumbers(toErase));
EraseTokens(toErase);
// 2. Add synonyms for N, NE, NW, etc.
ForEachToken(DoAddStreetSynonyms(*this));
}
void Query::Params::FillLanguages(Query const & q)
{
for (int i = 0; i < LANG_COUNT; ++i)
m_langs.insert(q.GetLanguage(i));
params.m_langs.insert(GetLanguage(i));
}
namespace impl
@ -1622,7 +1483,8 @@ void Query::SearchAddress(Results & res)
{
LOG(LDEBUG, ("Final city-locality = ", city));
Params params(*this);
SearchQueryParams params;
InitParams(false /* localitySearch */, params);
params.EraseTokens(city.m_matchedTokens);
if (params.CanSuggest())
@ -1652,7 +1514,8 @@ void Query::SearchAddress(Results & res)
{
LOG(LDEBUG, ("Final region-locality = ", region));
Params params(*this);
SearchQueryParams params;
InitParams(false /* localitySearch */, params);
params.EraseTokens(region.m_matchedTokens);
if (params.CanSuggest())
@ -1897,7 +1760,8 @@ namespace impl
void Query::SearchLocality(MwmValue * pMwm, impl::Locality & res1, impl::Region & res2)
{
Params params(*this, true);
SearchQueryParams params;
InitParams(true /* localitySearch */, params);
serial::CodingParams cp(GetCPForTrie(pMwm->GetHeader().GetDefCodingParams()));
@ -1957,7 +1821,8 @@ void Query::SearchFeatures()
MWMVectorT mwmsInfo;
m_pIndex->GetMwmsInfo(mwmsInfo);
Params params(*this);
SearchQueryParams params;
InitParams(false /* localitySearch */, params);
// do usual search in viewport and near me (without last rect)
for (int i = 0; i < LOCALITY_V; ++i)
@ -2023,7 +1888,8 @@ namespace
};
}
void Query::SearchFeatures(Params const & params, MWMVectorT const & mwmsInfo, ViewportID vID)
void Query::SearchFeatures(SearchQueryParams const & params, MWMVectorT const & mwmsInfo,
ViewportID vID)
{
for (shared_ptr<MwmInfo> const & info : mwmsInfo)
{
@ -2038,8 +1904,7 @@ void Query::SearchFeatures(Params const & params, MWMVectorT const & mwmsInfo, V
namespace
{
void FillCategories(Query::Params const & params, TrieIterator const * pTrieRoot,
void FillCategories(SearchQueryParams const & params, TrieIterator const * pTrieRoot,
TrieValuesHolder<FeaturesFilter> & categoriesHolder)
{
unique_ptr<TrieIterator> pCategoriesRoot;
@ -2069,7 +1934,7 @@ void FillCategories(Query::Params const & params, TrieIterator const * pTrieRoot
}
void Query::SearchInMWM(Index::MwmHandle const & mwmHandle, Params const & params,
void Query::SearchInMWM(Index::MwmHandle const & mwmHandle, SearchQueryParams const & params,
ViewportID vID /*= DEFAULT_V*/)
{
if (MwmValue const * const pMwm = mwmHandle.GetValue<MwmValue>())
@ -2195,7 +2060,8 @@ void Query::SearchAdditional(Results & res, size_t resCount)
MWMVectorT mwmsInfo;
m_pIndex->GetMwmsInfo(mwmsInfo);
Params params(*this);
SearchQueryParams params;
InitParams(false /* localitySearch */, params);
for (shared_ptr<MwmInfo> const & info : mwmsInfo)
{

View file

@ -38,6 +38,7 @@ namespace storage { class CountryInfoGetter; }
namespace search
{
struct SearchQueryParams;
namespace impl
{
@ -116,35 +117,7 @@ public:
//@{
typedef trie::ValueReader::ValueType TrieValueT;
struct Params
{
typedef strings::UniString StringT;
typedef vector<StringT> TokensVectorT;
typedef unordered_set<int8_t> LangsSetT;
vector<TokensVectorT> m_tokens;
TokensVectorT m_prefixTokens;
LangsSetT m_langs;
/// Initialize search params (tokens, languages).
/// @param[in] isLocalities Use true when search for locality in World.
Params(Query const & q, bool isLocalities = false);
/// @param[in] eraseInds Sorted vector of token's indexes.
void EraseTokens(vector<size_t> & eraseInds);
void ProcessAddressTokens();
bool IsEmpty() const { return (m_tokens.empty() && m_prefixTokens.empty()); }
bool CanSuggest() const { return (m_tokens.empty() && !m_prefixTokens.empty()); }
bool IsLangExist(int8_t l) const { return (m_langs.count(l) > 0); }
private:
template <class ToDo> void ForEachToken(ToDo toDo);
void FillLanguages(Query const & q);
};
//@}
void InitParams(bool localitySearch, SearchQueryParams & params);
private:
friend class impl::FeatureLoader;
@ -202,9 +175,10 @@ private:
/// If ind == -1, don't do any matching with features in viewport (@see m_offsetsInViewport).
//@{
/// Do search in all maps from mwmInfo.
void SearchFeatures(Params const & params, MWMVectorT const & mwmsInfo, ViewportID vID);
void SearchFeatures(SearchQueryParams const & params, MWMVectorT const & mwmsInfo,
ViewportID vID);
/// Do search in particular map (mwmHandle).
void SearchInMWM(Index::MwmHandle const & mwmHandle, Params const & params,
void SearchInMWM(Index::MwmHandle const & mwmHandle, SearchQueryParams const & params,
ViewportID vID = DEFAULT_V);
//@}

View file

@ -0,0 +1,142 @@
#include "search/search_query_params.hpp"
#include "indexer/feature_impl.hpp"
#include "base/assert.hpp"
#include "std/algorithm.hpp"
namespace search
{
namespace
{
class DoStoreNumbers
{
public:
DoStoreNumbers(vector<size_t> & vec) : m_vec(vec) {}
void operator()(SearchQueryParams::TString const & s, size_t i)
{
/// @todo Do smart filtering of house numbers and zipcodes.
if (feature::IsNumber(s))
m_vec.push_back(i);
}
private:
vector<size_t> & m_vec;
};
class DoAddStreetSynonyms
{
public:
DoAddStreetSynonyms(SearchQueryParams & params) : m_params(params) {}
void operator()(SearchQueryParams::TString const & s, size_t i)
{
if (s.size() > 2)
return;
string const ss = strings::ToUtf8(strings::MakeLowerCase(s));
// All synonyms should be lowercase!
if (ss == "n")
AddSym(i, "north");
else if (ss == "w")
AddSym(i, "west");
else if (ss == "s")
AddSym(i, "south");
else if (ss == "e")
AddSym(i, "east");
else if (ss == "nw")
AddSym(i, "northwest");
else if (ss == "ne")
AddSym(i, "northeast");
else if (ss == "sw")
AddSym(i, "southwest");
else if (ss == "se")
AddSym(i, "southeast");
}
private:
SearchQueryParams::TSynonymsVector & GetSyms(size_t i)
{
size_t const count = m_params.m_tokens.size();
if (i < count)
return m_params.m_tokens[i];
ASSERT_EQUAL(i, count, ());
return m_params.m_prefixTokens;
}
void AddSym(size_t i, string const & sym) { GetSyms(i).push_back(strings::MakeUniString(sym)); }
SearchQueryParams & m_params;
};
} // namespace
void SearchQueryParams::EraseTokens(vector<size_t> & eraseInds)
{
eraseInds.erase(unique(eraseInds.begin(), eraseInds.end()), eraseInds.end());
// fill temporary vector
vector<TSynonymsVector> newTokens;
size_t skipI = 0;
size_t const count = m_tokens.size();
size_t const eraseCount = eraseInds.size();
for (size_t i = 0; i < count; ++i)
{
if (skipI < eraseCount && eraseInds[skipI] == i)
++skipI;
else
newTokens.push_back(move(m_tokens[i]));
}
// assign to m_tokens
newTokens.swap(m_tokens);
if (skipI < eraseCount)
{
// it means that we need to skip prefix tokens
ASSERT_EQUAL(skipI + 1, eraseCount, (eraseInds));
ASSERT_EQUAL(eraseInds[skipI], count, (eraseInds));
m_prefixTokens.clear();
}
}
void SearchQueryParams::ProcessAddressTokens()
{
// Erases all number tokens.
// Assumes that USA street name numbers are end with "st, nd, rd, th" suffixes.
vector<size_t> toErase;
ForEachToken(DoStoreNumbers(toErase));
EraseTokens(toErase);
// Adds synonyms for N, NE, NW, etc.
ForEachToken(DoAddStreetSynonyms(*this));
}
template <class ToDo>
void SearchQueryParams::ForEachToken(ToDo && toDo)
{
size_t const count = m_tokens.size();
for (size_t i = 0; i < count; ++i)
{
ASSERT(!m_tokens[i].empty(), ());
ASSERT(!m_tokens[i].front().empty(), ());
toDo(m_tokens[i].front(), i);
}
if (!m_prefixTokens.empty())
{
ASSERT(!m_prefixTokens.front().empty(), ());
toDo(m_prefixTokens.front(), count);
}
}
} // namespace search
string DebugPrint(search::SearchQueryParams const & params)
{
ostringstream os;
os << "SearchQueryParams [ m_tokens=" << DebugPrint(params.m_tokens)
<< ", m_prefixTokens=" << DebugPrint(params.m_prefixTokens) << "]";
return os.str();
}

View file

@ -0,0 +1,36 @@
#pragma once
#include "base/string_utils.hpp"
#include "std/cstdint.hpp"
#include "std/unordered_set.hpp"
#include "std/vector.hpp"
namespace search
{
struct SearchQueryParams
{
using TString = strings::UniString;
using TSynonymsVector = vector<TString>;
using TLangsSet = unordered_set<int8_t>;
/// @param[in] eraseInds Sorted vector of token's indexes.
void EraseTokens(vector<size_t> & eraseInds);
void ProcessAddressTokens();
inline bool IsEmpty() const { return (m_tokens.empty() && m_prefixTokens.empty()); }
inline bool CanSuggest() const { return (m_tokens.empty() && !m_prefixTokens.empty()); }
inline bool IsLangExist(int8_t l) const { return (m_langs.count(l) > 0); }
vector<TSynonymsVector> m_tokens;
TSynonymsVector m_prefixTokens;
TLangsSet m_langs;
private:
template <class ToDo>
void ForEachToken(ToDo && toDo);
};
} // namespace search
string DebugPrint(search::SearchQueryParams const & params);