From 3532ba29cf1e305150d8082c9751bfd68d78a304 Mon Sep 17 00:00:00 2001 From: Yuri Gorshenin Date: Mon, 13 Jul 2015 11:43:26 +0300 Subject: [PATCH] [search] Search query params is extracted into individual hpp/cpp files. --- search/search.pro | 2 + search/search_query.cpp | 200 ++++++--------------------------- search/search_query.hpp | 36 +----- search/search_query_params.cpp | 142 +++++++++++++++++++++++ search/search_query_params.hpp | 36 ++++++ 5 files changed, 218 insertions(+), 198 deletions(-) create mode 100644 search/search_query_params.cpp create mode 100644 search/search_query_params.hpp diff --git a/search/search.pro b/search/search.pro index 996a2832b3..697013c988 100644 --- a/search/search.pro +++ b/search/search.pro @@ -26,6 +26,7 @@ HEADERS += \ search_engine.hpp \ search_query.hpp \ search_query_factory.hpp \ + search_query_params.hpp \ search_string_intersection.hpp \ SOURCES += \ @@ -41,3 +42,4 @@ SOURCES += \ result.cpp \ search_engine.cpp \ search_query.cpp \ + search_query_params.cpp \ diff --git a/search/search_query.cpp b/search/search_query.cpp index 48faf7968e..cc48999517 100644 --- a/search/search_query.cpp +++ b/search/search_query.cpp @@ -1,9 +1,11 @@ #include "search_query.hpp" + #include "feature_offset_match.hpp" +#include "geometry_utils.hpp" +#include "indexed_value.hpp" #include "latlon_match.hpp" #include "search_common.hpp" -#include "indexed_value.hpp" -#include "geometry_utils.hpp" +#include "search_query_params.hpp" #include "search_string_intersection.hpp" #include "storage/country_info.hpp" @@ -1241,188 +1243,47 @@ namespace } } -Query::Params::Params(Query const & q, bool isLocalities/* = false*/) +void Query::InitParams(bool localitySearch, SearchQueryParams & params) { - if (!q.m_prefix.empty()) - m_prefixTokens.push_back(q.m_prefix); + if (!m_prefix.empty()) + params.m_prefixTokens.push_back(m_prefix); - size_t const tokensCount = q.m_tokens.size(); - m_tokens.resize(tokensCount); + size_t const tokensCount = m_tokens.size(); + params.m_tokens.resize(tokensCount); // Add normal tokens. for (size_t i = 0; i < tokensCount; ++i) - m_tokens[i].push_back(q.m_tokens[i]); + params.m_tokens[i].push_back(m_tokens[i]); // Add names of categories (and synonyms). - if (!isLocalities) + if (!localitySearch) { Classificator const & cl = classif(); - q.ForEachCategoryTypes([&] (size_t i, uint32_t t) + auto addSyms = [&](size_t i, uint32_t t) { - TokensVectorT & v = (i < tokensCount ? m_tokens[i] : m_prefixTokens); + SearchQueryParams::TSynonymsVector & v = + (i < tokensCount ? params.m_tokens[i] : params.m_prefixTokens); uint32_t const index = cl.GetIndexForType(t); v.push_back(FeatureTypeToString(index)); // v2-version MWM has raw classificator types in search index prefix, so // do the hack: add synonyms for old convention if needed. - if (q.m_supportOldFormat) + if (m_supportOldFormat) { int const type = GetOldTypeFromIndex(index); if (type >= 0) { - ASSERT ( type == 70 || type > 4000, (type)); + ASSERT(type == 70 || type > 4000, (type)); v.push_back(FeatureTypeToString(static_cast(type))); } } - }); + }; + ForEachCategoryTypes(addSyms); } - FillLanguages(q); -} - -void Query::Params::EraseTokens(vector & eraseInds) -{ - eraseInds.erase(unique(eraseInds.begin(), eraseInds.end()), eraseInds.end()); - - // fill temporary vector - vector newTokens; - - size_t skipI = 0; - size_t const count = m_tokens.size(); - size_t const eraseCount = eraseInds.size(); - for (size_t i = 0; i < count; ++i) - { - if (skipI < eraseCount && eraseInds[skipI] == i) - { - ++skipI; - } - else - { - newTokens.push_back(TokensVectorT()); - newTokens.back().swap(m_tokens[i]); - } - } - - // assign to m_tokens - newTokens.swap(m_tokens); - - if (skipI < eraseCount) - { - // it means that we need to skip prefix tokens - ASSERT_EQUAL ( skipI+1, eraseCount, (eraseInds) ); - ASSERT_EQUAL ( eraseInds[skipI], count, (eraseInds) ); - m_prefixTokens.clear(); - } -} - -template void Query::Params::ForEachToken(ToDo toDo) -{ - size_t const count = m_tokens.size(); - for (size_t i = 0; i < count; ++i) - { - ASSERT ( !m_tokens[i].empty(), () ); - ASSERT ( !m_tokens[i].front().empty(), () ); - toDo(m_tokens[i].front(), i); - } - - if (!m_prefixTokens.empty()) - { - ASSERT ( !m_prefixTokens.front().empty(), () ); - toDo(m_prefixTokens.front(), count); - } -} - -string DebugPrint(Query::Params const & p) -{ - return ("Query::Params: Tokens = " + DebugPrint(p.m_tokens) + - "; Prefixes = " + DebugPrint(p.m_prefixTokens)); -} - -namespace -{ - class DoStoreNumbers - { - vector & m_vec; - public: - DoStoreNumbers(vector & vec) : m_vec(vec) {} - void operator() (Query::Params::StringT const & s, size_t i) - { - /// @todo Do smart filtering of house numbers and zipcodes. - if (feature::IsNumber(s)) - m_vec.push_back(i); - } - }; - - class DoAddStreetSynonyms - { - Query::Params & m_params; - - Query::Params::TokensVectorT & GetTokens(size_t i) - { - size_t const count = m_params.m_tokens.size(); - if (i < count) - return m_params.m_tokens[i]; - else - { - ASSERT_EQUAL ( i, count, () ); - return m_params.m_prefixTokens; - } - } - - void AddSynonym(size_t i, string const & sym) - { - GetTokens(i).push_back(strings::MakeUniString(sym)); - } - - public: - DoAddStreetSynonyms(Query::Params & params) : m_params(params) {} - - void operator() (Query::Params::StringT const & s, size_t i) - { - if (s.size() <= 2) - { - string const ss = strings::ToUtf8(strings::MakeLowerCase(s)); - - // All synonyms should be lowercase! - if (ss == "n") - AddSynonym(i, "north"); - else if (ss == "w") - AddSynonym(i, "west"); - else if (ss == "s") - AddSynonym(i, "south"); - else if (ss == "e") - AddSynonym(i, "east"); - else if (ss == "nw") - AddSynonym(i, "northwest"); - else if (ss == "ne") - AddSynonym(i, "northeast"); - else if (ss == "sw") - AddSynonym(i, "southwest"); - else if (ss == "se") - AddSynonym(i, "southeast"); - } - } - }; -} - -void Query::Params::ProcessAddressTokens() -{ - // 1. Do simple stuff - erase all number tokens. - // Assume that USA street name numbers are end with "st, nd, rd, th" suffixes. - - vector toErase; - ForEachToken(DoStoreNumbers(toErase)); - EraseTokens(toErase); - - // 2. Add synonyms for N, NE, NW, etc. - ForEachToken(DoAddStreetSynonyms(*this)); -} - -void Query::Params::FillLanguages(Query const & q) -{ for (int i = 0; i < LANG_COUNT; ++i) - m_langs.insert(q.GetLanguage(i)); + params.m_langs.insert(GetLanguage(i)); } namespace impl @@ -1622,7 +1483,8 @@ void Query::SearchAddress(Results & res) { LOG(LDEBUG, ("Final city-locality = ", city)); - Params params(*this); + SearchQueryParams params; + InitParams(false /* localitySearch */, params); params.EraseTokens(city.m_matchedTokens); if (params.CanSuggest()) @@ -1652,7 +1514,8 @@ void Query::SearchAddress(Results & res) { LOG(LDEBUG, ("Final region-locality = ", region)); - Params params(*this); + SearchQueryParams params; + InitParams(false /* localitySearch */, params); params.EraseTokens(region.m_matchedTokens); if (params.CanSuggest()) @@ -1897,7 +1760,8 @@ namespace impl void Query::SearchLocality(MwmValue * pMwm, impl::Locality & res1, impl::Region & res2) { - Params params(*this, true); + SearchQueryParams params; + InitParams(true /* localitySearch */, params); serial::CodingParams cp(GetCPForTrie(pMwm->GetHeader().GetDefCodingParams())); @@ -1957,7 +1821,8 @@ void Query::SearchFeatures() MWMVectorT mwmsInfo; m_pIndex->GetMwmsInfo(mwmsInfo); - Params params(*this); + SearchQueryParams params; + InitParams(false /* localitySearch */, params); // do usual search in viewport and near me (without last rect) for (int i = 0; i < LOCALITY_V; ++i) @@ -2023,7 +1888,8 @@ namespace }; } -void Query::SearchFeatures(Params const & params, MWMVectorT const & mwmsInfo, ViewportID vID) +void Query::SearchFeatures(SearchQueryParams const & params, MWMVectorT const & mwmsInfo, + ViewportID vID) { for (shared_ptr const & info : mwmsInfo) { @@ -2038,8 +1904,7 @@ void Query::SearchFeatures(Params const & params, MWMVectorT const & mwmsInfo, V namespace { - -void FillCategories(Query::Params const & params, TrieIterator const * pTrieRoot, +void FillCategories(SearchQueryParams const & params, TrieIterator const * pTrieRoot, TrieValuesHolder & categoriesHolder) { unique_ptr pCategoriesRoot; @@ -2069,7 +1934,7 @@ void FillCategories(Query::Params const & params, TrieIterator const * pTrieRoot } -void Query::SearchInMWM(Index::MwmHandle const & mwmHandle, Params const & params, +void Query::SearchInMWM(Index::MwmHandle const & mwmHandle, SearchQueryParams const & params, ViewportID vID /*= DEFAULT_V*/) { if (MwmValue const * const pMwm = mwmHandle.GetValue()) @@ -2195,7 +2060,8 @@ void Query::SearchAdditional(Results & res, size_t resCount) MWMVectorT mwmsInfo; m_pIndex->GetMwmsInfo(mwmsInfo); - Params params(*this); + SearchQueryParams params; + InitParams(false /* localitySearch */, params); for (shared_ptr const & info : mwmsInfo) { diff --git a/search/search_query.hpp b/search/search_query.hpp index 866c8b5b50..77dac826ed 100644 --- a/search/search_query.hpp +++ b/search/search_query.hpp @@ -38,6 +38,7 @@ namespace storage { class CountryInfoGetter; } namespace search { +struct SearchQueryParams; namespace impl { @@ -116,35 +117,7 @@ public: //@{ typedef trie::ValueReader::ValueType TrieValueT; - struct Params - { - typedef strings::UniString StringT; - typedef vector TokensVectorT; - typedef unordered_set LangsSetT; - - vector m_tokens; - TokensVectorT m_prefixTokens; - LangsSetT m_langs; - - /// Initialize search params (tokens, languages). - /// @param[in] isLocalities Use true when search for locality in World. - Params(Query const & q, bool isLocalities = false); - - /// @param[in] eraseInds Sorted vector of token's indexes. - void EraseTokens(vector & eraseInds); - - void ProcessAddressTokens(); - - bool IsEmpty() const { return (m_tokens.empty() && m_prefixTokens.empty()); } - bool CanSuggest() const { return (m_tokens.empty() && !m_prefixTokens.empty()); } - bool IsLangExist(int8_t l) const { return (m_langs.count(l) > 0); } - - private: - template void ForEachToken(ToDo toDo); - - void FillLanguages(Query const & q); - }; - //@} + void InitParams(bool localitySearch, SearchQueryParams & params); private: friend class impl::FeatureLoader; @@ -202,9 +175,10 @@ private: /// If ind == -1, don't do any matching with features in viewport (@see m_offsetsInViewport). //@{ /// Do search in all maps from mwmInfo. - void SearchFeatures(Params const & params, MWMVectorT const & mwmsInfo, ViewportID vID); + void SearchFeatures(SearchQueryParams const & params, MWMVectorT const & mwmsInfo, + ViewportID vID); /// Do search in particular map (mwmHandle). - void SearchInMWM(Index::MwmHandle const & mwmHandle, Params const & params, + void SearchInMWM(Index::MwmHandle const & mwmHandle, SearchQueryParams const & params, ViewportID vID = DEFAULT_V); //@} diff --git a/search/search_query_params.cpp b/search/search_query_params.cpp new file mode 100644 index 0000000000..fe3e0398b1 --- /dev/null +++ b/search/search_query_params.cpp @@ -0,0 +1,142 @@ +#include "search/search_query_params.hpp" + +#include "indexer/feature_impl.hpp" + +#include "base/assert.hpp" + +#include "std/algorithm.hpp" + +namespace search +{ +namespace +{ +class DoStoreNumbers +{ +public: + DoStoreNumbers(vector & vec) : m_vec(vec) {} + + void operator()(SearchQueryParams::TString const & s, size_t i) + { + /// @todo Do smart filtering of house numbers and zipcodes. + if (feature::IsNumber(s)) + m_vec.push_back(i); + } + +private: + vector & m_vec; +}; + +class DoAddStreetSynonyms +{ +public: + DoAddStreetSynonyms(SearchQueryParams & params) : m_params(params) {} + + void operator()(SearchQueryParams::TString const & s, size_t i) + { + if (s.size() > 2) + return; + string const ss = strings::ToUtf8(strings::MakeLowerCase(s)); + + // All synonyms should be lowercase! + if (ss == "n") + AddSym(i, "north"); + else if (ss == "w") + AddSym(i, "west"); + else if (ss == "s") + AddSym(i, "south"); + else if (ss == "e") + AddSym(i, "east"); + else if (ss == "nw") + AddSym(i, "northwest"); + else if (ss == "ne") + AddSym(i, "northeast"); + else if (ss == "sw") + AddSym(i, "southwest"); + else if (ss == "se") + AddSym(i, "southeast"); + } + +private: + SearchQueryParams::TSynonymsVector & GetSyms(size_t i) + { + size_t const count = m_params.m_tokens.size(); + if (i < count) + return m_params.m_tokens[i]; + ASSERT_EQUAL(i, count, ()); + return m_params.m_prefixTokens; + } + + void AddSym(size_t i, string const & sym) { GetSyms(i).push_back(strings::MakeUniString(sym)); } + + SearchQueryParams & m_params; +}; +} // namespace + +void SearchQueryParams::EraseTokens(vector & eraseInds) +{ + eraseInds.erase(unique(eraseInds.begin(), eraseInds.end()), eraseInds.end()); + + // fill temporary vector + vector newTokens; + + size_t skipI = 0; + size_t const count = m_tokens.size(); + size_t const eraseCount = eraseInds.size(); + for (size_t i = 0; i < count; ++i) + { + if (skipI < eraseCount && eraseInds[skipI] == i) + ++skipI; + else + newTokens.push_back(move(m_tokens[i])); + } + + // assign to m_tokens + newTokens.swap(m_tokens); + + if (skipI < eraseCount) + { + // it means that we need to skip prefix tokens + ASSERT_EQUAL(skipI + 1, eraseCount, (eraseInds)); + ASSERT_EQUAL(eraseInds[skipI], count, (eraseInds)); + m_prefixTokens.clear(); + } +} + +void SearchQueryParams::ProcessAddressTokens() +{ + // Erases all number tokens. + // Assumes that USA street name numbers are end with "st, nd, rd, th" suffixes. + vector toErase; + ForEachToken(DoStoreNumbers(toErase)); + EraseTokens(toErase); + + // Adds synonyms for N, NE, NW, etc. + ForEachToken(DoAddStreetSynonyms(*this)); +} + +template +void SearchQueryParams::ForEachToken(ToDo && toDo) +{ + size_t const count = m_tokens.size(); + for (size_t i = 0; i < count; ++i) + { + ASSERT(!m_tokens[i].empty(), ()); + ASSERT(!m_tokens[i].front().empty(), ()); + toDo(m_tokens[i].front(), i); + } + + if (!m_prefixTokens.empty()) + { + ASSERT(!m_prefixTokens.front().empty(), ()); + toDo(m_prefixTokens.front(), count); + } +} +} // namespace search + +string DebugPrint(search::SearchQueryParams const & params) +{ + ostringstream os; + os << "SearchQueryParams [ m_tokens=" << DebugPrint(params.m_tokens) + << ", m_prefixTokens=" << DebugPrint(params.m_prefixTokens) << "]"; + return os.str(); +} diff --git a/search/search_query_params.hpp b/search/search_query_params.hpp new file mode 100644 index 0000000000..cc6f64c7e7 --- /dev/null +++ b/search/search_query_params.hpp @@ -0,0 +1,36 @@ +#pragma once + +#include "base/string_utils.hpp" + +#include "std/cstdint.hpp" +#include "std/unordered_set.hpp" +#include "std/vector.hpp" + +namespace search +{ +struct SearchQueryParams +{ + using TString = strings::UniString; + using TSynonymsVector = vector; + using TLangsSet = unordered_set; + + /// @param[in] eraseInds Sorted vector of token's indexes. + void EraseTokens(vector & eraseInds); + + void ProcessAddressTokens(); + + inline bool IsEmpty() const { return (m_tokens.empty() && m_prefixTokens.empty()); } + inline bool CanSuggest() const { return (m_tokens.empty() && !m_prefixTokens.empty()); } + inline bool IsLangExist(int8_t l) const { return (m_langs.count(l) > 0); } + + vector m_tokens; + TSynonymsVector m_prefixTokens; + TLangsSet m_langs; + +private: + template + void ForEachToken(ToDo && toDo); +}; +} // namespace search + +string DebugPrint(search::SearchQueryParams const & params);