diff --git a/search/engine.cpp b/search/engine.cpp index a761e3e556..0f39d0ecba 100644 --- a/search/engine.cpp +++ b/search/engine.cpp @@ -1,7 +1,8 @@ #include "search/engine.hpp" -#include "geometry_utils.hpp" -#include "processor.hpp" +#include "search/geometry_utils.hpp" +#include "search/params.hpp" +#include "search/processor.hpp" #include "storage/country_info_getter.hpp" @@ -126,16 +127,16 @@ Engine::Params::Params(string const & locale, size_t numThreads) Engine::Engine(Index & index, CategoriesHolder const & categories, storage::CountryInfoGetter const & infoGetter, unique_ptr factory, Params const & params) - : m_categories(categories), m_shutdown(false) + : m_shutdown(false) { InitSuggestions doInit; - m_categories.ForEachName(bind(ref(doInit), _1)); + categories.ForEachName(bind(ref(doInit), _1)); doInit.GetSuggests(m_suggests); m_contexts.resize(params.m_numThreads); for (size_t i = 0; i < params.m_numThreads; ++i) { - auto processor = factory->Build(index, m_categories, m_suggests, infoGetter); + auto processor = factory->Build(index, categories, m_suggests, infoGetter); processor->SetPreferredLocale(params.m_locale); m_contexts[i].m_processor = move(processor); } @@ -308,6 +309,7 @@ void Engine::DoSearch(SearchParams const & params, m2::RectD const & viewport, processor.SetMode(params.GetMode()); processor.SetSuggestsEnabled(params.GetSuggestsEnabled()); + processor.SetOnResults(params.m_onResults); // This flag is needed for consistency with old search algorithm // only. It will be gone when we remove old search code. diff --git a/search/engine.hpp b/search/engine.hpp index 2462a4c616..07f66119e2 100644 --- a/search/engine.hpp +++ b/search/engine.hpp @@ -91,7 +91,7 @@ public: size_t m_numThreads; }; - // Doesn't take ownership of index. Takes ownership of categoriesR. + // Doesn't take ownership of index and categories. Engine(Index & index, CategoriesHolder const & categories, storage::CountryInfoGetter const & infoGetter, unique_ptr factory, Params const & params); @@ -161,7 +161,6 @@ private: void DoSearch(SearchParams const & params, m2::RectD const & viewport, shared_ptr handle, Processor & processor); - CategoriesHolder const & m_categories; vector m_suggests; bool m_shutdown; diff --git a/search/features_layer_matcher.cpp b/search/features_layer_matcher.cpp index 1f98b22ffb..0132857e76 100644 --- a/search/features_layer_matcher.cpp +++ b/search/features_layer_matcher.cpp @@ -13,7 +13,7 @@ namespace search /// even if there is no exact street written for this house. int constexpr kMaxApproxStreetDistanceM = 100; -FeaturesLayerMatcher::FeaturesLayerMatcher(Index & index, my::Cancellable const & cancellable) +FeaturesLayerMatcher::FeaturesLayerMatcher(Index const & index, my::Cancellable const & cancellable) : m_context(nullptr) , m_postcodes(nullptr) , m_reverseGeocoder(index) diff --git a/search/features_layer_matcher.hpp b/search/features_layer_matcher.hpp index f9ae2df657..5d69b58bbc 100644 --- a/search/features_layer_matcher.hpp +++ b/search/features_layer_matcher.hpp @@ -59,7 +59,7 @@ public: static int constexpr kBuildingRadiusMeters = 50; static int constexpr kStreetRadiusMeters = 100; - FeaturesLayerMatcher(Index & index, my::Cancellable const & cancellable); + FeaturesLayerMatcher(Index const & index, my::Cancellable const & cancellable); void SetContext(MwmContext * context); void SetPostcodes(coding::CompressedBitVector const * postcodes); diff --git a/search/geocoder.cpp b/search/geocoder.cpp index 29a9af2409..a2ae1e262c 100644 --- a/search/geocoder.cpp +++ b/search/geocoder.cpp @@ -258,7 +258,7 @@ bool HasSearchIndex(MwmValue const & value) { return value.m_cont.IsExist(SEARCH bool HasGeometryIndex(MwmValue & value) { return value.m_cont.IsExist(INDEX_FILE_TAG); } -MwmSet::MwmHandle FindWorld(Index & index, vector> const & infos) +MwmSet::MwmHandle FindWorld(Index const & index, vector> const & infos) { MwmSet::MwmHandle handle; for (auto const & info : infos) @@ -406,7 +406,7 @@ void UniteCBVs(vector> & cbvs) Geocoder::Params::Params() : m_mode(Mode::Everywhere), m_accuratePivotCenter(0, 0) {} // Geocoder::Geocoder ------------------------------------------------------------------------------ -Geocoder::Geocoder(Index & index, storage::CountryInfoGetter const & infoGetter, +Geocoder::Geocoder(Index const & index, storage::CountryInfoGetter const & infoGetter, my::Cancellable const & cancellable) : m_index(index) , m_infoGetter(infoGetter) diff --git a/search/geocoder.hpp b/search/geocoder.hpp index 79e549bba6..c771402ad9 100644 --- a/search/geocoder.hpp +++ b/search/geocoder.hpp @@ -141,7 +141,7 @@ public: #endif }; - Geocoder(Index & index, storage::CountryInfoGetter const & infoGetter, + Geocoder(Index const & index, storage::CountryInfoGetter const & infoGetter, my::Cancellable const & cancellable); ~Geocoder(); @@ -306,7 +306,7 @@ private: // |m_usedTokens| if there are no unused tokens. size_t SkipUsedTokens(size_t curToken) const; - Index & m_index; + Index const & m_index; storage::CountryInfoGetter const & m_infoGetter; diff --git a/search/nested_rects_cache.cpp b/search/nested_rects_cache.cpp index 3e9f37abcc..5cecd69070 100644 --- a/search/nested_rects_cache.cpp +++ b/search/nested_rects_cache.cpp @@ -19,7 +19,7 @@ namespace double const kPositionToleranceMeters = 15.0; } // namespace -NestedRectsCache::NestedRectsCache(Index & index) +NestedRectsCache::NestedRectsCache(Index const & index) : m_index(index), m_scale(0), m_position(0, 0), m_valid(false) { } diff --git a/search/nested_rects_cache.hpp b/search/nested_rects_cache.hpp index 077c37b34c..62c454da40 100644 --- a/search/nested_rects_cache.hpp +++ b/search/nested_rects_cache.hpp @@ -14,7 +14,7 @@ namespace search class NestedRectsCache { public: - explicit NestedRectsCache(Index & index); + explicit NestedRectsCache(Index const & index); void SetPosition(m2::PointD const & position, int scale); @@ -37,7 +37,7 @@ private: void Update(); - Index & m_index; + Index const & m_index; int m_scale; m2::PointD m_position; bool m_valid; diff --git a/search/params.hpp b/search/params.hpp index 211ffb6c17..0c688a1e3b 100644 --- a/search/params.hpp +++ b/search/params.hpp @@ -50,7 +50,6 @@ namespace search inline void Clear() { m_query.clear(); } - public: TOnStarted m_onStarted; TOnResults m_onResults; diff --git a/search/pre_ranker.cpp b/search/pre_ranker.cpp index b5782fbbfb..f333b5efcd 100644 --- a/search/pre_ranker.cpp +++ b/search/pre_ranker.cpp @@ -44,8 +44,6 @@ struct ComparePreResult1 PreRanker::PreRanker(size_t limit) : m_limit(limit) {} -void PreRanker::Add(PreResult1 const & result) { m_results.push_back(result); } - void PreRanker::Filter(bool viewportSearch) { using TSet = set; diff --git a/search/pre_ranker.hpp b/search/pre_ranker.hpp index 2bb61f6901..10db7de81f 100644 --- a/search/pre_ranker.hpp +++ b/search/pre_ranker.hpp @@ -1,7 +1,9 @@ #pragma once #include "search/intermediate_result.hpp" +#include "search/ranker.hpp" +#include "base/logging.hpp" #include "base/macros.hpp" #include "std/algorithm.hpp" @@ -11,14 +13,13 @@ namespace search { +class Ranker; // Fast and simple pre-ranker for search results. class PreRanker { public: explicit PreRanker(size_t limit); - void Add(PreResult1 const & result); - template void Emplace(TArgs &&... args) { @@ -45,6 +46,8 @@ public: fn(result.GetId(), result.GetInfo()); } + Ranker * m_ranker; + private: vector m_results; size_t const m_limit; diff --git a/search/processor.cpp b/search/processor.cpp index 50ff2cc7a3..183966ff18 100644 --- a/search/processor.cpp +++ b/search/processor.cpp @@ -12,7 +12,7 @@ #include "search/ranking_utils.hpp" #include "search/region.hpp" #include "search/search_index_values.hpp" -#include "search/string_intersection.hpp" +#include "search/utils.hpp" #include "storage/country_info_getter.hpp" #include "storage/index.hpp" @@ -53,13 +53,6 @@ #include "std/iterator.hpp" #include "std/limits.hpp" -#define LONG_OP(op) \ - { \ - if (IsCancelled()) \ - return; \ - op; \ - } - namespace search { namespace @@ -84,24 +77,6 @@ pair GetLangIndex(int id) return make_pair(g_arrLang1[id], g_arrLang2[id]); } -ftypes::Type GetLocalityIndex(feature::TypesHolder const & types) -{ - using namespace ftypes; - - // Inner logic of SearchAddress expects COUNTRY, STATE and CITY only. - Type const type = IsLocalityChecker::Instance().GetType(types); - switch (type) - { - case NONE: - case COUNTRY: - case STATE: - case CITY: return type; - case TOWN: return CITY; - case VILLAGE: return NONE; - case LOCALITY_COUNT: return type; - } -} - m2::RectD NormalizeViewport(m2::RectD viewport) { m2::RectD minViewport = MercatorBounds::RectByCenterXYAndSizeInMeters( @@ -119,15 +94,6 @@ m2::RectD GetRectAroundPosition(m2::PointD const & position) double constexpr kMaxPositionRadiusM = 50.0 * 1000; return MercatorBounds::RectByCenterXYAndSizeInMeters(position, kMaxPositionRadiusM); } - -inline bool IsHashtagged(strings::UniString const & s) { return !s.empty() && s[0] == '#'; } - -inline strings::UniString RemoveHashtag(strings::UniString const & s) -{ - if (IsHashtagged(s)) - return strings::UniString(s.begin() + 1, s.end()); - return s; -} } // namespace // static @@ -137,24 +103,19 @@ size_t const Processor::kPreResultsCount; double const Processor::kMinViewportRadiusM = 5.0 * 1000; double const Processor::kMaxViewportRadiusM = 50.0 * 1000; -Processor::Processor(Index & index, CategoriesHolder const & categories, +Processor::Processor(Index const & index, CategoriesHolder const & categories, vector const & suggests, storage::CountryInfoGetter const & infoGetter) - : m_index(index) - , m_categories(categories) - , m_suggests(suggests) + : m_categories(categories) , m_infoGetter(infoGetter) -#ifdef FIND_LOCALITY_TEST - , m_locality(&index) -#endif , m_position(0, 0) , m_mode(Mode::Everywhere) , m_worldSearch(true) , m_suggestsEnabled(true) , m_preRanker(kPreResultsCount) - , m_ranker(m_preRanker, *this) + , m_ranker(m_preRanker, index, infoGetter, categories, suggests, + static_cast(*this)) , m_geocoder(index, infoGetter, static_cast(*this)) - , m_reverseGeocoder(index) { // Initialize keywords scorer. // Note! This order should match the indexes arrays above. @@ -164,7 +125,8 @@ Processor::Processor(Index & index, CategoriesHolder const & categories, {StringUtf8Multilang::kInternationalCode, StringUtf8Multilang::kEnglishCode}, {StringUtf8Multilang::kDefaultCode}}; - m_keywordsScorer.SetLanguages(langPriorities); + m_ranker.m_keywordsScorer.SetLanguages(langPriorities); + m_preRanker.m_ranker = &m_ranker; SetPreferredLocale("en"); } @@ -186,7 +148,7 @@ void Processor::SetPreferredLocale(string const & locale) { ASSERT(!locale.empty(), ()); - LOG(LINFO, ("New preffered locale:", locale)); + LOG(LINFO, ("New preferred locale:", locale)); int8_t const code = StringUtf8Multilang::GetLangIndex(languages::Normalize(locale)); SetLanguage(LANG_CURRENT, code); @@ -196,10 +158,7 @@ void Processor::SetPreferredLocale(string const & locale) // Default initialization. // If you want to reset input language, call SetInputLocale before search. SetInputLocale(locale); - -#ifdef FIND_LOCALITY_TEST - m_locality.SetLanguage(code); -#endif + m_ranker.SetLocalityFinderLanguage(code); } void Processor::SetInputLocale(string const & locale) @@ -269,15 +228,15 @@ void Processor::SetQuery(string const & query) m_tokens.resize(maxTokensCount); // Assign tokens and prefix to scorer. - m_keywordsScorer.SetKeywords(m_tokens.data(), m_tokens.size(), m_prefix); + m_ranker.m_keywordsScorer.SetKeywords(m_tokens.data(), m_tokens.size(), m_prefix); - // get preffered types to show in results - m_prefferedTypes.clear(); + // get preferred types to show in results + m_preferredTypes.clear(); ForEachCategoryType(QuerySliceOnRawStrings(m_tokens, m_prefix), - [&](size_t, uint32_t t) - { - m_prefferedTypes.insert(t); - }); + [&](size_t, uint32_t t) + { + m_preferredTypes.insert(t); + }); } void Processor::SetRankPivot(m2::PointD const & pivot) @@ -294,12 +253,12 @@ void Processor::SetRankPivot(m2::PointD const & pivot) void Processor::SetLanguage(int id, int8_t lang) { - m_keywordsScorer.SetLanguage(GetLangIndex(id), lang); + m_ranker.m_keywordsScorer.SetLanguage(GetLangIndex(id), lang); } int8_t Processor::GetLanguage(int id) const { - return m_keywordsScorer.GetLanguage(GetLangIndex(id)); + return m_ranker.m_keywordsScorer.GetLanguage(GetLangIndex(id)); } m2::PointD Processor::GetPivotPoint() const @@ -360,12 +319,12 @@ void Processor::SetViewportByIndex(m2::RectD const & viewport, size_t idx, bool void Processor::ClearCache(size_t ind) { m_viewport[ind].MakeEmpty(); } -int Processor::GetCategoryLocales(int8_t(&arr)[3]) const +size_t Processor::GetCategoryLocales(int8_t(&arr)[3]) const { static int8_t const enLocaleCode = CategoriesHolder::MapLocaleToInteger("en"); // Prepare array of processing locales. English locale is always present for category matching. - int count = 0; + size_t count = 0; if (m_currentLocaleCode != -1) arr[count++] = m_currentLocaleCode; if (m_inputLocaleCode != -1 && m_inputLocaleCode != m_currentLocaleCode) @@ -376,52 +335,43 @@ int Processor::GetCategoryLocales(int8_t(&arr)[3]) const return count; } -void Processor::ForEachCategoryType(StringSliceBase const & slice, - function const & fn) const +template +void Processor::ForEachCategoryType(StringSliceBase const & slice, ToDo && todo) const { int8_t arrLocales[3]; int const localesCount = GetCategoryLocales(arrLocales); - for (size_t i = 0; i < slice.Size(); ++i) - { - auto token = RemoveHashtag(slice.Get(i)); - for (int j = 0; j < localesCount; ++j) - m_categories.ForEachTypeByName(arrLocales[j], token, bind(fn, i, _1)); - ProcessEmojiIfNeeded(token, i, fn); - } -} - -// template -void Processor::ProcessEmojiIfNeeded(strings::UniString const & token, size_t index, - function const & fn) const -{ - // Special process of 2 codepoints emoji (e.g. black guy on a bike). - // Only emoji synonyms can have one codepoint. - if (token.size() > 1) - { - static int8_t const enLocaleCode = CategoriesHolder::MapLocaleToInteger("en"); - - m_categories.ForEachTypeByName(enLocaleCode, strings::UniString(1, token[0]), - bind(fn, index, _1)); - } + ::search::ForEachCategoryType(slice, arrLocales, localesCount, m_categories, forward(todo)); } void Processor::Search(Results & results, size_t limit) { + Geocoder::Params geocoderParams; + InitParams(geocoderParams); + geocoderParams.m_mode = m_mode; + geocoderParams.m_pivot = GetPivotRect(); + geocoderParams.m_accuratePivotCenter = GetPivotPoint(); + m_geocoder.SetParams(geocoderParams); + + Ranker::Params rankerParams; + rankerParams.m_currentLocaleCode = m_currentLocaleCode; + if (m_mode == Mode::Viewport) + rankerParams.m_viewport = GetViewport(); + rankerParams.m_position = GetPosition(); + rankerParams.m_pivotRegion = GetPivotRegion(); + rankerParams.m_preferredTypes = m_preferredTypes; + rankerParams.m_suggestsEnabled = m_suggestsEnabled; + rankerParams.m_query = m_query; + rankerParams.m_tokens = m_tokens; + rankerParams.m_prefix = m_prefix; + rankerParams.m_numCategoryLocales = GetCategoryLocales(rankerParams.m_categoryLocales); + m_ranker.SetParams(rankerParams); + if (m_tokens.empty()) - SuggestStrings(results); - - Geocoder::Params params; - - InitParams(params); - params.m_mode = m_mode; - params.m_pivot = GetPivotRect(); - params.m_accuratePivotCenter = GetPivotPoint(); - m_geocoder.SetParams(params); + m_ranker.SuggestStrings(results); m_geocoder.GoEverywhere(m_preRanker); - - m_ranker.FlushResults(params, results, limit); + m_ranker.FlushResults(geocoderParams, results, limit); } void Processor::SearchViewportPoints(Results & results) @@ -429,6 +379,7 @@ void Processor::SearchViewportPoints(Results & results) Geocoder::Params params; InitParams(params); + params.m_mode = m_mode; params.m_pivot = m_viewport[CURRENT_V]; params.m_accuratePivotCenter = params.m_pivot.Center(); m_geocoder.SetParams(params); @@ -444,212 +395,12 @@ void Processor::SearchCoordinates(Results & res) const if (MatchLatLonDegree(m_query, lat, lon)) { ASSERT_EQUAL(res.GetCount(), 0, ()); - res.AddResultNoChecks(MakeResult(PreResult2(lat, lon))); + // Note that ranker's locale is not set up here but + // it is never used when making lat-lon results anyway. + res.AddResultNoChecks(m_ranker.MakeResult(PreResult2(lat, lon))); } } -void Processor::RemoveStringPrefix(string const & str, string & res) const -{ - search::Delimiters delims; - // Find start iterator of prefix in input query. - using TIter = utf8::unchecked::iterator; - TIter iter(str.end()); - while (iter.base() != str.begin()) - { - TIter prev = iter; - --prev; - - if (delims(*prev)) - break; - else - iter = prev; - } - - // Assign result with input string without prefix. - res.assign(str.begin(), iter.base()); -} - -void Processor::GetSuggestion(string const & name, string & suggest) const -{ - // Splits result's name. - search::Delimiters delims; - vector tokens; - SplitUniString(NormalizeAndSimplifyString(name), MakeBackInsertFunctor(tokens), delims); - - // Finds tokens that are already present in the input query. - vector tokensMatched(tokens.size()); - bool prefixMatched = false; - bool fullPrefixMatched = false; - - for (size_t i = 0; i < tokens.size(); ++i) - { - auto const & token = tokens[i]; - - if (find(m_tokens.begin(), m_tokens.end(), token) != m_tokens.end()) - { - tokensMatched[i] = true; - } - else if (StartsWith(token, m_prefix)) - { - prefixMatched = true; - fullPrefixMatched = token.size() == m_prefix.size(); - } - } - - // When |name| does not match prefix or when prefix equals to some - // token of the |name| (for example, when user entered "Moscow" - // without space at the end), we should not suggest anything. - if (!prefixMatched || fullPrefixMatched) - return; - - RemoveStringPrefix(m_query, suggest); - - // Appends unmatched result's tokens to the suggestion. - for (size_t i = 0; i < tokens.size(); ++i) - { - if (tokensMatched[i]) - continue; - suggest.append(strings::ToUtf8(tokens[i])); - suggest.push_back(' '); - } -} - -void Processor::ProcessSuggestions(vector & vec, Results & res) const -{ - if (m_prefix.empty() || !m_suggestsEnabled) - return; - - int added = 0; - for (auto i = vec.begin(); i != vec.end();) - { - PreResult2 const & r = **i; - - ftypes::Type const type = GetLocalityIndex(r.GetTypes()); - if ((type == ftypes::COUNTRY || type == ftypes::CITY) || r.IsStreet()) - { - string suggest; - GetSuggestion(r.GetName(), suggest); - if (!suggest.empty() && added < MAX_SUGGESTS_COUNT) - { - if (res.AddResult((Result(MakeResult(r), suggest)))) - ++added; - - i = vec.erase(i); - continue; - } - } - ++i; - } -} - -class BestNameFinder -{ - KeywordLangMatcher::ScoreT m_score; - string & m_name; - KeywordLangMatcher const & m_keywordsScorer; - -public: - BestNameFinder(string & name, KeywordLangMatcher const & keywordsScorer) - : m_score(), m_name(name), m_keywordsScorer(keywordsScorer) - { - } - - bool operator()(int8_t lang, string const & name) - { - KeywordLangMatcher::ScoreT const score = m_keywordsScorer.Score(lang, name); - if (m_score < score) - { - m_score = score; - m_name = name; - } - return true; - } -}; - -void Processor::GetBestMatchName(FeatureType const & f, string & name) const -{ - BestNameFinder finder(name, m_keywordsScorer); - UNUSED_VALUE(f.ForEachName(finder)); -} - -/// Makes continuous range for tokens and prefix. -template -class CombinedIter -{ - TIter m_i, m_end; - ValueT const * m_val; - -public: - CombinedIter(TIter i, TIter end, ValueT const * val) : m_i(i), m_end(end), m_val(val) {} - - ValueT const & operator*() const - { - ASSERT(m_val != 0 || m_i != m_end, ("dereferencing of empty iterator")); - if (m_i != m_end) - return *m_i; - - return *m_val; - } - - CombinedIter & operator++() - { - if (m_i != m_end) - ++m_i; - else - m_val = 0; - return *this; - } - - bool operator==(CombinedIter const & other) const - { - return m_val == other.m_val && m_i == other.m_i; - } - - bool operator!=(CombinedIter const & other) const - { - return m_val != other.m_val || m_i != other.m_i; - } -}; - -class AssignHighlightRange -{ - Result & m_res; - -public: - AssignHighlightRange(Result & res) : m_res(res) {} - - void operator()(pair const & range) { m_res.AddHighlightRange(range); } -}; - -Result Processor::MakeResult(PreResult2 const & r) const -{ - Result res = r.GenerateFinalResult(m_infoGetter, &m_categories, &m_prefferedTypes, - m_currentLocaleCode, &m_reverseGeocoder); - MakeResultHighlight(res); -#ifdef FIND_LOCALITY_TEST - if (ftypes::IsLocalityChecker::Instance().GetType(r.GetTypes()) == ftypes::NONE) - { - string city; - m_locality.GetLocality(res.GetFeatureCenter(), city); - res.AppendCity(city); - } -#endif - - res.SetRankingInfo(r.GetRankingInfo()); - return res; -} - -void Processor::MakeResultHighlight(Result & res) const -{ - using TIter = buffer_vector::const_iterator; - using TCombinedIter = CombinedIter; - - TCombinedIter beg(m_tokens.begin(), m_tokens.end(), m_prefix.empty() ? 0 : &m_prefix); - TCombinedIter end(m_tokens.end(), m_tokens.end(), 0); - - SearchStringTokensIntersectionRanges(res.GetString(), beg, end, AssignHighlightRange(res)); -} - namespace { int GetOldTypeFromIndex(size_t index) @@ -857,41 +608,8 @@ void Processor::ClearCaches() for (size_t i = 0; i < COUNT_V; ++i) ClearCache(i); - m_locality.ClearCache(); m_geocoder.ClearCaches(); -} - -void Processor::SuggestStrings(Results & res) -{ - if (m_prefix.empty() || !m_suggestsEnabled) - return; - int8_t arrLocales[3]; - int const localesCount = GetCategoryLocales(arrLocales); - - string prolog; - RemoveStringPrefix(m_query, prolog); - - for (int i = 0; i < localesCount; ++i) - MatchForSuggestionsImpl(m_prefix, arrLocales[i], prolog, res); -} - -void Processor::MatchForSuggestionsImpl(strings::UniString const & token, int8_t locale, - string const & prolog, Results & res) -{ - for (auto const & suggest : m_suggests) - { - strings::UniString const & s = suggest.m_name; - if ((suggest.m_prefixLength <= token.size()) && - (token != s) && // do not push suggestion if it already equals to token - (suggest.m_locale == locale) && // push suggestions only for needed language - StartsWith(s.begin(), s.end(), token.begin(), token.end())) - { - string const utf8Str = strings::ToUtf8(s); - Result r(utf8Str, prolog + utf8Str + " "); - MakeResultHighlight(r); - res.AddResult(move(r)); - } - } + m_ranker.ClearCaches(); } m2::RectD const & Processor::GetViewport(ViewportID vID /*= DEFAULT_V*/) const diff --git a/search/processor.hpp b/search/processor.hpp index 0a0de1b566..913e0ef7d1 100644 --- a/search/processor.hpp +++ b/search/processor.hpp @@ -1,11 +1,10 @@ #pragma once #include "search/geocoder.hpp" -#include "search/keyword_lang_matcher.hpp" #include "search/mode.hpp" +#include "search/params.hpp" #include "search/pre_ranker.hpp" #include "search/ranker.hpp" #include "search/rank_table_cache.hpp" -#include "search/reverse_geocoder.hpp" #include "search/search_trie.hpp" #include "search/suggest.hpp" #include "search/token_slice.hpp" @@ -29,12 +28,6 @@ #include "std/unique_ptr.hpp" #include "std/vector.hpp" -#define FIND_LOCALITY_TEST - -#ifdef FIND_LOCALITY_TEST -#include "search/locality_finder.hpp" -#endif - class FeatureType; class CategoriesHolder; @@ -61,7 +54,6 @@ class Ranker; class PreResult2Maker; class FeatureLoader; -class BestNameFinder; class DoFindLocality; class HouseCompFactory; @@ -74,8 +66,8 @@ public: static double const kMinViewportRadiusM; static double const kMaxViewportRadiusM; - Processor(Index & index, CategoriesHolder const & categories, vector const & suggests, - storage::CountryInfoGetter const & infoGetter); + Processor(Index const & index, CategoriesHolder const & categories, + vector const & suggests, storage::CountryInfoGetter const & infoGetter); inline void SupportOldFormat(bool b) { m_supportOldFormat = b; } @@ -87,6 +79,7 @@ public: void SetPreferredLocale(string const & locale); void SetInputLocale(string const & locale); void SetQuery(string const & query); + void SetOnResults(TOnResults const & onResults) { m_onResults = onResults; } // TODO (@y): this function must be removed. void SetRankPivot(m2::PointD const & pivot); inline void SetMode(Mode mode) { m_mode = mode; } @@ -137,15 +130,10 @@ protected: friend class PreResult2Maker; friend class Ranker; - int GetCategoryLocales(int8_t(&arr)[3]) const; + size_t GetCategoryLocales(int8_t(&arr)[3]) const; - void ForEachCategoryType( - StringSliceBase const & slice, - function const & fn) const; - - void ProcessEmojiIfNeeded( - strings::UniString const & token, size_t index, - function const & fn) const; + template + void ForEachCategoryType(StringSliceBase const & slice, ToDo && todo) const; using TMWMVector = vector>; using TOffsetsVector = map>; @@ -157,34 +145,15 @@ protected: void SetViewportByIndex(m2::RectD const & viewport, size_t idx, bool forceUpdate); void ClearCache(size_t ind); - void RemoveStringPrefix(string const & str, string & res) const; - void GetSuggestion(string const & name, string & suggest) const; - - void ProcessSuggestions(vector & vec, Results & res) const; - - void SuggestStrings(Results & res); - void MatchForSuggestionsImpl(strings::UniString const & token, int8_t locale, - string const & prolog, Results & res); - - void GetBestMatchName(FeatureType const & f, string & name) const; - - Result MakeResult(PreResult2 const & r) const; - void MakeResultHighlight(Result & res) const; - - Index & m_index; CategoriesHolder const & m_categories; - vector const & m_suggests; storage::CountryInfoGetter const & m_infoGetter; + TOnResults m_onResults; string m_region; string m_query; buffer_vector m_tokens; strings::UniString m_prefix; - set m_prefferedTypes; - -#ifdef FIND_LOCALITY_TEST - mutable LocalityFinder m_locality; -#endif + set m_preferredTypes; m2::RectD m_viewport[COUNT_V]; m2::PointD m_pivot; @@ -202,8 +171,6 @@ protected: void SetLanguage(int id, int8_t lang); int8_t GetLanguage(int id) const; - KeywordLangMatcher m_keywordsScorer; - bool m_supportOldFormat; protected: @@ -212,6 +179,5 @@ protected: PreRanker m_preRanker; Ranker m_ranker; Geocoder m_geocoder; - ReverseGeocoder const m_reverseGeocoder; }; } // namespace search diff --git a/search/processor_factory.hpp b/search/processor_factory.hpp index 5e0fc0fe78..4eeb8cec74 100644 --- a/search/processor_factory.hpp +++ b/search/processor_factory.hpp @@ -1,5 +1,6 @@ #pragma once +#include "search/params.hpp" #include "search/processor.hpp" #include "search/suggest.hpp" diff --git a/search/ranker.cpp b/search/ranker.cpp index 8507cc5ff4..ed43d73ef4 100644 --- a/search/ranker.cpp +++ b/search/ranker.cpp @@ -1,6 +1,8 @@ #include "search/ranker.hpp" -#include "search/processor.hpp" +#include "search/pre_ranker.hpp" +#include "search/string_intersection.hpp" #include "search/token_slice.hpp" +#include "search/utils.hpp" #include "indexer/feature_algo.hpp" @@ -46,12 +48,93 @@ void RemoveDuplicatingLinear(vector & values) }), values.end()); } + +void RemoveStringPrefix(string const & str, string & res) +{ + search::Delimiters delims; + // Find start iterator of prefix in input query. + using TIter = utf8::unchecked::iterator; + TIter iter(str.end()); + while (iter.base() != str.begin()) + { + TIter prev = iter; + --prev; + + if (delims(*prev)) + break; + else + iter = prev; + } + + // Assign result with input string without prefix. + res.assign(str.begin(), iter.base()); +} + +ftypes::Type GetLocalityIndex(feature::TypesHolder const & types) +{ + using namespace ftypes; + + // Inner logic of SearchAddress expects COUNTRY, STATE and CITY only. + Type const type = IsLocalityChecker::Instance().GetType(types); + switch (type) + { + case NONE: + case COUNTRY: + case STATE: + case CITY: return type; + case TOWN: return CITY; + case VILLAGE: return NONE; + case LOCALITY_COUNT: return type; + } +} + +/// Makes continuous range for tokens and prefix. +template +class CombinedIter +{ + TIter m_i, m_end; + ValueT const * m_val; + +public: + CombinedIter(TIter i, TIter end, ValueT const * val) : m_i(i), m_end(end), m_val(val) {} + + ValueT const & operator*() const + { + ASSERT(m_val != 0 || m_i != m_end, ("dereferencing of empty iterator")); + if (m_i != m_end) + return *m_i; + + return *m_val; + } + + CombinedIter & operator++() + { + if (m_i != m_end) + ++m_i; + else + m_val = 0; + return *this; + } + + bool operator==(CombinedIter const & other) const + { + return m_val == other.m_val && m_i == other.m_i; + } + + bool operator!=(CombinedIter const & other) const + { + return m_val != other.m_val || m_i != other.m_i; + } +}; } // namespace class PreResult2Maker { - Processor & m_processor; + Ranker & m_ranker; + Index const & m_index; Geocoder::Params const & m_params; + Ranker::Params const & m_rankerParams; + storage::CountryInfoGetter const & m_infoGetter; unique_ptr m_pFV; @@ -60,14 +143,14 @@ class PreResult2Maker string & country) { if (m_pFV.get() == 0 || m_pFV->GetId() != id.m_mwmId) - m_pFV.reset(new Index::FeaturesLoaderGuard(m_processor.m_index, id.m_mwmId)); + m_pFV.reset(new Index::FeaturesLoaderGuard(m_index, id.m_mwmId)); m_pFV->GetFeatureByIndex(id.m_index, f); f.SetID(id); center = feature::GetCenter(f); - m_processor.GetBestMatchName(f, name); + m_ranker.GetBestMatchName(f, name); // country (region) name is a file name if feature isn't from World.mwm if (m_pFV->IsWorld()) @@ -108,12 +191,14 @@ class PreResult2Maker feature::TypesHolder holder(ft); vector> matched(slice.Size()); - m_processor.ForEachCategoryType(QuerySlice(slice), [&](size_t i, uint32_t t) - { - ++matched[i].second; - if (holder.Has(t)) - ++matched[i].first; - }); + ForEachCategoryType(QuerySlice(slice), m_ranker.m_params.m_categoryLocales, + m_ranker.m_params.m_numCategoryLocales, m_ranker.m_categories, + [&](size_t i, uint32_t t) + { + ++matched[i].second; + if (holder.Has(t)) + ++matched[i].first; + }); info.m_pureCats = all_of(matched.begin(), matched.end(), [](pair const & m) { @@ -133,15 +218,15 @@ class PreResult2Maker case SearchModel::SEARCH_TYPE_VILLAGE: return rank /= 1.5; case SearchModel::SEARCH_TYPE_CITY: { - if (m_processor.GetViewport(Processor::CURRENT_V).IsPointInside(center)) + if (m_rankerParams.m_viewport.IsPointInside(center)) return rank * 2; storage::CountryInfo info; if (country.empty()) - m_processor.m_infoGetter.GetRegionInfo(center, info); + m_infoGetter.GetRegionInfo(center, info); else - m_processor.m_infoGetter.GetRegionInfo(country, info); - if (info.IsNotEmpty() && info.m_name == m_processor.GetPivotRegion()) + m_infoGetter.GetRegionInfo(country, info); + if (info.IsNotEmpty() && info.m_name == m_rankerParams.m_pivotRegion) return rank *= 1.7; } case SearchModel::SEARCH_TYPE_COUNTRY: @@ -153,8 +238,14 @@ class PreResult2Maker } public: - explicit PreResult2Maker(Processor & q, Geocoder::Params const & params) - : m_processor(q), m_params(params) + explicit PreResult2Maker(Ranker & ranker, Index const & index, + storage::CountryInfoGetter const & infoGetter, + Geocoder::Params const & params, Ranker::Params const & rankerParams) + : m_ranker(ranker) + , m_index(index) + , m_params(params) + , m_rankerParams(rankerParams) + , m_infoGetter(infoGetter) { } @@ -167,7 +258,7 @@ public: LoadFeature(res1.GetId(), ft, center, name, country); - auto res2 = make_unique(ft, &res1, center, m_processor.GetPosition() /* pivot */, + auto res2 = make_unique(ft, &res1, center, m_rankerParams.m_position /* pivot */, name, country); search::RankingInfo info; @@ -189,13 +280,13 @@ bool Ranker::IsResultExists(PreResult2 const & p, vector const & v }); } -void Ranker::MakePreResult2(Geocoder::Params const & params, vector & cont, +void Ranker::MakePreResult2(Geocoder::Params const & geocoderParams, vector & cont, vector & streets) { - m_preRanker.Filter(m_viewportSearch); + m_preRanker.Filter(m_params.m_viewportSearch); // Makes PreResult2 vector. - PreResult2Maker maker(m_processor, params); + PreResult2Maker maker(*this, m_index, m_infoGetter, geocoderParams, m_params); m_preRanker.ForEach( [&](PreResult1 const & r) { @@ -203,7 +294,8 @@ void Ranker::MakePreResult2(Geocoder::Params const & params, vectorGetCenter())) + if (geocoderParams.m_mode == Mode::Viewport && + !geocoderParams.m_pivot.IsPointInside(p->GetCenter())) return; if (p->IsStreet()) @@ -214,6 +306,160 @@ void Ranker::MakePreResult2(Geocoder::Params const & params, vector::const_iterator; + using TCombinedIter = CombinedIter; + + TCombinedIter beg(m_params.m_tokens.begin(), m_params.m_tokens.end(), + m_params.m_prefix.empty() ? 0 : &m_params.m_prefix); + TCombinedIter end(m_params.m_tokens.end(), m_params.m_tokens.end(), 0); + auto assignHighlightRange = [&](pair const & range) + { + res.AddHighlightRange(range); + }; + + SearchStringTokensIntersectionRanges(res.GetString(), beg, end, assignHighlightRange); +} + +void Ranker::GetSuggestion(string const & name, string & suggest) const +{ + // Splits result's name. + search::Delimiters delims; + vector tokens; + SplitUniString(NormalizeAndSimplifyString(name), MakeBackInsertFunctor(tokens), delims); + + // Finds tokens that are already present in the input query. + vector tokensMatched(tokens.size()); + bool prefixMatched = false; + bool fullPrefixMatched = false; + + for (size_t i = 0; i < tokens.size(); ++i) + { + auto const & token = tokens[i]; + + if (find(m_params.m_tokens.begin(), m_params.m_tokens.end(), token) != m_params.m_tokens.end()) + { + tokensMatched[i] = true; + } + else if (StartsWith(token, m_params.m_prefix)) + { + prefixMatched = true; + fullPrefixMatched = token.size() == m_params.m_prefix.size(); + } + } + + // When |name| does not match prefix or when prefix equals to some + // token of the |name| (for example, when user entered "Moscow" + // without space at the end), we should not suggest anything. + if (!prefixMatched || fullPrefixMatched) + return; + + RemoveStringPrefix(m_params.m_query, suggest); + + // Appends unmatched result's tokens to the suggestion. + for (size_t i = 0; i < tokens.size(); ++i) + { + if (tokensMatched[i]) + continue; + suggest.append(strings::ToUtf8(tokens[i])); + suggest.push_back(' '); + } +} + +void Ranker::SuggestStrings(Results & res) +{ + if (m_params.m_prefix.empty() || !m_params.m_suggestsEnabled) + return; + + string prolog; + RemoveStringPrefix(m_params.m_query, prolog); + + for (int i = 0; i < m_params.m_numCategoryLocales; ++i) + MatchForSuggestions(m_params.m_prefix, m_params.m_categoryLocales[i], prolog, res); +} + +void Ranker::MatchForSuggestions(strings::UniString const & token, int8_t locale, + string const & prolog, Results & res) +{ + for (auto const & suggest : m_suggests) + { + strings::UniString const & s = suggest.m_name; + if ((suggest.m_prefixLength <= token.size()) && + (token != s) && // do not push suggestion if it already equals to token + (suggest.m_locale == locale) && // push suggestions only for needed language + StartsWith(s.begin(), s.end(), token.begin(), token.end())) + { + string const utf8Str = strings::ToUtf8(s); + Result r(utf8Str, prolog + utf8Str + " "); + MakeResultHighlight(r); + res.AddResult(move(r)); + } + } +} + +void Ranker::GetBestMatchName(FeatureType const & f, string & name) const +{ + KeywordLangMatcher::ScoreT bestScore; + auto bestNameFinder = [&](int8_t lang, string const & s) -> bool + { + auto const score = m_keywordsScorer.Score(lang, s); + if (bestScore < score) + { + bestScore = score; + name = s; + } + return true; + }; + UNUSED_VALUE(f.ForEachName(bestNameFinder)); +} + +void Ranker::ProcessSuggestions(vector & vec, Results & res) const +{ + if (m_params.m_prefix.empty() || !m_params.m_suggestsEnabled) + return; + + int added = 0; + for (auto i = vec.begin(); i != vec.end();) + { + PreResult2 const & r = **i; + + ftypes::Type const type = GetLocalityIndex(r.GetTypes()); + if ((type == ftypes::COUNTRY || type == ftypes::CITY) || r.IsStreet()) + { + string suggest; + GetSuggestion(r.GetName(), suggest); + if (!suggest.empty() && added < MAX_SUGGESTS_COUNT) + { + if (res.AddResult((Result(MakeResult(r), suggest)))) + ++added; + + i = vec.erase(i); + continue; + } + } + ++i; + } +} + void Ranker::FlushResults(Geocoder::Params const & params, Results & res, size_t resCount) { vector values; @@ -226,29 +472,28 @@ void Ranker::FlushResults(Geocoder::Params const & params, Results & res, size_t sort(values.rbegin(), values.rend(), my::LessBy(&IndexedValue::GetRank)); - m_processor.ProcessSuggestions(values, res); + ProcessSuggestions(values, res); // Emit feature results. size_t count = res.GetCount(); for (size_t i = 0; i < values.size() && count < resCount; ++i) { - if (m_processor.IsCancelled()) - break; + BailIfCancelled(); LOG(LDEBUG, (values[i])); auto const & preResult2 = *values[i]; - if (res.AddResult(m_processor.MakeResult(preResult2))) + if (res.AddResult(MakeResult(preResult2))) ++count; } } -void Ranker::FlushViewportResults(Geocoder::Params const & params, Results & res) +void Ranker::FlushViewportResults(Geocoder::Params const & geocoderParams, Results & res) { vector values; vector streets; - MakePreResult2(params, values, streets); + MakePreResult2(geocoderParams, values, streets); RemoveDuplicatingLinear(values); if (values.empty()) return; @@ -257,14 +502,27 @@ void Ranker::FlushViewportResults(Geocoder::Params const & params, Results & res for (size_t i = 0; i < values.size(); ++i) { - if (m_processor.IsCancelled()) - break; + BailIfCancelled(); res.AddResultNoChecks( (*(values[i])) - .GenerateFinalResult(m_processor.m_infoGetter, &m_processor.m_categories, - &m_processor.m_prefferedTypes, m_processor.m_currentLocaleCode, + .GenerateFinalResult(m_infoGetter, &m_categories, &m_params.m_preferredTypes, + m_params.m_currentLocaleCode, nullptr /* Viewport results don't need calculated address */)); } } + +void Ranker::ClearCaches() +{ +#ifdef FIND_LOCALITY_TEST + m_locality.ClearCache(); +#endif // FIND_LOCALITY_TEST +} + +void Ranker::SetLocalityFinderLanguage(int8_t code) +{ +#ifdef FIND_LOCALITY_TEST + m_locality.SetLanguage(code); +#endif // FIND_LOCALITY_TEST +} } // namespace search diff --git a/search/ranker.hpp b/search/ranker.hpp index 84b760acf0..8bdfcb991f 100644 --- a/search/ranker.hpp +++ b/search/ranker.hpp @@ -1,27 +1,83 @@ #pragma once +#include "search/cancel_exception.hpp" #include "search/geocoder.hpp" #include "search/intermediate_result.hpp" +#include "search/keyword_lang_matcher.hpp" #include "search/mode.hpp" +#include "search/reverse_geocoder.hpp" +#include "search/suggest.hpp" +#include "indexer/categories_holder.hpp" #include "indexer/feature_decl.hpp" +#include "geometry/point2d.hpp" +#include "geometry/rect2d.hpp" + +#include "base/string_utils.hpp" + +#include "std/set.hpp" +#include "std/string.hpp" #include "std/vector.hpp" +#define FIND_LOCALITY_TEST + +#ifdef FIND_LOCALITY_TEST +#include "search/locality_finder.hpp" +#endif // FIND_LOCALITY_TEST + +class CategoriesHolder; +class Index; +namespace storage +{ +class CountryInfoGetter; +} // namespace storage + namespace search { class PreResult2Maker; -class Processor; class Ranker { public: - Ranker(PreRanker & preRanker, Processor & processor) - : m_viewportSearch(false), m_preRanker(preRanker), m_processor(processor) + struct Params + { + bool m_viewportSearch = false; + + int8_t m_currentLocaleCode = CategoriesHolder::kEnglishCode; + m2::RectD m_viewport; + m2::PointD m_position; + string m_pivotRegion; + set m_preferredTypes; + bool m_suggestsEnabled = false; + + string m_query; + buffer_vector m_tokens; + // Prefix of the last token in the query. + // We need it here to make suggestions. + strings::UniString m_prefix; + + int8_t m_categoryLocales[3]; + size_t m_numCategoryLocales = 0; + }; + + Ranker(PreRanker & preRanker, Index const & index, storage::CountryInfoGetter const & infoGetter, + CategoriesHolder const & categories, vector const & suggests, + my::Cancellable const & cancellable) + : m_reverseGeocoder(index) + , m_preRanker(preRanker) + , m_cancellable(cancellable) +#ifdef FIND_LOCALITY_TEST + , m_locality(&index) +#endif // FIND_LOCALITY_TEST + , m_index(index) + , m_infoGetter(infoGetter) + , m_categories(categories) + , m_suggests(suggests) { } - inline void Init(bool viewportSearch) { m_viewportSearch = viewportSearch; } + inline void Init(bool viewportSearch) { m_params.m_viewportSearch = viewportSearch; } bool IsResultExists(PreResult2 const & p, vector const & values); @@ -29,15 +85,43 @@ public: vector & streets); Result MakeResult(PreResult2 const & r) const; + void MakeResultHighlight(Result & res) const; - void FlushResults(Geocoder::Params const & params, Results & res, size_t resCount); - void FlushViewportResults(Geocoder::Params const & params, Results & res); + void GetSuggestion(string const & name, string & suggest) const; + void SuggestStrings(Results & res); + void MatchForSuggestions(strings::UniString const & token, int8_t locale, string const & prolog, + Results & res); + void GetBestMatchName(FeatureType const & f, string & name) const; + void ProcessSuggestions(vector & vec, Results & res) const; + + void FlushResults(Geocoder::Params const & geocoderParams, Results & res, size_t resCount); + void FlushViewportResults(Geocoder::Params const & geocoderParams, Results & res); + + void SetParams(Params const & params) { m_params = params; } + + void ClearCaches(); + + void SetLocalityFinderLanguage(int8_t code); + + inline void BailIfCancelled() { ::search::BailIfCancelled(m_cancellable); } + + KeywordLangMatcher m_keywordsScorer; + + friend class PreResult2Maker; private: - bool m_viewportSearch; + Params m_params; + ReverseGeocoder const m_reverseGeocoder; PreRanker & m_preRanker; + my::Cancellable const & m_cancellable; - // todo(@m) Remove. - Processor & m_processor; +#ifdef FIND_LOCALITY_TEST + mutable LocalityFinder m_locality; +#endif // FIND_LOCALITY_TEST + + Index const & m_index; + storage::CountryInfoGetter const & m_infoGetter; + CategoriesHolder const & m_categories; + vector const & m_suggests; }; } // namespace search diff --git a/search/search.pro b/search/search.pro index 6be1bd7a85..eec3a48784 100644 --- a/search/search.pro +++ b/search/search.pro @@ -64,6 +64,7 @@ HEADERS += \ suggest.hpp \ token_slice.hpp \ types_skipper.hpp \ + utils.hpp \ SOURCES += \ approximate_string_match.cpp \