diff --git a/search/search_query.cpp b/search/search_query.cpp index 682419c431..9a28c4fbc8 100644 --- a/search/search_query.cpp +++ b/search/search_query.cpp @@ -181,6 +181,32 @@ m2::RectD GetRectAroundPosition(m2::PointD const & position) double constexpr kMaxPositionRadiusM = 50.0 * 1000; return MercatorBounds::RectByCenterXYAndSizeInMeters(position, kMaxPositionRadiusM); } + +template +void UpdateNameScore(string const & name, TSlice const & slice, search::v2::NameScore & bestScore) +{ + auto const score = v2::GetNameScore(name, slice); + if (score > bestScore) + bestScore = score; +} + +template +void UpdateNameScore(vector const & tokens, TSlice const & slice, + search::v2::NameScore & bestScore, double & bestCoverage) +{ + auto const score = v2::GetNameScore(tokens, slice); + auto const coverage = + tokens.empty() ? 0 : static_cast(slice.Size()) / static_cast(tokens.size()); + if (score > bestScore) + { + bestScore = score; + bestCoverage = coverage; + } + else if (score == bestScore && coverage > bestCoverage) + { + bestCoverage = coverage; + } +} } // namespace // static @@ -591,6 +617,9 @@ class PreResult2Maker info.m_searchType = preInfo.m_searchType; info.m_nameScore = v2::NAME_SCORE_ZERO; + + v2::TokensSliceNoCategories slice(m_params, preInfo.m_startToken, preInfo.m_endToken); + for (auto const & lang : m_params.m_langs) { string name; @@ -599,28 +628,11 @@ class PreResult2Maker vector tokens; SplitUniString(NormalizeAndSimplifyString(name), MakeBackInsertFunctor(tokens), Delimiters()); - auto score = GetNameScore(tokens, m_params, preInfo.m_startToken, preInfo.m_endToken); - auto coverage = - tokens.empty() ? 0 : static_cast(preInfo.m_endToken - preInfo.m_startToken) / - static_cast(tokens.size()); - if (score > info.m_nameScore) - { - info.m_nameScore = score; - info.m_nameCoverage = coverage; - } - else if (score == info.m_nameScore && coverage > info.m_nameCoverage) - { - info.m_nameCoverage = coverage; - } + UpdateNameScore(tokens, slice, info.m_nameScore, info.m_nameCoverage); } if (info.m_searchType == v2::SearchModel::SEARCH_TYPE_BUILDING) - { - string const houseNumber = ft.GetHouseNumber(); - auto score = GetNameScore(houseNumber, m_params, preInfo.m_startToken, preInfo.m_endToken); - if (score > info.m_nameScore) - info.m_nameScore = score; - } + UpdateNameScore(ft.GetHouseNumber(), slice, info.m_nameScore); } uint8_t NormalizeRank(uint8_t rank, v2::SearchModel::SearchType type, m2::PointD const & center, @@ -1164,6 +1176,8 @@ void Query::InitParams(bool localitySearch, SearchQueryParams & params) for (size_t i = 0; i < tokensCount; ++i) params.m_tokens[i].push_back(m_tokens[i]); + params.m_isCategorySynonym.assign(tokensCount + (m_prefix.empty() ? 0 : 1), false); + // Add names of categories (and synonyms). if (!localitySearch) { @@ -1175,6 +1189,7 @@ void Query::InitParams(bool localitySearch, SearchQueryParams & params) uint32_t const index = cl.GetIndexForType(t); v.push_back(FeatureTypeToString(index)); + params.m_isCategorySynonym[i] = true; // v2-version MWM has raw classificator types in search index prefix, so // do the hack: add synonyms for old convention if needed. diff --git a/search/search_query_params.cpp b/search/search_query_params.cpp index 3de965be4a..c358bce01f 100644 --- a/search/search_query_params.cpp +++ b/search/search_query_params.cpp @@ -63,6 +63,7 @@ void SearchQueryParams::Clear() { m_tokens.clear(); m_prefixTokens.clear(); + m_isCategorySynonym.clear(); m_langs.clear(); m_scale = scales::GetUpperScale(); } diff --git a/search/search_query_params.hpp b/search/search_query_params.hpp index 4b3f3ebb66..38a26b7011 100644 --- a/search/search_query_params.hpp +++ b/search/search_query_params.hpp @@ -16,6 +16,8 @@ struct SearchQueryParams vector m_tokens; TSynonymsVector m_prefixTokens; + vector m_isCategorySynonym; + TLangsSet m_langs; int m_scale; diff --git a/search/search_tests/ranking_tests.cpp b/search/search_tests/ranking_tests.cpp index 605900dfbb..b74e35a284 100644 --- a/search/search_tests/ranking_tests.cpp +++ b/search/search_tests/ranking_tests.cpp @@ -32,7 +32,7 @@ NameScore GetScore(string const & name, string const & query, size_t startToken, params.m_prefixTokens.swap(params.m_tokens.back()); params.m_tokens.pop_back(); } - return GetNameScore(name, params, startToken, endToken); + return GetNameScore(name, TokensSlice(params, startToken, endToken)); } UNIT_TEST(NameTest_Smoke) diff --git a/search/v2/locality_scorer.cpp b/search/v2/locality_scorer.cpp index 66f5d92d90..49e7e8a817 100644 --- a/search/v2/locality_scorer.cpp +++ b/search/v2/locality_scorer.cpp @@ -99,8 +99,8 @@ void LocalityScorer::SortByName(vector & ls) const auto score = NAME_SCORE_ZERO; for (auto const & name : names) { - score = max(score, - GetNameScore(name, m_params, l.m_locality.m_startToken, l.m_locality.m_endToken)); + score = max(score, GetNameScore(name, v2::TokensSlice(m_params, l.m_locality.m_startToken, + l.m_locality.m_endToken))); } l.m_nameScore = score; } diff --git a/search/v2/ranking_info.cpp b/search/v2/ranking_info.cpp index 59c571ec2e..f612e6be87 100644 --- a/search/v2/ranking_info.cpp +++ b/search/v2/ranking_info.cpp @@ -14,7 +14,7 @@ namespace double const kDistanceToPivot = 24.443; double const kRank = 11.010; double const kNameScore = 1.0; -double const kNameCoverage = 0.0; +double const kNameCoverage = 1.0; double const kSearchType = 22.378; double TransformDistance(double distance) diff --git a/search/v2/ranking_utils.cpp b/search/v2/ranking_utils.cpp index f2dc94bd06..f865f3336e 100644 --- a/search/v2/ranking_utils.cpp +++ b/search/v2/ranking_utils.cpp @@ -1,12 +1,5 @@ #include "search/v2/ranking_utils.hpp" -#include "search/search_query_params.hpp" - -#include "indexer/search_delimiters.hpp" -#include "indexer/search_string_utils.hpp" - -#include "base/stl_add.hpp" - #include "std/algorithm.hpp" using namespace strings; @@ -15,7 +8,7 @@ namespace search { namespace v2 { -namespace +namespace impl { bool Match(vector const & tokens, UniString const & token) { @@ -31,54 +24,7 @@ bool PrefixMatch(vector const & prefixes, UniString const & token) } return false; } -} // namespace - -NameScore GetNameScore(string const & name, SearchQueryParams const & params, size_t startToken, - size_t endToken) -{ - if (startToken >= endToken) - return NAME_SCORE_ZERO; - - vector tokens; - SplitUniString(NormalizeAndSimplifyString(name), MakeBackInsertFunctor(tokens), Delimiters()); - return GetNameScore(tokens, params, startToken, endToken); -} - -NameScore GetNameScore(vector const & tokens, SearchQueryParams const & params, - size_t startToken, size_t endToken) -{ - if (startToken >= endToken) - return NAME_SCORE_ZERO; - - size_t const n = tokens.size(); - size_t const m = endToken - startToken; - - bool const lastTokenIsPrefix = (endToken == params.m_tokens.size() + 1); - - NameScore score = NAME_SCORE_ZERO; - for (int offset = 0; offset + m <= n; ++offset) - { - bool match = true; - for (int i = 0; i + 1 < m && match; ++i) - match = match && Match(params.GetTokens(startToken + i), tokens[offset + i]); - if (!match) - continue; - - if (Match(params.GetTokens(endToken - 1), tokens[offset + m - 1])) - { - if (m == n) - return NAME_SCORE_FULL_MATCH; - score = max(score, NAME_SCORE_SUBSTRING); - } - if (lastTokenIsPrefix && PrefixMatch(params.GetTokens(endToken - 1), tokens[offset + m - 1])) - { - if (m == n) - return NAME_SCORE_FULL_MATCH_PREFIX; - score = max(score, NAME_SCORE_SUBSTRING_PREFIX); - } - } - return score; -} +} // namespace impl string DebugPrint(NameScore score) { diff --git a/search/v2/ranking_utils.hpp b/search/v2/ranking_utils.hpp index 4dd5099c6c..a559192930 100644 --- a/search/v2/ranking_utils.hpp +++ b/search/v2/ranking_utils.hpp @@ -1,8 +1,14 @@ #pragma once +#include "search/search_query_params.hpp" #include "search/v2/geocoder.hpp" #include "search/v2/search_model.hpp" +#include "indexer/search_delimiters.hpp" +#include "indexer/search_string_utils.hpp" + +#include "base/assert.hpp" +#include "base/stl_add.hpp" #include "base/string_utils.hpp" #include "std/cstdint.hpp" @@ -16,6 +22,13 @@ struct SearchQueryParams; namespace v2 { +namespace impl +{ +bool Match(vector const & tokens, strings::UniString const & token); + +bool PrefixMatch(vector const & prefixes, strings::UniString const & token); +} // namespace impl + // The order and numeric values are important here. Please, check all // use-cases before changing this enum. enum NameScore @@ -29,11 +42,120 @@ enum NameScore NAME_SCORE_COUNT }; -NameScore GetNameScore(string const & name, SearchQueryParams const & params, size_t startToken, - size_t endToken); +class TokensSlice +{ +public: + TokensSlice(SearchQueryParams const & params, size_t startToken, size_t endToken) + : m_params(params), m_offset(startToken), m_size(endToken - startToken) + { + ASSERT_LESS_OR_EQUAL(startToken, endToken, ()); + } -NameScore GetNameScore(vector const & tokens, SearchQueryParams const & params, - size_t startToken, size_t endToken); + inline SearchQueryParams::TSynonymsVector const & Get(size_t i) const + { + ASSERT_LESS(i, Size(), ()); + return m_params.GetTokens(m_offset + i); + } + + inline size_t Size() const { return m_size; } + + inline bool Empty() const { return Size() == 0; } + + inline bool IsPrefix(size_t i) const + { + ASSERT_LESS(i, Size(), ()); + return m_offset + i == m_params.m_tokens.size(); + } + +private: + SearchQueryParams const & m_params; + size_t const m_offset; + size_t const m_size; +}; + +class TokensSliceNoCategories +{ +public: + TokensSliceNoCategories(SearchQueryParams const & params, size_t startToken, size_t endToken) + : m_params(params) + { + ASSERT_LESS_OR_EQUAL(startToken, endToken, ()); + + m_indexes.reserve(endToken - startToken); + for (size_t i = startToken; i < endToken; ++i) + { + if (!m_params.m_isCategorySynonym[i]) + m_indexes.push_back(i); + } + } + + inline SearchQueryParams::TSynonymsVector const & Get(size_t i) const + { + ASSERT_LESS(i, Size(), ()); + return m_params.GetTokens(m_indexes[i]); + } + + inline size_t Size() const { return m_indexes.size(); } + + inline bool Empty() const { return Size() == 0; } + + inline bool IsPrefix(size_t i) const + { + ASSERT_LESS(i, Size(), ()); + return m_indexes[i] == m_params.m_tokens.size(); + } + +private: + SearchQueryParams const & m_params; + vector m_indexes; +}; + +template +NameScore GetNameScore(string const & name, TSlice const & slice) +{ + if (slice.Empty()) + return NAME_SCORE_ZERO; + + vector tokens; + SplitUniString(NormalizeAndSimplifyString(name), MakeBackInsertFunctor(tokens), Delimiters()); + return GetNameScore(tokens, slice); +} + +template +NameScore GetNameScore(vector const & tokens, TSlice const & slice) +{ + if (slice.Empty()) + return NAME_SCORE_ZERO; + + size_t const n = tokens.size(); + size_t const m = slice.Size(); + + bool const lastTokenIsPrefix = slice.IsPrefix(m - 1); + + NameScore score = NAME_SCORE_ZERO; + for (int offset = 0; offset + m <= n; ++offset) + { + bool match = true; + for (int i = 0; i < m - 1 && match; ++i) + match = match && impl::Match(slice.Get(i), tokens[offset + i]); + if (!match) + continue; + + if (impl::Match(slice.Get(m - 1), tokens[offset + m - 1])) + { + if (m == n) + return NAME_SCORE_FULL_MATCH; + score = max(score, NAME_SCORE_SUBSTRING); + } + if (lastTokenIsPrefix && impl::PrefixMatch(slice.Get(m - 1), tokens[offset + m - 1])) + { + if (m == n) + return NAME_SCORE_FULL_MATCH_PREFIX; + score = max(score, NAME_SCORE_SUBSTRING_PREFIX); + } + } + return score; +} string DebugPrint(NameScore score); } // namespace v2