diff --git a/base/buffer_vector.hpp b/base/buffer_vector.hpp index d9d664041c..99c2925c6c 100644 --- a/base/buffer_vector.hpp +++ b/base/buffer_vector.hpp @@ -403,12 +403,12 @@ public: insert(where, &value, &value + 1); } - template - void erase_if(TFn fn) + template + void erase_if(Fn && fn) { iterator b = begin(); iterator e = end(); - iterator i = std::remove_if(b, e, fn); + iterator i = std::remove_if(b, e, std::forward(fn)); if (i != e) resize(std::distance(b, i)); } diff --git a/generator/search_index_builder.cpp b/generator/search_index_builder.cpp index d5829e657f..b442c23423 100644 --- a/generator/search_index_builder.cpp +++ b/generator/search_index_builder.cpp @@ -178,7 +178,8 @@ struct FeatureNameInserter }); } - int const maxTokensCount = search::MAX_TOKENS - 1; + CHECK_GREATER(search::kMaxNumTokens, 0, ()); + size_t const maxTokensCount = search::kMaxNumTokens - 1; if (tokens.size() > maxTokensCount) { LOG(LWARNING, ("Name has too many tokens:", name)); diff --git a/search/common.hpp b/search/common.hpp index 41be032f6a..5a218fdd56 100644 --- a/search/common.hpp +++ b/search/common.hpp @@ -6,6 +6,8 @@ #include "base/small_set.hpp" #include "base/string_utils.hpp" +#include + namespace search { // The prefix is stored separately. @@ -17,6 +19,6 @@ using Locales = ::base::SafeSmallSet(CategoriesHolder::kMaxSupportedLocaleIndex) + 1>; /// Upper bound for max count of tokens for indexing and scoring. -int constexpr MAX_TOKENS = 32; -int constexpr MAX_SUGGESTS_COUNT = 5; +size_t constexpr kMaxNumTokens = 32; +size_t constexpr kMaxNumSuggests = 5; } // namespace search diff --git a/search/keyword_matcher.cpp b/search/keyword_matcher.cpp index 3c344fe1a3..68d20980f0 100644 --- a/search/keyword_matcher.cpp +++ b/search/keyword_matcher.cpp @@ -39,7 +39,7 @@ KeywordMatcher::Score KeywordMatcher::CalcScore(string const & name) const KeywordMatcher::Score KeywordMatcher::CalcScore(strings::UniString const & name) const { - buffer_vector tokens; + buffer_vector tokens; SplitUniString(name, MakeBackInsertFunctor(tokens), Delimiters()); return CalcScore(tokens.data(), tokens.size()); @@ -49,7 +49,7 @@ KeywordMatcher::Score KeywordMatcher::CalcScore(strings::UniString const * token size_t count) const { // Some names can have too many tokens. Trim them. - count = min(count, static_cast(MAX_TOKENS)); + count = min(count, kMaxNumTokens); vector isQueryTokenMatched(m_keywords.size()); vector isNameTokenMatched(count); @@ -103,7 +103,7 @@ KeywordMatcher::Score KeywordMatcher::CalcScore(strings::UniString const * token for (size_t i = 0; i < count; ++i) { if (isNameTokenMatched[i]) - score.m_nameTokensMatched |= (1 << (MAX_TOKENS-1 - i)); + score.m_nameTokensMatched |= (1 << (kMaxNumTokens - 1 - i)); score.m_nameTokensLength += tokens[i].size(); } @@ -164,7 +164,7 @@ string DebugPrint(KeywordMatcher::Score const & score) out << ",nQTM=" << static_cast(score.m_numQueryTokensAndPrefixMatched); out << ",PM=" << score.m_prefixMatched; out << ",NTM="; - for (int i = MAX_TOKENS-1; i >= 0; --i) + for (int i = static_cast(kMaxNumTokens) - 1; i >= 0; --i) out << ((score.m_nameTokensMatched >> i) & 1); out << ",STMD=" << score.m_sumTokenMatchDistance; out << ")"; diff --git a/search/processor.cpp b/search/processor.cpp index cfa7e50d88..79d32c70d3 100644 --- a/search/processor.cpp +++ b/search/processor.cpp @@ -118,40 +118,21 @@ void SendStatistics(SearchParams const & params, m2::RectD const & viewport, Res GetPlatform().GetMarketingService().SendMarketingEvent(marketing::kSearchEmitResultsAndCoords, {}); } -// Removes all full-token stop words from |params|. -// Does nothing if all tokens in |params| are non-prefix stop words. -void RemoveStopWordsIfNeeded(QueryParams & params) +// Removes all full-token stop words from |tokens|. +// Does nothing if all tokens in are non-prefix stop words. +void RemoveStopWordsIfNeeded(QueryTokens & tokens, strings::UniString & prefix) { size_t numStopWords = 0; - for (size_t i = 0; i < params.GetNumTokens(); ++i) + for (auto const & token : tokens) { - auto & token = params.GetToken(i); - if (!params.IsPrefixToken(i) && IsStopWord(token.m_original)) + if (IsStopWord(token)) ++numStopWords; } - if (numStopWords == params.GetNumTokens()) + if (numStopWords == tokens.size() && prefix.empty()) return; - for (size_t i = 0; i < params.GetNumTokens();) - { - if (params.IsPrefixToken(i)) - { - ++i; - continue; - } - - auto & token = params.GetToken(i); - if (IsStopWord(token.m_original)) - { - params.RemoveToken(i); - } - else - { - my::EraseIf(token.m_synonyms, &IsStopWord); - ++i; - } - } + tokens.erase_if(&IsStopWord); } } // namespace @@ -271,16 +252,23 @@ void Processor::SetQuery(string const & query) } } - // Assign prefix with last parsed token. - if (!m_tokens.empty() && !delims(strings::LastUniChar(query))) + size_t const maxTokensCount = kMaxNumTokens - 1; + ASSERT_GREATER(maxTokensCount, 0, ()); + if (m_tokens.size() > maxTokensCount) { - m_prefix.swap(m_tokens.back()); - m_tokens.pop_back(); + m_tokens.resize(maxTokensCount); + } + else + { + // Assign the last parsed token to prefix. + if (!m_tokens.empty() && !delims(strings::LastUniChar(query))) + { + m_prefix.swap(m_tokens.back()); + m_tokens.pop_back(); + } } - int const maxTokensCount = MAX_TOKENS - 1; - if (m_tokens.size() > maxTokensCount) - m_tokens.resize(maxTokensCount); + RemoveStopWordsIfNeeded(m_tokens, m_prefix); // Assign tokens and prefix to scorer. m_keywordsScorer.SetKeywords(m_tokens.data(), m_tokens.size(), m_prefix); @@ -440,11 +428,9 @@ void Processor::InitParams(QueryParams & params) else params.InitWithPrefix(m_tokens.begin(), m_tokens.end(), m_prefix); - RemoveStopWordsIfNeeded(params); - // Add names of categories (and synonyms). Classificator const & c = classif(); - auto addSynonyms = [&](size_t i, uint32_t t) { + auto addCategorySynonyms = [&](size_t i, uint32_t t) { uint32_t const index = c.GetIndexForType(t); params.GetTypeIndices(i).push_back(index); }; @@ -454,12 +440,12 @@ void Processor::InitParams(QueryParams & params) params.SetCategorialRequest(isCategorialRequest); if (isCategorialRequest) { - ForEachCategoryType(tokenSlice, addSynonyms); + ForEachCategoryType(tokenSlice, addCategorySynonyms); } else { // todo(@m, @y). Shall we match prefix tokens for categories? - ForEachCategoryTypeFuzzy(tokenSlice, addSynonyms); + ForEachCategoryTypeFuzzy(tokenSlice, addCategorySynonyms); } // Remove all type indices for streets, as they're considired diff --git a/search/query_params.cpp b/search/query_params.cpp index dec6d4ead4..c3016a2334 100644 --- a/search/query_params.cpp +++ b/search/query_params.cpp @@ -1,10 +1,13 @@ #include "search/query_params.hpp" +#include "search/ranking_utils.hpp" #include "search/token_range.hpp" #include "indexer/feature_impl.hpp" -#include "std/algorithm.hpp" +#include + +using namespace std; namespace search { @@ -48,6 +51,27 @@ private: }; } // namespace +// QueryParams::Token ------------------------------------------------------------------------------ +void QueryParams::Token::AddSynonym(std::string const & s) +{ + AddSynonym(strings::MakeUniString(s)); +} + +void QueryParams::Token::AddSynonym(String const & s) +{ + if (!IsStopWord(s)) + m_synonyms.push_back(s); +} + +string DebugPrint(QueryParams::Token const & token) +{ + ostringstream os; + os << "Token [ m_original=" << DebugPrint(token.m_original) + << ", m_synonyms=" << DebugPrint(token.m_synonyms) << " ]"; + return os.str(); +} + +// QueryParams ------------------------------------------------------------------------------------- void QueryParams::Clear() { m_tokens.clear(); @@ -138,12 +162,4 @@ string DebugPrint(QueryParams const & params) << ", m_langs=" << DebugPrint(params.m_langs) << " ]"; return os.str(); } - -string DebugPrint(QueryParams::Token const & token) -{ - ostringstream os; - os << "Token [ m_original=" << DebugPrint(token.m_original) - << ", m_synonyms=" << DebugPrint(token.m_synonyms) << " ]"; - return os.str(); -} } // namespace search diff --git a/search/query_params.hpp b/search/query_params.hpp index 9780fc6464..6a2dbdff09 100644 --- a/search/query_params.hpp +++ b/search/query_params.hpp @@ -8,11 +8,10 @@ #include "base/small_set.hpp" #include "base/string_utils.hpp" -#include "std/cstdint.hpp" -#include "std/type_traits.hpp" -#include "std/unordered_set.hpp" -#include "std/utility.hpp" -#include "std/vector.hpp" +#include +#include +#include +#include namespace search { @@ -22,7 +21,7 @@ class QueryParams { public: using String = strings::UniString; - using TypeIndices = vector; + using TypeIndices = std::vector; using Langs = ::base::SafeSmallSet; struct Token @@ -30,8 +29,8 @@ public: Token() = default; Token(String const & original) : m_original(original) {} - void AddSynonym(String const & s) { m_synonyms.push_back(s); } - void AddSynonym(string const & s) { m_synonyms.push_back(strings::MakeUniString(s)); } + void AddSynonym(std::string const & s); + void AddSynonym(String const & s); // Calls |fn| on the original token and on synonyms. template @@ -39,7 +38,7 @@ public: Fn && fn) const { fn(m_original); - for_each(m_synonyms.begin(), m_synonyms.end(), forward(fn)); + for_each(m_synonyms.begin(), m_synonyms.end(), std::forward(fn)); } // Calls |fn| on the original token and on synonyms until |fn| return false. @@ -63,7 +62,7 @@ public: } String m_original; - vector m_synonyms; + std::vector m_synonyms; }; QueryParams() = default; @@ -82,7 +81,7 @@ public: { Clear(); for (; tokenBegin != tokenEnd; ++tokenBegin) - m_tokens.push_back(*tokenBegin); + m_tokens.emplace_back(*tokenBegin); m_prefixToken.m_original = prefix; m_hasPrefix = true; m_typeIndices.resize(GetNumTokens()); @@ -121,18 +120,18 @@ public: inline int GetScale() const { return m_scale; } private: - friend string DebugPrint(QueryParams const & params); + friend std::string DebugPrint(QueryParams const & params); - vector m_tokens; + std::vector m_tokens; Token m_prefixToken; bool m_hasPrefix = false; bool m_isCategorialRequest = false; - vector m_typeIndices; + std::vector m_typeIndices; Langs m_langs; int m_scale = scales::GetUpperScale(); }; -string DebugPrint(QueryParams::Token const & token); +std::string DebugPrint(QueryParams::Token const & token); } // namespace search diff --git a/search/ranker.cpp b/search/ranker.cpp index f38fa2694b..6a0b808f0a 100644 --- a/search/ranker.cpp +++ b/search/ranker.cpp @@ -550,7 +550,7 @@ void Ranker::ProcessSuggestions(vector & vec) const if (m_params.m_prefix.empty() || !m_params.m_suggestsEnabled) return; - int added = 0; + size_t added = 0; for (auto i = vec.begin(); i != vec.end();) { RankerResult const & r = *i; @@ -560,12 +560,14 @@ void Ranker::ProcessSuggestions(vector & vec) const { string suggestion; GetSuggestion(r, m_params.m_query, m_params.m_tokens, m_params.m_prefix, suggestion); - if (!suggestion.empty() && added < MAX_SUGGESTS_COUNT) + if (!suggestion.empty() && added < kMaxNumSuggests) { // todo(@m) RankingInfo is lost here. Should it be? if (m_emitter.AddResult(Result( MakeResult(r, false /* needAddress */, true /* needHighlighting */), suggestion))) + { ++added; + } i = vec.erase(i); continue; diff --git a/search/ranking_utils.cpp b/search/ranking_utils.cpp index 0e8d1c8b52..565588ca06 100644 --- a/search/ranking_utils.cpp +++ b/search/ranking_utils.cpp @@ -114,7 +114,7 @@ bool IsStopWord(UniString const & s) { /// @todo Get all common used stop words and factor out this array into /// search_string_utils.cpp module for example. - static char const * arr[] = {"a", "de", "da", "la"}; + static char const * arr[] = {"a", "de", "da", "la", "le"}; static set const kStopWords( make_transform_iterator(arr, &AsciiToUniString), diff --git a/search/result.cpp b/search/result.cpp index 728ab43a45..810301f729 100644 --- a/search/result.cpp +++ b/search/result.cpp @@ -162,7 +162,7 @@ bool Results::AddResult(Result && result) if (result.IsSuggest()) { - if (distance(m_results.begin(), it) >= MAX_SUGGESTS_COUNT) + if (distance(m_results.begin(), it) >= kMaxNumSuggests) return false; for (auto i = m_results.begin(); i != it; ++i) diff --git a/search/search_integration_tests/processor_test.cpp b/search/search_integration_tests/processor_test.cpp index a636503a1a..f892b5ac42 100644 --- a/search/search_integration_tests/processor_test.cpp +++ b/search/search_integration_tests/processor_test.cpp @@ -992,12 +992,18 @@ UNIT_CLASS_TEST(ProcessorTest, StopWords) vector{m2::PointD(-0.001, -0.001), m2::PointD(0, 0), m2::PointD(0.001, 0.001)}, "Rue de la Paix", "en"); + TestPOI bakery(m2::PointD(0.0, 0.0), "" /* name */, "en"); + bakery.SetTypes({{"shop", "bakery"}}); + BuildWorld([&](TestMwmBuilder & builder) { builder.Add(country); builder.Add(city); }); - auto id = BuildCountry(country.GetName(), [&](TestMwmBuilder & builder) { builder.Add(street); }); + auto id = BuildCountry(country.GetName(), [&](TestMwmBuilder & builder) { + builder.Add(street); + builder.Add(bakery); + }); { auto request = MakeRequest("la France à Paris Rue de la Paix"); @@ -1010,6 +1016,18 @@ UNIT_CLASS_TEST(ProcessorTest, StopWords) auto const & info = results[0].GetRankingInfo(); TEST_EQUAL(info.m_nameScore, NAME_SCORE_FULL_MATCH, ()); } + + { + TRules rules = {ExactMatch(id, bakery)}; + + TEST(ResultsMatch("la boulangerie ", "fr", rules), ()); + } + + { + TEST(ResultsMatch("la motviderie ", "fr", TRules{}), ()); + TEST(ResultsMatch("la la le la la la ", "fr", {ExactMatch(id, street)}), ()); + TEST(ResultsMatch("la la le la la la", "fr", TRules{}), ()); + } } UNIT_CLASS_TEST(ProcessorTest, Numerals) diff --git a/search/search_tests/keyword_matcher_test.cpp b/search/search_tests/keyword_matcher_test.cpp index 5414223d2b..d26c8fc051 100644 --- a/search/search_tests/keyword_matcher_test.cpp +++ b/search/search_tests/keyword_matcher_test.cpp @@ -16,7 +16,7 @@ namespace { using search::KeywordMatcher; -using search::MAX_TOKENS; +using search::kMaxNumTokens; enum ExpectedMatchResult { @@ -259,7 +259,7 @@ string GetManyTokens(string tokenPrefix, int tokenCount, bool countForward = tru UNIT_TEST(KeywordMatcher_QueryTooLong) { - for (int queryLength = MAX_TOKENS - 2; queryLength <= MAX_TOKENS + 2; ++queryLength) + for (int queryLength = kMaxNumTokens - 2; queryLength <= kMaxNumTokens + 2; ++queryLength) { string const query = GetManyTokens("Q", queryLength); string const queryWithPrefix = query + " Prefix"; @@ -292,11 +292,10 @@ UNIT_TEST(KeywordMatcher_QueryTooLong) UNIT_TEST(KeywordMatcher_NameTooLong) { - string const name[] = - { - "Aa Bb " + GetManyTokens("T", MAX_TOKENS + 1), - "Aa Bb " + GetManyTokens("T", MAX_TOKENS), - "Aa Bb " + GetManyTokens("T", MAX_TOKENS - 1), + string const name[] = { + "Aa Bb " + GetManyTokens("T", kMaxNumTokens + 1), + "Aa Bb " + GetManyTokens("T", kMaxNumTokens), + "Aa Bb " + GetManyTokens("T", kMaxNumTokens - 1), }; KeywordMatcherTestCase const testCases[] = @@ -315,9 +314,9 @@ UNIT_TEST(KeywordMatcher_NameTooLong) UNIT_TEST(KeywordMatcher_ManyTokensInReverseOrder) { - string const query = GetManyTokens("Q", MAX_TOKENS); - string const name = GetManyTokens("Q", MAX_TOKENS); - string const reversedName = GetManyTokens("Q", MAX_TOKENS, false); + string const query = GetManyTokens("Q", kMaxNumTokens); + string const name = GetManyTokens("Q", kMaxNumTokens); + string const reversedName = GetManyTokens("Q", kMaxNumTokens, false); KeywordMatcherTestCase const testCases[] = { diff --git a/search/token_range.hpp b/search/token_range.hpp index 0de51db8d7..03865fca5f 100644 --- a/search/token_range.hpp +++ b/search/token_range.hpp @@ -19,8 +19,8 @@ public: : m_begin(static_cast(begin)), m_end(static_cast(end)) { - ASSERT_LESS_OR_EQUAL(begin, MAX_TOKENS, ()); - ASSERT_LESS_OR_EQUAL(end, MAX_TOKENS, ()); + ASSERT_LESS_OR_EQUAL(begin, kMaxNumTokens, ()); + ASSERT_LESS_OR_EQUAL(end, kMaxNumTokens, ()); ASSERT(IsValid(), (*this)); }