diff --git a/base/mem_trie.hpp b/base/mem_trie.hpp index 1238ad951d..f70bbf9242 100644 --- a/base/mem_trie.hpp +++ b/base/mem_trie.hpp @@ -4,11 +4,9 @@ #include "base/macros.hpp" #include -#include -#include #include #include -#include +#include #include namespace base @@ -221,7 +219,7 @@ public: template void ForEachInNode(ToDo && toDo) const { - m_node.m_values.ForEach(std::forward(toDo)); + m_node.m_values.ForEach(toDo); } String GetLabel() const { return m_node.m_edge.template As(); } @@ -291,7 +289,7 @@ public: void ForEachInTrie(ToDo && toDo) const { String prefix; - ForEachInSubtree(m_root, prefix, std::forward(toDo)); + ForEachInSubtree(m_root, prefix, toDo); } // Calls |toDo| for each key-value pair in the node that is reachable @@ -302,7 +300,7 @@ public: { MoveTo(prefix, true /* fullMatch */, [&](Node const & node, Edge const & /* edge */, size_t /* offset */) { - node.m_values.ForEach(std::forward(toDo)); + node.m_values.ForEach(toDo); }); } @@ -323,7 +321,7 @@ public: String p = prefix; for (; offset < edge.Size(); ++offset) p.push_back(edge[offset]); - ForEachInSubtree(node, p, std::forward(toDo)); + ForEachInSubtree(node, p, toDo); }); } diff --git a/indexer/categories_holder.hpp b/indexer/categories_holder.hpp index 88ac3ea98e..111e54d489 100644 --- a/indexer/categories_holder.hpp +++ b/indexer/categories_holder.hpp @@ -1,18 +1,13 @@ #pragma once #include "base/mem_trie.hpp" -#include "base/stl_helpers.hpp" #include "base/string_utils.hpp" #include #include -#include -#include -#include #include #include #include -#include #include class Reader; @@ -173,7 +168,7 @@ public: void ForEachTypeByName(int8_t locale, strings::UniString const & name, ToDo && toDo) const { auto const localePrefix = strings::UniString(1, static_cast(locale)); - m_name2type.ForEachInNode(localePrefix + name, std::forward(toDo)); + m_name2type.ForEachInNode(localePrefix + name, toDo); } GroupTranslations const & GetGroupTranslations() const { return m_groupTranslations; } diff --git a/indexer/search_string_utils.cpp b/indexer/search_string_utils.cpp index 736542a189..1525c0caff 100644 --- a/indexer/search_string_utils.cpp +++ b/indexer/search_string_utils.cpp @@ -55,12 +55,26 @@ size_t GetMaxErrorsForToken(strings::UniString const & token) strings::LevenshteinDFA BuildLevenshteinDFA(strings::UniString const & s) { + ASSERT(!s.empty(), ()); // In search we use LevenshteinDFAs for fuzzy matching. But due to // performance reasons, we limit prefix misprints to fixed set of substitutions defined in // kAllowedMisprints and skipped letters. return strings::LevenshteinDFA(s, 1 /* prefixSize */, kAllowedMisprints, GetMaxErrorsForToken(s)); } +strings::LevenshteinDFA BuildLevenshteinDFA_Category(strings::UniString const & s) +{ + // https://github.com/organicmaps/organicmaps/issues/3655 + // Separate DFA for categories (token's length <= 4 means no errors allowed) to avoid fancy matchings like: + // cafe <-> care + // ecco -> eco + // shop <-> shoe + /// @todo "hote" doesn't match "hotel" now. Should allow _adding_ symbols when size == 4. + + ASSERT(!s.empty(), ()); + return strings::LevenshteinDFA(s, 1 /* prefixSize */, kAllowedMisprints, GetMaxErrorsForTokenLength(s.size() - 1)); +} + UniString NormalizeAndSimplifyString(string_view s) { UniString uniString = MakeUniString(s); diff --git a/indexer/search_string_utils.hpp b/indexer/search_string_utils.hpp index 36ec0f0850..eebcf87b1f 100644 --- a/indexer/search_string_utils.hpp +++ b/indexer/search_string_utils.hpp @@ -22,6 +22,7 @@ inline constexpr size_t GetMaxErrorsForTokenLength(size_t length) size_t GetMaxErrorsForToken(strings::UniString const & token); strings::LevenshteinDFA BuildLevenshteinDFA(strings::UniString const & s); +strings::LevenshteinDFA BuildLevenshteinDFA_Category(strings::UniString const & s); // This function should be used for all search strings normalization. // It does some magic text transformation which greatly helps us to improve our search. diff --git a/search/processor.cpp b/search/processor.cpp index d10e05b2f5..4e3690e1b4 100644 --- a/search/processor.cpp +++ b/search/processor.cpp @@ -494,14 +494,13 @@ Locales Processor::GetCategoryLocales() const template void Processor::ForEachCategoryType(StringSliceBase const & slice, ToDo && toDo) const { - ::search::ForEachCategoryType(slice, GetCategoryLocales(), m_categories, forward(toDo)); + ::search::ForEachCategoryType(slice, GetCategoryLocales(), m_categories, toDo); } template void Processor::ForEachCategoryTypeFuzzy(StringSliceBase const & slice, ToDo && toDo) const { - ::search::ForEachCategoryTypeFuzzy(slice, GetCategoryLocales(), m_categories, - forward(toDo)); + ::search::ForEachCategoryTypeFuzzy(slice, GetCategoryLocales(), m_categories, toDo); } void Processor::Search(SearchParams params) diff --git a/search/search_integration_tests/processor_test.cpp b/search/search_integration_tests/processor_test.cpp index ba093401e2..89ee90a425 100644 --- a/search/search_integration_tests/processor_test.cpp +++ b/search/search_integration_tests/processor_test.cpp @@ -917,7 +917,8 @@ UNIT_CLASS_TEST(ProcessorTest, TestCategorialSearch) } { - Rules const rules = {ExactMatch(wonderlandId, hotel1), ExactMatch(wonderlandId, hotel2), + /// @todo We updated fuzzy match for categories: hote -> hotel is not matched now (4 letters input token). + Rules const rules = {/*ExactMatch(wonderlandId, hotel1),*/ ExactMatch(wonderlandId, hotel2), ExactMatch(wonderlandId, hotelCafe), ExactMatch(testWorldId, homel), ExactMatch(wonderlandId, hotelDeVille)}; // A prefix token. @@ -3207,4 +3208,45 @@ UNIT_CLASS_TEST(ProcessorTest, Place_Region) TEST(ResultsMatch("carth", rules, "en"), ()); } +UNIT_CLASS_TEST(ProcessorTest, FuzzyCategories) +{ + TestPOI cafe({0, 0.01}, "xxx", "en"); + cafe.SetTypes({{"amenity", "cafe"}}); + + TestPOI cosmetics({0, 0.02}, "yyy", "en"); + cosmetics.SetTypes({{"shop", "cosmetics"}}); + + TestPOI shoes({0, 0.03}, "ecco", "en"); + shoes.SetTypes({{"shop", "shoes"}}); + + TestPOI organic({0, 0.04}, "zzz", "en"); + organic.SetTypes({{"shop", "grocery"}, {"organic", "yes"}}); + + auto wonderlandId = BuildCountry("Wonderland", [&](TestMwmBuilder & builder) + { + builder.Add(cafe); + builder.Add(cosmetics); + builder.Add(shoes); + builder.Add(organic); + }); + + SetViewport(m2::RectD(-0.5, -0.5, 0.5, 0.5)); + + { + Rules const rules = {ExactMatch(wonderlandId, cafe)}; + TEST(ResultsMatch("cafe", rules), ()); + } + + { + Rules const rules = {ExactMatch(wonderlandId, shoes)}; + TEST(ResultsMatch("shoe", rules), ()); + TEST(ResultsMatch("shoes", rules), ()); + } + + { + Rules const rules = {ExactMatch(wonderlandId, shoes)}; + TEST(ResultsMatch("ecco", rules), ()); + } +} + } // namespace processor_test diff --git a/search/utils.hpp b/search/utils.hpp index a7421a24c0..cabfcf71f4 100644 --- a/search/utils.hpp +++ b/search/utils.hpp @@ -6,19 +6,14 @@ #include "indexer/categories_holder.hpp" #include "indexer/feature_decl.hpp" -#include "indexer/mwm_set.hpp" -#include "indexer/search_delimiters.hpp" #include "indexer/search_string_utils.hpp" #include "indexer/trie.hpp" #include "geometry/rect2d.hpp" #include "base/levenshtein_dfa.hpp" -#include "base/stl_helpers.hpp" #include "base/string_utils.hpp" -#include -#include #include #include #include @@ -36,14 +31,14 @@ void ForEachCategoryType(StringSliceBase const & slice, Locales const & locales, { auto const & token = slice.Get(i); for (int8_t const locale : locales) - categories.ForEachTypeByName(locale, token, std::bind(todo, i, std::placeholders::_1)); + categories.ForEachTypeByName(locale, token, [&todo, i](uint32_t type) { todo(i, type); }); // Special case processing of 2 codepoints emoji (e.g. black guy on a bike). // Only emoji synonyms can have one codepoint. if (token.size() > 1) { categories.ForEachTypeByName(CategoriesHolder::kEnglishCode, strings::UniString(1, token[0]), - std::bind(todo, i, std::placeholders::_1)); + [&todo, i](uint32_t type) { todo(i, type); }); } } } @@ -68,11 +63,12 @@ void ForEachCategoryTypeFuzzy(StringSliceBase const & slice, Locales const & loc // A possible optimization is to build each dfa once and save it. Note that // dfas for the prefix tokens differ, i.e. we ignore slice.IsPrefix(i) here. SearchTrieRequest request; - request.m_names.push_back(BuildLevenshteinDFA(slice.Get(i))); + request.m_names.push_back(BuildLevenshteinDFA_Category(slice.Get(i))); request.SetLangs(locales); - MatchFeaturesInTrie(request, iterator, [&](uint32_t /* type */) { return true; } /* filter */, - std::bind(todo, i, std::placeholders::_1)); + MatchFeaturesInTrie(request, iterator, + [](uint32_t) { return true; } /* filter */, + [&todo, i](uint32_t type, bool) { todo(i, type); } /* todo */); } }