diff --git a/indexer/search_string_utils.cpp b/indexer/search_string_utils.cpp index 736542a189..1525c0caff 100644 --- a/indexer/search_string_utils.cpp +++ b/indexer/search_string_utils.cpp @@ -55,12 +55,26 @@ size_t GetMaxErrorsForToken(strings::UniString const & token) strings::LevenshteinDFA BuildLevenshteinDFA(strings::UniString const & s) { + ASSERT(!s.empty(), ()); // In search we use LevenshteinDFAs for fuzzy matching. But due to // performance reasons, we limit prefix misprints to fixed set of substitutions defined in // kAllowedMisprints and skipped letters. return strings::LevenshteinDFA(s, 1 /* prefixSize */, kAllowedMisprints, GetMaxErrorsForToken(s)); } +strings::LevenshteinDFA BuildLevenshteinDFA_Category(strings::UniString const & s) +{ + // https://github.com/organicmaps/organicmaps/issues/3655 + // Separate DFA for categories (token's length <= 4 means no errors allowed) to avoid fancy matchings like: + // cafe <-> care + // ecco -> eco + // shop <-> shoe + /// @todo "hote" doesn't match "hotel" now. Should allow _adding_ symbols when size == 4. + + ASSERT(!s.empty(), ()); + return strings::LevenshteinDFA(s, 1 /* prefixSize */, kAllowedMisprints, GetMaxErrorsForTokenLength(s.size() - 1)); +} + UniString NormalizeAndSimplifyString(string_view s) { UniString uniString = MakeUniString(s); diff --git a/indexer/search_string_utils.hpp b/indexer/search_string_utils.hpp index 36ec0f0850..eebcf87b1f 100644 --- a/indexer/search_string_utils.hpp +++ b/indexer/search_string_utils.hpp @@ -22,6 +22,7 @@ inline constexpr size_t GetMaxErrorsForTokenLength(size_t length) size_t GetMaxErrorsForToken(strings::UniString const & token); strings::LevenshteinDFA BuildLevenshteinDFA(strings::UniString const & s); +strings::LevenshteinDFA BuildLevenshteinDFA_Category(strings::UniString const & s); // This function should be used for all search strings normalization. // It does some magic text transformation which greatly helps us to improve our search. diff --git a/search/search_integration_tests/processor_test.cpp b/search/search_integration_tests/processor_test.cpp index ba093401e2..89ee90a425 100644 --- a/search/search_integration_tests/processor_test.cpp +++ b/search/search_integration_tests/processor_test.cpp @@ -917,7 +917,8 @@ UNIT_CLASS_TEST(ProcessorTest, TestCategorialSearch) } { - Rules const rules = {ExactMatch(wonderlandId, hotel1), ExactMatch(wonderlandId, hotel2), + /// @todo We updated fuzzy match for categories: hote -> hotel is not matched now (4 letters input token). + Rules const rules = {/*ExactMatch(wonderlandId, hotel1),*/ ExactMatch(wonderlandId, hotel2), ExactMatch(wonderlandId, hotelCafe), ExactMatch(testWorldId, homel), ExactMatch(wonderlandId, hotelDeVille)}; // A prefix token. @@ -3207,4 +3208,45 @@ UNIT_CLASS_TEST(ProcessorTest, Place_Region) TEST(ResultsMatch("carth", rules, "en"), ()); } +UNIT_CLASS_TEST(ProcessorTest, FuzzyCategories) +{ + TestPOI cafe({0, 0.01}, "xxx", "en"); + cafe.SetTypes({{"amenity", "cafe"}}); + + TestPOI cosmetics({0, 0.02}, "yyy", "en"); + cosmetics.SetTypes({{"shop", "cosmetics"}}); + + TestPOI shoes({0, 0.03}, "ecco", "en"); + shoes.SetTypes({{"shop", "shoes"}}); + + TestPOI organic({0, 0.04}, "zzz", "en"); + organic.SetTypes({{"shop", "grocery"}, {"organic", "yes"}}); + + auto wonderlandId = BuildCountry("Wonderland", [&](TestMwmBuilder & builder) + { + builder.Add(cafe); + builder.Add(cosmetics); + builder.Add(shoes); + builder.Add(organic); + }); + + SetViewport(m2::RectD(-0.5, -0.5, 0.5, 0.5)); + + { + Rules const rules = {ExactMatch(wonderlandId, cafe)}; + TEST(ResultsMatch("cafe", rules), ()); + } + + { + Rules const rules = {ExactMatch(wonderlandId, shoes)}; + TEST(ResultsMatch("shoe", rules), ()); + TEST(ResultsMatch("shoes", rules), ()); + } + + { + Rules const rules = {ExactMatch(wonderlandId, shoes)}; + TEST(ResultsMatch("ecco", rules), ()); + } +} + } // namespace processor_test diff --git a/search/utils.hpp b/search/utils.hpp index ed623db72f..cabfcf71f4 100644 --- a/search/utils.hpp +++ b/search/utils.hpp @@ -63,7 +63,7 @@ void ForEachCategoryTypeFuzzy(StringSliceBase const & slice, Locales const & loc // A possible optimization is to build each dfa once and save it. Note that // dfas for the prefix tokens differ, i.e. we ignore slice.IsPrefix(i) here. SearchTrieRequest request; - request.m_names.push_back(BuildLevenshteinDFA(slice.Get(i))); + request.m_names.push_back(BuildLevenshteinDFA_Category(slice.Get(i))); request.SetLangs(locales); MatchFeaturesInTrie(request, iterator,