diff --git a/search/categories_cache.cpp b/search/categories_cache.cpp index 507dddca87..87095488d9 100644 --- a/search/categories_cache.cpp +++ b/search/categories_cache.cpp @@ -31,6 +31,27 @@ CBV CategoriesCache::Get(MwmContext const & context) return cbv; } +CBV CategoriesCache::GetFuzzy(MwmContext const & context) +{ + if (!context.m_handle.IsAlive() || !context.m_value.HasSearchIndex()) + return CBV(); + + auto id = context.m_handle.GetId(); + auto const it = m_cacheFuzzy.find(id); + if (it != m_cacheFuzzy.cend()) + return it->second; + + auto cbv = LoadFuzzy(context); + m_cacheFuzzy[id] = cbv; + return cbv; +} + +void CategoriesCache::Clear() +{ + m_cacheFuzzy.clear(); + m_cache.clear(); +} + CBV CategoriesCache::Load(MwmContext const & context) { ASSERT(context.m_handle.IsAlive(), ()); @@ -38,6 +59,23 @@ CBV CategoriesCache::Load(MwmContext const & context) auto const & c = classif(); + SearchTrieRequest request; + + m_categories.ForEach([&request, &c](uint32_t const type) { + request.m_categories.emplace_back(FeatureTypeToString(c.GetIndexForType(type))); + }); + + Retrieval retrieval(context, m_cancellable); + return CBV(retrieval.RetrieveAddressFeatures(request)); +} + +CBV CategoriesCache::LoadFuzzy(MwmContext const & context) +{ + ASSERT(context.m_handle.IsAlive(), ()); + ASSERT(context.m_value.HasSearchIndex(), ()); + + auto const & c = classif(); + SearchTrieRequest request; m_categories.ForEach([&request, &c](uint32_t const type) { @@ -45,7 +83,7 @@ CBV CategoriesCache::Load(MwmContext const & context) }); Retrieval retrieval(context, m_cancellable); - return CBV(retrieval.RetrieveAddressFeatures(request)); + return CBV(retrieval.RetrieveAddressFeaturesFuzzy(request)); } // StreetsCache ------------------------------------------------------------------------------------ diff --git a/search/categories_cache.hpp b/search/categories_cache.hpp index 64049e9a0c..feafa56a56 100644 --- a/search/categories_cache.hpp +++ b/search/categories_cache.hpp @@ -8,6 +8,7 @@ #include "base/cancellable.hpp" #include "std/map.hpp" +#include "std/set.hpp" namespace search { @@ -23,18 +24,28 @@ public: source.ForEachType([this](uint32_t type) { m_categories.Add(type); }); } + CategoriesCache(set types, my::Cancellable const & cancellable) + : m_cancellable(cancellable) + { + for (uint32_t type : types) + m_categories.Add(type); + } + virtual ~CategoriesCache() = default; CBV Get(MwmContext const & context); + CBV GetFuzzy(MwmContext const & context); - inline void Clear() { m_cache.clear(); } + void Clear(); private: CBV Load(MwmContext const & context); + CBV LoadFuzzy(MwmContext const & context); CategoriesSet m_categories; my::Cancellable const & m_cancellable; map m_cache; + map m_cacheFuzzy; }; class StreetsCache : public CategoriesCache diff --git a/search/geocoder.cpp b/search/geocoder.cpp index 385bd4b8b0..5d6c8de8c9 100644 --- a/search/geocoder.cpp +++ b/search/geocoder.cpp @@ -219,7 +219,7 @@ MwmSet::MwmHandle FindWorld(Index const & index, vector> con double Area(m2::RectD const & rect) { return rect.IsValid() ? rect.SizeX() * rect.SizeY() : 0; } -// Computes an average similaty between |rect| and |pivot|. By +// Computes the average similarity between |rect| and |pivot|. By // similarity between two rects we mean a fraction of the area of // rects intersection to the area of the smallest rect. double GetSimilarity(m2::RectD const & pivot, m2::RectD const & rect) @@ -364,6 +364,12 @@ Geocoder::~Geocoder() {} void Geocoder::SetParams(Params const & params) { + if (params.IsCategorialRequest()) + { + SetParamsForCategorialSearch(params); + return; + } + m_params = params; m_model.SetCianEnabled(m_params.m_cianMode); @@ -434,6 +440,32 @@ void Geocoder::GoInViewport() GoImpl(infos, true /* inViewport */); } +void Geocoder::ClearCaches() +{ + m_pivotRectsCache.Clear(); + m_localityRectsCache.Clear(); + + m_matchersCache.clear(); + m_streetsCache.Clear(); + m_hotelsCache.Clear(); + m_hotelsFilter.ClearCaches(); + m_postcodes.Clear(); +} + +void Geocoder::SetParamsForCategorialSearch(Params const & params) +{ + m_params = params; + m_model.SetCianEnabled(m_params.m_cianMode); + + m_tokenRequests.clear(); + m_prefixTokenRequest.Clear(); + + ASSERT_EQUAL(m_params.GetNumTokens(), 1, ()); + ASSERT(!m_params.IsPrefixToken(0), ()); + + LOG(LDEBUG, (static_cast(m_params))); +} + void Geocoder::GoImpl(vector> & infos, bool inViewport) { // base::PProf pprof("/tmp/geocoder.prof"); @@ -515,7 +547,7 @@ void Geocoder::GoImpl(vector> & infos, bool inViewport) features = features.Intersect(viewportCBV); } - ctx.m_villages = m_villagesCache.Get(*m_context); + ctx.m_villages = m_villagesCache.GetFuzzy(*m_context); auto citiesFromWorld = m_cities; FillVillageLocalities(ctx); @@ -524,11 +556,17 @@ void Geocoder::GoImpl(vector> & infos, bool inViewport) m_cities = citiesFromWorld; }); + if (m_params.IsCategorialRequest()) + { + MatchCategories(ctx); + } + else + { + MatchRegions(ctx, Region::TYPE_COUNTRY); - MatchRegions(ctx, Region::TYPE_COUNTRY); - - if (index < numIntersectingMaps || m_preRanker.NumSentResults() == 0) - MatchAroundPivot(ctx); + if (index < numIntersectingMaps || m_preRanker.NumSentResults() == 0) + MatchAroundPivot(ctx); + } if (index + 1 >= numIntersectingMaps) m_preRanker.UpdateResults(false /* lastUpdate */); @@ -544,18 +582,6 @@ void Geocoder::GoImpl(vector> & infos, bool inViewport) } } -void Geocoder::ClearCaches() -{ - m_pivotRectsCache.Clear(); - m_localityRectsCache.Clear(); - - m_matchersCache.clear(); - m_streetsCache.Clear(); - m_hotelsCache.Clear(); - m_hotelsFilter.ClearCaches(); - m_postcodes.Clear(); -} - void Geocoder::InitBaseContext(BaseContext & ctx) { Retrieval retrieval(*m_context, m_cancellable); @@ -565,14 +591,22 @@ void Geocoder::InitBaseContext(BaseContext & ctx) ctx.m_features.resize(ctx.m_numTokens); for (size_t i = 0; i < ctx.m_features.size(); ++i) { - if (m_params.IsPrefixToken(i)) - ctx.m_features[i] = retrieval.RetrieveAddressFeatures(m_prefixTokenRequest); + if (m_params.IsCategorialRequest()) + { + // Implementation-wise, the simplest way to match a feature by + // its category bypassing the matching by name is by using a CategoriesCache. + CategoriesCache cache(m_params.m_preferredTypes, m_cancellable); + ctx.m_features[i] = cache.Get(*m_context); + } + else if (m_params.IsPrefixToken(i)) + ctx.m_features[i] = retrieval.RetrieveAddressFeaturesFuzzy(m_prefixTokenRequest); else - ctx.m_features[i] = retrieval.RetrieveAddressFeatures(m_tokenRequests[i]); + ctx.m_features[i] = retrieval.RetrieveAddressFeaturesFuzzy(m_tokenRequests[i]); if (m_params.m_cianMode) ctx.m_features[i] = DecimateCianResults(ctx.m_features[i]); } + ctx.m_hotelsFilter = m_hotelsFilter.MakeScopedFilter(*m_context, m_params.m_hotelsFilter); } @@ -591,6 +625,13 @@ void Geocoder::FillLocalityCandidates(BaseContext const & ctx, CBV const & filte size_t const maxNumLocalities, vector & preLocalities) { + // todo(@m) "food moscow" should be a valid categorial request. + if (m_params.IsCategorialRequest()) + { + preLocalities.clear(); + return; + } + LocalityScorerDelegate delegate(*m_context, m_params); LocalityScorer scorer(m_params, delegate); scorer.GetTopLocalities(m_context->GetId(), ctx, filter, maxNumLocalities, preLocalities); @@ -735,6 +776,24 @@ void Geocoder::ForEachCountry(vector> const & infos, TFn && } } +void Geocoder::MatchCategories(BaseContext & ctx) +{ + auto emit = [&](uint64_t bit) { + auto const featureId = base::asserted_cast(bit); + Model::Type type; + if (!GetTypeInGeocoding(ctx, featureId, type)) + return; + + EmitResult(ctx, m_context->GetId(), featureId, type, TokenRange(0, 1), nullptr /* geoParts */); + }; + + // By now there's only one token and zero prefix tokens. + // Its features have been retrieved from the search index + // using the exact (non-fuzzy) matching and intersected + // with viewport, if needed. Every such feature is relevant. + ctx.m_features[0].ForEach(emit); +} + void Geocoder::MatchRegions(BaseContext & ctx, Region::Type type) { switch (type) @@ -871,7 +930,7 @@ void Geocoder::LimitedSearch(BaseContext & ctx, FeaturesFilter const & filter) }); if (!ctx.m_streets) - ctx.m_streets = m_streetsCache.Get(*m_context); + ctx.m_streets = m_streetsCache.GetFuzzy(*m_context); MatchUnclassified(ctx, 0 /* curToken */); diff --git a/search/geocoder.hpp b/search/geocoder.hpp index ea2fbaba8c..ae291eae4f 100644 --- a/search/geocoder.hpp +++ b/search/geocoder.hpp @@ -84,6 +84,7 @@ public: m2::RectD m_pivot; shared_ptr m_hotelsFilter; bool m_cianMode = false; + set m_preferredTypes; }; Geocoder(Index const & index, storage::CountryInfoGetter const & infoGetter, @@ -122,6 +123,9 @@ private: CBV m_features; }; + // Sets search query params for categorial search. + void SetParamsForCategorialSearch(Params const & params); + void GoImpl(vector> & infos, bool inViewport); template @@ -149,6 +153,9 @@ private: // Throws CancelException if cancelled. inline void BailIfCancelled() { ::search::BailIfCancelled(m_cancellable); } + // A fast-path branch for categorial requests. + void MatchCategories(BaseContext & ctx); + // Tries to find all countries and states in a search query and then // performs matching of cities in found maps. void MatchRegions(BaseContext & ctx, Region::Type type); diff --git a/search/processor.cpp b/search/processor.cpp index fc207f7337..464dfc6775 100644 --- a/search/processor.cpp +++ b/search/processor.cpp @@ -300,7 +300,7 @@ void Processor::SetQuery(string const & query) // Assign tokens and prefix to scorer. m_ranker.SetKeywords(m_tokens.data(), m_tokens.size(), m_prefix); - // get preferred types to show in results + // Get preferred types to show in results. m_preferredTypes.clear(); ForEachCategoryType(QuerySliceOnRawStrings(m_tokens, m_prefix), [&](size_t, uint32_t t) @@ -390,11 +390,10 @@ void Processor::SetViewportByIndex(m2::RectD const & viewport, size_t idx, bool } void Processor::ClearCache(size_t ind) { m_viewport[ind].MakeEmpty(); } - -TLocales Processor::GetCategoryLocales() const +Locales Processor::GetCategoryLocales() const { static int8_t const enLocaleCode = CategoriesHolder::MapLocaleToInteger("en"); - TLocales result; + Locales result; // Prepare array of processing locales. English locale is always present for category matching. if (m_currentLocaleCode != -1) @@ -522,18 +521,27 @@ void Processor::InitParams(QueryParams & params) else params.InitWithPrefix(m_tokens.begin(), m_tokens.end(), m_prefix); + RemoveStopWordsIfNeeded(params); + // Add names of categories (and synonyms). Classificator const & c = classif(); - auto addSyms = [&](size_t i, uint32_t t) - { + auto addSynonyms = [&](size_t i, uint32_t t) { uint32_t const index = c.GetIndexForType(t); params.GetTypeIndices(i).push_back(index); }; - - // todo(@m, @y). Shall we match prefix tokens for categories? - ForEachCategoryTypeFuzzy(QuerySliceOnRawStrings(m_tokens, m_prefix), addSyms); - - RemoveStopWordsIfNeeded(params); + auto const tokenSlice = QuerySliceOnRawStrings(m_tokens, m_prefix); + bool const isCategorialRequest = + IsCategorialRequest(tokenSlice, GetCategoryLocales(), m_categories); + params.SetCategorialRequest(isCategorialRequest); + if (isCategorialRequest) + { + ForEachCategoryType(tokenSlice, addSynonyms); + } + else + { + // todo(@m, @y). Shall we match prefix tokens for categories? + ForEachCategoryTypeFuzzy(tokenSlice, addSynonyms); + } // Remove all type indices for streets, as they're considired // individually. @@ -563,6 +571,8 @@ void Processor::InitGeocoder(Geocoder::Params & params) params.m_pivot = GetPivotRect(); params.m_hotelsFilter = m_hotelsFilter; params.m_cianMode = m_cianMode; + params.m_preferredTypes = m_preferredTypes; + m_geocoder.SetParams(params); } diff --git a/search/query_params.hpp b/search/query_params.hpp index d489deb6bc..e59c72d1c6 100644 --- a/search/query_params.hpp +++ b/search/query_params.hpp @@ -115,6 +115,9 @@ public: inline Langs const & GetLangs() const { return m_langs; } inline bool LangExists(int8_t lang) const { return m_langs.Contains(lang); } + inline void SetCategorialRequest(bool rhs) { m_isCategorialRequest = rhs; } + inline bool IsCategorialRequest() const { return m_isCategorialRequest; } + inline int GetScale() const { return m_scale; } private: @@ -123,6 +126,7 @@ private: vector m_tokens; Token m_prefixToken; bool m_hasPrefix = false; + bool m_isCategorialRequest = false; vector m_typeIndices; diff --git a/search/retrieval.cpp b/search/retrieval.cpp index aa7ab9a0ad..700fdf8da2 100644 --- a/search/retrieval.cpp +++ b/search/retrieval.cpp @@ -318,12 +318,24 @@ Retrieval::Retrieval(MwmContext const & context, my::Cancellable const & cancell } unique_ptr Retrieval::RetrieveAddressFeatures( - SearchTrieRequest const & request) + SearchTrieRequest const & request) { return Retrieve(request); } unique_ptr Retrieval::RetrieveAddressFeatures( + SearchTrieRequest> const & request) +{ + return Retrieve(request); +} + +unique_ptr Retrieval::RetrieveAddressFeaturesFuzzy( + SearchTrieRequest const & request) +{ + return Retrieve(request); +} + +unique_ptr Retrieval::RetrieveAddressFeaturesFuzzy( SearchTrieRequest> const & request) { return Retrieve(request); diff --git a/search/retrieval.hpp b/search/retrieval.hpp index 9203822656..0a99e2865f 100644 --- a/search/retrieval.hpp +++ b/search/retrieval.hpp @@ -38,9 +38,15 @@ public: // Following functions retrieve from the search index corresponding to // |value| all features matching to |request|. unique_ptr RetrieveAddressFeatures( - SearchTrieRequest const & request); + SearchTrieRequest const & request); unique_ptr RetrieveAddressFeatures( + SearchTrieRequest> const & request); + + unique_ptr RetrieveAddressFeaturesFuzzy( + SearchTrieRequest const & request); + + unique_ptr RetrieveAddressFeaturesFuzzy( SearchTrieRequest> const & request); // Retrieves from the search index corresponding to |value| all diff --git a/search/search_integration_tests/processor_test.cpp b/search/search_integration_tests/processor_test.cpp index 0231bddb54..86b9ab4dc4 100644 --- a/search/search_integration_tests/processor_test.cpp +++ b/search/search_integration_tests/processor_test.cpp @@ -23,6 +23,7 @@ #include "base/checked_cast.hpp" #include "base/macros.hpp" #include "base/math.hpp" +#include "base/string_utils.hpp" #include "std/shared_ptr.hpp" #include "std/vector.hpp" @@ -696,6 +697,109 @@ UNIT_CLASS_TEST(ProcessorTest, TestCategories) ()); } +// A separate test for the categorial search branch in the geocoder. +UNIT_CLASS_TEST(ProcessorTest, TestCategorialSearch) +{ + string const countryName = "Wonderland"; + + TestCity sanDiego(m2::PointD(0, 0), "San Diego", "en", 100 /* rank */); + TestCity homel(m2::PointD(10, 10), "Homel", "en", 100 /* rank */); + + // No need in TestHotel here, TestPOI is enough. + TestPOI hotel1(m2::PointD(0, 0.01), "", "ru"); + hotel1.SetTypes({{"tourism", "hotel"}}); + + TestPOI hotel2(m2::PointD(0, 0.02), "Hotel San Diego, California", "en"); + hotel2.SetTypes({{"tourism", "hotel"}}); + + TestPOI hotelCafe(m2::PointD(0, 0.03), "Hotel", "en"); + hotelCafe.SetTypes({{"amenity", "cafe"}}); + + TestPOI hotelDeVille(m2::PointD(0, 0.04), "Hôtel De Ville", "en"); + hotelDeVille.SetTypes({{"amenity", "townhall"}}); + + auto const testWorldId = BuildWorld([&](TestMwmBuilder & builder) { + builder.Add(sanDiego); + builder.Add(homel); + }); + auto wonderlandId = BuildCountry(countryName, [&](TestMwmBuilder & builder) { + builder.Add(hotel1); + builder.Add(hotel2); + builder.Add(hotelCafe); + builder.Add(hotelDeVille); + }); + + SetViewport(m2::RectD(m2::PointD(-0.5, -0.5), m2::PointD(0.5, 0.5))); + + { + TRules const rules = {ExactMatch(wonderlandId, hotel1), ExactMatch(wonderlandId, hotel2)}; + + auto request = MakeRequest("hotel "); + TEST(ResultsMatch(request->Results(), rules), ()); + } + + { + TRules const rules = {ExactMatch(wonderlandId, hotel1), ExactMatch(wonderlandId, hotel2)}; + + auto request = MakeRequest("гостиница ", "ru"); + TEST(ResultsMatch(request->Results(), rules), ()); + } + + { + TRules const rules = {ExactMatch(wonderlandId, hotel1), ExactMatch(wonderlandId, hotel2)}; + + // Hotel unicode character: both a synonym and and emoji. + uint32_t const hotelEmojiCodepoint = 0x0001F3E8; + strings::UniString const hotelUniString(1, hotelEmojiCodepoint); + auto request = MakeRequest(ToUtf8(hotelUniString)); + TEST(ResultsMatch(request->Results(), rules), ()); + } + + { + TRules const rules = {ExactMatch(wonderlandId, hotel1), ExactMatch(wonderlandId, hotel2), + ExactMatch(wonderlandId, hotelCafe), ExactMatch(testWorldId, homel), + ExactMatch(wonderlandId, hotelDeVille)}; + // A prefix token. + auto request = MakeRequest("hotel"); + TEST(ResultsMatch(request->Results(), rules), ()); + } + + { + TRules const rules = {ExactMatch(wonderlandId, hotel1), ExactMatch(wonderlandId, hotel2), + ExactMatch(wonderlandId, hotelCafe), + ExactMatch(wonderlandId, hotelDeVille)}; + // It looks like a category search but we cannot tell it, so + // even the features that match only by name are emitted. + auto request = MakeRequest("hotel san diego "); + TEST(ResultsMatch(request->Results(), rules), ()); + } + + { + TRules const rules = {ExactMatch(wonderlandId, hotel1), ExactMatch(wonderlandId, hotel2), + ExactMatch(wonderlandId, hotelCafe), ExactMatch(testWorldId, homel), + ExactMatch(wonderlandId, hotelDeVille)}; + // Homel matches exactly, other features are matched by fuzzy names. + auto request = MakeRequest("homel "); + TEST(ResultsMatch(request->Results(), rules), ()); + } + + { + TRules const rules = {ExactMatch(wonderlandId, hotel1), ExactMatch(wonderlandId, hotel2), + ExactMatch(wonderlandId, hotelCafe), ExactMatch(testWorldId, homel), + ExactMatch(wonderlandId, hotelDeVille)}; + // A typo in search: all features fit. + auto request = MakeRequest("hofel "); + TEST(ResultsMatch(request->Results(), rules), ()); + } + + { + TRules const rules = {ExactMatch(wonderlandId, hotelDeVille)}; + + auto request = MakeRequest("hotel de ville "); + TEST(ResultsMatch(request->Results(), rules), ()); + } +} + UNIT_CLASS_TEST(ProcessorTest, TestCoords) { auto request = MakeRequest("51.681644 39.183481"); diff --git a/search/token_slice.hpp b/search/token_slice.hpp index 789f3abf5e..e9f5d2b323 100644 --- a/search/token_slice.hpp +++ b/search/token_slice.hpp @@ -81,6 +81,8 @@ public: { } + bool HasPrefixToken() const { return !m_prefix.empty(); } + // QuerySlice overrides: QueryParams::String const & Get(size_t i) const override { diff --git a/search/utils.hpp b/search/utils.hpp index bbde779ea8..e152d842c8 100644 --- a/search/utils.hpp +++ b/search/utils.hpp @@ -16,6 +16,8 @@ #include #include +#include "base/logging.hpp" + namespace search { // todo(@m, @y). Unite with the similar function in search/feature_offset_match.hpp. @@ -62,14 +64,14 @@ bool MatchInTrie(TrieIt const & trieStartIt, DFA const & dfa, ToDo && toDo) return found; } -using TLocales = buffer_vector; +using Locales = buffer_vector; size_t GetMaxErrorsForToken(strings::UniString const & token); strings::LevenshteinDFA BuildLevenshteinDFA(strings::UniString const & s); template -void ForEachCategoryType(StringSliceBase const & slice, TLocales const & locales, +void ForEachCategoryType(StringSliceBase const & slice, Locales const & locales, CategoriesHolder const & categories, ToDo && todo) { for (size_t i = 0; i < slice.Size(); ++i) @@ -94,7 +96,7 @@ void ForEachCategoryType(StringSliceBase const & slice, TLocales const & locales // in all category synonyms in all |locales| in order to find a token // whose edit distance is close enough to the required token from |slice|. template -void ForEachCategoryTypeFuzzy(StringSliceBase const & slice, TLocales const & locales, +void ForEachCategoryTypeFuzzy(StringSliceBase const & slice, Locales const & locales, CategoriesHolder const & categories, ToDo && todo) { using Trie = my::MemTrie>; @@ -118,4 +120,31 @@ void ForEachCategoryTypeFuzzy(StringSliceBase const & slice, TLocales const & lo }); } } + +// Returns whether the request specified by |slice| is categorial +// in any of the |locales|. We expect that categorial requests should +// mostly arise from clicking on a category button in the UI. +// It is assumed that typing a word that matches a category's name +// and a space after it means that no errors were made. +template +bool IsCategorialRequest(QuerySliceOnRawStrings const & slice, Locales const & locales, + CategoriesHolder const & catHolder) +{ + if (slice.Size() != 1 || slice.HasPrefixToken()) + return false; + + bool found = false; + auto token = slice.Get(0); + catHolder.ForEachName([&](CategoriesHolder::Category::Name const & categorySynonym) { + if (std::find(locales.begin(), locales.end(), categorySynonym.m_locale) == locales.end()) + return; + + if (token != strings::MakeUniString(categorySynonym.m_name)) + return; + + found = true; + }); + + return found; +} } // namespace search