From a1f4ac5953e858dc4199a54e0eef81fde41ffaa2 Mon Sep 17 00:00:00 2001 From: Maxim Pimenov Date: Mon, 13 Feb 2017 18:59:55 +0300 Subject: [PATCH] [indexer] United all tries-by-locale into one trie. --- indexer/categories_holder.cpp | 9 ++++----- indexer/categories_holder.hpp | 19 ++++++------------- search/utils.hpp | 27 ++++++++++++++++++--------- 3 files changed, 28 insertions(+), 27 deletions(-) diff --git a/indexer/categories_holder.cpp b/indexer/categories_holder.cpp index 382d21f7f9..f4ef385381 100644 --- a/indexer/categories_holder.cpp +++ b/indexer/categories_holder.cpp @@ -203,6 +203,8 @@ void CategoriesHolder::AddCategory(Category & cat, vector & types) auto const locale = synonym.m_locale; ASSERT_NOT_EQUAL(locale, kUnsupportedLocaleCode, ()); + auto const localePrefix = String(1, static_cast(locale)); + auto const uniName = search::NormalizeAndSimplifyString(synonym.m_name); vector tokens; @@ -213,10 +215,7 @@ void CategoriesHolder::AddCategory(Category & cat, vector & types) if (!ValidKeyToken(token)) continue; for (uint32_t const t : types) - { - auto it = m_name2type.emplace(locale, make_unique()).first; - it->second->Add(token, t); - } + m_name2type->Add(localePrefix + token, t); } } } @@ -243,7 +242,7 @@ bool CategoriesHolder::ValidKeyToken(String const & s) void CategoriesHolder::LoadFromStream(istream & s) { m_type2cat.clear(); - m_name2type.clear(); + m_name2type = make_unique(); m_groupTranslations.clear(); State state = EParseTypes; diff --git a/indexer/categories_holder.hpp b/indexer/categories_holder.hpp index b40982e001..95050ca09e 100644 --- a/indexer/categories_holder.hpp +++ b/indexer/categories_holder.hpp @@ -56,7 +56,8 @@ private: Type2CategoryCont m_type2cat; // Maps locale and category token to the list of corresponding types. - map> m_name2type; + // Locale is treated as a special symbol prepended to the token. + unique_ptr m_name2type; GroupTranslations m_groupTranslations; @@ -109,10 +110,9 @@ public: template void ForEachTypeByName(int8_t locale, String const & name, ToDo && toDo) const { - auto const it = m_name2type.find(locale); - if (it == m_name2type.end()) - return; - it->second->ForEachInNode(name, my::MakeIgnoreFirstArgument(forward(toDo))); + auto const localePrefix = String(1, static_cast(locale)); + m_name2type->ForEachInNode(localePrefix + name, + my::MakeIgnoreFirstArgument(forward(toDo))); } inline GroupTranslations const & GetGroupTranslations() const { return m_groupTranslations; } @@ -126,14 +126,7 @@ public: string GetReadableFeatureType(uint32_t type, int8_t locale) const; // Exposes the tries that map category tokens to types. - Trie const * GetNameToTypesTrie(int8_t locale) const - { - auto const it = m_name2type.find(locale); - if (it == m_name2type.end()) - return nullptr; - return it->second.get(); - } - + Trie const & GetNameToTypesTrie() const { return *m_name2type; } bool IsTypeExist(uint32_t type) const; inline void Swap(CategoriesHolder & r) diff --git a/search/utils.hpp b/search/utils.hpp index 717f100eb9..6f2b6ac5e9 100644 --- a/search/utils.hpp +++ b/search/utils.hpp @@ -15,13 +15,14 @@ #include #include #include +#include namespace search { -// my::MemTrie // todo(@m, @y). Unite with the similar function in search/feature_offset_match.hpp. template -bool MatchInTrie(Trie const & trie, DFA const & dfa, ToDo && toDo) +bool MatchInTrie(Trie const & /* trie */, typename Trie::Iterator const & trieStartIt, + DFA const & dfa, ToDo && toDo) { using Char = typename Trie::Char; using TrieIt = typename Trie::Iterator; @@ -34,7 +35,7 @@ bool MatchInTrie(Trie const & trie, DFA const & dfa, ToDo && toDo) auto it = dfa.Begin(); if (it.Rejects()) return false; - q.emplace(trie.GetRootIterator(), it); + q.emplace(trieStartIt, it); } bool found = false; @@ -97,17 +98,25 @@ template void ForEachCategoryTypeFuzzy(StringSliceBase const & slice, TLocales const & locales, CategoriesHolder const & categories, ToDo && todo) { + using Trie = my::MemTrie; + + auto const & trie = categories.GetNameToTypesTrie(); + auto const & trieRootIt = trie.GetRootIterator(); + std::set localeSet(locales.begin(), locales.end()); + for (size_t i = 0; i < slice.Size(); ++i) { auto const & token = slice.Get(i); auto const & dfa = strings::LevenshteinDFA(token, 1 /* prefixCharsToKeep */, GetMaxErrorsForToken(token)); - for (int8_t const locale : locales) - { - auto const * trie = categories.GetNameToTypesTrie(locale); - if (trie != nullptr) - MatchInTrie(*trie, dfa, std::bind(todo, i, std::placeholders::_1)); - } + + trieRootIt.ForEachMove([&](Trie::Char const & c, Trie::Iterator const & moveIt) { + if (localeSet.count(static_cast(c)) != 0) + { + MatchInTrie(trie /* passed to infer the iterator's type */, moveIt, dfa, + std::bind(todo, i, std::placeholders::_1)); + } + }); } } } // namespace search