From 0ddbe71640f6b4b65221761204306711dd0158e1 Mon Sep 17 00:00:00 2001 From: Maxim Pimenov Date: Tue, 14 Feb 2017 17:38:13 +0300 Subject: [PATCH] Review fixes. --- base/mem_trie.hpp | 14 +++++++++++++- indexer/categories_holder.cpp | 6 +++--- indexer/categories_holder.hpp | 9 +++++---- search/utils.hpp | 13 ++++++++----- 4 files changed, 29 insertions(+), 13 deletions(-) diff --git a/base/mem_trie.hpp b/base/mem_trie.hpp index 6bd161eed9..efc5a90cc0 100644 --- a/base/mem_trie.hpp +++ b/base/mem_trie.hpp @@ -29,7 +29,7 @@ public: { m_root = std::move(rhs.m_root); m_numNodes = rhs.m_numNodes; - rhs.m_numNodes = 1; + rhs.Clear(); return *this; } @@ -103,6 +103,12 @@ public: ForEachInSubtree(*root, prefix, std::forward(toDo)); } + void Clear() + { + m_root.Clear(); + m_numNodes = 1; + } + size_t GetNumNodes() const { return m_numNodes; } Iterator GetRootIterator() const { return Iterator(m_root); } Node const & GetRoot() const { return m_root; } @@ -134,6 +140,12 @@ private: void AddValue(Value const & value) { m_values.push_back(value); } + void Clear() + { + m_moves.clear(); + m_values.clear(); + } + std::map> m_moves; std::vector m_values; diff --git a/indexer/categories_holder.cpp b/indexer/categories_holder.cpp index f4ef385381..30771f4659 100644 --- a/indexer/categories_holder.cpp +++ b/indexer/categories_holder.cpp @@ -69,7 +69,7 @@ bool ParseEmoji(CategoriesHolder::Category::Name & name) return false; } - name.m_name = ToUtf8(UniString(1, static_cast(c))); + name.m_name = ToUtf8(UniString(1 /* numChars */, static_cast(c))); if (IsASCIIString(ToUtf8(search::NormalizeAndSimplifyString(name.m_name)))) { @@ -215,7 +215,7 @@ void CategoriesHolder::AddCategory(Category & cat, vector & types) if (!ValidKeyToken(token)) continue; for (uint32_t const t : types) - m_name2type->Add(localePrefix + token, t); + m_name2type.Add(localePrefix + token, t); } } } @@ -242,7 +242,7 @@ bool CategoriesHolder::ValidKeyToken(String const & s) void CategoriesHolder::LoadFromStream(istream & s) { m_type2cat.clear(); - m_name2type = make_unique(); + m_name2type.Clear(); m_groupTranslations.clear(); State state = EParseTypes; diff --git a/indexer/categories_holder.hpp b/indexer/categories_holder.hpp index 95050ca09e..03c773af09 100644 --- a/indexer/categories_holder.hpp +++ b/indexer/categories_holder.hpp @@ -4,6 +4,7 @@ #include "base/stl_helpers.hpp" #include "base/string_utils.hpp" +#include "std/algorithm.hpp" #include "std/deque.hpp" #include "std/iostream.hpp" #include "std/map.hpp" @@ -57,7 +58,7 @@ private: // Maps locale and category token to the list of corresponding types. // Locale is treated as a special symbol prepended to the token. - unique_ptr m_name2type; + Trie m_name2type; GroupTranslations m_groupTranslations; @@ -111,7 +112,7 @@ public: void ForEachTypeByName(int8_t locale, String const & name, ToDo && toDo) const { auto const localePrefix = String(1, static_cast(locale)); - m_name2type->ForEachInNode(localePrefix + name, + m_name2type.ForEachInNode(localePrefix + name, my::MakeIgnoreFirstArgument(forward(toDo))); } @@ -126,13 +127,13 @@ public: string GetReadableFeatureType(uint32_t type, int8_t locale) const; // Exposes the tries that map category tokens to types. - Trie const & GetNameToTypesTrie() const { return *m_name2type; } + Trie const & GetNameToTypesTrie() const { return m_name2type; } bool IsTypeExist(uint32_t type) const; inline void Swap(CategoriesHolder & r) { m_type2cat.swap(r.m_type2cat); - m_name2type.swap(r.m_name2type); + std::swap(m_name2type, r.m_name2type); } // Converts any language |locale| from UI to the corresponding diff --git a/search/utils.hpp b/search/utils.hpp index 6f2b6ac5e9..e64fc20a5c 100644 --- a/search/utils.hpp +++ b/search/utils.hpp @@ -15,7 +15,7 @@ #include #include #include -#include +#include namespace search { @@ -102,16 +102,19 @@ void ForEachCategoryTypeFuzzy(StringSliceBase const & slice, TLocales const & lo auto const & trie = categories.GetNameToTypesTrie(); auto const & trieRootIt = trie.GetRootIterator(); - std::set localeSet(locales.begin(), locales.end()); + vector sortedLocales(locales.begin(), locales.end()); + my::SortUnique(sortedLocales); for (size_t i = 0; i < slice.Size(); ++i) { auto const & token = slice.Get(i); - auto const & dfa = - strings::LevenshteinDFA(token, 1 /* prefixCharsToKeep */, GetMaxErrorsForToken(token)); + // todo(@m, @y). We build dfa twice for each token: here and in geocoder.cpp. + // A possible optimization is to build each dfa once and save it. Note that + // dfas for the prefix tokens differ, i.e. we ignore slice.IsPrefix(i) here. + strings::LevenshteinDFA const dfa(token, 1 /* prefixCharsToKeep */, GetMaxErrorsForToken(token)); trieRootIt.ForEachMove([&](Trie::Char const & c, Trie::Iterator const & moveIt) { - if (localeSet.count(static_cast(c)) != 0) + if (std::binary_search(sortedLocales.begin(), sortedLocales.end(), static_cast(c))) { MatchInTrie(trie /* passed to infer the iterator's type */, moveIt, dfa, std::bind(todo, i, std::placeholders::_1));