From b95690dcd7927babf961b48ec096b0712590ba43 Mon Sep 17 00:00:00 2001 From: Maxim Pimenov Date: Wed, 8 Feb 2017 16:21:10 +0300 Subject: [PATCH 1/2] [indexer] Changed a map to a trie in CategoriesHolder. Also, minor style refactorings. --- base/mem_trie.hpp | 20 ++++++++- indexer/categories_holder.cpp | 51 ++++++++++++++--------- indexer/categories_holder.hpp | 42 +++++++++---------- indexer/indexer_tests/categories_test.cpp | 43 +++++++++++++++++++ 4 files changed, 114 insertions(+), 42 deletions(-) diff --git a/base/mem_trie.hpp b/base/mem_trie.hpp index 3c8547b8ce..354675663d 100644 --- a/base/mem_trie.hpp +++ b/base/mem_trie.hpp @@ -49,7 +49,7 @@ public: ForEachInSubtree(m_root, prefix, std::forward(toDo)); } - // Calls |toDo| for each key-value pair in a node that is reachable + // Calls |toDo| for each key-value pair in the node that is reachable // by |prefix| from the trie root. Does nothing if such node does // not exist. template @@ -59,6 +59,16 @@ public: ForEachInNode(*root, prefix, std::forward(toDo)); } + // Calls |toDo| for each value in the node that is reachable + // by |prefix| from the trie root. Does nothing if such node does + // not exist. + template + void ForEachValueInNode(String const & prefix, ToDo && toDo) const + { + if (auto const * root = MoveTo(prefix)) + ForEachValueInNode(*root, std::forward(toDo)); + } + // Calls |toDo| for each key-value pair in a subtree that is // reachable by |prefix| from the trie root. Does nothing if such // subtree does not exist. @@ -126,6 +136,14 @@ private: toDo(prefix, value); } + // Calls |toDo| for each value in |node|. + template + void ForEachValueInNode(Node const & node, ToDo && toDo) const + { + for (auto const & value : node.m_values) + toDo(value); + } + // Calls |toDo| for each key-value pair in subtree where |node| is a // root of the subtree. |prefix| is a path from the trie root to the // |node|. diff --git a/indexer/categories_holder.cpp b/indexer/categories_holder.cpp index 71b78c58d6..ad2564f0ee 100644 --- a/indexer/categories_holder.cpp +++ b/indexer/categories_holder.cpp @@ -195,23 +195,32 @@ void CategoriesHolder::AddCategory(Category & cat, vector & types) shared_ptr p(new Category()); p->Swap(cat); - for (size_t i = 0; i < types.size(); ++i) - m_type2cat.insert(make_pair(types[i], p)); + for (uint32_t const t : types) + m_type2cat.insert(make_pair(t, p)); - for (size_t i = 0; i < p->m_synonyms.size(); ++i) + for (auto const & synonym : p->m_synonyms) { - ASSERT(p->m_synonyms[i].m_locale != kUnsupportedLocaleCode, ()); + auto const locale = synonym.m_locale; + ASSERT(locale != kUnsupportedLocaleCode, ()); - StringT const uniName = search::NormalizeAndSimplifyString(p->m_synonyms[i].m_name); + auto const uniName = search::NormalizeAndSimplifyString(synonym.m_name); - vector tokens; + vector tokens; SplitUniString(uniName, MakeBackInsertFunctor(tokens), search::Delimiters()); - for (size_t j = 0; j < tokens.size(); ++j) - for (size_t k = 0; k < types.size(); ++k) - if (ValidKeyToken(tokens[j])) - m_name2type.insert( - make_pair(make_pair(p->m_synonyms[i].m_locale, tokens[j]), types[k])); + for (auto const & token : tokens) + { + if (!ValidKeyToken(token)) + continue; + for (uint32_t const t : types) + { + if (m_name2type.find(locale) == m_name2type.end()) + m_name2type[locale] = make_unique(); + + auto * trie = m_name2type[locale].get(); + trie->Add(token, t); + } + } } } @@ -219,7 +228,7 @@ void CategoriesHolder::AddCategory(Category & cat, vector & types) types.clear(); } -bool CategoriesHolder::ValidKeyToken(StringT const & s) +bool CategoriesHolder::ValidKeyToken(String const & s) { if (s.size() > 2) return true; @@ -306,17 +315,19 @@ void CategoriesHolder::LoadFromStream(istream & s) bool CategoriesHolder::GetNameByType(uint32_t type, int8_t locale, string & name) const { - pair const range = m_type2cat.equal_range(type); + auto const range = m_type2cat.equal_range(type); - for (IteratorT i = range.first; i != range.second; ++i) + for (auto it = range.first; it != range.second; ++it) { - Category const & cat = *i->second; - for (size_t j = 0; j < cat.m_synonyms.size(); ++j) - if (cat.m_synonyms[j].m_locale == locale) + Category const & cat = *it->second; + for (auto const & synonym : cat.m_synonyms) + { + if (synonym.m_locale == locale) { - name = cat.m_synonyms[j].m_name; + name = synonym.m_name; return true; } + } } if (range.first != range.second) @@ -352,7 +363,7 @@ string CategoriesHolder::GetReadableFeatureType(uint32_t type, int8_t locale) co bool CategoriesHolder::IsTypeExist(uint32_t type) const { - pair const range = m_type2cat.equal_range(type); + auto const range = m_type2cat.equal_range(type); return range.first != range.second; } @@ -379,8 +390,10 @@ int8_t CategoriesHolder::MapLocaleToInteger(string const & locale) strings::AsciiToLower(lower); for (char const * s : {"hant", "tw", "hk", "mo"}) + { if (lower.find(s) != string::npos) return 12; // Traditional Chinese + } return 17; // Simplified Chinese by default for all other cases } diff --git a/indexer/categories_holder.hpp b/indexer/categories_holder.hpp index 0c12622d5e..0edb91906d 100644 --- a/indexer/categories_holder.hpp +++ b/indexer/categories_holder.hpp @@ -1,4 +1,5 @@ #pragma once +#include "base/mem_trie.hpp" #include "base/string_utils.hpp" #include "std/deque.hpp" @@ -11,7 +12,6 @@ #include "std/utility.hpp" #include "std/vector.hpp" - class Reader; class CategoriesHolder @@ -47,13 +47,12 @@ public: using GroupTranslations = unordered_map>; private: - typedef strings::UniString StringT; - typedef multimap > Type2CategoryContT; - typedef multimap, uint32_t> Name2CatContT; - typedef Type2CategoryContT::const_iterator IteratorT; + using String = strings::UniString; + using Type2CategoryCont = multimap>; + using Trie = my::MemTrie; - Type2CategoryContT m_type2cat; - Name2CatContT m_name2type; + Type2CategoryCont m_type2cat; + map> m_name2type; GroupTranslations m_groupTranslations; public: @@ -71,8 +70,8 @@ public: template void ForEachCategory(ToDo && toDo) const { - for (IteratorT i = m_type2cat.begin(); i != m_type2cat.end(); ++i) - toDo(*i->second); + for (auto & p : m_type2cat) + toDo(*p.second); } template @@ -85,9 +84,12 @@ public: template void ForEachName(ToDo && toDo) const { - for (IteratorT i = m_type2cat.begin(); i != m_type2cat.end(); ++i) - for (size_t j = 0; j < i->second->m_synonyms.size(); ++j) - toDo(i->second->m_synonyms[j]); + for (auto & p : m_type2cat) + { + shared_ptr cat = p.second; + for (auto const & synonym : cat->m_synonyms) + toDo(synonym); + } } template @@ -101,16 +103,12 @@ public: } template - void ForEachTypeByName(int8_t locale, StringT const & name, ToDo && toDo) const + void ForEachTypeByName(int8_t locale, String const & name, ToDo && toDo) const { - typedef typename Name2CatContT::const_iterator IterT; - - pair range = m_name2type.equal_range(make_pair(locale, name)); - while (range.first != range.second) - { - toDo(range.first->second); - ++range.first; - } + auto const it = m_name2type.find(locale); + if (it == m_name2type.end()) + return; + it->second->ForEachValueInNode(name, forward(toDo)); } inline GroupTranslations const & GetGroupTranslations() const { return m_groupTranslations; } @@ -142,7 +140,7 @@ public: private: void AddCategory(Category & cat, vector & types); - static bool ValidKeyToken(StringT const & s); + static bool ValidKeyToken(String const & s); }; inline void swap(CategoriesHolder & a, CategoriesHolder & b) diff --git a/indexer/indexer_tests/categories_test.cpp b/indexer/indexer_tests/categories_test.cpp index bea439e730..5bc2698013 100644 --- a/indexer/indexer_tests/categories_test.cpp +++ b/indexer/indexer_tests/categories_test.cpp @@ -20,6 +20,7 @@ #include "std/transform_iterator.hpp" #include "base/stl_helpers.hpp" +#include "base/string_utils.hpp" using namespace indexer; @@ -222,6 +223,48 @@ UNIT_TEST(CategoriesHolder_DisplayedName) }); } +UNIT_TEST(CategoriesHolder_ForEach) +{ + char const kCategories[] = + "amenity-bar\n" + "en:abc|ddd-eee\n" + "\n" + "amenity-pub\n" + "en:ddd\n" + "\n" + "amenity-cafe\n" + "en:abc eee\n" + "\n" + "amenity-restaurant\n" + "en:ddd|eee\n" + "\n" + ""; + + classificator::Load(); + CategoriesHolder holder(make_unique(kCategories, ARRAY_SIZE(kCategories) - 1)); + + { + uint32_t counter = 0; + holder.ForEachTypeByName(CategoriesHolder::kEnglishCode, strings::MakeUniString("abc"), + [&](uint32_t /* type */) { ++counter; }); + TEST_EQUAL(counter, 2, ()); + } + + { + uint32_t counter = 0; + holder.ForEachTypeByName(CategoriesHolder::kEnglishCode, strings::MakeUniString("ddd"), + [&](uint32_t /* type */) { ++counter; }); + TEST_EQUAL(counter, 3, ()); + } + + { + uint32_t counter = 0; + holder.ForEachTypeByName(CategoriesHolder::kEnglishCode, strings::MakeUniString("eee"), + [&](uint32_t /* type */) { ++counter; }); + TEST_EQUAL(counter, 3, ()); + } +} + UNIT_TEST(CategoriesIndex_Smoke) { classificator::Load(); From fa0d6036f342358218ef7950d6695e078c25f552 Mon Sep 17 00:00:00 2001 From: Maxim Pimenov Date: Wed, 8 Feb 2017 17:45:39 +0300 Subject: [PATCH 2/2] Review fixes. --- base/base_tests/stl_helpers_test.cpp | 24 ++++++++++++++++++++++++ base/mem_trie.hpp | 18 ------------------ base/stl_helpers.hpp | 26 ++++++++++++++++++++++++++ indexer/categories_holder.cpp | 9 +++------ indexer/categories_holder.hpp | 14 +++++++++----- 5 files changed, 62 insertions(+), 29 deletions(-) diff --git a/base/base_tests/stl_helpers_test.cpp b/base/base_tests/stl_helpers_test.cpp index 029df6af98..71462cfbaf 100644 --- a/base/base_tests/stl_helpers_test.cpp +++ b/base/base_tests/stl_helpers_test.cpp @@ -125,4 +125,28 @@ UNIT_TEST(SortUnique_VectorTest) TestSortUnique(); TestSortUnique(); } + +UNIT_TEST(IgnoreFirstArgument) +{ + { + int s = 0; + auto f1 = [&](int a, int b) { s += a + b; }; + auto f2 = [&](int a, int b) { s -= a + b; }; + auto f3 = my::MakeIgnoreFirstArgument(f2); + + f1(2, 3); + TEST_EQUAL(s, 5, ()); + f3(1, 2, 3); + TEST_EQUAL(s, 0, ()); + } + + { + auto f1 = [](int a, int b) -> int { return a + b; }; + auto f2 = my::MakeIgnoreFirstArgument(f1); + + auto const x = f1(2, 3); + auto const y = f2("ignored", 2, 3); + TEST_EQUAL(x, y, ()); + } +} } // namespace diff --git a/base/mem_trie.hpp b/base/mem_trie.hpp index 354675663d..fdd35af365 100644 --- a/base/mem_trie.hpp +++ b/base/mem_trie.hpp @@ -59,16 +59,6 @@ public: ForEachInNode(*root, prefix, std::forward(toDo)); } - // Calls |toDo| for each value in the node that is reachable - // by |prefix| from the trie root. Does nothing if such node does - // not exist. - template - void ForEachValueInNode(String const & prefix, ToDo && toDo) const - { - if (auto const * root = MoveTo(prefix)) - ForEachValueInNode(*root, std::forward(toDo)); - } - // Calls |toDo| for each key-value pair in a subtree that is // reachable by |prefix| from the trie root. Does nothing if such // subtree does not exist. @@ -136,14 +126,6 @@ private: toDo(prefix, value); } - // Calls |toDo| for each value in |node|. - template - void ForEachValueInNode(Node const & node, ToDo && toDo) const - { - for (auto const & value : node.m_values) - toDo(value); - } - // Calls |toDo| for each key-value pair in subtree where |node| is a // root of the subtree. |prefix| is a path from the trie root to the // |node|. diff --git a/base/stl_helpers.hpp b/base/stl_helpers.hpp index 2130dd03d7..5ee22b6897 100644 --- a/base/stl_helpers.hpp +++ b/base/stl_helpers.hpp @@ -2,6 +2,7 @@ #include #include +#include #include #include @@ -137,4 +138,29 @@ typename std::underlying_type::type Key(T value) { return static_cast::type>(value); } + +// Use this if you want to make a functor whose first +// argument is ignored and the rest are forwarded to |fn|. +template +class IgnoreFirstArgument +{ +public: + template + IgnoreFirstArgument(Gn && gn) : m_fn(std::forward(gn)) {} + + template + typename std::result_of::type operator()(Arg && arg, Args &&... args) + { + return m_fn(std::forward(args)...); + } + +private: + Fn m_fn; +}; + +template +IgnoreFirstArgument MakeIgnoreFirstArgument(Fn && fn) +{ + return IgnoreFirstArgument(std::forward(fn)); +} } // namespace my diff --git a/indexer/categories_holder.cpp b/indexer/categories_holder.cpp index ad2564f0ee..382d21f7f9 100644 --- a/indexer/categories_holder.cpp +++ b/indexer/categories_holder.cpp @@ -201,7 +201,7 @@ void CategoriesHolder::AddCategory(Category & cat, vector & types) for (auto const & synonym : p->m_synonyms) { auto const locale = synonym.m_locale; - ASSERT(locale != kUnsupportedLocaleCode, ()); + ASSERT_NOT_EQUAL(locale, kUnsupportedLocaleCode, ()); auto const uniName = search::NormalizeAndSimplifyString(synonym.m_name); @@ -214,11 +214,8 @@ void CategoriesHolder::AddCategory(Category & cat, vector & types) continue; for (uint32_t const t : types) { - if (m_name2type.find(locale) == m_name2type.end()) - m_name2type[locale] = make_unique(); - - auto * trie = m_name2type[locale].get(); - trie->Add(token, t); + auto it = m_name2type.emplace(locale, make_unique()).first; + it->second->Add(token, t); } } } diff --git a/indexer/categories_holder.hpp b/indexer/categories_holder.hpp index 0edb91906d..ba40e43460 100644 --- a/indexer/categories_holder.hpp +++ b/indexer/categories_holder.hpp @@ -1,5 +1,7 @@ #pragma once + #include "base/mem_trie.hpp" +#include "base/stl_helpers.hpp" #include "base/string_utils.hpp" #include "std/deque.hpp" @@ -52,7 +54,10 @@ private: using Trie = my::MemTrie; Type2CategoryCont m_type2cat; + + // Maps locale and category token to the list of corresponding types. map> m_name2type; + GroupTranslations m_groupTranslations; public: @@ -70,7 +75,7 @@ public: template void ForEachCategory(ToDo && toDo) const { - for (auto & p : m_type2cat) + for (auto const & p : m_type2cat) toDo(*p.second); } @@ -84,10 +89,9 @@ public: template void ForEachName(ToDo && toDo) const { - for (auto & p : m_type2cat) + for (auto const & p : m_type2cat) { - shared_ptr cat = p.second; - for (auto const & synonym : cat->m_synonyms) + for (auto const & synonym : p.second->m_synonyms) toDo(synonym); } } @@ -108,7 +112,7 @@ public: auto const it = m_name2type.find(locale); if (it == m_name2type.end()) return; - it->second->ForEachValueInNode(name, forward(toDo)); + it->second->ForEachInNode(name, my::MakeIgnoreFirstArgument(forward(toDo))); } inline GroupTranslations const & GetGroupTranslations() const { return m_groupTranslations; }